Commit 7ff9554b authored by Kay Sievers's avatar Kay Sievers Committed by Greg Kroah-Hartman

printk: convert byte-buffer to variable-length record buffer

- Record-based stream instead of the traditional byte stream
  buffer. All records carry a 64 bit timestamp, the syslog facility
  and priority in the record header.

- Records consume almost the same amount, sometimes less memory than
  the traditional byte stream buffer (if printk_time is enabled). The record
  header is 16 bytes long, plus some padding bytes at the end if needed.
  The byte-stream buffer needed 3 chars for the syslog prefix, 15 char for
  the timestamp and a newline.

- Buffer management is based on message sequence numbers. When records
  need to be discarded, the reading heads move on to the next full
  record. Unlike the byte-stream buffer, no old logged lines get
  truncated or partly overwritten by new ones. Sequence numbers also
  allow consumers of the log stream to get notified if any message in
  the stream they are about to read gets discarded during the time
  of reading.

- Better buffered IO support for KERN_CONT continuation lines, when printk()
  is called multiple times for a single line. The use of KERN_CONT is now
  mandatory to use continuation; a few places in the kernel need trivial fixes
  here. The buffering could possibly be extended to per-cpu variables to allow
  better thread-safety for multiple printk() invocations for a single line.

- Full-featured syslog facility value support. Different facilities
  can tag their messages. All userspace-injected messages enforce a
  facility value > 0 now, to be able to reliably distinguish them from
  the kernel-generated messages. Independent subsystems like a
  baseband processor running its own firmware, or a kernel-related
  userspace process can use their own unique facility values. Multiple
  independent log streams can co-exist that way in the same
  buffer. All share the same global sequence number counter to ensure
  proper ordering (and interleaving) and to allow the consumers of the
  log to reliably correlate the events from different facilities.
Tested-by: default avatarWilliam Douglas <william.douglas@intel.com>
Signed-off-by: default avatarKay Sievers <kay@vrfy.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 89528127
...@@ -810,33 +810,54 @@ static const struct file_operations oldmem_fops = { ...@@ -810,33 +810,54 @@ static const struct file_operations oldmem_fops = {
static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv, static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv,
unsigned long count, loff_t pos) unsigned long count, loff_t pos)
{ {
char *line, *p; char *buf, *line;
int i; int i;
ssize_t ret = -EFAULT; int level = default_message_loglevel;
int facility = 1; /* LOG_USER */
size_t len = iov_length(iv, count); size_t len = iov_length(iv, count);
ssize_t ret = len;
line = kmalloc(len + 1, GFP_KERNEL); if (len > 1024)
if (line == NULL) return -EINVAL;
buf = kmalloc(len+1, GFP_KERNEL);
if (buf == NULL)
return -ENOMEM; return -ENOMEM;
/* line = buf;
* copy all vectors into a single string, to ensure we do
* not interleave our log line with other printk calls
*/
p = line;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (copy_from_user(p, iv[i].iov_base, iv[i].iov_len)) if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len))
goto out; goto out;
p += iv[i].iov_len; line += iv[i].iov_len;
}
/*
* Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
* the decimal value represents 32bit, the lower 3 bit are the log
* level, the rest are the log facility.
*
* If no prefix or no userspace facility is specified, we
* enforce LOG_USER, to be able to reliably distinguish
* kernel-generated messages from userspace-injected ones.
*/
line = buf;
if (line[0] == '<') {
char *endp = NULL;
i = simple_strtoul(line+1, &endp, 10);
if (endp && endp[0] == '>') {
level = i & 7;
if (i >> 3)
facility = i >> 3;
endp++;
len -= endp - line;
line = endp;
}
} }
p[0] = '\0'; line[len] = '\0';
ret = printk("%s", line); printk_emit(facility, level, NULL, 0, "%s", line);
/* printk can add a prefix */
if (ret > len)
ret = len;
out: out:
kfree(line); kfree(buf);
return ret; return ret;
} }
......
...@@ -95,8 +95,19 @@ extern int printk_needs_cpu(int cpu); ...@@ -95,8 +95,19 @@ extern int printk_needs_cpu(int cpu);
extern void printk_tick(void); extern void printk_tick(void);
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
asmlinkage __printf(5, 0)
int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args);
asmlinkage __printf(1, 0) asmlinkage __printf(1, 0)
int vprintk(const char *fmt, va_list args); int vprintk(const char *fmt, va_list args);
asmlinkage __printf(5, 6) __cold
asmlinkage int printk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, ...);
asmlinkage __printf(1, 2) __cold asmlinkage __printf(1, 2) __cold
int printk(const char *fmt, ...); int printk(const char *fmt, ...);
......
...@@ -54,8 +54,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) ...@@ -54,8 +54,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
{ {
} }
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
/* printk's without a loglevel use this.. */ /* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
...@@ -98,24 +96,6 @@ EXPORT_SYMBOL_GPL(console_drivers); ...@@ -98,24 +96,6 @@ EXPORT_SYMBOL_GPL(console_drivers);
*/ */
static int console_locked, console_suspended; static int console_locked, console_suspended;
/*
* logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
* It is also used in interesting ways to provide interlocking in
* console_unlock();.
*/
static DEFINE_RAW_SPINLOCK(logbuf_lock);
#define LOG_BUF_MASK (log_buf_len-1)
#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
/*
* The indices into log_buf are not constrained to log_buf_len - they
* must be masked before subscripting
*/
static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */
static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */
static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */
/* /*
* If exclusive_console is non-NULL then only this console is to be printed to. * If exclusive_console is non-NULL then only this console is to be printed to.
*/ */
...@@ -146,12 +126,176 @@ EXPORT_SYMBOL(console_set_on_cmdline); ...@@ -146,12 +126,176 @@ EXPORT_SYMBOL(console_set_on_cmdline);
static int console_may_schedule; static int console_may_schedule;
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
/*
* The printk log buffer consists of a chain of concatenated variable
* length records. Every record starts with a record header, containing
* the overall length of the record.
*
* The heads to the first and last entry in the buffer, as well as the
* sequence numbers of these both entries are maintained when messages
* are stored..
*
* If the heads indicate available messages, the length in the header
* tells the start next message. A length == 0 for the next message
* indicates a wrap-around to the beginning of the buffer.
*
* Every record carries the monotonic timestamp in microseconds, as well as
* the standard userspace syslog level and syslog facility. The usual
* kernel messages use LOG_KERN; userspace-injected messages always carry
* a matching syslog facility, by default LOG_USER. The origin of every
* message can be reliably determined that way.
*
* The human readable log message directly follows the message header. The
* length of the message text is stored in the header, the stored message
* is not terminated.
*
*/
struct log {
u64 ts_nsec; /* timestamp in nanoseconds */
u16 len; /* length of entire record */
u16 text_len; /* length of text buffer */
u16 dict_len; /* length of dictionary buffer */
u16 level; /* syslog level + facility */
};
/*
* The logbuf_lock protects kmsg buffer, indices, counters. It is also
* used in interesting ways to provide interlocking in console_unlock();
*/
static DEFINE_RAW_SPINLOCK(logbuf_lock);
/* cpu currently holding logbuf_lock */
static volatile unsigned int logbuf_cpu = UINT_MAX;
#define LOG_LINE_MAX 1024
/* record buffer */
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN]; static char __log_buf[__LOG_BUF_LEN];
static char *log_buf = __log_buf; static char *log_buf = __log_buf;
static int log_buf_len = __LOG_BUF_LEN; static u32 log_buf_len = __LOG_BUF_LEN;
static unsigned logged_chars; /* Number of chars produced since last read+clear operation */
static int saved_console_loglevel = -1; /* index and sequence number of the first record stored in the buffer */
static u64 log_first_seq;
static u32 log_first_idx;
/* index and sequence number of the next record to store in the buffer */
static u64 log_next_seq;
static u32 log_next_idx;
/* the next printk record to read after the last 'clear' command */
static u64 clear_seq;
static u32 clear_idx;
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static u32 syslog_idx;
/* human readable text of the record */
static char *log_text(const struct log *msg)
{
return (char *)msg + sizeof(struct log);
}
/* optional key/value pair dictionary attached to the record */
static char *log_dict(const struct log *msg)
{
return (char *)msg + sizeof(struct log) + msg->text_len;
}
/* get record by index; idx must point to valid msg */
static struct log *log_from_idx(u32 idx)
{
struct log *msg = (struct log *)(log_buf + idx);
/*
* A length == 0 record is the end of buffer marker. Wrap around and
* read the message at the start of the buffer.
*/
if (!msg->len)
return (struct log *)log_buf;
return msg;
}
/* get next record; idx must point to valid msg */
static u32 log_next(u32 idx)
{
struct log *msg = (struct log *)(log_buf + idx);
/* length == 0 indicates the end of the buffer; wrap */
/*
* A length == 0 record is the end of buffer marker. Wrap around and
* read the message at the start of the buffer as *this* one, and
* return the one after that.
*/
if (!msg->len) {
msg = (struct log *)log_buf;
return msg->len;
}
return idx + msg->len;
}
#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
#define LOG_ALIGN 4
#else
#define LOG_ALIGN 8
#endif
/* insert record into the buffer, discard old ones, update heads */
static void log_store(int facility, int level,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
struct log *msg;
u32 size, pad_len;
/* number of '\0' padding bytes to next message */
size = sizeof(struct log) + text_len + dict_len;
pad_len = (-size) & (LOG_ALIGN - 1);
size += pad_len;
while (log_first_seq < log_next_seq) {
u32 free;
if (log_next_idx > log_first_idx)
free = max(log_buf_len - log_next_idx, log_first_idx);
else
free = log_first_idx - log_next_idx;
if (free > size + sizeof(struct log))
break;
/* drop old messages until we have enough contiuous space */
log_first_idx = log_next(log_first_idx);
log_first_seq++;
}
if (log_next_idx + size + sizeof(struct log) >= log_buf_len) {
/*
* This message + an additional empty header does not fit
* at the end of the buffer. Add an empty header with len == 0
* to signify a wrap around.
*/
memset(log_buf + log_next_idx, 0, sizeof(struct log));
log_next_idx = 0;
}
/* fill message */
msg = (struct log *)(log_buf + log_next_idx);
memcpy(log_text(msg), text, text_len);
msg->text_len = text_len;
memcpy(log_dict(msg), dict, dict_len);
msg->dict_len = dict_len;
msg->level = (facility << 3) | (level & 7);
msg->ts_nsec = local_clock();
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
/* insert message */
log_next_idx += msg->len;
log_next_seq++;
}
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
/* /*
...@@ -165,9 +309,9 @@ static int saved_console_loglevel = -1; ...@@ -165,9 +309,9 @@ static int saved_console_loglevel = -1;
void log_buf_kexec_setup(void) void log_buf_kexec_setup(void)
{ {
VMCOREINFO_SYMBOL(log_buf); VMCOREINFO_SYMBOL(log_buf);
VMCOREINFO_SYMBOL(log_end);
VMCOREINFO_SYMBOL(log_buf_len); VMCOREINFO_SYMBOL(log_buf_len);
VMCOREINFO_SYMBOL(logged_chars); VMCOREINFO_SYMBOL(log_first_idx);
VMCOREINFO_SYMBOL(log_next_idx);
} }
#endif #endif
...@@ -191,7 +335,6 @@ early_param("log_buf_len", log_buf_len_setup); ...@@ -191,7 +335,6 @@ early_param("log_buf_len", log_buf_len_setup);
void __init setup_log_buf(int early) void __init setup_log_buf(int early)
{ {
unsigned long flags; unsigned long flags;
unsigned start, dest_idx, offset;
char *new_log_buf; char *new_log_buf;
int free; int free;
...@@ -219,20 +362,8 @@ void __init setup_log_buf(int early) ...@@ -219,20 +362,8 @@ void __init setup_log_buf(int early)
log_buf_len = new_log_buf_len; log_buf_len = new_log_buf_len;
log_buf = new_log_buf; log_buf = new_log_buf;
new_log_buf_len = 0; new_log_buf_len = 0;
free = __LOG_BUF_LEN - log_end; free = __LOG_BUF_LEN - log_next_idx;
memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
offset = start = min(con_start, log_start);
dest_idx = 0;
while (start != log_end) {
unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
log_buf[dest_idx] = __log_buf[log_idx_mask];
start++;
dest_idx++;
}
log_start -= offset;
con_start -= offset;
log_end -= offset;
raw_spin_unlock_irqrestore(&logbuf_lock, flags); raw_spin_unlock_irqrestore(&logbuf_lock, flags);
pr_info("log_buf_len: %d\n", log_buf_len); pr_info("log_buf_len: %d\n", log_buf_len);
...@@ -332,11 +463,165 @@ static int check_syslog_permissions(int type, bool from_file) ...@@ -332,11 +463,165 @@ static int check_syslog_permissions(int type, bool from_file)
return 0; return 0;
} }
#if defined(CONFIG_PRINTK_TIME)
static bool printk_time = 1;
#else
static bool printk_time;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
static int syslog_print_line(u32 idx, char *text, size_t size)
{
struct log *msg;
size_t len;
msg = log_from_idx(idx);
if (!text) {
/* calculate length only */
len = 3;
if (msg->level > 9)
len++;
if (msg->level > 99)
len++;
if (printk_time)
len += 15;
len += msg->text_len;
len++;
return len;
}
len = sprintf(text, "<%u>", msg->level);
if (printk_time) {
unsigned long long t = msg->ts_nsec;
unsigned long rem_ns = do_div(t, 1000000000);
len += sprintf(text + len, "[%5lu.%06lu] ",
(unsigned long) t, rem_ns / 1000);
}
if (len + msg->text_len > size)
return -EINVAL;
memcpy(text + len, log_text(msg), msg->text_len);
len += msg->text_len;
text[len++] = '\n';
return len;
}
static int syslog_print(char __user *buf, int size)
{
char *text;
int len;
text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
raw_spin_lock_irq(&logbuf_lock);
if (syslog_seq < log_first_seq) {
/* messages are gone, move to first one */
syslog_seq = log_first_seq;
syslog_idx = log_first_idx;
}
len = syslog_print_line(syslog_idx, text, LOG_LINE_MAX);
syslog_idx = log_next(syslog_idx);
syslog_seq++;
raw_spin_unlock_irq(&logbuf_lock);
if (len > 0 && copy_to_user(buf, text, len))
len = -EFAULT;
kfree(text);
return len;
}
static int syslog_print_all(char __user *buf, int size, bool clear)
{
char *text;
int len = 0;
text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
raw_spin_lock_irq(&logbuf_lock);
if (buf) {
u64 next_seq;
u64 seq;
u32 idx;
if (clear_seq < log_first_seq) {
/* messages are gone, move to first available one */
clear_seq = log_first_seq;
clear_idx = log_first_idx;
}
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
seq = clear_seq;
idx = clear_idx;
while (seq < log_next_seq) {
len += syslog_print_line(idx, NULL, 0);
idx = log_next(idx);
seq++;
}
seq = clear_seq;
idx = clear_idx;
while (len > size && seq < log_next_seq) {
len -= syslog_print_line(idx, NULL, 0);
idx = log_next(idx);
seq++;
}
/* last message in this dump */
next_seq = log_next_seq;
len = 0;
while (len >= 0 && seq < next_seq) {
int textlen;
textlen = syslog_print_line(idx, text, LOG_LINE_MAX);
if (textlen < 0) {
len = textlen;
break;
}
idx = log_next(idx);
seq++;
raw_spin_unlock_irq(&logbuf_lock);
if (copy_to_user(buf + len, text, textlen))
len = -EFAULT;
else
len += textlen;
raw_spin_lock_irq(&logbuf_lock);
if (seq < log_first_seq) {
/* messages are gone, move to next one */
seq = log_first_seq;
idx = log_first_idx;
}
}
}
if (clear) {
clear_seq = log_next_seq;
clear_idx = log_next_idx;
}
raw_spin_unlock_irq(&logbuf_lock);
kfree(text);
return len;
}
int do_syslog(int type, char __user *buf, int len, bool from_file) int do_syslog(int type, char __user *buf, int len, bool from_file)
{ {
unsigned i, j, limit, count; bool clear = false;
int do_clear = 0; static int saved_console_loglevel = -1;
char c;
int error; int error;
error = check_syslog_permissions(type, from_file); error = check_syslog_permissions(type, from_file);
...@@ -364,28 +649,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) ...@@ -364,28 +649,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
goto out; goto out;
} }
error = wait_event_interruptible(log_wait, error = wait_event_interruptible(log_wait,
(log_start - log_end)); syslog_seq != log_next_seq);
if (error) if (error)
goto out; goto out;
i = 0; error = syslog_print(buf, len);
raw_spin_lock_irq(&logbuf_lock);
while (!error && (log_start != log_end) && i < len) {
c = LOG_BUF(log_start);
log_start++;
raw_spin_unlock_irq(&logbuf_lock);
error = __put_user(c,buf);
buf++;
i++;
cond_resched();
raw_spin_lock_irq(&logbuf_lock);
}
raw_spin_unlock_irq(&logbuf_lock);
if (!error)
error = i;
break; break;
/* Read/clear last kernel messages */ /* Read/clear last kernel messages */
case SYSLOG_ACTION_READ_CLEAR: case SYSLOG_ACTION_READ_CLEAR:
do_clear = 1; clear = true;
/* FALL THRU */ /* FALL THRU */
/* Read last kernel messages */ /* Read last kernel messages */
case SYSLOG_ACTION_READ_ALL: case SYSLOG_ACTION_READ_ALL:
...@@ -399,52 +670,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) ...@@ -399,52 +670,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
error = -EFAULT; error = -EFAULT;
goto out; goto out;
} }
count = len; error = syslog_print_all(buf, len, clear);
if (count > log_buf_len)
count = log_buf_len;
raw_spin_lock_irq(&logbuf_lock);
if (count > logged_chars)
count = logged_chars;
if (do_clear)
logged_chars = 0;
limit = log_end;
/*
* __put_user() could sleep, and while we sleep
* printk() could overwrite the messages
* we try to copy to user space. Therefore
* the messages are copied in reverse. <manfreds>
*/
for (i = 0; i < count && !error; i++) {
j = limit-1-i;
if (j + log_buf_len < log_end)
break;
c = LOG_BUF(j);
raw_spin_unlock_irq(&logbuf_lock);
error = __put_user(c,&buf[count-1-i]);
cond_resched();
raw_spin_lock_irq(&logbuf_lock);
}
raw_spin_unlock_irq(&logbuf_lock);
if (error)
break;
error = i;
if (i != count) {
int offset = count-error;
/* buffer overflow during copy, correct user buffer. */
for (i = 0; i < error; i++) {
if (__get_user(c,&buf[i+offset]) ||
__put_user(c,&buf[i])) {
error = -EFAULT;
break;
}
cond_resched();
}
}
break; break;
/* Clear ring buffer */ /* Clear ring buffer */
case SYSLOG_ACTION_CLEAR: case SYSLOG_ACTION_CLEAR:
logged_chars = 0; syslog_print_all(NULL, 0, true);
break;
/* Disable logging to console */ /* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_OFF: case SYSLOG_ACTION_CONSOLE_OFF:
if (saved_console_loglevel == -1) if (saved_console_loglevel == -1)
...@@ -472,7 +702,33 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) ...@@ -472,7 +702,33 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
break; break;
/* Number of chars in the log buffer */ /* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD: case SYSLOG_ACTION_SIZE_UNREAD:
error = log_end - log_start; raw_spin_lock_irq(&logbuf_lock);
if (syslog_seq < log_first_seq) {
/* messages are gone, move to first one */
syslog_seq = log_first_seq;
syslog_idx = log_first_idx;
}
if (from_file) {
/*
* Short-cut for poll(/"proc/kmsg") which simply checks
* for pending data, not the size; return the count of
* records, not the length.
*/
error = log_next_idx - syslog_idx;
} else {
u64 seq;
u32 idx;
error = 0;
seq = syslog_seq;
idx = syslog_idx;
while (seq < log_next_seq) {
error += syslog_print_line(idx, NULL, 0);
idx = log_next(idx);
seq++;
}
}
raw_spin_unlock_irq(&logbuf_lock);
break; break;
/* Size of the log buffer */ /* Size of the log buffer */
case SYSLOG_ACTION_SIZE_BUFFER: case SYSLOG_ACTION_SIZE_BUFFER:
...@@ -501,29 +757,11 @@ void kdb_syslog_data(char *syslog_data[4]) ...@@ -501,29 +757,11 @@ void kdb_syslog_data(char *syslog_data[4])
{ {
syslog_data[0] = log_buf; syslog_data[0] = log_buf;
syslog_data[1] = log_buf + log_buf_len; syslog_data[1] = log_buf + log_buf_len;
syslog_data[2] = log_buf + log_end - syslog_data[2] = log_buf + log_first_idx;
(logged_chars < log_buf_len ? logged_chars : log_buf_len); syslog_data[3] = log_buf + log_next_idx;
syslog_data[3] = log_buf + log_end;
} }
#endif /* CONFIG_KGDB_KDB */ #endif /* CONFIG_KGDB_KDB */
/*
* Call the console drivers on a range of log_buf
*/
static void __call_console_drivers(unsigned start, unsigned end)
{
struct console *con;
for_each_console(con) {
if (exclusive_console && con != exclusive_console)
continue;
if ((con->flags & CON_ENABLED) && con->write &&
(cpu_online(smp_processor_id()) ||
(con->flags & CON_ANYTIME)))
con->write(con, &LOG_BUF(start), end - start);
}
}
static bool __read_mostly ignore_loglevel; static bool __read_mostly ignore_loglevel;
static int __init ignore_loglevel_setup(char *str) static int __init ignore_loglevel_setup(char *str)
...@@ -539,143 +777,34 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); ...@@ -539,143 +777,34 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
"print all kernel messages to the console."); "print all kernel messages to the console.");
/*
* Write out chars from start to end - 1 inclusive
*/
static void _call_console_drivers(unsigned start,
unsigned end, int msg_log_level)
{
trace_console(&LOG_BUF(0), start, end, log_buf_len);
if ((msg_log_level < console_loglevel || ignore_loglevel) &&
console_drivers && start != end) {
if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
/* wrapped write */
__call_console_drivers(start & LOG_BUF_MASK,
log_buf_len);
__call_console_drivers(0, end & LOG_BUF_MASK);
} else {
__call_console_drivers(start, end);
}
}
}
/*
* Parse the syslog header <[0-9]*>. The decimal value represents 32bit, the
* lower 3 bit are the log level, the rest are the log facility. In case
* userspace passes usual userspace syslog messages to /dev/kmsg or
* /dev/ttyprintk, the log prefix might contain the facility. Printk needs
* to extract the correct log level for in-kernel processing, and not mangle
* the original value.
*
* If a prefix is found, the length of the prefix is returned. If 'level' is
* passed, it will be filled in with the log level without a possible facility
* value. If 'special' is passed, the special printk prefix chars are accepted
* and returned. If no valid header is found, 0 is returned and the passed
* variables are not touched.
*/
static size_t log_prefix(const char *p, unsigned int *level, char *special)
{
unsigned int lev = 0;
char sp = '\0';
size_t len;
if (p[0] != '<' || !p[1])
return 0;
if (p[2] == '>') {
/* usual single digit level number or special char */
switch (p[1]) {
case '0' ... '7':
lev = p[1] - '0';
break;
case 'c': /* KERN_CONT */
case 'd': /* KERN_DEFAULT */
sp = p[1];
break;
default:
return 0;
}
len = 3;
} else {
/* multi digit including the level and facility number */
char *endp = NULL;
lev = (simple_strtoul(&p[1], &endp, 10) & 7);
if (endp == NULL || endp[0] != '>')
return 0;
len = (endp + 1) - p;
}
/* do not accept special char if not asked for */
if (sp && !special)
return 0;
if (special) {
*special = sp;
/* return special char, do not touch level */
if (sp)
return len;
}
if (level)
*level = lev;
return len;
}
/* /*
* Call the console drivers, asking them to write out * Call the console drivers, asking them to write out
* log_buf[start] to log_buf[end - 1]. * log_buf[start] to log_buf[end - 1].
* The console_lock must be held. * The console_lock must be held.
*/ */
static void call_console_drivers(unsigned start, unsigned end) static void call_console_drivers(int level, const char *text, size_t len)
{ {
unsigned cur_index, start_print; struct console *con;
static int msg_level = -1;
BUG_ON(((int)(start - end)) > 0); trace_console(text, 0, len, len);
cur_index = start; if (level >= console_loglevel && !ignore_loglevel)
start_print = start; return;
while (cur_index != end) { if (!console_drivers)
if (msg_level < 0 && ((end - cur_index) > 2)) { return;
/* strip log prefix */
cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL);
start_print = cur_index;
}
while (cur_index != end) {
char c = LOG_BUF(cur_index);
cur_index++; for_each_console(con) {
if (c == '\n') { if (exclusive_console && con != exclusive_console)
if (msg_level < 0) { continue;
/* if (!(con->flags & CON_ENABLED))
* printk() has already given us loglevel tags in continue;
* the buffer. This code is here in case the if (!con->write)
* log buffer has wrapped right round and scribbled continue;
* on those tags if (!cpu_online(smp_processor_id()) &&
*/ !(con->flags & CON_ANYTIME))
msg_level = default_message_loglevel; continue;
} con->write(con, text, len);
_call_console_drivers(start_print, cur_index, msg_level);
msg_level = -1;
start_print = cur_index;
break;
}
}
} }
_call_console_drivers(start_print, end, msg_level);
}
static void emit_log_char(char c)
{
LOG_BUF(log_end) = c;
log_end++;
if (log_end - log_start > log_buf_len)
log_start = log_end - log_buf_len;
if (log_end - con_start > log_buf_len)
con_start = log_end - log_buf_len;
if (logged_chars < log_buf_len)
logged_chars++;
} }
/* /*
...@@ -700,16 +829,6 @@ static void zap_locks(void) ...@@ -700,16 +829,6 @@ static void zap_locks(void)
sema_init(&console_sem, 1); sema_init(&console_sem, 1);
} }
#if defined(CONFIG_PRINTK_TIME)
static bool printk_time = 1;
#else
static bool printk_time = 0;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
static bool always_kmsg_dump;
module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
/* Check if we have any console registered that can be called early in boot. */ /* Check if we have any console registered that can be called early in boot. */
static int have_callable_console(void) static int have_callable_console(void)
{ {
...@@ -722,51 +841,6 @@ static int have_callable_console(void) ...@@ -722,51 +841,6 @@ static int have_callable_console(void)
return 0; return 0;
} }
/**
* printk - print a kernel message
* @fmt: format string
*
* This is printk(). It can be called from any context. We want it to work.
*
* We try to grab the console_lock. If we succeed, it's easy - we log the output and
* call the console drivers. If we fail to get the semaphore we place the output
* into the log buffer and return. The current holder of the console_sem will
* notice the new output in console_unlock(); and will send it to the
* consoles before releasing the lock.
*
* One effect of this deferred printing is that code which calls printk() and
* then changes console_loglevel may break. This is because console_loglevel
* is inspected when the actual printing occurs.
*
* See also:
* printf(3)
*
* See the vsnprintf() documentation for format string extensions over C99.
*/
asmlinkage int printk(const char *fmt, ...)
{
va_list args;
int r;
#ifdef CONFIG_KGDB_KDB
if (unlikely(kdb_trap_printk)) {
va_start(args, fmt);
r = vkdb_printf(fmt, args);
va_end(args);
return r;
}
#endif
va_start(args, fmt);
r = vprintk(fmt, args);
va_end(args);
return r;
}
/* cpu currently holding logbuf_lock */
static volatile unsigned int printk_cpu = UINT_MAX;
/* /*
* Can we actually use the console at this time on this cpu? * Can we actually use the console at this time on this cpu?
* *
...@@ -810,17 +884,12 @@ static int console_trylock_for_printk(unsigned int cpu) ...@@ -810,17 +884,12 @@ static int console_trylock_for_printk(unsigned int cpu)
retval = 0; retval = 0;
} }
} }
printk_cpu = UINT_MAX; logbuf_cpu = UINT_MAX;
if (wake) if (wake)
up(&console_sem); up(&console_sem);
raw_spin_unlock(&logbuf_lock); raw_spin_unlock(&logbuf_lock);
return retval; return retval;
} }
static const char recursion_bug_msg [] =
KERN_CRIT "BUG: recent printk recursion!\n";
static int recursion_bug;
static int new_text_line = 1;
static char printk_buf[1024];
int printk_delay_msec __read_mostly; int printk_delay_msec __read_mostly;
...@@ -836,15 +905,22 @@ static inline void printk_delay(void) ...@@ -836,15 +905,22 @@ static inline void printk_delay(void)
} }
} }
asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{ {
int printed_len = 0; static int recursion_bug;
int current_log_level = default_message_loglevel; static char buf[LOG_LINE_MAX];
static size_t buflen;
static int buflevel;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t textlen;
unsigned long flags; unsigned long flags;
int this_cpu; int this_cpu;
char *p; bool newline = false;
size_t plen; bool cont = false;
char special; int printed_len = 0;
boot_delay_msec(); boot_delay_msec();
printk_delay(); printk_delay();
...@@ -856,7 +932,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) ...@@ -856,7 +932,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
/* /*
* Ouch, printk recursed into itself! * Ouch, printk recursed into itself!
*/ */
if (unlikely(printk_cpu == this_cpu)) { if (unlikely(logbuf_cpu == this_cpu)) {
/* /*
* If a crash is occurring during printk() on this CPU, * If a crash is occurring during printk() on this CPU,
* then try to get the crash message out but make sure * then try to get the crash message out but make sure
...@@ -873,97 +949,92 @@ asmlinkage int vprintk(const char *fmt, va_list args) ...@@ -873,97 +949,92 @@ asmlinkage int vprintk(const char *fmt, va_list args)
lockdep_off(); lockdep_off();
raw_spin_lock(&logbuf_lock); raw_spin_lock(&logbuf_lock);
printk_cpu = this_cpu; logbuf_cpu = this_cpu;
if (recursion_bug) { if (recursion_bug) {
static const char recursion_msg[] =
"BUG: recent printk recursion!";
recursion_bug = 0; recursion_bug = 0;
strcpy(printk_buf, recursion_bug_msg); printed_len += strlen(recursion_msg);
printed_len = strlen(recursion_bug_msg); /* emit KERN_CRIT message */
log_store(0, 2, NULL, 0, recursion_msg, printed_len);
} }
/* Emit the output into the temporary buffer */
printed_len += vscnprintf(printk_buf + printed_len,
sizeof(printk_buf) - printed_len, fmt, args);
p = printk_buf; /*
* The printf needs to come first; we need the syslog
* prefix which might be passed-in as a parameter.
*/
textlen = vscnprintf(text, sizeof(textbuf), fmt, args);
/* Read log level and handle special printk prefix */ /* mark and strip a trailing newline */
plen = log_prefix(p, &current_log_level, &special); if (textlen && text[textlen-1] == '\n') {
if (plen) { textlen--;
p += plen; newline = true;
}
switch (special) { /* strip syslog prefix and extract log level or flags */
case 'c': /* Strip <c> KERN_CONT, continue line */ if (text[0] == '<' && text[1] && text[2] == '>') {
plen = 0; switch (text[1]) {
case '0' ... '7':
if (level == -1)
level = text[1] - '0';
text += 3;
textlen -= 3;
break;
case 'c': /* KERN_CONT */
cont = true;
case 'd': /* KERN_DEFAULT */
text += 3;
textlen -= 3;
break; break;
case 'd': /* Strip <d> KERN_DEFAULT, start new line */
plen = 0;
default:
if (!new_text_line) {
emit_log_char('\n');
new_text_line = 1;
}
} }
} }
/* if (buflen && (!cont || dict)) {
* Copy the output into log_buf. If the caller didn't provide /* no continuation; flush existing buffer */
* the appropriate log prefix, we insert them here log_store(facility, buflevel, NULL, 0, buf, buflen);
*/ printed_len += buflen;
for (; *p; p++) { buflen = 0;
if (new_text_line) {
new_text_line = 0;
if (plen) {
/* Copy original log prefix */
int i;
for (i = 0; i < plen; i++)
emit_log_char(printk_buf[i]);
printed_len += plen;
} else {
/* Add log prefix */
emit_log_char('<');
emit_log_char(current_log_level + '0');
emit_log_char('>');
printed_len += 3;
} }
if (printk_time) { if (buflen == 0) {
/* Add the current time stamp */ /* remember level for first message in the buffer */
char tbuf[50], *tp; if (level == -1)
unsigned tlen; buflevel = default_message_loglevel;
unsigned long long t; else
unsigned long nanosec_rem; buflevel = level;
t = cpu_clock(printk_cpu);
nanosec_rem = do_div(t, 1000000000);
tlen = sprintf(tbuf, "[%5lu.%06lu] ",
(unsigned long) t,
nanosec_rem / 1000);
for (tp = tbuf; tp < tbuf + tlen; tp++)
emit_log_char(*tp);
printed_len += tlen;
} }
if (!*p) if (buflen || !newline) {
break; /* append to existing buffer, or buffer until next message */
if (buflen + textlen > sizeof(buf))
textlen = sizeof(buf) - buflen;
memcpy(buf + buflen, text, textlen);
buflen += textlen;
} }
emit_log_char(*p); if (newline) {
if (*p == '\n') /* end of line; flush buffer */
new_text_line = 1; if (buflen) {
log_store(facility, buflevel,
dict, dictlen, buf, buflen);
printed_len += buflen;
buflen = 0;
} else {
log_store(facility, buflevel,
dict, dictlen, text, textlen);
printed_len += textlen;
}
} }
/* /*
* Try to acquire and then immediately release the * Try to acquire and then immediately release the console semaphore.
* console semaphore. The release will do all the * The release will print out buffers and wake up /dev/kmsg and syslog()
* actual magic (print out buffers, wake up klogd, * users.
* etc).
* *
* The console_trylock_for_printk() function * The console_trylock_for_printk() function will release 'logbuf_lock'
* will release 'logbuf_lock' regardless of whether it * regardless of whether it actually gets the console semaphore or not.
* actually gets the semaphore or not.
*/ */
if (console_trylock_for_printk(this_cpu)) if (console_trylock_for_printk(this_cpu))
console_unlock(); console_unlock();
...@@ -974,12 +1045,73 @@ asmlinkage int vprintk(const char *fmt, va_list args) ...@@ -974,12 +1045,73 @@ asmlinkage int vprintk(const char *fmt, va_list args)
return printed_len; return printed_len;
} }
EXPORT_SYMBOL(printk); EXPORT_SYMBOL(vprintk_emit);
asmlinkage int vprintk(const char *fmt, va_list args)
{
return vprintk_emit(0, -1, NULL, 0, fmt, args);
}
EXPORT_SYMBOL(vprintk); EXPORT_SYMBOL(vprintk);
asmlinkage int printk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, ...)
{
va_list args;
int r;
va_start(args, fmt);
r = vprintk_emit(facility, level, dict, dictlen, fmt, args);
va_end(args);
return r;
}
EXPORT_SYMBOL(printk_emit);
/**
* printk - print a kernel message
* @fmt: format string
*
* This is printk(). It can be called from any context. We want it to work.
*
* We try to grab the console_lock. If we succeed, it's easy - we log the
* output and call the console drivers. If we fail to get the semaphore, we
* place the output into the log buffer and return. The current holder of
* the console_sem will notice the new output in console_unlock(); and will
* send it to the consoles before releasing the lock.
*
* One effect of this deferred printing is that code which calls printk() and
* then changes console_loglevel may break. This is because console_loglevel
* is inspected when the actual printing occurs.
*
* See also:
* printf(3)
*
* See the vsnprintf() documentation for format string extensions over C99.
*/
asmlinkage int printk(const char *fmt, ...)
{
va_list args;
int r;
#ifdef CONFIG_KGDB_KDB
if (unlikely(kdb_trap_printk)) {
va_start(args, fmt);
r = vkdb_printf(fmt, args);
va_end(args);
return r;
}
#endif
va_start(args, fmt);
r = vprintk_emit(0, -1, NULL, 0, fmt, args);
va_end(args);
return r;
}
EXPORT_SYMBOL(printk);
#else #else
static void call_console_drivers(unsigned start, unsigned end) static void call_console_drivers(int level, const char *text, size_t len)
{ {
} }
...@@ -1217,7 +1349,7 @@ int is_console_locked(void) ...@@ -1217,7 +1349,7 @@ int is_console_locked(void)
} }
/* /*
* Delayed printk facility, for scheduler-internal messages: * Delayed printk version, for scheduler-internal messages:
*/ */
#define PRINTK_BUF_SIZE 512 #define PRINTK_BUF_SIZE 512
...@@ -1253,6 +1385,10 @@ void wake_up_klogd(void) ...@@ -1253,6 +1385,10 @@ void wake_up_klogd(void)
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
} }
/* the next printk record to write to the console */
static u64 console_seq;
static u32 console_idx;
/** /**
* console_unlock - unlock the console system * console_unlock - unlock the console system
* *
...@@ -1263,15 +1399,16 @@ void wake_up_klogd(void) ...@@ -1263,15 +1399,16 @@ void wake_up_klogd(void)
* by printk(). If this is the case, console_unlock(); emits * by printk(). If this is the case, console_unlock(); emits
* the output prior to releasing the lock. * the output prior to releasing the lock.
* *
* If there is output waiting for klogd, we wake it up. * If there is output waiting, we wake it /dev/kmsg and syslog() users.
* *
* console_unlock(); may be called from any context. * console_unlock(); may be called from any context.
*/ */
void console_unlock(void) void console_unlock(void)
{ {
static u64 seen_seq;
unsigned long flags; unsigned long flags;
unsigned _con_start, _log_end; bool wake_klogd = false;
unsigned wake_klogd = 0, retry = 0; bool retry;
if (console_suspended) { if (console_suspended) {
up(&console_sem); up(&console_sem);
...@@ -1281,17 +1418,41 @@ void console_unlock(void) ...@@ -1281,17 +1418,41 @@ void console_unlock(void)
console_may_schedule = 0; console_may_schedule = 0;
again: again:
for ( ; ; ) { for (;;) {
struct log *msg;
static char text[LOG_LINE_MAX];
size_t len;
int level;
raw_spin_lock_irqsave(&logbuf_lock, flags); raw_spin_lock_irqsave(&logbuf_lock, flags);
wake_klogd |= log_start - log_end; if (seen_seq != log_next_seq) {
if (con_start == log_end) wake_klogd = true;
break; /* Nothing to print */ seen_seq = log_next_seq;
_con_start = con_start; }
_log_end = log_end;
con_start = log_end; /* Flush */ if (console_seq < log_first_seq) {
/* messages are gone, move to first one */
console_seq = log_first_seq;
console_idx = log_first_idx;
}
if (console_seq == log_next_seq)
break;
msg = log_from_idx(console_idx);
level = msg->level & 7;
len = msg->text_len;
if (len+1 >= sizeof(text))
len = sizeof(text)-1;
memcpy(text, log_text(msg), len);
text[len++] = '\n';
console_idx = log_next(console_idx);
console_seq++;
raw_spin_unlock(&logbuf_lock); raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */ stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end); call_console_drivers(level, text, len);
start_critical_timings(); start_critical_timings();
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -1312,8 +1473,7 @@ void console_unlock(void) ...@@ -1312,8 +1473,7 @@ void console_unlock(void)
* flush, no worries. * flush, no worries.
*/ */
raw_spin_lock(&logbuf_lock); raw_spin_lock(&logbuf_lock);
if (con_start != log_end) retry = console_seq != log_next_seq;
retry = 1;
raw_spin_unlock_irqrestore(&logbuf_lock, flags); raw_spin_unlock_irqrestore(&logbuf_lock, flags);
if (retry && console_trylock()) if (retry && console_trylock())
...@@ -1549,7 +1709,8 @@ void register_console(struct console *newcon) ...@@ -1549,7 +1709,8 @@ void register_console(struct console *newcon)
* for us. * for us.
*/ */
raw_spin_lock_irqsave(&logbuf_lock, flags); raw_spin_lock_irqsave(&logbuf_lock, flags);
con_start = log_start; console_seq = syslog_seq;
console_idx = syslog_idx;
raw_spin_unlock_irqrestore(&logbuf_lock, flags); raw_spin_unlock_irqrestore(&logbuf_lock, flags);
/* /*
* We're about to replay the log buffer. Only do this to the * We're about to replay the log buffer. Only do this to the
...@@ -1758,6 +1919,9 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper) ...@@ -1758,6 +1919,9 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper)
} }
EXPORT_SYMBOL_GPL(kmsg_dump_unregister); EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
static bool always_kmsg_dump;
module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
/** /**
* kmsg_dump - dump kernel log to kernel message dumpers. * kmsg_dump - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping * @reason: the reason (oops, panic etc) for dumping
...@@ -1767,8 +1931,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister); ...@@ -1767,8 +1931,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
*/ */
void kmsg_dump(enum kmsg_dump_reason reason) void kmsg_dump(enum kmsg_dump_reason reason)
{ {
unsigned long end; u64 idx;
unsigned chars;
struct kmsg_dumper *dumper; struct kmsg_dumper *dumper;
const char *s1, *s2; const char *s1, *s2;
unsigned long l1, l2; unsigned long l1, l2;
...@@ -1780,24 +1943,27 @@ void kmsg_dump(enum kmsg_dump_reason reason) ...@@ -1780,24 +1943,27 @@ void kmsg_dump(enum kmsg_dump_reason reason)
/* Theoretically, the log could move on after we do this, but /* Theoretically, the log could move on after we do this, but
there's not a lot we can do about that. The new messages there's not a lot we can do about that. The new messages
will overwrite the start of what we dump. */ will overwrite the start of what we dump. */
raw_spin_lock_irqsave(&logbuf_lock, flags); raw_spin_lock_irqsave(&logbuf_lock, flags);
end = log_end & LOG_BUF_MASK; if (syslog_seq < log_first_seq)
chars = logged_chars; idx = syslog_idx;
raw_spin_unlock_irqrestore(&logbuf_lock, flags); else
idx = log_first_idx;
if (chars > end) { if (idx > log_next_idx) {
s1 = log_buf + log_buf_len - chars + end; s1 = log_buf;
l1 = chars - end; l1 = log_next_idx;
s2 = log_buf; s2 = log_buf + idx;
l2 = end; l2 = log_buf_len - idx;
} else { } else {
s1 = ""; s1 = "";
l1 = 0; l1 = 0;
s2 = log_buf + end - chars; s2 = log_buf + idx;
l2 = chars; l2 = log_next_idx - idx;
} }
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(dumper, &dump_list, list) list_for_each_entry_rcu(dumper, &dump_list, list)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment