Commit 7d91de74 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk

Pull printk updates from Petr Mladek:

 - Add Petr Mladek, Sergey Senozhatsky as printk maintainers, and Steven
   Rostedt as the printk reviewer. This idea came up after the
   discussion about printk issues at Kernel Summit. It was formulated
   and discussed at lkml[1].

 - Extend a lock-less NMI per-cpu buffers idea to handle recursive
   printk() calls by Sergey Senozhatsky[2]. It is the first step in
   sanitizing printk as discussed at Kernel Summit.

   The change allows to see messages that would normally get ignored or
   would cause a deadlock.

   Also it allows to enable lockdep in printk(). This already paid off.
   The testing in linux-next helped to discover two old problems that
   were hidden before[3][4].

 - Remove unused parameter by Sergey Senozhatsky. Clean up after a past
   change.

[1] http://lkml.kernel.org/r/1481798878-31898-1-git-send-email-pmladek@suse.com
[2] http://lkml.kernel.org/r/20161227141611.940-1-sergey.senozhatsky@gmail.com
[3] http://lkml.kernel.org/r/20170215044332.30449-1-sergey.senozhatsky@gmail.com
[4] http://lkml.kernel.org/r/20170217015932.11898-1-sergey.senozhatsky@gmail.com

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk:
  printk: drop call_console_drivers() unused param
  printk: convert the rest to printk-safe
  printk: remove zap_locks() function
  printk: use printk_safe buffers in printk
  printk: report lost messages in printk safe/nmi contexts
  printk: always use deferred printk when flush printk_safe lines
  printk: introduce per-cpu safe_print seq buffer
  printk: rename nmi.c and exported api
  printk: use vprintk_func in vprintk()
  MAINTAINERS: Add printk maintainers
parents 6ef192f2 d9c23523
......@@ -9997,6 +9997,14 @@ S: Supported
F: Documentation/preempt-locking.txt
F: include/linux/preempt.h
PRINTK
M: Petr Mladek <pmladek@suse.com>
M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
S: Maintained
F: kernel/printk/
F: include/linux/printk.h
PRISM54 WIRELESS DRIVER
M: "Luis R. Rodriguez" <mcgrof@gmail.com>
L: linux-wireless@vger.kernel.org
......
......@@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { }
#endif
#ifdef CONFIG_PRINTK_NMI
extern void printk_nmi_init(void);
extern void printk_nmi_enter(void);
extern void printk_nmi_exit(void);
extern void printk_nmi_flush(void);
extern void printk_nmi_flush_on_panic(void);
#else
static inline void printk_nmi_init(void) { }
static inline void printk_nmi_enter(void) { }
static inline void printk_nmi_exit(void) { }
static inline void printk_nmi_flush(void) { }
static inline void printk_nmi_flush_on_panic(void) { }
#endif /* PRINTK_NMI */
#ifdef CONFIG_PRINTK
......@@ -209,6 +203,9 @@ void __init setup_log_buf(int early);
__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
void dump_stack_print_info(const char *log_lvl);
void show_regs_print_info(const char *log_lvl);
extern void printk_safe_init(void);
extern void printk_safe_flush(void);
extern void printk_safe_flush_on_panic(void);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
......@@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl)
static inline void show_regs_print_info(const char *log_lvl)
{
}
static inline void printk_safe_init(void)
{
}
static inline void printk_safe_flush(void)
{
}
static inline void printk_safe_flush_on_panic(void)
{
}
#endif
extern asmlinkage void dump_stack(void) __cold;
......
......@@ -861,17 +861,19 @@ config LOG_CPU_MAX_BUF_SHIFT
13 => 8 KB for each CPU
12 => 4 KB for each CPU
config NMI_LOG_BUF_SHIFT
int "Temporary per-CPU NMI log buffer size (12 => 4KB, 13 => 8KB)"
config PRINTK_SAFE_LOG_BUF_SHIFT
int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)"
range 10 21
default 13
depends on PRINTK_NMI
depends on PRINTK
help
Select the size of a per-CPU buffer where NMI messages are temporary
stored. They are copied to the main log buffer in a safe context
to avoid a deadlock. The value defines the size as a power of 2.
Select the size of an alternate printk per-CPU buffer where messages
printed from usafe contexts are temporary stored. One example would
be NMI messages, another one - printk recursion. The messages are
copied to the main log buffer in a safe context to avoid a deadlock.
The value defines the size as a power of 2.
NMI messages are rare and limited. The largest one is when
Those messages are rare and limited. The largest one is when
a backtrace is printed. It usually fits into 4KB. Select
8KB if you want to be on the safe side.
......
......@@ -581,7 +581,7 @@ asmlinkage __visible void __init start_kernel(void)
timekeeping_init();
time_init();
sched_clock_postinit();
printk_nmi_init();
printk_safe_init();
perf_event_init();
profile_init();
call_function_init();
......
......@@ -916,7 +916,7 @@ void crash_kexec(struct pt_regs *regs)
old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
if (old_cpu == PANIC_CPU_INVALID) {
/* This is the 1st CPU which comes here, so go ahead. */
printk_nmi_flush_on_panic();
printk_safe_flush_on_panic();
__crash_kexec(regs);
/*
......
......@@ -188,7 +188,7 @@ void panic(const char *fmt, ...)
* Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (!_crash_kexec_post_notifiers) {
printk_nmi_flush_on_panic();
printk_safe_flush_on_panic();
__crash_kexec(NULL);
/*
......@@ -213,7 +213,7 @@ void panic(const char *fmt, ...)
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
/* Call flush even twice. It tries harder with a single online CPU */
printk_nmi_flush_on_panic();
printk_safe_flush_on_panic();
kmsg_dump(KMSG_DUMP_PANIC);
/*
......
obj-y = printk.o
obj-$(CONFIG_PRINTK_NMI) += nmi.o
obj-$(CONFIG_PRINTK) += printk_safe.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
......@@ -16,42 +16,55 @@
*/
#include <linux/percpu.h>
typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
#ifdef CONFIG_PRINTK
int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
#ifdef CONFIG_PRINTK_NMI
#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff
#define PRINTK_NMI_CONTEXT_MASK 0x80000000
extern raw_spinlock_t logbuf_lock;
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
void __printk_safe_enter(void);
void __printk_safe_exit(void);
#define printk_safe_enter_irqsave(flags) \
do { \
local_irq_save(flags); \
__printk_safe_enter(); \
} while (0)
#define printk_safe_exit_irqrestore(flags) \
do { \
__printk_safe_exit(); \
local_irq_restore(flags); \
} while (0)
#define printk_safe_enter_irq() \
do { \
local_irq_disable(); \
__printk_safe_enter(); \
} while (0)
#define printk_safe_exit_irq() \
do { \
__printk_safe_exit(); \
local_irq_enable(); \
} while (0)
#else
__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
/*
* printk() could not take logbuf_lock in NMI context. Instead,
* it temporary stores the strings into a per-CPU buffer.
* The alternative implementation is chosen transparently
* via per-CPU variable.
* In !PRINTK builds we still export logbuf_lock spin_lock, console_sem
* semaphore and some of console functions (console_unlock()/etc.), so
* printk-safe must preserve the existing local IRQ guarantees.
*/
DECLARE_PER_CPU(printk_func_t, printk_func);
static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
return this_cpu_read(printk_func)(fmt, args);
}
extern atomic_t nmi_message_lost;
static inline int get_nmi_message_lost(void)
{
return atomic_xchg(&nmi_message_lost, 0);
}
#else /* CONFIG_PRINTK_NMI */
static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
return vprintk_default(fmt, args);
}
static inline int get_nmi_message_lost(void)
{
return 0;
}
#endif /* CONFIG_PRINTK_NMI */
#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
#define printk_safe_enter_irq() local_irq_disable()
#define printk_safe_exit_irq() local_irq_enable()
#endif /* CONFIG_PRINTK */
......@@ -213,17 +213,36 @@ static int nr_ext_console_drivers;
static int __down_trylock_console_sem(unsigned long ip)
{
if (down_trylock(&console_sem))
int lock_failed;
unsigned long flags;
/*
* Here and in __up_console_sem() we need to be in safe mode,
* because spindump/WARN/etc from under console ->lock will
* deadlock in printk()->down_trylock_console_sem() otherwise.
*/
printk_safe_enter_irqsave(flags);
lock_failed = down_trylock(&console_sem);
printk_safe_exit_irqrestore(flags);
if (lock_failed)
return 1;
mutex_acquire(&console_lock_dep_map, 0, 1, ip);
return 0;
}
#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)
#define up_console_sem() do { \
mutex_release(&console_lock_dep_map, 1, _RET_IP_);\
up(&console_sem);\
} while (0)
static void __up_console_sem(unsigned long ip)
{
unsigned long flags;
mutex_release(&console_lock_dep_map, 1, ip);
printk_safe_enter_irqsave(flags);
up(&console_sem);
printk_safe_exit_irqrestore(flags);
}
#define up_console_sem() __up_console_sem(_RET_IP_)
/*
* This is used for debugging the mess that is the VT code by
......@@ -351,6 +370,34 @@ __packed __aligned(4)
*/
DEFINE_RAW_SPINLOCK(logbuf_lock);
/*
* Helper macros to lock/unlock logbuf_lock and switch between
* printk-safe/unsafe modes.
*/
#define logbuf_lock_irq() \
do { \
printk_safe_enter_irq(); \
raw_spin_lock(&logbuf_lock); \
} while (0)
#define logbuf_unlock_irq() \
do { \
raw_spin_unlock(&logbuf_lock); \
printk_safe_exit_irq(); \
} while (0)
#define logbuf_lock_irqsave(flags) \
do { \
printk_safe_enter_irqsave(flags); \
raw_spin_lock(&logbuf_lock); \
} while (0)
#define logbuf_unlock_irqrestore(flags) \
do { \
raw_spin_unlock(&logbuf_lock); \
printk_safe_exit_irqrestore(flags); \
} while (0)
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* the next printk record to read by syslog(READ) or /proc/kmsg */
......@@ -782,20 +829,21 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
ret = mutex_lock_interruptible(&user->lock);
if (ret)
return ret;
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
while (user->seq == log_next_seq) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
goto out;
}
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
ret = wait_event_interruptible(log_wait,
user->seq != log_next_seq);
if (ret)
goto out;
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
}
if (user->seq < log_first_seq) {
......@@ -803,7 +851,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
user->idx = log_first_idx;
user->seq = log_first_seq;
ret = -EPIPE;
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
goto out;
}
......@@ -816,7 +864,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
user->idx = log_next(user->idx);
user->seq++;
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
if (len > count) {
ret = -EINVAL;
......@@ -843,7 +891,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
if (offset)
return -ESPIPE;
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
switch (whence) {
case SEEK_SET:
/* the first record */
......@@ -867,7 +915,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
default:
ret = -EINVAL;
}
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
return ret;
}
......@@ -881,7 +929,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
if (user->seq < log_next_seq) {
/* return error when data has vanished underneath us */
if (user->seq < log_first_seq)
......@@ -889,7 +937,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
else
ret = POLLIN|POLLRDNORM;
}
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
return ret;
}
......@@ -919,10 +967,10 @@ static int devkmsg_open(struct inode *inode, struct file *file)
mutex_init(&user->lock);
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
user->idx = log_first_idx;
user->seq = log_first_seq;
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
file->private_data = user;
return 0;
......@@ -1064,13 +1112,13 @@ void __init setup_log_buf(int early)
return;
}
raw_spin_lock_irqsave(&logbuf_lock, flags);
logbuf_lock_irqsave(flags);
log_buf_len = new_log_buf_len;
log_buf = new_log_buf;
new_log_buf_len = 0;
free = __LOG_BUF_LEN - log_next_idx;
memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
pr_info("log_buf_len: %d bytes\n", log_buf_len);
pr_info("early log buf free: %d(%d%%)\n",
......@@ -1248,7 +1296,7 @@ static int syslog_print(char __user *buf, int size)
size_t n;
size_t skip;
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
if (syslog_seq < log_first_seq) {
/* messages are gone, move to first one */
syslog_seq = log_first_seq;
......@@ -1256,7 +1304,7 @@ static int syslog_print(char __user *buf, int size)
syslog_partial = 0;
}
if (syslog_seq == log_next_seq) {
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
break;
}
......@@ -1275,7 +1323,7 @@ static int syslog_print(char __user *buf, int size)
syslog_partial += n;
} else
n = 0;
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
if (!n)
break;
......@@ -1304,7 +1352,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
if (!text)
return -ENOMEM;
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
if (buf) {
u64 next_seq;
u64 seq;
......@@ -1352,12 +1400,12 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
idx = log_next(idx);
seq++;
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
if (copy_to_user(buf + len, text, textlen))
len = -EFAULT;
else
len += textlen;
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
if (seq < log_first_seq) {
/* messages are gone, move to next one */
......@@ -1371,7 +1419,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
clear_seq = log_next_seq;
clear_idx = log_next_idx;
}
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
kfree(text);
return len;
......@@ -1458,7 +1506,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
break;
/* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD:
raw_spin_lock_irq(&logbuf_lock);
logbuf_lock_irq();
if (syslog_seq < log_first_seq) {
/* messages are gone, move to first one */
syslog_seq = log_first_seq;
......@@ -1486,7 +1534,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
}
error -= syslog_partial;
}
raw_spin_unlock_irq(&logbuf_lock);
logbuf_unlock_irq();
break;
/* Size of the log buffer */
case SYSLOG_ACTION_SIZE_BUFFER:
......@@ -1510,8 +1558,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
* log_buf[start] to log_buf[end - 1].
* The console_lock must be held.
*/
static void call_console_drivers(int level,
const char *ext_text, size_t ext_len,
static void call_console_drivers(const char *ext_text, size_t ext_len,
const char *text, size_t len)
{
struct console *con;
......@@ -1538,28 +1585,6 @@ static void call_console_drivers(int level,
}
}
/*
* Zap console related locks when oopsing.
* To leave time for slow consoles to print a full oops,
* only zap at most once every 30 seconds.
*/
static void zap_locks(void)
{
static unsigned long oops_timestamp;
if (time_after_eq(jiffies, oops_timestamp) &&
!time_after(jiffies, oops_timestamp + 30 * HZ))
return;
oops_timestamp = jiffies;
debug_locks_off();
/* If a crash is occurring, make sure we can't deadlock */
raw_spin_lock_init(&logbuf_lock);
/* And make sure that we print immediately */
sema_init(&console_sem, 1);
}
int printk_delay_msec __read_mostly;
static inline void printk_delay(void)
......@@ -1669,18 +1694,13 @@ asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
static bool recursion_bug;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len = 0;
enum log_flags lflags = 0;
unsigned long flags;
int this_cpu;
int printed_len = 0;
int nmi_message_lost;
bool in_sched = false;
/* cpu currently holding logbuf_lock in this function */
static unsigned int logbuf_cpu = UINT_MAX;
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
......@@ -1690,53 +1710,8 @@ asmlinkage int vprintk_emit(int facility, int level,
boot_delay_msec(level);
printk_delay();
local_irq_save(flags);
this_cpu = smp_processor_id();
/*
* Ouch, printk recursed into itself!
*/
if (unlikely(logbuf_cpu == this_cpu)) {
/*
* If a crash is occurring during printk() on this CPU,
* then try to get the crash message out but make sure
* we can't deadlock. Otherwise just return to avoid the
* recursion and return - but flag the recursion so that
* it can be printed at the next appropriate moment:
*/
if (!oops_in_progress && !lockdep_recursing(current)) {
recursion_bug = true;
local_irq_restore(flags);
return 0;
}
zap_locks();
}
lockdep_off();
/* This stops the holder of console_sem just where we want him */
raw_spin_lock(&logbuf_lock);
logbuf_cpu = this_cpu;
if (unlikely(recursion_bug)) {
static const char recursion_msg[] =
"BUG: recent printk recursion!";
recursion_bug = false;
/* emit KERN_CRIT message */
printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
NULL, 0, recursion_msg,
strlen(recursion_msg));
}
nmi_message_lost = get_nmi_message_lost();
if (unlikely(nmi_message_lost)) {
text_len = scnprintf(textbuf, sizeof(textbuf),
"BAD LUCK: lost %d message(s) from NMI context!",
nmi_message_lost);
printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
NULL, 0, textbuf, text_len);
}
logbuf_lock_irqsave(flags);
/*
* The printf needs to come first; we need the syslog
* prefix which might be passed-in as a parameter.
......@@ -1779,14 +1754,10 @@ asmlinkage int vprintk_emit(int facility, int level,
printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len);
logbuf_cpu = UINT_MAX;
raw_spin_unlock(&logbuf_lock);
lockdep_on();
local_irq_restore(flags);
logbuf_unlock_irqrestore(flags);
/* If called from the scheduler, we can not call up(). */
if (!in_sched) {
lockdep_off();
/*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers and wake up
......@@ -1794,7 +1765,6 @@ asmlinkage int vprintk_emit(int facility, int level,
*/
if (console_trylock())
console_unlock();
lockdep_on();
}
return printed_len;
......@@ -1803,7 +1773,7 @@ EXPORT_SYMBOL(vprintk_emit);
asmlinkage int vprintk(const char *fmt, va_list args)
{
return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
return vprintk_func(fmt, args);
}
EXPORT_SYMBOL(vprintk);
......@@ -1895,16 +1865,12 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
static ssize_t msg_print_ext_body(char *buf, size_t size,
char *dict, size_t dict_len,
char *text, size_t text_len) { return 0; }
static void call_console_drivers(int level,
const char *ext_text, size_t ext_len,
static void call_console_drivers(const char *ext_text, size_t ext_len,
const char *text, size_t len) {}
static size_t msg_print_text(const struct printk_log *msg,
bool syslog, char *buf, size_t size) { return 0; }
static bool suppress_message_printing(int level) { return false; }
/* Still needs to be defined for users */
DEFINE_PER_CPU(printk_func_t, printk_func);
#endif /* CONFIG_PRINTK */
#ifdef CONFIG_EARLY_PRINTK
......@@ -2220,9 +2186,9 @@ void console_unlock(void)
struct printk_log *msg;
size_t ext_len = 0;
size_t len;
int level;
raw_spin_lock_irqsave(&logbuf_lock, flags);
printk_safe_enter_irqsave(flags);
raw_spin_lock(&logbuf_lock);
if (seen_seq != log_next_seq) {
wake_klogd = true;
seen_seq = log_next_seq;
......@@ -2243,8 +2209,7 @@ void console_unlock(void)
break;
msg = log_from_idx(console_idx);
level = msg->level;
if (suppress_message_printing(level)) {
if (suppress_message_printing(msg->level)) {
/*
* Skip record we have buffered and already printed
* directly to the console when we received it, and
......@@ -2270,9 +2235,9 @@ void console_unlock(void)
raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(level, ext_text, ext_len, text, len);
call_console_drivers(ext_text, ext_len, text, len);
start_critical_timings();
local_irq_restore(flags);
printk_safe_exit_irqrestore(flags);
if (do_cond_resched)
cond_resched();
......@@ -2295,7 +2260,8 @@ void console_unlock(void)
*/
raw_spin_lock(&logbuf_lock);
retry = console_seq != log_next_seq;
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
raw_spin_unlock(&logbuf_lock);
printk_safe_exit_irqrestore(flags);
if (retry && console_trylock())
goto again;
......@@ -2558,10 +2524,10 @@ void register_console(struct console *newcon)
* console_unlock(); will print out the buffered messages
* for us.
*/
raw_spin_lock_irqsave(&logbuf_lock, flags);
logbuf_lock_irqsave(flags);
console_seq = syslog_seq;
console_idx = syslog_idx;
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
/*
* We're about to replay the log buffer. Only do this to the
* just-registered console to avoid excessive message spam to
......@@ -2860,12 +2826,12 @@ void kmsg_dump(enum kmsg_dump_reason reason)
/* initialize iterator with data about the stored records */
dumper->active = true;
raw_spin_lock_irqsave(&logbuf_lock, flags);
logbuf_lock_irqsave(flags);
dumper->cur_seq = clear_seq;
dumper->cur_idx = clear_idx;
dumper->next_seq = log_next_seq;
dumper->next_idx = log_next_idx;
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
/* invoke dumper which will iterate over records */
dumper->dump(dumper, reason);
......@@ -2950,9 +2916,9 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
unsigned long flags;
bool ret;
raw_spin_lock_irqsave(&logbuf_lock, flags);
logbuf_lock_irqsave(flags);
ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
return ret;
}
......@@ -2991,7 +2957,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
if (!dumper->active)
goto out;
raw_spin_lock_irqsave(&logbuf_lock, flags);
logbuf_lock_irqsave(flags);
if (dumper->cur_seq < log_first_seq) {
/* messages are gone, move to first available one */
dumper->cur_seq = log_first_seq;
......@@ -3000,7 +2966,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
/* last entry */
if (dumper->cur_seq >= dumper->next_seq) {
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
goto out;
}
......@@ -3042,7 +3008,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
dumper->next_seq = next_seq;
dumper->next_idx = next_idx;
ret = true;
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
out:
if (len)
*len = l;
......@@ -3080,9 +3046,9 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
{
unsigned long flags;
raw_spin_lock_irqsave(&logbuf_lock, flags);
logbuf_lock_irqsave(flags);
kmsg_dump_rewind_nolock(dumper);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
logbuf_unlock_irqrestore(flags);
}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
......
/*
* nmi.c - Safe printk in NMI context
* printk_safe.c - Safe printk for printk-deadlock-prone contexts
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -32,36 +32,58 @@
* is later flushed into the main ring buffer via IRQ work.
*
* The alternative implementation is chosen transparently
* via @printk_func per-CPU variable.
* by examinig current printk() context mask stored in @printk_context
* per-CPU variable.
*
* The implementation allows to flush the strings also from another CPU.
* There are situations when we want to make sure that all buffers
* were handled or when IRQs are blocked.
*/
DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
static int printk_nmi_irq_ready;
atomic_t nmi_message_lost;
static int printk_safe_irq_ready;
#define NMI_LOG_BUF_LEN ((1 << CONFIG_NMI_LOG_BUF_SHIFT) - \
sizeof(atomic_t) - sizeof(struct irq_work))
#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \
sizeof(atomic_t) - \
sizeof(atomic_t) - \
sizeof(struct irq_work))
struct nmi_seq_buf {
struct printk_safe_seq_buf {
atomic_t len; /* length of written data */
atomic_t message_lost;
struct irq_work work; /* IRQ work that flushes the buffer */
unsigned char buffer[NMI_LOG_BUF_LEN];
unsigned char buffer[SAFE_LOG_BUF_LEN];
};
static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
static DEFINE_PER_CPU(int, printk_context);
#ifdef CONFIG_PRINTK_NMI
static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
#endif
/* Get flushed in a more safe context. */
static void queue_flush_work(struct printk_safe_seq_buf *s)
{
if (printk_safe_irq_ready) {
/* Make sure that IRQ work is really initialized. */
smp_rmb();
irq_work_queue(&s->work);
}
}
/*
* Safe printk() for NMI context. It uses a per-CPU buffer to
* store the message. NMIs are not nested, so there is always only
* one writer running. But the buffer might get flushed from another
* CPU, so we need to be careful.
* Add a message to per-CPU context-dependent buffer. NMI and printk-safe
* have dedicated buffers, because otherwise printk-safe preempted by
* NMI-printk would have overwritten the NMI messages.
*
* The messages are fushed from irq work (or from panic()), possibly,
* from other CPU, concurrently with printk_safe_log_store(). Should this
* happen, printk_safe_log_store() will notice the buffer->len mismatch
* and repeat the write.
*/
static int vprintk_nmi(const char *fmt, va_list args)
static int printk_safe_log_store(struct printk_safe_seq_buf *s,
const char *fmt, va_list args)
{
struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
int add = 0;
int add;
size_t len;
again:
......@@ -69,18 +91,21 @@ static int vprintk_nmi(const char *fmt, va_list args)
/* The trailing '\0' is not counted into len. */
if (len >= sizeof(s->buffer) - 1) {
atomic_inc(&nmi_message_lost);
atomic_inc(&s->message_lost);
queue_flush_work(s);
return 0;
}
/*
* Make sure that all old data have been read before the buffer was
* reseted. This is not needed when we just append data.
* Make sure that all old data have been read before the buffer
* was reset. This is not needed when we just append data.
*/
if (!len)
smp_rmb();
add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
if (!add)
return 0;
/*
* Do it once again if the buffer has been flushed in the meantime.
......@@ -90,32 +115,23 @@ static int vprintk_nmi(const char *fmt, va_list args)
if (atomic_cmpxchg(&s->len, len, len + add) != len)
goto again;
/* Get flushed in a more safe context. */
if (add && printk_nmi_irq_ready) {
/* Make sure that IRQ work is really initialized. */
smp_rmb();
irq_work_queue(&s->work);
}
queue_flush_work(s);
return add;
}
static void printk_nmi_flush_line(const char *text, int len)
static inline void printk_safe_flush_line(const char *text, int len)
{
/*
* The buffers are flushed in NMI only on panic. The messages must
* go only into the ring buffer at this stage. Consoles will get
* explicitly called later when a crashdump is not generated.
* Avoid any console drivers calls from here, because we may be
* in NMI or printk_safe context (when in panic). The messages
* must go only into the ring buffer at this stage. Consoles will
* get explicitly called later when a crashdump is not generated.
*/
if (in_nmi())
printk_deferred("%.*s", len, text);
else
printk("%.*s", len, text);
printk_deferred("%.*s", len, text);
}
/* printk part of the temporary buffer line by line */
static int printk_nmi_flush_buffer(const char *start, size_t len)
static int printk_safe_flush_buffer(const char *start, size_t len)
{
const char *c, *end;
bool header;
......@@ -127,7 +143,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len)
/* Print line by line. */
while (c < end) {
if (*c == '\n') {
printk_nmi_flush_line(start, c - start + 1);
printk_safe_flush_line(start, c - start + 1);
start = ++c;
header = true;
continue;
......@@ -140,7 +156,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len)
continue;
}
printk_nmi_flush_line(start, c - start);
printk_safe_flush_line(start, c - start);
start = c++;
header = true;
continue;
......@@ -154,22 +170,31 @@ static int printk_nmi_flush_buffer(const char *start, size_t len)
if (start < end && !header) {
static const char newline[] = KERN_CONT "\n";
printk_nmi_flush_line(start, end - start);
printk_nmi_flush_line(newline, strlen(newline));
printk_safe_flush_line(start, end - start);
printk_safe_flush_line(newline, strlen(newline));
}
return len;
}
static void report_message_lost(struct printk_safe_seq_buf *s)
{
int lost = atomic_xchg(&s->message_lost, 0);
if (lost)
printk_deferred("Lost %d message(s)!\n", lost);
}
/*
* Flush data from the associated per_CPU buffer. The function
* Flush data from the associated per-CPU buffer. The function
* can be called either via IRQ work or independently.
*/
static void __printk_nmi_flush(struct irq_work *work)
static void __printk_safe_flush(struct irq_work *work)
{
static raw_spinlock_t read_lock =
__RAW_SPIN_LOCK_INITIALIZER(read_lock);
struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work);
struct printk_safe_seq_buf *s =
container_of(work, struct printk_safe_seq_buf, work);
unsigned long flags;
size_t len;
int i;
......@@ -194,9 +219,9 @@ static void __printk_nmi_flush(struct irq_work *work)
* buffer size.
*/
if ((i && i >= len) || len > sizeof(s->buffer)) {
const char *msg = "printk_nmi_flush: internal error\n";
const char *msg = "printk_safe_flush: internal error\n";
printk_nmi_flush_line(msg, strlen(msg));
printk_safe_flush_line(msg, strlen(msg));
len = 0;
}
......@@ -205,7 +230,7 @@ static void __printk_nmi_flush(struct irq_work *work)
/* Make sure that data has been written up to the @len */
smp_rmb();
i += printk_nmi_flush_buffer(s->buffer + i, len - i);
i += printk_safe_flush_buffer(s->buffer + i, len - i);
/*
* Check that nothing has got added in the meantime and truncate
......@@ -217,35 +242,40 @@ static void __printk_nmi_flush(struct irq_work *work)
goto more;
out:
report_message_lost(s);
raw_spin_unlock_irqrestore(&read_lock, flags);
}
/**
* printk_nmi_flush - flush all per-cpu nmi buffers.
* printk_safe_flush - flush all per-cpu nmi buffers.
*
* The buffers are flushed automatically via IRQ work. This function
* is useful only when someone wants to be sure that all buffers have
* been flushed at some point.
*/
void printk_nmi_flush(void)
void printk_safe_flush(void)
{
int cpu;
for_each_possible_cpu(cpu)
__printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work);
for_each_possible_cpu(cpu) {
#ifdef CONFIG_PRINTK_NMI
__printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work);
#endif
__printk_safe_flush(&per_cpu(safe_print_seq, cpu).work);
}
}
/**
* printk_nmi_flush_on_panic - flush all per-cpu nmi buffers when the system
* printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system
* goes down.
*
* Similar to printk_nmi_flush() but it can be called even in NMI context when
* Similar to printk_safe_flush() but it can be called even in NMI context when
* the system goes down. It does the best effort to get NMI messages into
* the main ring buffer.
*
* Note that it could try harder when there is only one CPU online.
*/
void printk_nmi_flush_on_panic(void)
void printk_safe_flush_on_panic(void)
{
/*
* Make sure that we could access the main ring buffer.
......@@ -259,33 +289,97 @@ void printk_nmi_flush_on_panic(void)
raw_spin_lock_init(&logbuf_lock);
}
printk_nmi_flush();
printk_safe_flush();
}
void __init printk_nmi_init(void)
#ifdef CONFIG_PRINTK_NMI
/*
* Safe printk() for NMI context. It uses a per-CPU buffer to
* store the message. NMIs are not nested, so there is always only
* one writer running. But the buffer might get flushed from another
* CPU, so we need to be careful.
*/
static int vprintk_nmi(const char *fmt, va_list args)
{
int cpu;
struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
for_each_possible_cpu(cpu) {
struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu);
return printk_safe_log_store(s, fmt, args);
}
init_irq_work(&s->work, __printk_nmi_flush);
}
void printk_nmi_enter(void)
{
this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
}
/* Make sure that IRQ works are initialized before enabling. */
smp_wmb();
printk_nmi_irq_ready = 1;
void printk_nmi_exit(void)
{
this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
}
/* Flush pending messages that did not have scheduled IRQ works. */
printk_nmi_flush();
#else
static int vprintk_nmi(const char *fmt, va_list args)
{
return 0;
}
void printk_nmi_enter(void)
#endif /* CONFIG_PRINTK_NMI */
/*
* Lock-less printk(), to avoid deadlocks should the printk() recurse
* into itself. It uses a per-CPU buffer to store the message, just like
* NMI.
*/
static int vprintk_safe(const char *fmt, va_list args)
{
this_cpu_write(printk_func, vprintk_nmi);
struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq);
return printk_safe_log_store(s, fmt, args);
}
void printk_nmi_exit(void)
/* Can be preempted by NMI. */
void __printk_safe_enter(void)
{
this_cpu_inc(printk_context);
}
/* Can be preempted by NMI. */
void __printk_safe_exit(void)
{
this_cpu_write(printk_func, vprintk_default);
this_cpu_dec(printk_context);
}
__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
return vprintk_nmi(fmt, args);
if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
return vprintk_safe(fmt, args);
return vprintk_default(fmt, args);
}
void __init printk_safe_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct printk_safe_seq_buf *s;
s = &per_cpu(safe_print_seq, cpu);
init_irq_work(&s->work, __printk_safe_flush);
#ifdef CONFIG_PRINTK_NMI
s = &per_cpu(nmi_print_seq, cpu);
init_irq_work(&s->work, __printk_safe_flush);
#endif
}
/* Make sure that IRQ works are initialized before enabling. */
smp_wmb();
printk_safe_irq_ready = 1;
/* Flush pending messages that did not have scheduled IRQ works. */
printk_safe_flush();
}
......@@ -77,7 +77,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
* Force flush any remote buffers that might be stuck in IRQ context
* and therefore could not run their irq_work.
*/
printk_nmi_flush();
printk_safe_flush();
clear_bit_unlock(0, &backtrace_flag);
put_cpu();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment