Commit c0c770e6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'apei-release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6

* 'apei-release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6:
  ACPI, APEI, EINJ Param support is disabled by default
  APEI GHES: 32-bit buildfix
  ACPI: APEI build fix
  ACPI, APEI, GHES: Add hardware memory error recovery support
  HWPoison: add memory_failure_queue()
  ACPI, APEI, GHES, Error records content based throttle
  ACPI, APEI, GHES, printk support for recoverable error via NMI
  lib, Make gen_pool memory allocator lockless
  lib, Add lock-less NULL terminated single list
  Add Kconfig option ARCH_HAVE_NMI_SAFE_CMPXCHG
  ACPI, APEI, Add WHEA _OSC support
  ACPI, APEI, Add APEI bit support in generic _OSC call
  ACPI, APEI, GHES, Support disable GHES at boot time
  ACPI, APEI, GHES, Prevent GHES to be built as module
  ACPI, APEI, Use apei_exec_run_optional in APEI EINJ and ERST
  ACPI, APEI, Add apei_exec_run_optional
  ACPI, APEI, GHES, Do not ratelimit fatal error printk before panic
  ACPI, APEI, ERST, Fix erst-dbg long record reading issue
  ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled
parents a9e4e6e1 d0e323b4
......@@ -48,12 +48,19 @@ directory apei/einj. The following files are provided.
- param1
This file is used to set the first error parameter value. Effect of
parameter depends on error_type specified. For memory error, this is
physical memory address.
physical memory address. Only available if param_extension module
parameter is specified.
- param2
This file is used to set the second error parameter value. Effect of
parameter depends on error_type specified. For memory error, this is
physical memory address mask.
physical memory address mask. Only available if param_extension
module parameter is specified.
Injecting parameter support is a BIOS version specific extension, that
is, it only works on some BIOS version. If you want to use it, please
make sure your BIOS version has the proper support and specify
"param_extension=y" in module parameter.
For more information about EINJ, please refer to ACPI specification
version 4.0, section 17.5.
......@@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX
config HAVE_RCU_TABLE_FREE
bool
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
source "kernel/gcov/Kconfig"
......@@ -14,6 +14,7 @@ config ALPHA
select AUTO_IRQ_AFFINITY if SMP
select GENERIC_IRQ_SHOW
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_HAVE_NMI_SAFE_CMPXCHG
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
......
......@@ -10,6 +10,7 @@ config AVR32
select GENERIC_IRQ_PROBE
select HARDIRQS_SW_RESEND
select GENERIC_IRQ_SHOW
select ARCH_HAVE_NMI_SAFE_CMPXCHG
help
AVR32 is a high-performance 32-bit RISC microprocessor core,
designed for cost-sensitive embedded applications, with particular
......
......@@ -7,6 +7,7 @@ config FRV
select HAVE_PERF_EVENTS
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_SHOW
select ARCH_HAVE_NMI_SAFE_CMPXCHG
config ZONE_DMA
bool
......
......@@ -28,6 +28,7 @@ config IA64
select IRQ_PER_CPU
select GENERIC_IRQ_SHOW
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_HAVE_NMI_SAFE_CMPXCHG
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
......
......@@ -6,6 +6,7 @@ config M68K
select GENERIC_ATOMIC64 if MMU
select HAVE_GENERIC_HARDIRQS if !MMU
select GENERIC_IRQ_SHOW if !MMU
select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
config RWSEM_GENERIC_SPINLOCK
bool
......
......@@ -15,6 +15,7 @@ config PARISC
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_PROBE
select IRQ_PER_CPU
select ARCH_HAVE_NMI_SAFE_CMPXCHG
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
......
......@@ -136,6 +136,7 @@ config PPC
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_BPF_JIT if (PPC64 && NET)
select HAVE_ARCH_JUMP_LABEL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
config EARLY_PRINTK
bool
......
......@@ -81,6 +81,7 @@ config S390
select INIT_ALL_POSSIBLE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
......
......@@ -11,6 +11,7 @@ config SUPERH
select HAVE_DMA_ATTRS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select PERF_USE_VMALLOC
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
......
......@@ -54,6 +54,7 @@ config SPARC64
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select IRQ_PREFLOW_FASTEOI
select ARCH_HAVE_NMI_SAFE_CMPXCHG
config ARCH_DEFCONFIG
string
......
......@@ -12,6 +12,7 @@ config TILE
select GENERIC_PENDING_IRQ if SMP
select GENERIC_IRQ_SHOW
select SYS_HYPERVISOR
select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
# FIXME: investigate whether we need/want these options.
# select HAVE_IOREMAP_PROT
......
......@@ -72,6 +72,7 @@ config X86
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_BPF_JIT if (X86_64 && NET)
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
......
......@@ -10,9 +10,11 @@ config ACPI_APEI
error injection.
config ACPI_APEI_GHES
tristate "APEI Generic Hardware Error Source"
bool "APEI Generic Hardware Error Source"
depends on ACPI_APEI && X86
select ACPI_HED
select LLIST
select GENERIC_ALLOCATOR
help
Generic Hardware Error Source provides a way to report
platform hardware errors (such as that from chipset). It
......@@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER
PCIe AER errors may be reported via APEI firmware first mode.
Turn on this option to enable the corresponding support.
config ACPI_APEI_MEMORY_FAILURE
bool "APEI memory error recovering support"
depends on ACPI_APEI && MEMORY_FAILURE
help
Memory errors may be reported via APEI firmware first mode.
Turn on this option to enable the memory recovering support.
config ACPI_APEI_EINJ
tristate "APEI Error INJection (EINJ)"
depends on ACPI_APEI && DEBUG_FS
......
......@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
* Interpret the specified action. Go through whole action table,
* execute all instructions belong to the action.
*/
int apei_exec_run(struct apei_exec_context *ctx, u8 action)
int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
bool optional)
{
int rc;
int rc = -ENOENT;
u32 i, ip;
struct acpi_whea_header *entry;
apei_exec_ins_func_t run;
......@@ -198,9 +199,9 @@ int apei_exec_run(struct apei_exec_context *ctx, u8 action)
goto rewind;
}
return 0;
return !optional && rc < 0 ? rc : 0;
}
EXPORT_SYMBOL_GPL(apei_exec_run);
EXPORT_SYMBOL_GPL(__apei_exec_run);
typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
struct acpi_whea_header *entry,
......@@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void)
return dapei;
}
EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
int apei_osc_setup(void)
{
static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
acpi_handle handle;
u32 capbuf[3];
struct acpi_osc_context context = {
.uuid_str = whea_uuid_str,
.rev = 1,
.cap.length = sizeof(capbuf),
.cap.pointer = capbuf,
};
capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
capbuf[OSC_SUPPORT_TYPE] = 0;
capbuf[OSC_CONTROL_TYPE] = 0;
if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
|| ACPI_FAILURE(acpi_run_osc(handle, &context)))
return -EIO;
else {
kfree(context.ret.pointer);
return 0;
}
}
EXPORT_SYMBOL_GPL(apei_osc_setup);
......@@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
return ctx->value;
}
int apei_exec_run(struct apei_exec_context *ctx, u8 action);
int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
{
return __apei_exec_run(ctx, action, 0);
}
/* It is optional whether the firmware provides the action */
static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
{
return __apei_exec_run(ctx, action, 1);
}
/* Common instruction implementation */
......@@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx,
const struct acpi_hest_generic_status *estatus);
int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
int apei_osc_setup(void);
#endif
......@@ -46,7 +46,8 @@
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
* most will ignore the parameter and make their own choice of address
* for error injection.
* for error injection. This extension is used only if
* param_extension module parameter is specified.
*/
struct einj_parameter {
u64 type;
......@@ -65,6 +66,9 @@ struct einj_parameter {
((struct acpi_whea_header *)((char *)(tab) + \
sizeof(struct acpi_table_einj)))
static bool param_extension;
module_param(param_extension, bool, 0);
static struct acpi_table_einj *einj_tab;
static struct apei_resources einj_resources;
......@@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
einj_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, type);
......@@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
rc = __einj_error_trigger(trigger_paddr);
if (rc)
return rc;
rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
return rc;
}
......@@ -489,14 +493,6 @@ static int __init einj_init(void)
einj_debug_dir, NULL, &error_type_fops);
if (!fentry)
goto err_cleanup;
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param1);
if (!fentry)
goto err_cleanup;
fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param2);
if (!fentry)
goto err_cleanup;
fentry = debugfs_create_file("error_inject", S_IWUSR,
einj_debug_dir, NULL, &error_inject_fops);
if (!fentry)
......@@ -513,12 +509,23 @@ static int __init einj_init(void)
rc = apei_exec_pre_map_gars(&ctx);
if (rc)
goto err_release;
param_paddr = einj_get_parameter_address();
if (param_paddr) {
einj_param = ioremap(param_paddr, sizeof(*einj_param));
rc = -ENOMEM;
if (!einj_param)
goto err_unmap;
if (param_extension) {
param_paddr = einj_get_parameter_address();
if (param_paddr) {
einj_param = ioremap(param_paddr, sizeof(*einj_param));
rc = -ENOMEM;
if (!einj_param)
goto err_unmap;
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param1);
if (!fentry)
goto err_unmap;
fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param2);
if (!fentry)
goto err_unmap;
} else
pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
}
pr_info(EINJ_PFX "Error INJection is initialized.\n");
......@@ -526,6 +533,8 @@ static int __init einj_init(void)
return 0;
err_unmap:
if (einj_param)
iounmap(einj_param);
apei_exec_post_unmap_gars(&ctx);
err_release:
apei_resources_release(&einj_resources);
......
......@@ -33,7 +33,7 @@
#define ERST_DBG_PFX "ERST DBG: "
#define ERST_DBG_RECORD_LEN_MAX 4096
#define ERST_DBG_RECORD_LEN_MAX 0x4000
static void *erst_dbg_buf;
static unsigned int erst_dbg_buf_len;
......@@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = {
static __init int erst_dbg_init(void)
{
if (erst_disable) {
pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
return -ENODEV;
}
return misc_register(&erst_dbg_dev);
}
......
......@@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
......@@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_END);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
......@@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
......@@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_END);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
......@@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
int rc;
erst_exec_ctx_init(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, record_id);
......@@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
rc = apei_exec_run(&ctx, ACPI_ERST_END);
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
......
This diff is collapsed.
......@@ -231,16 +231,17 @@ void __init acpi_hest_init(void)
goto err;
}
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
if (rc)
goto err;
rc = hest_ghes_dev_register(ghes_count);
if (!rc) {
pr_info(HEST_PFX "Table parsing has been initialized.\n");
return;
if (!ghes_disable) {
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
if (rc)
goto err;
rc = hest_ghes_dev_register(ghes_count);
if (rc)
goto err;
}
pr_info(HEST_PFX "Table parsing has been initialized.\n");
return;
err:
hest_disable = 1;
}
......@@ -39,6 +39,7 @@
#include <linux/pci.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
#include <acpi/apei.h>
#include <linux/dmi.h>
#include <linux/suspend.h>
......@@ -519,6 +520,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
}
EXPORT_SYMBOL(acpi_run_osc);
bool osc_sb_apei_support_acked;
static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
static void acpi_bus_osc_support(void)
{
......@@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void)
#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
#endif
if (!ghes_disable)
capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
return;
if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
u32 *capbuf_ret = context.ret.pointer;
if (context.ret.length > OSC_SUPPORT_TYPE)
osc_sb_apei_support_acked =
capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
kfree(context.ret.pointer);
/* do we need to check the returned cap? Sounds no */
}
/* do we need to check other returned cap? Sounds no */
}
/* --------------------------------------------------------------------------
......
......@@ -18,6 +18,11 @@
extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern int ghes_disable;
#else
#define ghes_disable 1
#endif
#ifdef CONFIG_ACPI_APEI
void __init acpi_hest_init(void);
......
......@@ -279,6 +279,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
#define OSC_SB_CPUHP_OST_SUPPORT 8
#define OSC_SB_APEI_SUPPORT 16
extern bool osc_sb_apei_support_acked;
/* PCI defined _OSC bits */
/* _OSC DW1 Definition (OS Support Fields) */
#define OSC_EXT_PCI_CONFIG_SUPPORT 1
......
......@@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
#define BITMAP_LAST_WORD_MASK(nbits) \
( \
((nbits) % BITS_PER_LONG) ? \
......
/*
* Basic general purpose allocator for managing special purpose memory
* not managed by the regular kmalloc/kfree interface.
* Uses for this includes on-device special memory, uncached memory
* etc.
* Basic general purpose allocator for managing special purpose
* memory, for example, memory that is not managed by the regular
* kmalloc/kfree interface. Uses for this includes on-device special
* memory, uncached memory etc.
*
* It is safe to use the allocator in NMI handlers and other special
* unblockable contexts that could otherwise deadlock on locks. This
* is implemented by using atomic operations and retries on any
* conflicts. The disadvantage is that there may be livelocks in
* extreme cases. For better scalability, one allocator can be used
* for each CPU.
*
* The lockless operation only works if there is enough memory
* available. If new memory is added to the pool a lock has to be
* still taken. So any user relying on locklessness has to ensure
* that sufficient memory is preallocated.
*
* The basic atomic operation of this allocator is cmpxchg on long.
* On architectures that don't have NMI-safe cmpxchg implementation,
* the allocator can NOT be used in NMI handler. So code uses the
* allocator in NMI handler should depend on
* CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
......@@ -15,7 +33,7 @@
* General purpose special memory pool descriptor.
*/
struct gen_pool {
rwlock_t lock;
spinlock_t lock;
struct list_head chunks; /* list of chunks in this pool */
int min_alloc_order; /* minimum allocation order */
};
......@@ -24,8 +42,8 @@ struct gen_pool {
* General purpose special memory pool chunk descriptor.
*/
struct gen_pool_chunk {
spinlock_t lock;
struct list_head next_chunk; /* next chunk in pool */
atomic_t avail;
phys_addr_t phys_addr; /* physical starting address of memory chunk */
unsigned long start_addr; /* starting address of memory chunk */
unsigned long end_addr; /* ending address of memory chunk */
......@@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
extern void gen_pool_destroy(struct gen_pool *);
extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
extern void gen_pool_for_each_chunk(struct gen_pool *,
void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
extern size_t gen_pool_avail(struct gen_pool *);
extern size_t gen_pool_size(struct gen_pool *);
#endif /* __GENALLOC_H__ */
#ifndef LLIST_H
#define LLIST_H
/*
* Lock-less NULL terminated single linked list
*
* If there are multiple producers and multiple consumers, llist_add
* can be used in producers and llist_del_all can be used in
* consumers. They can work simultaneously without lock. But
* llist_del_first can not be used here. Because llist_del_first
* depends on list->first->next does not changed if list->first is not
* changed during its operation, but llist_del_first, llist_add,
* llist_add (or llist_del_all, llist_add, llist_add) sequence in
* another consumer may violate that.
*
* If there are multiple producers and one consumer, llist_add can be
* used in producers and llist_del_all or llist_del_first can be used
* in the consumer.
*
* This can be summarized as follow:
*
* | add | del_first | del_all
* add | - | - | -
* del_first | | L | L
* del_all | | | -
*
* Where "-" stands for no lock is needed, while "L" stands for lock
* is needed.
*
* The list entries deleted via llist_del_all can be traversed with
* traversing function such as llist_for_each etc. But the list
* entries can not be traversed safely before deleted from the list.
* The order of deleted entries is from the newest to the oldest added
* one. If you want to traverse from the oldest to the newest, you
* must reverse the order by yourself before traversing.
*
* The basic atomic operation of this list is cmpxchg on long. On
* architectures that don't have NMI-safe cmpxchg implementation, the
* list can NOT be used in NMI handler. So code uses the list in NMI
* handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
*/
struct llist_head {
struct llist_node *first;
};
struct llist_node {
struct llist_node *next;
};
#define LLIST_HEAD_INIT(name) { NULL }
#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name)
/**
* init_llist_head - initialize lock-less list head
* @head: the head for your lock-less list
*/
static inline void init_llist_head(struct llist_head *list)
{
list->first = NULL;
}
/**
* llist_entry - get the struct of this entry
* @ptr: the &struct llist_node pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the llist_node within the struct.
*/
#define llist_entry(ptr, type, member) \
container_of(ptr, type, member)
/**
* llist_for_each - iterate over some deleted entries of a lock-less list
* @pos: the &struct llist_node to use as a loop cursor
* @node: the first entry of deleted list entries
*
* In general, some entries of the lock-less list can be traversed
* safely only after being deleted from list, so start with an entry
* instead of list head.
*
* If being used on entries deleted from lock-less list directly, the
* traverse order is from the newest to the oldest added entry. If
* you want to traverse from the oldest to the newest, you must
* reverse the order by yourself before traversing.
*/
#define llist_for_each(pos, node) \
for ((pos) = (node); pos; (pos) = (pos)->next)
/**
* llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
* @pos: the type * to use as a loop cursor.
* @node: the fist entry of deleted list entries.
* @member: the name of the llist_node with the struct.
*
* In general, some entries of the lock-less list can be traversed
* safely only after being removed from list, so start with an entry
* instead of list head.
*
* If being used on entries deleted from lock-less list directly, the
* traverse order is from the newest to the oldest added entry. If
* you want to traverse from the oldest to the newest, you must
* reverse the order by yourself before traversing.
*/
#define llist_for_each_entry(pos, node, member) \
for ((pos) = llist_entry((node), typeof(*(pos)), member); \
&(pos)->member != NULL; \
(pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
/**
* llist_empty - tests whether a lock-less list is empty
* @head: the list to test
*
* Not guaranteed to be accurate or up to date. Just a quick way to
* test whether the list is empty without deleting something from the
* list.
*/
static inline int llist_empty(const struct llist_head *head)
{
return ACCESS_ONCE(head->first) == NULL;
}
void llist_add(struct llist_node *new, struct llist_head *head);
void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
struct llist_head *head);
struct llist_node *llist_del_first(struct llist_head *head);
struct llist_node *llist_del_all(struct llist_head *head);
#endif /* LLIST_H */
......@@ -1600,6 +1600,7 @@ enum mf_flags {
};
extern void memory_failure(unsigned long pfn, int trapno);
extern int __memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
......
......@@ -276,4 +276,7 @@ config CORDIC
so its calculations are in fixed point. Modules can select this
when they require this function. Module will be called cordic.
config LLIST
bool
endmenu
......@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
obj-$(CONFIG_CORDIC) += cordic.o
obj-$(CONFIG_LLIST) += llist.o
hostprogs-y := gen_crc32table
clean-files := crc32table.h
......
......@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
}
EXPORT_SYMBOL(__bitmap_weight);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
void bitmap_set(unsigned long *map, int start, int nr)
{
unsigned long *p = map + BIT_WORD(start);
......
This diff is collapsed.
/*
* Lock-less NULL terminated single linked list
*
* The basic atomic operation of this list is cmpxchg on long. On
* architectures that don't have NMI-safe cmpxchg implementation, the
* list can NOT be used in NMI handler. So code uses the list in NMI
* handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
*
* Copyright 2010,2011 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation;
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/llist.h>
#include <asm/system.h>
/**
* llist_add - add a new entry
* @new: new entry to be added
* @head: the head for your lock-less list
*/
void llist_add(struct llist_node *new, struct llist_head *head)
{
struct llist_node *entry, *old_entry;
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
BUG_ON(in_nmi());
#endif
entry = head->first;
do {
old_entry = entry;
new->next = entry;
cpu_relax();
} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
}
EXPORT_SYMBOL_GPL(llist_add);
/**
* llist_add_batch - add several linked entries in batch
* @new_first: first entry in batch to be added
* @new_last: last entry in batch to be added
* @head: the head for your lock-less list
*/
void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
struct llist_head *head)
{
struct llist_node *entry, *old_entry;
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
BUG_ON(in_nmi());
#endif
entry = head->first;
do {
old_entry = entry;
new_last->next = entry;
cpu_relax();
} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
}
EXPORT_SYMBOL_GPL(llist_add_batch);
/**
* llist_del_first - delete the first entry of lock-less list
* @head: the head for your lock-less list
*
* If list is empty, return NULL, otherwise, return the first entry
* deleted, this is the newest added one.
*
* Only one llist_del_first user can be used simultaneously with
* multiple llist_add users without lock. Because otherwise
* llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
* llist_add) sequence in another user may change @head->first->next,
* but keep @head->first. If multiple consumers are needed, please
* use llist_del_all or use lock between consumers.
*/
struct llist_node *llist_del_first(struct llist_head *head)
{
struct llist_node *entry, *old_entry, *next;
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
BUG_ON(in_nmi());
#endif
entry = head->first;
do {
if (entry == NULL)
return NULL;
old_entry = entry;
next = entry->next;
cpu_relax();
} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
return entry;
}
EXPORT_SYMBOL_GPL(llist_del_first);
/**
* llist_del_all - delete all entries from lock-less list
* @head: the head of lock-less list to delete all entries
*
* If list is empty, return NULL, otherwise, delete all entries and
* return the pointer to the first entry. The order of entries
* deleted is from the newest to the oldest added one.
*/
struct llist_node *llist_del_all(struct llist_head *head)
{
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
BUG_ON(in_nmi());
#endif
return xchg(&head->first, NULL);
}
EXPORT_SYMBOL_GPL(llist_del_all);
......@@ -53,6 +53,7 @@
#include <linux/hugetlb.h>
#include <linux/memory_hotplug.h>
#include <linux/mm_inline.h>
#include <linux/kfifo.h>
#include "internal.h"
int sysctl_memory_failure_early_kill __read_mostly = 0;
......@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
__memory_failure(pfn, trapno, 0);
}
#define MEMORY_FAILURE_FIFO_ORDER 4
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
struct memory_failure_entry {
unsigned long pfn;
int trapno;
int flags;
};
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock;
struct work_struct work;
};
static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
/**
* memory_failure_queue - Schedule handling memory failure of a page.
* @pfn: Page Number of the corrupted page
* @trapno: Trap number reported in the signal to user space.
* @flags: Flags for memory failure handling
*
* This function is called by the low level hardware error handler
* when it detects hardware memory corruption of a page. It schedules
* the recovering of error page, including dropping pages, killing
* processes etc.
*
* The function is primarily of use for corruptions that
* happen outside the current execution context (e.g. when
* detected by a background scrubber)
*
* Can run in IRQ context.
*/
void memory_failure_queue(unsigned long pfn, int trapno, int flags)
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
struct memory_failure_entry entry = {
.pfn = pfn,
.trapno = trapno,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, &entry))
schedule_work_on(smp_processor_id(), &mf_cpu->work);
else
pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
static void memory_failure_work_func(struct work_struct *work)
{
struct memory_failure_cpu *mf_cpu;
struct memory_failure_entry entry = { 0, };
unsigned long proc_flags;
int gotten;
mf_cpu = &__get_cpu_var(memory_failure_cpu);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
__memory_failure(entry.pfn, entry.trapno, entry.flags);
}
}
static int __init memory_failure_init(void)
{
struct memory_failure_cpu *mf_cpu;
int cpu;
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
return 0;
}
core_initcall(memory_failure_init);
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment