Commit 885b976f authored by Huang Ying's avatar Huang Ying Committed by Len Brown

ACPI, APEI, Add ERST record ID cache

APEI ERST firmware interface and implementation has no multiple users
in mind.  For example, if there is four records in storage with ID: 1,
2, 3 and 4, if two ERST readers enumerate the records via
GET_NEXT_RECORD_ID as follow,

reader 1		reader 2
1
			2
3
			4
-1
			-1

where -1 signals there is no more record ID.

Reader 1 has no chance to check record 2 and 4, while reader 2 has no
chance to check record 1 and 3.  And any other GET_NEXT_RECORD_ID will
return -1, that is, other readers will has no chance to check any
record even they are not cleared by anyone.

This makes raw GET_NEXT_RECORD_ID not suitable for used by multiple
users.

To solve the issue, an in-memory ERST record ID cache is designed and
implemented.  When enumerating record ID, the ID returned by
GET_NEXT_RECORD_ID is added into cache in addition to be returned to
caller.  So other readers can check the cache to get all record ID
available.
Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarLen Brown <len.brown@intel.com>
parent dd9c1549
......@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
ssize_t apei_read_mce(struct mce *m, u64 *record_id)
{
struct cper_mce_record rcd;
ssize_t len;
len = erst_read_next(&rcd.hdr, sizeof(rcd));
if (len <= 0)
return len;
/* Can not skip other records in storage via ERST unless clear them */
else if (len != sizeof(rcd) ||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
if (printk_ratelimit())
pr_warning(
"MCE-APEI: Can not skip the unknown record in ERST");
return -EIO;
}
int rc, pos;
rc = erst_get_record_id_begin(&pos);
if (rc)
return rc;
retry:
rc = erst_get_record_id_next(&pos, record_id);
if (rc)
goto out;
/* no more record */
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
goto out;
rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
/* someone else has cleared the record, try next one */
if (rc == -ENOENT)
goto retry;
else if (rc < 0)
goto out;
/* try to skip other type records in storage */
else if (rc != sizeof(rcd) ||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
goto retry;
memcpy(m, &rcd.mce, sizeof(*m));
*record_id = rcd.hdr.record_id;
rc = sizeof(*m);
out:
erst_get_record_id_end();
return sizeof(*m);
return rc;
}
/* Check whether there is record in ERST */
......
......@@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex);
static int erst_dbg_open(struct inode *inode, struct file *file)
{
int rc, *pos;
if (erst_disable)
return -ENODEV;
pos = (int *)&file->private_data;
rc = erst_get_record_id_begin(pos);
if (rc)
return rc;
return nonseekable_open(inode, file);
}
static int erst_dbg_release(struct inode *inode, struct file *file)
{
erst_get_record_id_end();
return 0;
}
static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int rc;
......@@ -79,18 +94,20 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
int rc;
int rc, *pos;
ssize_t len = 0;
u64 id;
if (*off != 0)
if (*off)
return -EINVAL;
if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
return -EINTR;
pos = (int *)&filp->private_data;
retry_next:
rc = erst_get_next_record_id(&id);
rc = erst_get_record_id_next(pos, &id);
if (rc)
goto out;
/* no more record */
......@@ -181,6 +198,7 @@ static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
static const struct file_operations erst_dbg_ops = {
.owner = THIS_MODULE,
.open = erst_dbg_open,
.release = erst_dbg_release,
.read = erst_dbg_read,
.write = erst_dbg_write,
.unlocked_ioctl = erst_dbg_ioctl,
......
......@@ -429,6 +429,22 @@ ssize_t erst_get_record_count(void)
}
EXPORT_SYMBOL_GPL(erst_get_record_count);
#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
struct erst_record_id_cache {
struct mutex lock;
u64 *entries;
int len;
int size;
int refcount;
};
static struct erst_record_id_cache erst_record_id_cache = {
.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
.refcount = 0,
};
static int __erst_get_next_record_id(u64 *record_id)
{
struct apei_exec_context ctx;
......@@ -443,26 +459,179 @@ static int __erst_get_next_record_id(u64 *record_id)
return 0;
}
int erst_get_record_id_begin(int *pos)
{
int rc;
if (erst_disable)
return -ENODEV;
rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
if (rc)
return rc;
erst_record_id_cache.refcount++;
mutex_unlock(&erst_record_id_cache.lock);
*pos = 0;
return 0;
}
EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
/* erst_record_id_cache.lock must be held by caller */
static int __erst_record_id_cache_add_one(void)
{
u64 id, prev_id, first_id;
int i, rc;
u64 *entries;
unsigned long flags;
id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
retry:
raw_spin_lock_irqsave(&erst_lock, flags);
rc = __erst_get_next_record_id(&id);
raw_spin_unlock_irqrestore(&erst_lock, flags);
if (rc == -ENOENT)
return 0;
if (rc)
return rc;
if (id == APEI_ERST_INVALID_RECORD_ID)
return 0;
/* can not skip current ID, or loop back to first ID */
if (id == prev_id || id == first_id)
return 0;
if (first_id == APEI_ERST_INVALID_RECORD_ID)
first_id = id;
prev_id = id;
entries = erst_record_id_cache.entries;
for (i = 0; i < erst_record_id_cache.len; i++) {
if (entries[i] == id)
break;
}
/* record id already in cache, try next */
if (i < erst_record_id_cache.len)
goto retry;
if (erst_record_id_cache.len >= erst_record_id_cache.size) {
int new_size, alloc_size;
u64 *new_entries;
new_size = erst_record_id_cache.size * 2;
new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
ERST_RECORD_ID_CACHE_SIZE_MAX);
if (new_size <= erst_record_id_cache.size) {
if (printk_ratelimit())
pr_warning(FW_WARN ERST_PFX
"too many record ID!\n");
return 0;
}
alloc_size = new_size * sizeof(entries[0]);
if (alloc_size < PAGE_SIZE)
new_entries = kmalloc(alloc_size, GFP_KERNEL);
else
new_entries = vmalloc(alloc_size);
if (!new_entries)
return -ENOMEM;
memcpy(new_entries, entries,
erst_record_id_cache.len * sizeof(entries[0]));
if (erst_record_id_cache.size < PAGE_SIZE)
kfree(entries);
else
vfree(entries);
erst_record_id_cache.entries = entries = new_entries;
erst_record_id_cache.size = new_size;
}
entries[i] = id;
erst_record_id_cache.len++;
return 1;
}
/*
* Get the record ID of an existing error record on the persistent
* storage. If there is no error record on the persistent storage, the
* returned record_id is APEI_ERST_INVALID_RECORD_ID.
*/
int erst_get_next_record_id(u64 *record_id)
int erst_get_record_id_next(int *pos, u64 *record_id)
{
int rc;
unsigned long flags;
int rc = 0;
u64 *entries;
if (erst_disable)
return -ENODEV;
raw_spin_lock_irqsave(&erst_lock, flags);
rc = __erst_get_next_record_id(record_id);
raw_spin_unlock_irqrestore(&erst_lock, flags);
/* must be enclosed by erst_get_record_id_begin/end */
BUG_ON(!erst_record_id_cache.refcount);
BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
mutex_lock(&erst_record_id_cache.lock);
entries = erst_record_id_cache.entries;
for (; *pos < erst_record_id_cache.len; (*pos)++)
if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
break;
/* found next record id in cache */
if (*pos < erst_record_id_cache.len) {
*record_id = entries[*pos];
(*pos)++;
goto out_unlock;
}
/* Try to add one more record ID to cache */
rc = __erst_record_id_cache_add_one();
if (rc < 0)
goto out_unlock;
/* successfully add one new ID */
if (rc == 1) {
*record_id = erst_record_id_cache.entries[*pos];
(*pos)++;
rc = 0;
} else {
*pos = -1;
*record_id = APEI_ERST_INVALID_RECORD_ID;
}
out_unlock:
mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
EXPORT_SYMBOL_GPL(erst_get_next_record_id);
EXPORT_SYMBOL_GPL(erst_get_record_id_next);
/* erst_record_id_cache.lock must be held by caller */
static void __erst_record_id_cache_compact(void)
{
int i, wpos = 0;
u64 *entries;
if (erst_record_id_cache.refcount)
return;
entries = erst_record_id_cache.entries;
for (i = 0; i < erst_record_id_cache.len; i++) {
if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
continue;
if (wpos != i)
memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
wpos++;
}
erst_record_id_cache.len = wpos;
}
void erst_get_record_id_end(void)
{
/*
* erst_disable != 0 should be detected by invoker via the
* return value of erst_get_record_id_begin/next, so this
* function should not be called for erst_disable != 0.
*/
BUG_ON(erst_disable);
mutex_lock(&erst_record_id_cache.lock);
erst_record_id_cache.refcount--;
BUG_ON(erst_record_id_cache.refcount < 0);
__erst_record_id_cache_compact();
mutex_unlock(&erst_record_id_cache.lock);
}
EXPORT_SYMBOL_GPL(erst_get_record_id_end);
static int __erst_write_to_storage(u64 offset)
{
......@@ -703,56 +872,34 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
}
EXPORT_SYMBOL_GPL(erst_read);
/*
* If return value > buflen, the buffer size is not big enough,
* else if return value = 0, there is no more record to read,
* else if return value < 0, something goes wrong,
* else everything is OK, and return value is record length
*/
ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
{
int rc;
ssize_t len;
unsigned long flags;
u64 record_id;
if (erst_disable)
return -ENODEV;
raw_spin_lock_irqsave(&erst_lock, flags);
rc = __erst_get_next_record_id(&record_id);
if (rc) {
raw_spin_unlock_irqrestore(&erst_lock, flags);
return rc;
}
/* no more record */
if (record_id == APEI_ERST_INVALID_RECORD_ID) {
raw_spin_unlock_irqrestore(&erst_lock, flags);
return 0;
}
len = __erst_read(record_id, record, buflen);
raw_spin_unlock_irqrestore(&erst_lock, flags);
return len;
}
EXPORT_SYMBOL_GPL(erst_read_next);
int erst_clear(u64 record_id)
{
int rc;
int rc, i;
unsigned long flags;
u64 *entries;
if (erst_disable)
return -ENODEV;
rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
if (rc)
return rc;
raw_spin_lock_irqsave(&erst_lock, flags);
if (erst_erange.attr & ERST_RANGE_NVRAM)
rc = __erst_clear_from_nvram(record_id);
else
rc = __erst_clear_from_storage(record_id);
raw_spin_unlock_irqrestore(&erst_lock, flags);
if (rc)
goto out;
entries = erst_record_id_cache.entries;
for (i = 0; i < erst_record_id_cache.len; i++) {
if (entries[i] == record_id)
entries[i] = APEI_ERST_INVALID_RECORD_ID;
}
__erst_record_id_cache_compact();
out:
mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
EXPORT_SYMBOL_GPL(erst_clear);
......
......@@ -30,10 +30,11 @@ int apei_hest_parse(apei_hest_func_t func, void *data);
int erst_write(const struct cper_record_header *record);
ssize_t erst_get_record_count(void);
int erst_get_next_record_id(u64 *record_id);
int erst_get_record_id_begin(int *pos);
int erst_get_record_id_next(int *pos, u64 *record_id);
void erst_get_record_id_end(void);
ssize_t erst_read(u64 record_id, struct cper_record_header *record,
size_t buflen);
ssize_t erst_read_next(struct cper_record_header *record, size_t buflen);
int erst_clear(u64 record_id);
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment