Commit b1f4c00e authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'fsi-for-v5.16' of...

Merge tag 'fsi-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi into char-misc-next

Joel writes:

FSI changes for v5.16

 - SBEFIFO usersapce interfaces to perform FFDC (First Failure
   Data Capture) and detect timeouts

 - A fix to handle multiple messages in flight

* tag 'fsi-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi:
  fsi: sbefifo: Use interruptible mutex locking
  fsi: sbefifo: Add sysfs file indicating a timeout error
  docs: ABI: testing: Document the SBEFIFO timeout interface
  hwmon: (occ) Provide the SBEFIFO FFDC in binary sysfs
  docs: ABI: testing: Document the OCC hwmon FFDC binary interface
  fsi: occ: Store the SBEFIFO FFDC in the user response buffer
  fsi: occ: Use a large buffer for responses
  hwmon: (occ) Remove sequence numbering and checksum calculation
  fsi: occ: Force sequence numbering per OCC
parents 8ac33b8b 7cc2f34e
What: /sys/bus/fsi/devices/XX.XX.00:06/sbefifoX/timeout
KernelVersion: 5.15
Contact: eajames@linux.ibm.com
Description:
Indicates whether or not this SBE device has experienced a
timeout; i.e. the SBE did not respond within the time allotted
by the driver. A value of 1 indicates that a timeout has
ocurred and no transfers have completed since the timeout. A
value of 0 indicates that no timeout has ocurred, or if one
has, more recent transfers have completed successful.
What: /sys/bus/platform/devices/occ-hwmon.X/ffdc
KernelVersion: 5.15
Contact: eajames@linux.ibm.com
Description:
Contains the First Failure Data Capture from the SBEFIFO
hardware, if there is any from a previous transfer. Otherwise,
the file is empty. The data is cleared when it's been
completely read by a user. As the name suggests, only the data
from the first error is saved, until it's cleared upon read. The OCC hwmon driver, running on
a Baseboard Management Controller (BMC), communicates with
POWER9 and up processors over the Self-Boot Engine (SBE) FIFO.
In many error conditions, the SBEFIFO will return error data
indicating the type of error and system state, etc.
This diff is collapsed.
...@@ -124,6 +124,7 @@ struct sbefifo { ...@@ -124,6 +124,7 @@ struct sbefifo {
bool broken; bool broken;
bool dead; bool dead;
bool async_ffdc; bool async_ffdc;
bool timed_out;
}; };
struct sbefifo_user { struct sbefifo_user {
...@@ -136,6 +137,14 @@ struct sbefifo_user { ...@@ -136,6 +137,14 @@ struct sbefifo_user {
static DEFINE_MUTEX(sbefifo_ffdc_mutex); static DEFINE_MUTEX(sbefifo_ffdc_mutex);
static ssize_t timeout_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct sbefifo *sbefifo = container_of(dev, struct sbefifo, dev);
return sysfs_emit(buf, "%d\n", sbefifo->timed_out ? 1 : 0);
}
static DEVICE_ATTR_RO(timeout);
static void __sbefifo_dump_ffdc(struct device *dev, const __be32 *ffdc, static void __sbefifo_dump_ffdc(struct device *dev, const __be32 *ffdc,
size_t ffdc_sz, bool internal) size_t ffdc_sz, bool internal)
...@@ -462,11 +471,14 @@ static int sbefifo_wait(struct sbefifo *sbefifo, bool up, ...@@ -462,11 +471,14 @@ static int sbefifo_wait(struct sbefifo *sbefifo, bool up,
break; break;
} }
if (!ready) { if (!ready) {
sysfs_notify(&sbefifo->dev.kobj, NULL, dev_attr_timeout.attr.name);
sbefifo->timed_out = true;
dev_err(dev, "%s FIFO Timeout ! status=%08x\n", up ? "UP" : "DOWN", sts); dev_err(dev, "%s FIFO Timeout ! status=%08x\n", up ? "UP" : "DOWN", sts);
return -ETIMEDOUT; return -ETIMEDOUT;
} }
dev_vdbg(dev, "End of wait status: %08x\n", sts); dev_vdbg(dev, "End of wait status: %08x\n", sts);
sbefifo->timed_out = false;
*status = sts; *status = sts;
return 0; return 0;
...@@ -740,7 +752,9 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len, ...@@ -740,7 +752,9 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes); iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
/* Perform the command */ /* Perform the command */
mutex_lock(&sbefifo->lock); rc = mutex_lock_interruptible(&sbefifo->lock);
if (rc)
return rc;
rc = __sbefifo_submit(sbefifo, command, cmd_len, &resp_iter); rc = __sbefifo_submit(sbefifo, command, cmd_len, &resp_iter);
mutex_unlock(&sbefifo->lock); mutex_unlock(&sbefifo->lock);
...@@ -820,7 +834,9 @@ static ssize_t sbefifo_user_read(struct file *file, char __user *buf, ...@@ -820,7 +834,9 @@ static ssize_t sbefifo_user_read(struct file *file, char __user *buf,
iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len); iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len);
/* Perform the command */ /* Perform the command */
mutex_lock(&sbefifo->lock); rc = mutex_lock_interruptible(&sbefifo->lock);
if (rc)
goto bail;
rc = __sbefifo_submit(sbefifo, user->pending_cmd, cmd_len, &resp_iter); rc = __sbefifo_submit(sbefifo, user->pending_cmd, cmd_len, &resp_iter);
mutex_unlock(&sbefifo->lock); mutex_unlock(&sbefifo->lock);
if (rc < 0) if (rc < 0)
...@@ -875,7 +891,9 @@ static ssize_t sbefifo_user_write(struct file *file, const char __user *buf, ...@@ -875,7 +891,9 @@ static ssize_t sbefifo_user_write(struct file *file, const char __user *buf,
user->pending_len = 0; user->pending_len = 0;
/* Trigger reset request */ /* Trigger reset request */
mutex_lock(&sbefifo->lock); rc = mutex_lock_interruptible(&sbefifo->lock);
if (rc)
goto bail;
rc = sbefifo_request_reset(user->sbefifo); rc = sbefifo_request_reset(user->sbefifo);
mutex_unlock(&sbefifo->lock); mutex_unlock(&sbefifo->lock);
if (rc == 0) if (rc == 0)
...@@ -993,6 +1011,8 @@ static int sbefifo_probe(struct device *dev) ...@@ -993,6 +1011,8 @@ static int sbefifo_probe(struct device *dev)
child_name); child_name);
} }
device_create_file(&sbefifo->dev, &dev_attr_timeout);
return 0; return 0;
err_free_minor: err_free_minor:
fsi_free_minor(sbefifo->dev.devt); fsi_free_minor(sbefifo->dev.devt);
...@@ -1018,6 +1038,8 @@ static int sbefifo_remove(struct device *dev) ...@@ -1018,6 +1038,8 @@ static int sbefifo_remove(struct device *dev)
dev_dbg(dev, "Removing sbefifo device...\n"); dev_dbg(dev, "Removing sbefifo device...\n");
device_remove_file(&sbefifo->dev, &dev_attr_timeout);
mutex_lock(&sbefifo->lock); mutex_lock(&sbefifo->lock);
sbefifo->dead = true; sbefifo->dead = true;
mutex_unlock(&sbefifo->lock); mutex_unlock(&sbefifo->lock);
......
...@@ -132,22 +132,20 @@ struct extended_sensor { ...@@ -132,22 +132,20 @@ struct extended_sensor {
static int occ_poll(struct occ *occ) static int occ_poll(struct occ *occ)
{ {
int rc; int rc;
u16 checksum = occ->poll_cmd_data + occ->seq_no + 1; u8 cmd[7];
u8 cmd[8];
struct occ_poll_response_header *header; struct occ_poll_response_header *header;
/* big endian */ /* big endian */
cmd[0] = occ->seq_no++; /* sequence number */ cmd[0] = 0; /* sequence number */
cmd[1] = 0; /* cmd type */ cmd[1] = 0; /* cmd type */
cmd[2] = 0; /* data length msb */ cmd[2] = 0; /* data length msb */
cmd[3] = 1; /* data length lsb */ cmd[3] = 1; /* data length lsb */
cmd[4] = occ->poll_cmd_data; /* data */ cmd[4] = occ->poll_cmd_data; /* data */
cmd[5] = checksum >> 8; /* checksum msb */ cmd[5] = 0; /* checksum msb */
cmd[6] = checksum & 0xFF; /* checksum lsb */ cmd[6] = 0; /* checksum lsb */
cmd[7] = 0;
/* mutex should already be locked if necessary */ /* mutex should already be locked if necessary */
rc = occ->send_cmd(occ, cmd); rc = occ->send_cmd(occ, cmd, sizeof(cmd));
if (rc) { if (rc) {
occ->last_error = rc; occ->last_error = rc;
if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD) if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD)
...@@ -184,25 +182,23 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap) ...@@ -184,25 +182,23 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
{ {
int rc; int rc;
u8 cmd[8]; u8 cmd[8];
u16 checksum = 0x24;
__be16 user_power_cap_be = cpu_to_be16(user_power_cap); __be16 user_power_cap_be = cpu_to_be16(user_power_cap);
cmd[0] = 0; cmd[0] = 0; /* sequence number */
cmd[1] = 0x22; cmd[1] = 0x22; /* cmd type */
cmd[2] = 0; cmd[2] = 0; /* data length msb */
cmd[3] = 2; cmd[3] = 2; /* data length lsb */
memcpy(&cmd[4], &user_power_cap_be, 2); memcpy(&cmd[4], &user_power_cap_be, 2);
checksum += cmd[4] + cmd[5]; cmd[6] = 0; /* checksum msb */
cmd[6] = checksum >> 8; cmd[7] = 0; /* checksum lsb */
cmd[7] = checksum & 0xFF;
rc = mutex_lock_interruptible(&occ->lock); rc = mutex_lock_interruptible(&occ->lock);
if (rc) if (rc)
return rc; return rc;
rc = occ->send_cmd(occ, cmd); rc = occ->send_cmd(occ, cmd, sizeof(cmd));
mutex_unlock(&occ->lock); mutex_unlock(&occ->lock);
...@@ -1144,8 +1140,6 @@ int occ_setup(struct occ *occ, const char *name) ...@@ -1144,8 +1140,6 @@ int occ_setup(struct occ *occ, const char *name)
{ {
int rc; int rc;
/* start with 1 to avoid false match with zero-initialized SRAM buffer */
occ->seq_no = 1;
mutex_init(&occ->lock); mutex_init(&occ->lock);
occ->groups[0] = &occ->group; occ->groups[0] = &occ->group;
......
...@@ -95,9 +95,8 @@ struct occ { ...@@ -95,9 +95,8 @@ struct occ {
struct occ_sensors sensors; struct occ_sensors sensors;
int powr_sample_time_us; /* average power sample time */ int powr_sample_time_us; /* average power sample time */
u8 seq_no;
u8 poll_cmd_data; /* to perform OCC poll command */ u8 poll_cmd_data; /* to perform OCC poll command */
int (*send_cmd)(struct occ *occ, u8 *cmd); int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len);
unsigned long next_update; unsigned long next_update;
struct mutex lock; /* lock OCC access */ struct mutex lock; /* lock OCC access */
......
...@@ -97,18 +97,21 @@ static int p8_i2c_occ_putscom_u32(struct i2c_client *client, u32 address, ...@@ -97,18 +97,21 @@ static int p8_i2c_occ_putscom_u32(struct i2c_client *client, u32 address,
} }
static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address, static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address,
u8 *data) u8 *data, size_t len)
{ {
__be32 data0, data1; __be32 data0 = 0, data1 = 0;
memcpy(&data0, data, 4); memcpy(&data0, data, min_t(size_t, len, 4));
memcpy(&data1, data + 4, 4); if (len > 4) {
len -= 4;
memcpy(&data1, data + 4, min_t(size_t, len, 4));
}
return p8_i2c_occ_putscom_u32(client, address, be32_to_cpu(data0), return p8_i2c_occ_putscom_u32(client, address, be32_to_cpu(data0),
be32_to_cpu(data1)); be32_to_cpu(data1));
} }
static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd) static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
{ {
int i, rc; int i, rc;
unsigned long start; unsigned long start;
...@@ -127,7 +130,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd) ...@@ -127,7 +130,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
return rc; return rc;
/* write command (expected to already be BE), we need bus-endian... */ /* write command (expected to already be BE), we need bus-endian... */
rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd); rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd, len);
if (rc) if (rc)
return rc; return rc;
......
...@@ -4,28 +4,96 @@ ...@@ -4,28 +4,96 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/fsi-occ.h> #include <linux/fsi-occ.h>
#include <linux/mm.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include "common.h" #include "common.h"
struct p9_sbe_occ { struct p9_sbe_occ {
struct occ occ; struct occ occ;
bool sbe_error;
void *ffdc;
size_t ffdc_len;
size_t ffdc_size;
struct mutex sbe_error_lock; /* lock access to ffdc data */
struct device *sbe; struct device *sbe;
}; };
#define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ) #define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ)
static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd) static ssize_t ffdc_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *battr, char *buf, loff_t pos,
size_t count)
{
ssize_t rc = 0;
struct occ *occ = dev_get_drvdata(kobj_to_dev(kobj));
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
mutex_lock(&ctx->sbe_error_lock);
if (ctx->sbe_error) {
rc = memory_read_from_buffer(buf, count, &pos, ctx->ffdc,
ctx->ffdc_len);
if (pos >= ctx->ffdc_len)
ctx->sbe_error = false;
}
mutex_unlock(&ctx->sbe_error_lock);
return rc;
}
static BIN_ATTR_RO(ffdc, OCC_MAX_RESP_WORDS * 4);
static bool p9_sbe_occ_save_ffdc(struct p9_sbe_occ *ctx, const void *resp,
size_t resp_len)
{
bool notify = false;
mutex_lock(&ctx->sbe_error_lock);
if (!ctx->sbe_error) {
if (resp_len > ctx->ffdc_size) {
if (ctx->ffdc)
kvfree(ctx->ffdc);
ctx->ffdc = kvmalloc(resp_len, GFP_KERNEL);
if (!ctx->ffdc) {
ctx->ffdc_len = 0;
ctx->ffdc_size = 0;
goto done;
}
ctx->ffdc_size = resp_len;
}
notify = true;
ctx->sbe_error = true;
ctx->ffdc_len = resp_len;
memcpy(ctx->ffdc, resp, resp_len);
}
done:
mutex_unlock(&ctx->sbe_error_lock);
return notify;
}
static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
{ {
struct occ_response *resp = &occ->resp; struct occ_response *resp = &occ->resp;
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
size_t resp_len = sizeof(*resp); size_t resp_len = sizeof(*resp);
int rc; int rc;
rc = fsi_occ_submit(ctx->sbe, cmd, 8, resp, &resp_len); rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
if (rc < 0) if (rc < 0) {
if (resp_len) {
if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len))
sysfs_notify(&occ->bus_dev->kobj, NULL,
bin_attr_ffdc.attr.name);
}
return rc; return rc;
}
switch (resp->return_status) { switch (resp->return_status) {
case OCC_RESP_CMD_IN_PRG: case OCC_RESP_CMD_IN_PRG:
...@@ -65,6 +133,8 @@ static int p9_sbe_occ_probe(struct platform_device *pdev) ...@@ -65,6 +133,8 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
if (!ctx) if (!ctx)
return -ENOMEM; return -ENOMEM;
mutex_init(&ctx->sbe_error_lock);
ctx->sbe = pdev->dev.parent; ctx->sbe = pdev->dev.parent;
occ = &ctx->occ; occ = &ctx->occ;
occ->bus_dev = &pdev->dev; occ->bus_dev = &pdev->dev;
...@@ -78,6 +148,15 @@ static int p9_sbe_occ_probe(struct platform_device *pdev) ...@@ -78,6 +148,15 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
if (rc == -ESHUTDOWN) if (rc == -ESHUTDOWN)
rc = -ENODEV; /* Host is shutdown, don't spew errors */ rc = -ENODEV; /* Host is shutdown, don't spew errors */
if (!rc) {
rc = device_create_bin_file(occ->bus_dev, &bin_attr_ffdc);
if (rc) {
dev_warn(occ->bus_dev,
"failed to create SBE error ffdc file\n");
rc = 0;
}
}
return rc; return rc;
} }
...@@ -86,9 +165,14 @@ static int p9_sbe_occ_remove(struct platform_device *pdev) ...@@ -86,9 +165,14 @@ static int p9_sbe_occ_remove(struct platform_device *pdev)
struct occ *occ = platform_get_drvdata(pdev); struct occ *occ = platform_get_drvdata(pdev);
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
device_remove_bin_file(occ->bus_dev, &bin_attr_ffdc);
ctx->sbe = NULL; ctx->sbe = NULL;
occ_shutdown(occ); occ_shutdown(occ);
if (ctx->ffdc)
kvfree(ctx->ffdc);
return 0; return 0;
} }
......
...@@ -19,6 +19,8 @@ struct device; ...@@ -19,6 +19,8 @@ struct device;
#define OCC_RESP_CRIT_OCB 0xE3 #define OCC_RESP_CRIT_OCB 0xE3
#define OCC_RESP_CRIT_HW 0xE4 #define OCC_RESP_CRIT_HW 0xE4
#define OCC_MAX_RESP_WORDS 2048
int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, int fsi_occ_submit(struct device *dev, const void *request, size_t req_len,
void *response, size_t *resp_len); void *response, size_t *resp_len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment