Commit b1f4c00e authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'fsi-for-v5.16' of...

Merge tag 'fsi-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi into char-misc-next

Joel writes:

FSI changes for v5.16

 - SBEFIFO usersapce interfaces to perform FFDC (First Failure
   Data Capture) and detect timeouts

 - A fix to handle multiple messages in flight

* tag 'fsi-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi:
  fsi: sbefifo: Use interruptible mutex locking
  fsi: sbefifo: Add sysfs file indicating a timeout error
  docs: ABI: testing: Document the SBEFIFO timeout interface
  hwmon: (occ) Provide the SBEFIFO FFDC in binary sysfs
  docs: ABI: testing: Document the OCC hwmon FFDC binary interface
  fsi: occ: Store the SBEFIFO FFDC in the user response buffer
  fsi: occ: Use a large buffer for responses
  hwmon: (occ) Remove sequence numbering and checksum calculation
  fsi: occ: Force sequence numbering per OCC
parents 8ac33b8b 7cc2f34e
What: /sys/bus/fsi/devices/XX.XX.00:06/sbefifoX/timeout
KernelVersion: 5.15
Contact: eajames@linux.ibm.com
Description:
Indicates whether or not this SBE device has experienced a
timeout; i.e. the SBE did not respond within the time allotted
by the driver. A value of 1 indicates that a timeout has
ocurred and no transfers have completed since the timeout. A
value of 0 indicates that no timeout has ocurred, or if one
has, more recent transfers have completed successful.
What: /sys/bus/platform/devices/occ-hwmon.X/ffdc
KernelVersion: 5.15
Contact: eajames@linux.ibm.com
Description:
Contains the First Failure Data Capture from the SBEFIFO
hardware, if there is any from a previous transfer. Otherwise,
the file is empty. The data is cleared when it's been
completely read by a user. As the name suggests, only the data
from the first error is saved, until it's cleared upon read. The OCC hwmon driver, running on
a Baseboard Management Controller (BMC), communicates with
POWER9 and up processors over the Self-Boot Engine (SBE) FIFO.
In many error conditions, the SBEFIFO will return error data
indicating the type of error and system state, etc.
This diff is collapsed.
......@@ -124,6 +124,7 @@ struct sbefifo {
bool broken;
bool dead;
bool async_ffdc;
bool timed_out;
};
struct sbefifo_user {
......@@ -136,6 +137,14 @@ struct sbefifo_user {
static DEFINE_MUTEX(sbefifo_ffdc_mutex);
static ssize_t timeout_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct sbefifo *sbefifo = container_of(dev, struct sbefifo, dev);
return sysfs_emit(buf, "%d\n", sbefifo->timed_out ? 1 : 0);
}
static DEVICE_ATTR_RO(timeout);
static void __sbefifo_dump_ffdc(struct device *dev, const __be32 *ffdc,
size_t ffdc_sz, bool internal)
......@@ -462,11 +471,14 @@ static int sbefifo_wait(struct sbefifo *sbefifo, bool up,
break;
}
if (!ready) {
sysfs_notify(&sbefifo->dev.kobj, NULL, dev_attr_timeout.attr.name);
sbefifo->timed_out = true;
dev_err(dev, "%s FIFO Timeout ! status=%08x\n", up ? "UP" : "DOWN", sts);
return -ETIMEDOUT;
}
dev_vdbg(dev, "End of wait status: %08x\n", sts);
sbefifo->timed_out = false;
*status = sts;
return 0;
......@@ -740,7 +752,9 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
/* Perform the command */
mutex_lock(&sbefifo->lock);
rc = mutex_lock_interruptible(&sbefifo->lock);
if (rc)
return rc;
rc = __sbefifo_submit(sbefifo, command, cmd_len, &resp_iter);
mutex_unlock(&sbefifo->lock);
......@@ -820,7 +834,9 @@ static ssize_t sbefifo_user_read(struct file *file, char __user *buf,
iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len);
/* Perform the command */
mutex_lock(&sbefifo->lock);
rc = mutex_lock_interruptible(&sbefifo->lock);
if (rc)
goto bail;
rc = __sbefifo_submit(sbefifo, user->pending_cmd, cmd_len, &resp_iter);
mutex_unlock(&sbefifo->lock);
if (rc < 0)
......@@ -875,7 +891,9 @@ static ssize_t sbefifo_user_write(struct file *file, const char __user *buf,
user->pending_len = 0;
/* Trigger reset request */
mutex_lock(&sbefifo->lock);
rc = mutex_lock_interruptible(&sbefifo->lock);
if (rc)
goto bail;
rc = sbefifo_request_reset(user->sbefifo);
mutex_unlock(&sbefifo->lock);
if (rc == 0)
......@@ -993,6 +1011,8 @@ static int sbefifo_probe(struct device *dev)
child_name);
}
device_create_file(&sbefifo->dev, &dev_attr_timeout);
return 0;
err_free_minor:
fsi_free_minor(sbefifo->dev.devt);
......@@ -1018,6 +1038,8 @@ static int sbefifo_remove(struct device *dev)
dev_dbg(dev, "Removing sbefifo device...\n");
device_remove_file(&sbefifo->dev, &dev_attr_timeout);
mutex_lock(&sbefifo->lock);
sbefifo->dead = true;
mutex_unlock(&sbefifo->lock);
......
......@@ -132,22 +132,20 @@ struct extended_sensor {
static int occ_poll(struct occ *occ)
{
int rc;
u16 checksum = occ->poll_cmd_data + occ->seq_no + 1;
u8 cmd[8];
u8 cmd[7];
struct occ_poll_response_header *header;
/* big endian */
cmd[0] = occ->seq_no++; /* sequence number */
cmd[0] = 0; /* sequence number */
cmd[1] = 0; /* cmd type */
cmd[2] = 0; /* data length msb */
cmd[3] = 1; /* data length lsb */
cmd[4] = occ->poll_cmd_data; /* data */
cmd[5] = checksum >> 8; /* checksum msb */
cmd[6] = checksum & 0xFF; /* checksum lsb */
cmd[7] = 0;
cmd[5] = 0; /* checksum msb */
cmd[6] = 0; /* checksum lsb */
/* mutex should already be locked if necessary */
rc = occ->send_cmd(occ, cmd);
rc = occ->send_cmd(occ, cmd, sizeof(cmd));
if (rc) {
occ->last_error = rc;
if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD)
......@@ -184,25 +182,23 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
{
int rc;
u8 cmd[8];
u16 checksum = 0x24;
__be16 user_power_cap_be = cpu_to_be16(user_power_cap);
cmd[0] = 0;
cmd[1] = 0x22;
cmd[2] = 0;
cmd[3] = 2;
cmd[0] = 0; /* sequence number */
cmd[1] = 0x22; /* cmd type */
cmd[2] = 0; /* data length msb */
cmd[3] = 2; /* data length lsb */
memcpy(&cmd[4], &user_power_cap_be, 2);
checksum += cmd[4] + cmd[5];
cmd[6] = checksum >> 8;
cmd[7] = checksum & 0xFF;
cmd[6] = 0; /* checksum msb */
cmd[7] = 0; /* checksum lsb */
rc = mutex_lock_interruptible(&occ->lock);
if (rc)
return rc;
rc = occ->send_cmd(occ, cmd);
rc = occ->send_cmd(occ, cmd, sizeof(cmd));
mutex_unlock(&occ->lock);
......@@ -1144,8 +1140,6 @@ int occ_setup(struct occ *occ, const char *name)
{
int rc;
/* start with 1 to avoid false match with zero-initialized SRAM buffer */
occ->seq_no = 1;
mutex_init(&occ->lock);
occ->groups[0] = &occ->group;
......
......@@ -95,9 +95,8 @@ struct occ {
struct occ_sensors sensors;
int powr_sample_time_us; /* average power sample time */
u8 seq_no;
u8 poll_cmd_data; /* to perform OCC poll command */
int (*send_cmd)(struct occ *occ, u8 *cmd);
int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len);
unsigned long next_update;
struct mutex lock; /* lock OCC access */
......
......@@ -97,18 +97,21 @@ static int p8_i2c_occ_putscom_u32(struct i2c_client *client, u32 address,
}
static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address,
u8 *data)
u8 *data, size_t len)
{
__be32 data0, data1;
__be32 data0 = 0, data1 = 0;
memcpy(&data0, data, 4);
memcpy(&data1, data + 4, 4);
memcpy(&data0, data, min_t(size_t, len, 4));
if (len > 4) {
len -= 4;
memcpy(&data1, data + 4, min_t(size_t, len, 4));
}
return p8_i2c_occ_putscom_u32(client, address, be32_to_cpu(data0),
be32_to_cpu(data1));
}
static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
{
int i, rc;
unsigned long start;
......@@ -127,7 +130,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
return rc;
/* write command (expected to already be BE), we need bus-endian... */
rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd);
rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd, len);
if (rc)
return rc;
......
......@@ -4,28 +4,96 @@
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/fsi-occ.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include "common.h"
struct p9_sbe_occ {
struct occ occ;
bool sbe_error;
void *ffdc;
size_t ffdc_len;
size_t ffdc_size;
struct mutex sbe_error_lock; /* lock access to ffdc data */
struct device *sbe;
};
#define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ)
static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd)
static ssize_t ffdc_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *battr, char *buf, loff_t pos,
size_t count)
{
ssize_t rc = 0;
struct occ *occ = dev_get_drvdata(kobj_to_dev(kobj));
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
mutex_lock(&ctx->sbe_error_lock);
if (ctx->sbe_error) {
rc = memory_read_from_buffer(buf, count, &pos, ctx->ffdc,
ctx->ffdc_len);
if (pos >= ctx->ffdc_len)
ctx->sbe_error = false;
}
mutex_unlock(&ctx->sbe_error_lock);
return rc;
}
static BIN_ATTR_RO(ffdc, OCC_MAX_RESP_WORDS * 4);
static bool p9_sbe_occ_save_ffdc(struct p9_sbe_occ *ctx, const void *resp,
size_t resp_len)
{
bool notify = false;
mutex_lock(&ctx->sbe_error_lock);
if (!ctx->sbe_error) {
if (resp_len > ctx->ffdc_size) {
if (ctx->ffdc)
kvfree(ctx->ffdc);
ctx->ffdc = kvmalloc(resp_len, GFP_KERNEL);
if (!ctx->ffdc) {
ctx->ffdc_len = 0;
ctx->ffdc_size = 0;
goto done;
}
ctx->ffdc_size = resp_len;
}
notify = true;
ctx->sbe_error = true;
ctx->ffdc_len = resp_len;
memcpy(ctx->ffdc, resp, resp_len);
}
done:
mutex_unlock(&ctx->sbe_error_lock);
return notify;
}
static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
{
struct occ_response *resp = &occ->resp;
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
size_t resp_len = sizeof(*resp);
int rc;
rc = fsi_occ_submit(ctx->sbe, cmd, 8, resp, &resp_len);
if (rc < 0)
rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
if (rc < 0) {
if (resp_len) {
if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len))
sysfs_notify(&occ->bus_dev->kobj, NULL,
bin_attr_ffdc.attr.name);
}
return rc;
}
switch (resp->return_status) {
case OCC_RESP_CMD_IN_PRG:
......@@ -65,6 +133,8 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
if (!ctx)
return -ENOMEM;
mutex_init(&ctx->sbe_error_lock);
ctx->sbe = pdev->dev.parent;
occ = &ctx->occ;
occ->bus_dev = &pdev->dev;
......@@ -78,6 +148,15 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
if (rc == -ESHUTDOWN)
rc = -ENODEV; /* Host is shutdown, don't spew errors */
if (!rc) {
rc = device_create_bin_file(occ->bus_dev, &bin_attr_ffdc);
if (rc) {
dev_warn(occ->bus_dev,
"failed to create SBE error ffdc file\n");
rc = 0;
}
}
return rc;
}
......@@ -86,9 +165,14 @@ static int p9_sbe_occ_remove(struct platform_device *pdev)
struct occ *occ = platform_get_drvdata(pdev);
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
device_remove_bin_file(occ->bus_dev, &bin_attr_ffdc);
ctx->sbe = NULL;
occ_shutdown(occ);
if (ctx->ffdc)
kvfree(ctx->ffdc);
return 0;
}
......
......@@ -19,6 +19,8 @@ struct device;
#define OCC_RESP_CRIT_OCB 0xE3
#define OCC_RESP_CRIT_HW 0xE4
#define OCC_MAX_RESP_WORDS 2048
int fsi_occ_submit(struct device *dev, const void *request, size_t req_len,
void *response, size_t *resp_len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment