Commit c1f0fcd8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull cxl updates from Dan Williams:
 "Compute Express Link (CXL) updates for 6.2.

  While it may seem backwards, the CXL update this time around includes
  some focus on CXL 1.x enabling where the work to date had been with
  CXL 2.0 (VH topologies) in mind.

  First generation CXL can mostly be supported via BIOS, similar to DDR,
  however it became clear there are use cases for OS native CXL error
  handling and some CXL 3.0 endpoint features can be deployed on CXL 1.x
  hosts (Restricted CXL Host (RCH) topologies). So, this update brings
  RCH topologies into the Linux CXL device model.

  In support of the ongoing CXL 2.0+ enabling two new core kernel
  facilities are added.

  One is the ability for the kernel to flag collisions between userspace
  access to PCI configuration registers and kernel accesses. This is
  brought on by the PCIe Data-Object-Exchange (DOE) facility, a hardware
  mailbox over config-cycles.

  The other is a cpu_cache_invalidate_memregion() API that maps to
  wbinvd_on_all_cpus() on x86. To prevent abuse it is disabled in guest
  VMs and architectures that do not support it yet. The CXL paths that
  need it, dynamic memory region creation and security commands (erase /
  unlock), are disabled when it is not present.

  As for the CXL 2.0+ this cycle the subsystem gains support Persistent
  Memory Security commands, error handling in response to PCIe AER
  notifications, and support for the "XOR" host bridge interleave
  algorithm.

  Summary:

   - Add the cpu_cache_invalidate_memregion() API for cache flushing in
     response to physical memory reconfiguration, or memory-side data
     invalidation from operations like secure erase or memory-device
     unlock.

   - Add a facility for the kernel to warn about collisions between
     kernel and userspace access to PCI configuration registers

   - Add support for Restricted CXL Host (RCH) topologies (formerly CXL
     1.1)

   - Add handling and reporting of CXL errors reported via the PCIe AER
     mechanism

   - Add support for CXL Persistent Memory Security commands

   - Add support for the "XOR" algorithm for CXL host bridge interleave

   - Rework / simplify CXL to NVDIMM interactions

   - Miscellaneous cleanups and fixes"

* tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (71 commits)
  cxl/region: Fix memdev reuse check
  cxl/pci: Remove endian confusion
  cxl/pci: Add some type-safety to the AER trace points
  cxl/security: Drop security command ioctl uapi
  cxl/mbox: Add variable output size validation for internal commands
  cxl/mbox: Enable cxl_mbox_send_cmd() users to validate output size
  cxl/security: Fix Get Security State output payload endian handling
  cxl: update names for interleave ways conversion macros
  cxl: update names for interleave granularity conversion macros
  cxl/acpi: Warn about an invalid CHBCR in an existing CHBS entry
  tools/testing/cxl: Require cache invalidation bypass
  cxl/acpi: Fail decoder add if CXIMS for HBIG is missing
  cxl/region: Fix spelling mistake "memergion" -> "memregion"
  cxl/regs: Fix sparse warning
  cxl/acpi: Set ACPI's CXL _OSC to indicate RCD mode support
  tools/testing/cxl: Add an RCH topology
  cxl/port: Add RCD endpoint port enumeration
  cxl/mem: Move devm_cxl_add_endpoint() from cxl_core to cxl_mem
  tools/testing/cxl: Add XOR Math support to cxl_test
  cxl/acpi: Support CXL XOR Interleave Math (CXIMS)
  ...
parents 691806e9 f04facfb
......@@ -41,3 +41,17 @@ KernelVersion: 5.18
Contact: Kajol Jain <kjain@linux.ibm.com>
Description: (RO) This sysfs file exposes the cpumask which is designated to
to retrieve nvdimm pmu event counter data.
What: /sys/bus/nd/devices/nmemX/cxl/id
Date: November 2022
KernelVersion: 6.2
Contact: Dave Jiang <dave.jiang@intel.com>
Description: (RO) Show the id (serial) of the device. This is CXL specific.
What: /sys/bus/nd/devices/nmemX/cxl/provider
Date: November 2022
KernelVersion: 6.2
Contact: Dave Jiang <dave.jiang@intel.com>
Description: (RO) Shows the CXL bridge device that ties to a CXL memory device
to this NVDIMM device. I.e. the parent of the device returned is
a /sys/bus/cxl/devices/memX instance.
......@@ -83,6 +83,7 @@ This structure has the form::
int (*mmio_enabled)(struct pci_dev *dev);
int (*slot_reset)(struct pci_dev *dev);
void (*resume)(struct pci_dev *dev);
void (*cor_error_detected)(struct pci_dev *dev);
};
The possible channel states are::
......@@ -422,5 +423,11 @@ That is, the recovery API only requires that:
- drivers/net/cxgb3
- drivers/net/s2io.c
The cor_error_detected() callback is invoked in handle_error_source() when
the error severity is "correctable". The callback is optional and allows
additional logging to be done if desired. See example:
- drivers/cxl/pci.c
The End
-------
......@@ -69,6 +69,7 @@ config X86
select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
......
......@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/cc_platform.h>
#include <linux/set_memory.h>
#include <linux/memregion.h>
#include <asm/e820/api.h>
#include <asm/processor.h>
......@@ -330,6 +331,23 @@ void arch_invalidate_pmem(void *addr, size_t size)
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
#endif
#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
bool cpu_cache_has_invalidate_memregion(void)
{
return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
}
EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM);
int cpu_cache_invalidate_memregion(int res_desc)
{
if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion()))
return -ENXIO;
wbinvd_on_all_cpus();
return 0;
}
EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM);
#endif
static void __cpa_flush_all(void *arg)
{
unsigned long cache = (unsigned long)arg;
......
......@@ -3,6 +3,7 @@
#include <linux/libnvdimm.h>
#include <linux/ndctl.h>
#include <linux/acpi.h>
#include <linux/memregion.h>
#include <asm/smp.h>
#include "intel.h"
#include "nfit.h"
......@@ -190,8 +191,6 @@ static int intel_security_change_key(struct nvdimm *nvdimm,
}
}
static void nvdimm_invalidate_cache(void);
static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data)
{
......@@ -227,9 +226,6 @@ static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
return -EIO;
}
/* DIMM unlocked, invalidate all CPU caches before we read it */
nvdimm_invalidate_cache();
return 0;
}
......@@ -297,8 +293,6 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
if (!test_bit(cmd, &nfit_mem->dsm_mask))
return -ENOTTY;
/* flush all cache before we erase DIMM */
nvdimm_invalidate_cache();
memcpy(nd_cmd.cmd.passphrase, key->data,
sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
......@@ -317,8 +311,6 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
return -ENXIO;
}
/* DIMM erased, invalidate all CPU caches before we read it */
nvdimm_invalidate_cache();
return 0;
}
......@@ -354,8 +346,6 @@ static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm)
return -ENXIO;
}
/* flush all cache before we make the nvdimms available */
nvdimm_invalidate_cache();
return 0;
}
......@@ -380,8 +370,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
if (!test_bit(NVDIMM_INTEL_OVERWRITE, &nfit_mem->dsm_mask))
return -ENOTTY;
/* flush all cache before we erase DIMM */
nvdimm_invalidate_cache();
memcpy(nd_cmd.cmd.passphrase, nkey->data,
sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
......@@ -401,22 +389,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
}
}
/*
* TODO: define a cross arch wbinvd equivalent when/if
* NVDIMM_FAMILY_INTEL command support arrives on another arch.
*/
#ifdef CONFIG_X86
static void nvdimm_invalidate_cache(void)
{
wbinvd_on_all_cpus();
}
#else
static void nvdimm_invalidate_cache(void)
{
WARN_ON_ONCE("cache invalidation required after unlock\n");
}
#endif
static const struct nvdimm_security_ops __intel_security_ops = {
.get_flags = intel_security_flags,
.freeze = intel_security_freeze,
......
......@@ -493,6 +493,7 @@ static u32 calculate_cxl_support(void)
u32 support;
support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT;
support |= OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT;
if (pci_aer_available())
support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT;
if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
......
......@@ -111,4 +111,22 @@ config CXL_REGION
select MEMREGION
select GET_FREE_REGION
config CXL_REGION_INVALIDATION_TEST
bool "CXL: Region Cache Management Bypass (TEST)"
depends on CXL_REGION
help
CXL Region management and security operations potentially invalidate
the content of CPU caches without notifiying those caches to
invalidate the affected cachelines. The CXL Region driver attempts
to invalidate caches when those events occur. If that invalidation
fails the region will fail to enable. Reasons for cache
invalidation failure are due to the CPU not providing a cache
invalidation mechanism. For example usage of wbinvd is restricted to
bare metal x86. However, for testing purposes toggling this option
can disable that data integrity safety and proceed with enabling
regions when there might be conflicting contents in the CPU cache.
If unsure, or if this kernel is meant for production environments,
say N.
endif
......@@ -9,5 +9,5 @@ obj-$(CONFIG_CXL_PORT) += cxl_port.o
cxl_mem-y := mem.o
cxl_pci-y := pci.o
cxl_acpi-y := acpi.o
cxl_pmem-y := pmem.o
cxl_pmem-y := pmem.o security.o
cxl_port-y := port.o
......@@ -6,9 +6,120 @@
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/pci.h>
#include <asm/div64.h>
#include "cxlpci.h"
#include "cxl.h"
#define CXL_RCRB_SIZE SZ_8K
struct cxl_cxims_data {
int nr_maps;
u64 xormaps[];
};
/*
* Find a targets entry (n) in the host bridge interleave list.
* CXL Specfication 3.0 Table 9-22
*/
static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw,
int ig)
{
int i = 0, n = 0;
u8 eiw;
/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
if (iw != 3) {
for (i = 0; i < cximsd->nr_maps; i++)
n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
}
/* IW: 3,6,12 add a modulo calculation to 'n' */
if (!is_power_of_2(iw)) {
if (ways_to_eiw(iw, &eiw))
return -1;
hpa &= GENMASK_ULL(51, eiw + ig);
n |= do_div(hpa, 3) << i;
}
return n;
}
static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
{
struct cxl_cxims_data *cximsd = cxlrd->platform_data;
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
struct cxl_decoder *cxld = &cxlsd->cxld;
int ig = cxld->interleave_granularity;
int iw = cxld->interleave_ways;
int n = 0;
u64 hpa;
if (dev_WARN_ONCE(&cxld->dev,
cxld->interleave_ways != cxlsd->nr_targets,
"misconfigured root decoder\n"))
return NULL;
hpa = cxlrd->res->start + pos * ig;
/* Entry (n) is 0 for no interleave (iw == 1) */
if (iw != 1)
n = cxl_xor_calc_n(hpa, cximsd, iw, ig);
if (n < 0)
return NULL;
return cxlrd->cxlsd.target[n];
}
struct cxl_cxims_context {
struct device *dev;
struct cxl_root_decoder *cxlrd;
};
static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header;
struct cxl_cxims_context *ctx = arg;
struct cxl_root_decoder *cxlrd = ctx->cxlrd;
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
struct device *dev = ctx->dev;
struct cxl_cxims_data *cximsd;
unsigned int hbig, nr_maps;
int rc;
rc = eig_to_granularity(cxims->hbig, &hbig);
if (rc)
return rc;
/* Does this CXIMS entry apply to the given CXL Window? */
if (hbig != cxld->interleave_granularity)
return 0;
/* IW 1,3 do not use xormaps and skip this parsing entirely */
if (is_power_of_2(cxld->interleave_ways))
/* 2, 4, 8, 16 way */
nr_maps = ilog2(cxld->interleave_ways);
else
/* 6, 12 way */
nr_maps = ilog2(cxld->interleave_ways / 3);
if (cxims->nr_xormaps < nr_maps) {
dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n",
cxims->nr_xormaps, nr_maps);
return -ENXIO;
}
cximsd = devm_kzalloc(dev, struct_size(cximsd, xormaps, nr_maps),
GFP_KERNEL);
if (!cximsd)
return -ENOMEM;
memcpy(cximsd->xormaps, cxims->xormap_list,
nr_maps * sizeof(*cximsd->xormaps));
cximsd->nr_maps = nr_maps;
cxlrd->platform_data = cximsd;
return 0;
}
static unsigned long cfmws_to_decoder_flags(int restrictions)
{
unsigned long flags = CXL_DECODER_F_ENABLE;
......@@ -33,8 +144,10 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
int rc, expected_len;
unsigned int ways;
if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n");
if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO &&
cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
dev_err(dev, "CFMWS Unknown Interleave Arithmetic: %d\n",
cfmws->interleave_arithmetic);
return -EINVAL;
}
......@@ -48,7 +161,7 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
return -EINVAL;
}
rc = cxl_to_ways(cfmws->interleave_ways, &ways);
rc = eiw_to_ways(cfmws->interleave_ways, &ways);
if (rc) {
dev_err(dev, "CFMWS Interleave Ways (%d) invalid\n",
cfmws->interleave_ways);
......@@ -70,6 +183,10 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
return 0;
}
/*
* Note, @dev must be the first member, see 'struct cxl_chbs_context'
* and mock_acpi_table_parse_cedt()
*/
struct cxl_cfmws_context {
struct device *dev;
struct cxl_port *root_port;
......@@ -84,9 +201,11 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct resource *cxl_res = ctx->cxl_res;
struct cxl_cxims_context cxims_ctx;
struct cxl_root_decoder *cxlrd;
struct device *dev = ctx->dev;
struct acpi_cedt_cfmws *cfmws;
cxl_calc_hb_fn cxl_calc_hb;
struct cxl_decoder *cxld;
unsigned int ways, i, ig;
struct resource *res;
......@@ -102,10 +221,10 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
return 0;
}
rc = cxl_to_ways(cfmws->interleave_ways, &ways);
rc = eiw_to_ways(cfmws->interleave_ways, &ways);
if (rc)
return rc;
rc = cxl_to_granularity(cfmws->granularity, &ig);
rc = eig_to_granularity(cfmws->granularity, &ig);
if (rc)
return rc;
for (i = 0; i < ways; i++)
......@@ -128,7 +247,12 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
if (rc)
goto err_insert;
cxlrd = cxl_root_decoder_alloc(root_port, ways);
if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO)
cxl_calc_hb = cxl_hb_modulo;
else
cxl_calc_hb = cxl_hb_xor;
cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
if (IS_ERR(cxlrd))
return 0;
......@@ -148,7 +272,25 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
ig = CXL_DECODER_MIN_GRANULARITY;
cxld->interleave_granularity = ig;
if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
if (ways != 1 && ways != 3) {
cxims_ctx = (struct cxl_cxims_context) {
.dev = dev,
.cxlrd = cxlrd,
};
rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS,
cxl_parse_cxims, &cxims_ctx);
if (rc < 0)
goto err_xormap;
if (!cxlrd->platform_data) {
dev_err(dev, "No CXIMS for HBIG %u\n", ig);
rc = -EINVAL;
goto err_xormap;
}
}
}
rc = cxl_decoder_add(cxld, target_map);
err_xormap:
if (rc)
put_device(&cxld->dev);
else
......@@ -193,34 +335,39 @@ static int add_host_bridge_uport(struct device *match, void *arg)
{
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
struct acpi_device *bridge = to_cxl_host_bridge(host, match);
struct acpi_device *hb = to_cxl_host_bridge(host, match);
struct acpi_pci_root *pci_root;
struct cxl_dport *dport;
struct cxl_port *port;
struct device *bridge;
int rc;
if (!bridge)
if (!hb)
return 0;
dport = cxl_find_dport_by_dev(root_port, match);
pci_root = acpi_pci_find_root(hb->handle);
bridge = pci_root->bus->bridge;
dport = cxl_find_dport_by_dev(root_port, bridge);
if (!dport) {
dev_dbg(host, "host bridge expected and not found\n");
return 0;
}
/*
* Note that this lookup already succeeded in
* to_cxl_host_bridge(), so no need to check for failure here
*/
pci_root = acpi_pci_find_root(bridge->handle);
rc = devm_cxl_register_pci_bus(host, match, pci_root->bus);
if (dport->rch) {
dev_info(bridge, "host supports CXL (restricted)\n");
return 0;
}
rc = devm_cxl_register_pci_bus(host, bridge, pci_root->bus);
if (rc)
return rc;
port = devm_cxl_add_port(host, match, dport->component_reg_phys, dport);
port = devm_cxl_add_port(host, bridge, dport->component_reg_phys,
dport);
if (IS_ERR(port))
return PTR_ERR(port);
dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
dev_info(bridge, "host supports CXL\n");
return 0;
}
......@@ -228,7 +375,9 @@ static int add_host_bridge_uport(struct device *match, void *arg)
struct cxl_chbs_context {
struct device *dev;
unsigned long long uid;
resource_size_t rcrb;
resource_size_t chbcr;
u32 cxl_version;
};
static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
......@@ -244,51 +393,86 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
if (ctx->uid != chbs->uid)
return 0;
ctx->chbcr = chbs->base;
ctx->cxl_version = chbs->cxl_version;
ctx->rcrb = CXL_RESOURCE_NONE;
ctx->chbcr = CXL_RESOURCE_NONE;
if (!chbs->base)
return 0;
if (chbs->cxl_version != ACPI_CEDT_CHBS_VERSION_CXL11) {
ctx->chbcr = chbs->base;
return 0;
}
if (chbs->length != CXL_RCRB_SIZE)
return 0;
ctx->rcrb = chbs->base;
ctx->chbcr = cxl_rcrb_to_component(ctx->dev, chbs->base,
CXL_RCRB_DOWNSTREAM);
return 0;
}
static int add_host_bridge_dport(struct device *match, void *arg)
{
acpi_status status;
acpi_status rc;
struct device *bridge;
unsigned long long uid;
struct cxl_dport *dport;
struct cxl_chbs_context ctx;
struct acpi_pci_root *pci_root;
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
struct acpi_device *bridge = to_cxl_host_bridge(host, match);
struct acpi_device *hb = to_cxl_host_bridge(host, match);
if (!bridge)
if (!hb)
return 0;
status = acpi_evaluate_integer(bridge->handle, METHOD_NAME__UID, NULL,
&uid);
if (status != AE_OK) {
dev_err(host, "unable to retrieve _UID of %s\n",
dev_name(match));
rc = acpi_evaluate_integer(hb->handle, METHOD_NAME__UID, NULL, &uid);
if (rc != AE_OK) {
dev_err(match, "unable to retrieve _UID\n");
return -ENODEV;
}
dev_dbg(match, "UID found: %lld\n", uid);
ctx = (struct cxl_chbs_context) {
.dev = host,
.dev = match,
.uid = uid,
};
acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbcr, &ctx);
if (ctx.chbcr == 0) {
dev_warn(host, "No CHBS found for Host Bridge: %s\n",
dev_name(match));
if (!ctx.chbcr) {
dev_warn(match, "No CHBS found for Host Bridge (UID %lld)\n",
uid);
return 0;
}
dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr);
if (IS_ERR(dport)) {
dev_err(host, "failed to add downstream port: %s\n",
dev_name(match));
return PTR_ERR(dport);
if (ctx.rcrb != CXL_RESOURCE_NONE)
dev_dbg(match, "RCRB found for UID %lld: %pa\n", uid, &ctx.rcrb);
if (ctx.chbcr == CXL_RESOURCE_NONE) {
dev_warn(match, "CHBCR invalid for Host Bridge (UID %lld)\n",
uid);
return 0;
}
dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match));
dev_dbg(match, "CHBCR found: %pa\n", &ctx.chbcr);
pci_root = acpi_pci_find_root(hb->handle);
bridge = pci_root->bus->bridge;
if (ctx.cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11)
dport = devm_cxl_add_rch_dport(root_port, bridge, uid,
ctx.chbcr, ctx.rcrb);
else
dport = devm_cxl_add_dport(root_port, bridge, uid,
ctx.chbcr);
if (IS_ERR(dport))
return PTR_ERR(dport);
return 0;
}
......@@ -466,7 +650,6 @@ static int cxl_acpi_probe(struct platform_device *pdev)
root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
if (IS_ERR(root_port))
return PTR_ERR(root_port);
dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
add_host_bridge_dport);
......@@ -512,7 +695,8 @@ static int cxl_acpi_probe(struct platform_device *pdev)
return rc;
/* In case PCI is scanned before ACPI re-trigger memdev attach */
return cxl_bus_rescan();
cxl_bus_rescan();
return 0;
}
static const struct acpi_device_id cxl_acpi_ids[] = {
......@@ -536,7 +720,20 @@ static struct platform_driver cxl_acpi_driver = {
.id_table = cxl_test_ids,
};
module_platform_driver(cxl_acpi_driver);
static int __init cxl_acpi_init(void)
{
return platform_driver_register(&cxl_acpi_driver);
}
static void __exit cxl_acpi_exit(void)
{
platform_driver_unregister(&cxl_acpi_driver);
cxl_bus_drain();
}
module_init(cxl_acpi_init);
module_exit(cxl_acpi_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
MODULE_IMPORT_NS(ACPI);
MODULE_SOFTDEP("pre: cxl_pmem");
......@@ -58,14 +58,6 @@ extern struct rw_semaphore cxl_dpa_rwsem;
bool is_switch_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
if (!port)
return NULL;
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
......
......@@ -82,18 +82,23 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
cxlhdm->interleave_mask |= GENMASK(14, 12);
}
static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
void __iomem *crb)
static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
struct cxl_component_regs *regs)
{
struct cxl_component_reg_map map;
struct cxl_register_map map = {
.resource = port->component_reg_phys,
.base = crb,
.max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
};
cxl_probe_component_regs(&port->dev, crb, &map);
if (!map.hdm_decoder.valid) {
cxl_probe_component_regs(&port->dev, crb, &map.component_map);
if (!map.component_map.hdm_decoder.valid) {
dev_err(&port->dev, "HDM decoder registers invalid\n");
return IOMEM_ERR_PTR(-ENXIO);
return -ENXIO;
}
return crb + map.hdm_decoder.offset;
return cxl_map_component_regs(&port->dev, regs, &map,
BIT(CXL_CM_CAP_CAP_ID_HDM));
}
/**
......@@ -103,25 +108,25 @@ static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
{
struct device *dev = &port->dev;
void __iomem *crb, *hdm;
struct cxl_hdm *cxlhdm;
void __iomem *crb;
int rc;
cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
cxlhdm->port = port;
crb = devm_cxl_iomap_block(dev, port->component_reg_phys,
CXL_COMPONENT_REG_BLOCK_SIZE);
crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
if (!crb) {
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
hdm = map_hdm_decoder_regs(port, crb);
if (IS_ERR(hdm))
return ERR_CAST(hdm);
cxlhdm->regs.hdm_decoder = hdm;
rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs);
iounmap(crb);
if (rc)
return ERR_PTR(rc);
parse_hdm_decoder_caps(cxlhdm);
if (cxlhdm->decoder_count == 0) {
......@@ -489,10 +494,10 @@ static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
* Input validation ensures these warns never fire, but otherwise
* suppress unititalized variable usage warnings.
*/
if (WARN_ONCE(ways_to_cxl(cxld->interleave_ways, &eiw),
if (WARN_ONCE(ways_to_eiw(cxld->interleave_ways, &eiw),
"invalid interleave_ways: %d\n", cxld->interleave_ways))
return;
if (WARN_ONCE(granularity_to_cxl(cxld->interleave_granularity, &eig),
if (WARN_ONCE(granularity_to_eig(cxld->interleave_granularity, &eig),
"invalid interleave_granularity: %d\n",
cxld->interleave_granularity))
return;
......@@ -736,16 +741,16 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
}
cxld->target_type = CXL_DECODER_EXPANDER;
}
rc = cxl_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
&cxld->interleave_ways);
rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
&cxld->interleave_ways);
if (rc) {
dev_warn(&port->dev,
"decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n",
port->id, cxld->id, ctrl);
return rc;
}
rc = cxl_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
&cxld->interleave_granularity);
rc = eig_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
&cxld->interleave_granularity);
if (rc)
return rc;
......
......@@ -140,13 +140,9 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
}
/**
* cxl_mbox_send_cmd() - Send a mailbox command to a device.
* cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command
* @cxlds: The device data for the operation
* @opcode: Opcode for the mailbox command.
* @in: The input payload for the mailbox command.
* @in_size: The length of the input payload
* @out: Caller allocated buffer for the output.
* @out_size: Expected size of output.
* @mbox_cmd: initialized command to execute
*
* Context: Any context.
* Return:
......@@ -161,40 +157,40 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
*/
int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
size_t in_size, void *out, size_t out_size)
int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *mbox_cmd)
{
const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
struct cxl_mbox_cmd mbox_cmd = {
.opcode = opcode,
.payload_in = in,
.size_in = in_size,
.size_out = out_size,
.payload_out = out,
};
size_t out_size, min_out;
int rc;
if (in_size > cxlds->payload_size || out_size > cxlds->payload_size)
if (mbox_cmd->size_in > cxlds->payload_size ||
mbox_cmd->size_out > cxlds->payload_size)
return -E2BIG;
rc = cxlds->mbox_send(cxlds, &mbox_cmd);
out_size = mbox_cmd->size_out;
min_out = mbox_cmd->min_out;
rc = cxlds->mbox_send(cxlds, mbox_cmd);
if (rc)
return rc;
if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS)
return cxl_mbox_cmd_rc2errno(&mbox_cmd);
if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS)
return cxl_mbox_cmd_rc2errno(mbox_cmd);
if (!out_size)
return 0;
/*
* Variable sized commands can't be validated and so it's up to the
* caller to do that if they wish.
* Variable sized output needs to at least satisfy the caller's
* minimum if not the fully requested size.
*/
if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) {
if (mbox_cmd.size_out != out_size)
return -EIO;
}
if (min_out == 0)
min_out = out_size;
if (mbox_cmd->size_out < min_out)
return -EIO;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL);
EXPORT_SYMBOL_NS_GPL(cxl_internal_send_cmd, CXL);
static bool cxl_mem_raw_command_allowed(u16 opcode)
{
......@@ -561,15 +557,25 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8
while (remaining) {
u32 xfer_size = min_t(u32, remaining, cxlds->payload_size);
struct cxl_mbox_get_log log = {
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_get_log log;
int rc;
log = (struct cxl_mbox_get_log) {
.uuid = *uuid,
.offset = cpu_to_le32(offset),
.length = cpu_to_le32(xfer_size)
.length = cpu_to_le32(xfer_size),
};
int rc;
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LOG, &log, sizeof(log),
out, xfer_size);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_LOG,
.size_in = sizeof(log),
.payload_in = &log,
.size_out = xfer_size,
.payload_out = out,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
......@@ -615,19 +621,27 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_supported_logs *ret;
struct cxl_mbox_cmd mbox_cmd;
int rc;
ret = kvmalloc(cxlds->payload_size, GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL, 0, ret,
cxlds->payload_size);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS,
.size_out = cxlds->payload_size,
.payload_out = ret,
/* At least the record number field must be valid */
.min_out = 2,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0) {
kvfree(ret);
return ERR_PTR(rc);
}
return ret;
}
......@@ -697,7 +711,6 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
/* Found the required CEL */
rc = 0;
}
out:
kvfree(gsl);
return rc;
......@@ -719,11 +732,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_partition_info pi;
struct cxl_mbox_cmd mbox_cmd;
int rc;
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
&pi, sizeof(pi));
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_PARTITION_INFO,
.size_out = sizeof(pi),
.payload_out = &pi,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
return rc;
......@@ -752,10 +769,15 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
{
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
struct cxl_mbox_cmd mbox_cmd;
int rc;
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
sizeof(id));
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_IDENTIFY,
.size_out = sizeof(id),
.payload_out = &id,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
......
......@@ -344,6 +344,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
* needed as this is ordered with cdev_add() publishing the device.
*/
cxlmd->cxlds = cxlds;
cxlds->cxlmd = cxlmd;
cdev = &cxlmd->cdev;
rc = cdev_device_add(cdev, dev);
......
......@@ -54,16 +54,13 @@ static int match_add_dports(struct pci_dev *pdev, void *data)
dev_dbg(&port->dev, "failed to find component registers\n");
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
cxl_regmap_to_base(pdev, &map));
dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
if (IS_ERR(dport)) {
ctx->error = PTR_ERR(dport);
return PTR_ERR(dport);
}
ctx->count++;
dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
return 0;
}
......
......@@ -99,7 +99,6 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
dev = &cxl_nvb->dev;
cxl_nvb->port = port;
cxl_nvb->state = CXL_NVB_NEW;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key);
device_set_pm_not_required(dev);
......@@ -117,28 +116,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
static void unregister_nvb(void *_cxl_nvb)
{
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
bool flush;
/*
* If the bridge was ever activated then there might be in-flight state
* work to flush. Once the state has been changed to 'dead' then no new
* work can be queued by user-triggered bind.
*/
device_lock(&cxl_nvb->dev);
flush = cxl_nvb->state != CXL_NVB_NEW;
cxl_nvb->state = CXL_NVB_DEAD;
device_unlock(&cxl_nvb->dev);
/*
* Even though the device core will trigger device_release_driver()
* before the unregister, it does not know about the fact that
* cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver
* release not and flush it before tearing down the nvdimm device
* hierarchy.
*/
device_release_driver(&cxl_nvb->dev);
if (flush)
flush_work(&cxl_nvb->state_work);
device_unregister(&cxl_nvb->dev);
}
......@@ -188,7 +166,6 @@ static void cxl_nvdimm_release(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
xa_destroy(&cxl_nvd->pmem_regions);
kfree(cxl_nvd);
}
......@@ -220,7 +197,8 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL);
static struct lock_class_key cxl_nvdimm_key;
static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb,
struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
......@@ -231,38 +209,78 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
dev = &cxl_nvd->dev;
cxl_nvd->cxlmd = cxlmd;
xa_init(&cxl_nvd->pmem_regions);
cxlmd->cxl_nvd = cxl_nvd;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
device_set_pm_not_required(dev);
dev->parent = &cxlmd->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_nvdimm_type;
/*
* A "%llx" string is 17-bytes vs dimm_id that is max
* NVDIMM_KEY_DESC_LEN
*/
BUILD_BUG_ON(sizeof(cxl_nvd->dev_id) < 17 ||
sizeof(cxl_nvd->dev_id) > NVDIMM_KEY_DESC_LEN);
sprintf(cxl_nvd->dev_id, "%llx", cxlmd->cxlds->serial);
return cxl_nvd;
}
static void cxl_nvd_unregister(void *dev)
static void cxl_nvd_unregister(void *_cxl_nvd)
{
device_unregister(dev);
struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
/*
* Either the bridge is in ->remove() context under the device_lock(),
* or cxlmd_release_nvdimm() is cancelling the bridge's release action
* for @cxl_nvd and doing it itself (while manually holding the bridge
* lock).
*/
device_lock_assert(&cxl_nvb->dev);
cxl_nvd->cxlmd = NULL;
cxlmd->cxl_nvd = NULL;
device_unregister(&cxl_nvd->dev);
}
static void cxlmd_release_nvdimm(void *_cxlmd)
{
struct cxl_memdev *cxlmd = _cxlmd;
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
device_lock(&cxl_nvb->dev);
if (cxlmd->cxl_nvd)
devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister,
cxlmd->cxl_nvd);
device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
}
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
* @host: same host as @cxlmd
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
*
* Return: 0 on success negative error code on failure.
*/
int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
int rc;
cxl_nvd = cxl_nvdimm_alloc(cxlmd);
if (IS_ERR(cxl_nvd))
return PTR_ERR(cxl_nvd);
cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
if (!cxl_nvb)
return -ENODEV;
cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd);
if (IS_ERR(cxl_nvd)) {
rc = PTR_ERR(cxl_nvd);
goto err_alloc;
}
cxlmd->cxl_nvb = cxl_nvb;
dev = &cxl_nvd->dev;
rc = dev_set_name(dev, "pmem%d", cxlmd->id);
......@@ -273,13 +291,34 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
if (rc)
goto err;
dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));
dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
/*
* The two actions below arrange for @cxl_nvd to be deleted when either
* the top-level PMEM bridge goes down, or the endpoint device goes
* through ->remove().
*/
device_lock(&cxl_nvb->dev);
if (cxl_nvb->dev.driver)
rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister,
cxl_nvd);
else
rc = -ENXIO;
device_unlock(&cxl_nvb->dev);
return devm_add_action_or_reset(host, cxl_nvd_unregister, dev);
if (rc)
goto err_alloc;
/* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
err:
put_device(dev);
err_alloc:
cxlmd->cxl_nvb = NULL;
cxlmd->cxl_nvd = NULL;
put_device(&cxl_nvb->dev);
return rc;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm, CXL);
......@@ -628,6 +628,8 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
iter = to_cxl_port(iter->dev.parent);
if (iter->host_bridge)
port->host_bridge = iter->host_bridge;
else if (parent_dport->rch)
port->host_bridge = parent_dport->dport;
else
port->host_bridge = iter->uport;
dev_dbg(uport, "host-bridge: %s\n", dev_name(port->host_bridge));
......@@ -655,16 +657,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
return ERR_PTR(rc);
}
/**
* devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
* @host: host device for devm operations
* @uport: "physical" device implementing this upstream port
* @component_reg_phys: (optional) for configurable cxl_port instances
* @parent_dport: next hop up in the CXL memory decode hierarchy
*/
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport)
static struct cxl_port *__devm_cxl_add_port(struct device *host,
struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport)
{
struct cxl_port *port;
struct device *dev;
......@@ -702,6 +698,41 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
put_device(dev);
return ERR_PTR(rc);
}
/**
* devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
* @host: host device for devm operations
* @uport: "physical" device implementing this upstream port
* @component_reg_phys: (optional) for configurable cxl_port instances
* @parent_dport: next hop up in the CXL memory decode hierarchy
*/
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport)
{
struct cxl_port *port, *parent_port;
port = __devm_cxl_add_port(host, uport, component_reg_phys,
parent_dport);
parent_port = parent_dport ? parent_dport->port : NULL;
if (IS_ERR(port)) {
dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n",
dev_name(&port->dev),
parent_port ? " to " : "",
parent_port ? dev_name(&parent_port->dev) : "",
parent_port ? "" : " (root port)",
PTR_ERR(port));
} else {
dev_dbg(uport, "%s added%s%s%s\n",
dev_name(&port->dev),
parent_port ? " to " : "",
parent_port ? dev_name(&parent_port->dev) : "",
parent_port ? "" : " (root port)");
}
return port;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port)
......@@ -870,20 +901,10 @@ static void cxl_dport_unlink(void *data)
sysfs_remove_link(&port->dev.kobj, link_name);
}
/**
* devm_cxl_add_dport - append downstream port data to a cxl_port
* @port: the cxl_port that references this dport
* @dport_dev: firmware or PCI device representing the dport
* @port_id: identifier for this dport in a decoder's target list
* @component_reg_phys: optional location of CXL component registers
*
* Note that dports are appended to the devm release action's of the
* either the port's host (for root ports), or the port itself (for
* switch ports)
*/
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport_dev, int port_id,
resource_size_t component_reg_phys)
static struct cxl_dport *
__devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
int port_id, resource_size_t component_reg_phys,
resource_size_t rcrb)
{
char link_name[CXL_TARGET_STRLEN];
struct cxl_dport *dport;
......@@ -913,6 +934,9 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
dport->port_id = port_id;
dport->component_reg_phys = component_reg_phys;
dport->port = port;
if (rcrb != CXL_RESOURCE_NONE)
dport->rch = true;
dport->rcrb = rcrb;
cond_cxl_root_lock(port);
rc = add_dport(port, dport);
......@@ -935,8 +959,74 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
return dport;
}
/**
* devm_cxl_add_dport - append VH downstream port data to a cxl_port
* @port: the cxl_port that references this dport
* @dport_dev: firmware or PCI device representing the dport
* @port_id: identifier for this dport in a decoder's target list
* @component_reg_phys: optional location of CXL component registers
*
* Note that dports are appended to the devm release action's of the
* either the port's host (for root ports), or the port itself (for
* switch ports)
*/
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport_dev, int port_id,
resource_size_t component_reg_phys)
{
struct cxl_dport *dport;
dport = __devm_cxl_add_dport(port, dport_dev, port_id,
component_reg_phys, CXL_RESOURCE_NONE);
if (IS_ERR(dport)) {
dev_dbg(dport_dev, "failed to add dport to %s: %ld\n",
dev_name(&port->dev), PTR_ERR(dport));
} else {
dev_dbg(dport_dev, "dport added to %s\n",
dev_name(&port->dev));
}
return dport;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
/**
* devm_cxl_add_rch_dport - append RCH downstream port data to a cxl_port
* @port: the cxl_port that references this dport
* @dport_dev: firmware or PCI device representing the dport
* @port_id: identifier for this dport in a decoder's target list
* @component_reg_phys: optional location of CXL component registers
* @rcrb: mandatory location of a Root Complex Register Block
*
* See CXL 3.0 9.11.8 CXL Devices Attached to an RCH
*/
struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
struct device *dport_dev, int port_id,
resource_size_t component_reg_phys,
resource_size_t rcrb)
{
struct cxl_dport *dport;
if (rcrb == CXL_RESOURCE_NONE) {
dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n");
return ERR_PTR(-EINVAL);
}
dport = __devm_cxl_add_dport(port, dport_dev, port_id,
component_reg_phys, rcrb);
if (IS_ERR(dport)) {
dev_dbg(dport_dev, "failed to add RCH dport to %s: %ld\n",
dev_name(&port->dev), PTR_ERR(dport));
} else {
dev_dbg(dport_dev, "RCH dport added to %s\n",
dev_name(&port->dev));
}
return dport;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_rch_dport, CXL);
static int add_ep(struct cxl_ep *new)
{
struct cxl_port *port = new->dport->port;
......@@ -1122,47 +1212,6 @@ static void reap_dports(struct cxl_port *port)
}
}
int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
struct cxl_dport *parent_dport)
{
struct cxl_port *parent_port = parent_dport->port;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_port *endpoint, *iter, *down;
int rc;
/*
* Now that the path to the root is established record all the
* intervening ports in the chain.
*/
for (iter = parent_port, down = NULL; !is_cxl_root(iter);
down = iter, iter = to_cxl_port(iter->dev.parent)) {
struct cxl_ep *ep;
ep = cxl_ep_load(iter, cxlmd);
ep->next = down;
}
endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
cxlds->component_reg_phys, parent_dport);
if (IS_ERR(endpoint))
return PTR_ERR(endpoint);
dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
rc = cxl_endpoint_autoremove(cxlmd, endpoint);
if (rc)
return rc;
if (!endpoint->dev.driver) {
dev_err(&cxlmd->dev, "%s failed probe\n",
dev_name(&endpoint->dev));
return -ENXIO;
}
return 0;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_endpoint, CXL);
static void cxl_detach_ep(void *data)
{
struct cxl_memdev *cxlmd = data;
......@@ -1243,7 +1292,7 @@ static resource_size_t find_component_registers(struct device *dev)
pdev = to_pci_dev(dev);
cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
return cxl_regmap_to_base(pdev, &map);
return map.resource;
}
static int add_port_attach_ep(struct cxl_memdev *cxlmd,
......@@ -1320,6 +1369,13 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
struct device *iter;
int rc;
/*
* Skip intermediate port enumeration in the RCH case, there
* are no ports in between a host bridge and an endpoint.
*/
if (cxlmd->cxlds->rcd)
return 0;
rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd);
if (rc)
return rc;
......@@ -1428,7 +1484,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
return rc;
}
static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
{
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
struct cxl_decoder *cxld = &cxlsd->cxld;
......@@ -1441,6 +1497,7 @@ static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
return cxlrd->cxlsd.target[pos % iw];
}
EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL);
static struct lock_class_key cxl_decoder_key;
......@@ -1502,6 +1559,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port,
* cxl_root_decoder_alloc - Allocate a root level decoder
* @port: owning CXL root of this decoder
* @nr_targets: static number of downstream targets
* @calc_hb: which host bridge covers the n'th position by granularity
*
* Return: A new cxl decoder to be registered by cxl_decoder_add(). A
* 'CXL root' decoder is one that decodes from a top-level / static platform
......@@ -1509,7 +1567,8 @@ static int cxl_switch_decoder_init(struct cxl_port *port,
* topology.
*/
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets)
unsigned int nr_targets,
cxl_calc_hb_fn calc_hb)
{
struct cxl_root_decoder *cxlrd;
struct cxl_switch_decoder *cxlsd;
......@@ -1531,7 +1590,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
return ERR_PTR(rc);
}
cxlrd->calc_hb = cxl_hb_modulo;
cxlrd->calc_hb = calc_hb;
cxld = &cxlsd->cxld;
cxld->dev.type = &cxl_decoder_root_type;
......@@ -1797,12 +1856,27 @@ static void cxl_bus_remove(struct device *dev)
static struct workqueue_struct *cxl_bus_wq;
int cxl_bus_rescan(void)
static void cxl_bus_rescan_queue(struct work_struct *w)
{
return bus_rescan_devices(&cxl_bus_type);
int rc = bus_rescan_devices(&cxl_bus_type);
pr_debug("CXL bus rescan result: %d\n", rc);
}
void cxl_bus_rescan(void)
{
static DECLARE_WORK(rescan_work, cxl_bus_rescan_queue);
queue_work(cxl_bus_wq, &rescan_work);
}
EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
void cxl_bus_drain(void)
{
drain_workqueue(cxl_bus_wq);
}
EXPORT_SYMBOL_NS_GPL(cxl_bus_drain, CXL);
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
{
return queue_work(cxl_bus_wq, &cxlmd->detach_work);
......
......@@ -324,7 +324,7 @@ static ssize_t interleave_ways_store(struct device *dev,
if (rc)
return rc;
rc = ways_to_cxl(val, &iw);
rc = ways_to_eiw(val, &iw);
if (rc)
return rc;
......@@ -391,7 +391,7 @@ static ssize_t interleave_granularity_store(struct device *dev,
if (rc)
return rc;
rc = granularity_to_cxl(val, &ig);
rc = granularity_to_eig(val, &ig);
if (rc)
return rc;
......@@ -1028,7 +1028,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
parent_iw = parent_cxld->interleave_ways;
}
rc = granularity_to_cxl(parent_ig, &peig);
rc = granularity_to_eig(parent_ig, &peig);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
dev_name(parent_port->uport),
......@@ -1036,7 +1036,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
rc = ways_to_cxl(parent_iw, &peiw);
rc = ways_to_eiw(parent_iw, &peiw);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
dev_name(parent_port->uport),
......@@ -1045,7 +1045,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
}
iw = cxl_rr->nr_targets;
rc = ways_to_cxl(iw, &eiw);
rc = ways_to_eiw(iw, &eiw);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
dev_name(port->uport), dev_name(&port->dev), iw);
......@@ -1065,7 +1065,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
eig = peig;
}
rc = cxl_to_granularity(eig, &ig);
rc = eig_to_granularity(eig, &ig);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
dev_name(port->uport), dev_name(&port->dev),
......@@ -1226,7 +1226,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled_target;
struct cxl_memdev *cxlmd_target;
cxled_target = p->targets[pos];
cxled_target = p->targets[i];
if (!cxled_target)
continue;
......@@ -1403,6 +1403,8 @@ static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
goto out;
down_read(&cxl_dpa_rwsem);
rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
if (rc == 0)
set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
out:
......@@ -1812,6 +1814,7 @@ static struct lock_class_key cxl_pmem_region_key;
static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
struct device *dev;
int i;
......@@ -1839,6 +1842,18 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
/*
* Regions never span CXL root devices, so by definition the
* bridge for one device is the same for all.
*/
if (i == 0) {
cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
if (!cxl_nvb) {
cxlr_pmem = ERR_PTR(-ENODEV);
goto out;
}
cxlr->cxl_nvb = cxl_nvb;
}
m->cxlmd = cxlmd;
get_device(&cxlmd->dev);
m->start = cxled->dpa_res->start;
......@@ -1848,6 +1863,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
dev = &cxlr_pmem->dev;
cxlr_pmem->cxlr = cxlr;
cxlr->cxlr_pmem = cxlr_pmem;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
device_set_pm_not_required(dev);
......@@ -1860,9 +1876,36 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
return cxlr_pmem;
}
static void cxlr_pmem_unregister(void *dev)
static void cxlr_pmem_unregister(void *_cxlr_pmem)
{
device_unregister(dev);
struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
struct cxl_region *cxlr = cxlr_pmem->cxlr;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
/*
* Either the bridge is in ->remove() context under the device_lock(),
* or cxlr_release_nvdimm() is cancelling the bridge's release action
* for @cxlr_pmem and doing it itself (while manually holding the bridge
* lock).
*/
device_lock_assert(&cxl_nvb->dev);
cxlr->cxlr_pmem = NULL;
cxlr_pmem->cxlr = NULL;
device_unregister(&cxlr_pmem->dev);
}
static void cxlr_release_nvdimm(void *_cxlr)
{
struct cxl_region *cxlr = _cxlr;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
device_lock(&cxl_nvb->dev);
if (cxlr->cxlr_pmem)
devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
cxlr->cxlr_pmem);
device_unlock(&cxl_nvb->dev);
cxlr->cxl_nvb = NULL;
put_device(&cxl_nvb->dev);
}
/**
......@@ -1874,12 +1917,14 @@ static void cxlr_pmem_unregister(void *dev)
static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
{
struct cxl_pmem_region *cxlr_pmem;
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
int rc;
cxlr_pmem = cxl_pmem_region_alloc(cxlr);
if (IS_ERR(cxlr_pmem))
return PTR_ERR(cxlr_pmem);
cxl_nvb = cxlr->cxl_nvb;
dev = &cxlr_pmem->dev;
rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
......@@ -1893,13 +1938,52 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));
return devm_add_action_or_reset(&cxlr->dev, cxlr_pmem_unregister, dev);
device_lock(&cxl_nvb->dev);
if (cxl_nvb->dev.driver)
rc = devm_add_action_or_reset(&cxl_nvb->dev,
cxlr_pmem_unregister, cxlr_pmem);
else
rc = -ENXIO;
device_unlock(&cxl_nvb->dev);
if (rc)
goto err_bridge;
/* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
err:
put_device(dev);
err_bridge:
put_device(&cxl_nvb->dev);
cxlr->cxl_nvb = NULL;
return rc;
}
static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
{
if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
return 0;
if (!cpu_cache_has_invalidate_memregion()) {
if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
dev_warn(
&cxlr->dev,
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
return 0;
} else {
dev_err(&cxlr->dev,
"Failed to synchronize CPU cache state\n");
return -ENXIO;
}
}
cpu_cache_invalidate_memregion(IORES_DESC_CXL);
clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
return 0;
}
static int cxl_region_probe(struct device *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
......@@ -1915,14 +1999,21 @@ static int cxl_region_probe(struct device *dev)
if (p->state < CXL_CONFIG_COMMIT) {
dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
rc = -ENXIO;
goto out;
}
rc = cxl_region_invalidate_memregion(cxlr);
/*
* From this point on any path that changes the region's state away from
* CXL_CONFIG_COMMIT is also responsible for releasing the driver.
*/
out:
up_read(&cxl_region_rwsem);
if (rc)
return rc;
switch (cxlr->mode) {
case CXL_DECODER_PMEM:
return devm_cxl_add_pmem_region(cxlr);
......@@ -1950,4 +2041,5 @@ void cxl_region_exit(void)
}
MODULE_IMPORT_NS(CXL);
MODULE_IMPORT_NS(DEVMEM);
MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
......@@ -7,6 +7,8 @@
#include <cxlmem.h>
#include <cxlpci.h>
#include "core.h"
/**
* DOC: cxl registers
*
......@@ -59,36 +61,48 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
for (cap = 1; cap <= cap_count; cap++) {
void __iomem *register_block;
u32 hdr;
int decoder_cnt;
struct cxl_reg_map *rmap;
u16 cap_id, offset;
u32 length;
u32 length, hdr;
hdr = readl(base + cap * 0x4);
cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
register_block = base + offset;
hdr = readl(register_block);
rmap = NULL;
switch (cap_id) {
case CXL_CM_CAP_CAP_ID_HDM:
case CXL_CM_CAP_CAP_ID_HDM: {
int decoder_cnt;
dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
offset);
hdr = readl(register_block);
decoder_cnt = cxl_hdm_decoder_count(hdr);
length = 0x20 * decoder_cnt + 0x10;
map->hdm_decoder.valid = true;
map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
map->hdm_decoder.size = length;
rmap = &map->hdm_decoder;
break;
}
case CXL_CM_CAP_CAP_ID_RAS:
dev_dbg(dev, "found RAS capability (0x%x)\n",
offset);
length = CXL_RAS_CAPABILITY_LENGTH;
rmap = &map->ras;
break;
default:
dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
offset);
break;
}
if (!rmap)
continue;
rmap->valid = true;
rmap->id = cap_id;
rmap->offset = CXL_CM_OFFSET + offset;
rmap->size = length;
}
}
EXPORT_SYMBOL_NS_GPL(cxl_probe_component_regs, CXL);
......@@ -117,6 +131,7 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
for (cap = 1; cap <= cap_count; cap++) {
struct cxl_reg_map *rmap;
u32 offset, length;
u16 cap_id;
......@@ -125,28 +140,22 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
offset = readl(base + cap * 0x10 + 0x4);
length = readl(base + cap * 0x10 + 0x8);
rmap = NULL;
switch (cap_id) {
case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
dev_dbg(dev, "found Status capability (0x%x)\n", offset);
map->status.valid = true;
map->status.offset = offset;
map->status.size = length;
rmap = &map->status;
break;
case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
map->mbox.valid = true;
map->mbox.offset = offset;
map->mbox.size = length;
rmap = &map->mbox;
break;
case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
break;
case CXLDEV_CAP_CAP_ID_MEMDEV:
dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
map->memdev.valid = true;
map->memdev.offset = offset;
map->memdev.size = length;
rmap = &map->memdev;
break;
default:
if (cap_id >= 0x8000)
......@@ -155,6 +164,13 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
break;
}
if (!rmap)
continue;
rmap->valid = true;
rmap->id = cap_id;
rmap->offset = offset;
rmap->size = length;
}
}
EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL);
......@@ -165,6 +181,9 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
void __iomem *ret_val;
struct resource *res;
if (WARN_ON_ONCE(addr == CXL_RESOURCE_NONE))
return NULL;
res = devm_request_mem_region(dev, addr, length, dev_name(dev));
if (!res) {
resource_size_t end = addr + length - 1;
......@@ -180,67 +199,65 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
return ret_val;
}
int cxl_map_component_regs(struct pci_dev *pdev,
struct cxl_component_regs *regs,
struct cxl_register_map *map)
int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs,
struct cxl_register_map *map, unsigned long map_mask)
{
struct device *dev = &pdev->dev;
resource_size_t phys_addr;
resource_size_t length;
phys_addr = pci_resource_start(pdev, map->barno);
phys_addr += map->block_offset;
struct mapinfo {
struct cxl_reg_map *rmap;
void __iomem **addr;
} mapinfo[] = {
{ &map->component_map.hdm_decoder, &regs->hdm_decoder },
{ &map->component_map.ras, &regs->ras },
};
int i;
for (i = 0; i < ARRAY_SIZE(mapinfo); i++) {
struct mapinfo *mi = &mapinfo[i];
resource_size_t phys_addr;
resource_size_t length;
phys_addr += map->component_map.hdm_decoder.offset;
length = map->component_map.hdm_decoder.size;
regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
if (!regs->hdm_decoder)
return -ENOMEM;
if (!mi->rmap->valid)
continue;
if (!test_bit(mi->rmap->id, &map_mask))
continue;
phys_addr = map->resource + mi->rmap->offset;
length = mi->rmap->size;
*(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length);
if (!*(mi->addr))
return -ENOMEM;
}
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL);
int cxl_map_device_regs(struct pci_dev *pdev,
int cxl_map_device_regs(struct device *dev,
struct cxl_device_regs *regs,
struct cxl_register_map *map)
{
struct device *dev = &pdev->dev;
resource_size_t phys_addr;
phys_addr = pci_resource_start(pdev, map->barno);
phys_addr += map->block_offset;
if (map->device_map.status.valid) {
resource_size_t addr;
resource_size_t phys_addr = map->resource;
struct mapinfo {
struct cxl_reg_map *rmap;
void __iomem **addr;
} mapinfo[] = {
{ &map->device_map.status, &regs->status, },
{ &map->device_map.mbox, &regs->mbox, },
{ &map->device_map.memdev, &regs->memdev, },
};
int i;
for (i = 0; i < ARRAY_SIZE(mapinfo); i++) {
struct mapinfo *mi = &mapinfo[i];
resource_size_t length;
addr = phys_addr + map->device_map.status.offset;
length = map->device_map.status.size;
regs->status = devm_cxl_iomap_block(dev, addr, length);
if (!regs->status)
return -ENOMEM;
}
if (map->device_map.mbox.valid) {
resource_size_t addr;
resource_size_t length;
addr = phys_addr + map->device_map.mbox.offset;
length = map->device_map.mbox.size;
regs->mbox = devm_cxl_iomap_block(dev, addr, length);
if (!regs->mbox)
return -ENOMEM;
}
if (map->device_map.memdev.valid) {
resource_size_t addr;
resource_size_t length;
if (!mi->rmap->valid)
continue;
addr = phys_addr + map->device_map.memdev.offset;
length = map->device_map.memdev.size;
regs->memdev = devm_cxl_iomap_block(dev, addr, length);
if (!regs->memdev)
addr = phys_addr + mi->rmap->offset;
length = mi->rmap->size;
*(mi->addr) = devm_cxl_iomap_block(dev, addr, length);
if (!*(mi->addr))
return -ENOMEM;
}
......@@ -248,13 +265,24 @@ int cxl_map_device_regs(struct pci_dev *pdev,
}
EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
map->block_offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
u64 offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
if (offset > pci_resource_len(pdev, bar)) {
dev_warn(&pdev->dev,
"BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
&pdev->resource[bar], &offset, map->reg_type);
return false;
}
map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
map->resource = pci_resource_start(pdev, bar) + offset;
map->max_size = pci_resource_len(pdev, bar) - offset;
return true;
}
/**
......@@ -274,7 +302,7 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
u32 regloc_size, regblocks;
int regloc, i;
map->block_offset = U64_MAX;
map->resource = CXL_RESOURCE_NONE;
regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DVSEC_REG_LOCATOR);
if (!regloc)
......@@ -292,13 +320,79 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
pci_read_config_dword(pdev, regloc, &reg_lo);
pci_read_config_dword(pdev, regloc + 4, &reg_hi);
cxl_decode_regblock(reg_lo, reg_hi, map);
if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map))
continue;
if (map->reg_type == type)
return 0;
}
map->block_offset = U64_MAX;
map->resource = CXL_RESOURCE_NONE;
return -ENODEV;
}
EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
resource_size_t cxl_rcrb_to_component(struct device *dev,
resource_size_t rcrb,
enum cxl_rcrb which)
{
resource_size_t component_reg_phys;
void __iomem *addr;
u32 bar0, bar1;
u16 cmd;
u32 id;
if (which == CXL_RCRB_UPSTREAM)
rcrb += SZ_4K;
/*
* RCRB's BAR[0..1] point to component block containing CXL
* subsystem component registers. MEMBAR extraction follows
* the PCI Base spec here, esp. 64 bit extraction and memory
* ranges alignment (6.0, 7.5.1.2.1).
*/
if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB"))
return CXL_RESOURCE_NONE;
addr = ioremap(rcrb, SZ_4K);
if (!addr) {
dev_err(dev, "Failed to map region %pr\n", addr);
release_mem_region(rcrb, SZ_4K);
return CXL_RESOURCE_NONE;
}
id = readl(addr + PCI_VENDOR_ID);
cmd = readw(addr + PCI_COMMAND);
bar0 = readl(addr + PCI_BASE_ADDRESS_0);
bar1 = readl(addr + PCI_BASE_ADDRESS_1);
iounmap(addr);
release_mem_region(rcrb, SZ_4K);
/*
* Sanity check, see CXL 3.0 Figure 9-8 CXL Device that Does Not
* Remap Upstream Port and Component Registers
*/
if (id == U32_MAX) {
if (which == CXL_RCRB_DOWNSTREAM)
dev_err(dev, "Failed to access Downstream Port RCRB\n");
return CXL_RESOURCE_NONE;
}
if (!(cmd & PCI_COMMAND_MEMORY))
return CXL_RESOURCE_NONE;
/* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */
if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
return CXL_RESOURCE_NONE;
component_reg_phys = bar0 & PCI_BASE_ADDRESS_MEM_MASK;
if (bar0 & PCI_BASE_ADDRESS_MEM_TYPE_64)
component_reg_phys |= ((u64)bar1) << 32;
if (!component_reg_phys)
return CXL_RESOURCE_NONE;
/* MEMBAR is block size (64k) aligned. */
if (!IS_ALIGNED(component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE))
return CXL_RESOURCE_NONE;
return component_reg_phys;
}
EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_component, CXL);
......@@ -33,6 +33,7 @@
#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24)
#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20)
#define CXL_CM_CAP_CAP_ID_RAS 0x2
#define CXL_CM_CAP_CAP_ID_HDM 0x5
#define CXL_CM_CAP_CAP_HDM_VERSION 1
......@@ -61,6 +62,10 @@
#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i)
/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */
#define CXL_DECODER_MIN_GRANULARITY 256
#define CXL_DECODER_MAX_ENCODED_IG 6
static inline int cxl_hdm_decoder_count(u32 cap_hdr)
{
int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr);
......@@ -69,23 +74,23 @@ static inline int cxl_hdm_decoder_count(u32 cap_hdr)
}
/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
static inline int cxl_to_granularity(u16 ig, unsigned int *val)
static inline int eig_to_granularity(u16 eig, unsigned int *granularity)
{
if (ig > 6)
if (eig > CXL_DECODER_MAX_ENCODED_IG)
return -EINVAL;
*val = 256 << ig;
*granularity = CXL_DECODER_MIN_GRANULARITY << eig;
return 0;
}
/* Encode defined in CXL ECN "3, 6, 12 and 16-way memory Interleaving" */
static inline int cxl_to_ways(u8 eniw, unsigned int *val)
static inline int eiw_to_ways(u8 eiw, unsigned int *ways)
{
switch (eniw) {
switch (eiw) {
case 0 ... 4:
*val = 1 << eniw;
*ways = 1 << eiw;
break;
case 8 ... 10:
*val = 3 << (eniw - 8);
*ways = 3 << (eiw - 8);
break;
default:
return -EINVAL;
......@@ -94,20 +99,21 @@ static inline int cxl_to_ways(u8 eniw, unsigned int *val)
return 0;
}
static inline int granularity_to_cxl(int g, u16 *ig)
static inline int granularity_to_eig(int granularity, u16 *eig)
{
if (g > SZ_16K || g < 256 || !is_power_of_2(g))
if (granularity > SZ_16K || granularity < CXL_DECODER_MIN_GRANULARITY ||
!is_power_of_2(granularity))
return -EINVAL;
*ig = ilog2(g) - 8;
*eig = ilog2(granularity) - 8;
return 0;
}
static inline int ways_to_cxl(unsigned int ways, u8 *iw)
static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
{
if (ways > 16)
return -EINVAL;
if (is_power_of_2(ways)) {
*iw = ilog2(ways);
*eiw = ilog2(ways);
return 0;
}
if (ways % 3)
......@@ -115,10 +121,26 @@ static inline int ways_to_cxl(unsigned int ways, u8 *iw)
ways /= 3;
if (!is_power_of_2(ways))
return -EINVAL;
*iw = ilog2(ways) + 8;
*eiw = ilog2(ways) + 8;
return 0;
}
/* RAS Registers CXL 2.0 8.2.5.9 CXL RAS Capability Structure */
#define CXL_RAS_UNCORRECTABLE_STATUS_OFFSET 0x0
#define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0))
#define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4
#define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0))
#define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8
#define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0))
#define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC
#define CXL_RAS_CORRECTABLE_STATUS_MASK GENMASK(6, 0)
#define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10
#define CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
#define CXL_RAS_CAP_CONTROL_OFFSET 0x14
#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
#define CXL_RAS_HEADER_LOG_OFFSET 0x18
#define CXL_RAS_CAPABILITY_LENGTH 0x58
/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
#define CXLDEV_CAP_ARRAY_OFFSET 0x0
#define CXLDEV_CAP_ARRAY_CAP_ID 0
......@@ -153,9 +175,11 @@ struct cxl_regs {
/*
* Common set of CXL Component register block base pointers
* @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure
* @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure
*/
struct_group_tagged(cxl_component_regs, component,
void __iomem *hdm_decoder;
void __iomem *ras;
);
/*
* Common set of CXL Device register block base pointers
......@@ -170,12 +194,14 @@ struct cxl_regs {
struct cxl_reg_map {
bool valid;
int id;
unsigned long offset;
unsigned long size;
};
struct cxl_component_reg_map {
struct cxl_reg_map hdm_decoder;
struct cxl_reg_map ras;
};
struct cxl_device_reg_map {
......@@ -187,17 +213,17 @@ struct cxl_device_reg_map {
/**
* struct cxl_register_map - DVSEC harvested register block mapping parameters
* @base: virtual base of the register-block-BAR + @block_offset
* @block_offset: offset to start of register block in @barno
* @resource: physical resource base of the register block
* @max_size: maximum mapping size to perform register search
* @reg_type: see enum cxl_regloc_type
* @barno: PCI BAR number containing the register block
* @component_map: cxl_reg_map for component registers
* @device_map: cxl_reg_maps for device registers
*/
struct cxl_register_map {
void __iomem *base;
u64 block_offset;
resource_size_t resource;
resource_size_t max_size;
u8 reg_type;
u8 barno;
union {
struct cxl_component_reg_map component_map;
struct cxl_device_reg_map device_map;
......@@ -208,18 +234,23 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_reg_map *map);
void cxl_probe_device_regs(struct device *dev, void __iomem *base,
struct cxl_device_reg_map *map);
int cxl_map_component_regs(struct pci_dev *pdev,
struct cxl_component_regs *regs,
struct cxl_register_map *map);
int cxl_map_device_regs(struct pci_dev *pdev,
struct cxl_device_regs *regs,
int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs,
struct cxl_register_map *map,
unsigned long map_mask);
int cxl_map_device_regs(struct device *dev, struct cxl_device_regs *regs,
struct cxl_register_map *map);
enum cxl_regloc_type;
int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
enum cxl_rcrb {
CXL_RCRB_DOWNSTREAM,
CXL_RCRB_UPSTREAM,
};
resource_size_t cxl_rcrb_to_component(struct device *dev,
resource_size_t rcrb,
enum cxl_rcrb which);
#define CXL_RESOURCE_NONE ((resource_size_t) -1)
#define CXL_TARGET_STRLEN 20
......@@ -248,7 +279,6 @@ enum cxl_decoder_type {
*/
#define CXL_DECODER_MAX_INTERLEAVE 16
#define CXL_DECODER_MIN_GRANULARITY 256
/**
* struct cxl_decoder - Common CXL HDM Decoder Attributes
......@@ -324,18 +354,23 @@ struct cxl_switch_decoder {
struct cxl_dport *target[];
};
struct cxl_root_decoder;
typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
int pos);
/**
* struct cxl_root_decoder - Static platform CXL address decoder
* @res: host / parent resource for region allocations
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
* @platform_data: platform specific configuration data
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
struct resource *res;
atomic_t region_id;
struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
cxl_calc_hb_fn calc_hb;
void *platform_data;
struct cxl_switch_decoder cxlsd;
};
......@@ -379,12 +414,21 @@ struct cxl_region_params {
int nr_targets;
};
/*
* Flag whether this region needs to have its HPA span synchronized with
* CPU cache state at region activation time.
*/
#define CXL_REGION_F_INCOHERENT 0
/**
* struct cxl_region - CXL region
* @dev: This region's device
* @id: This region's id. Id is globally unique across all regions
* @mode: Endpoint decoder allocation / access mode
* @type: Endpoint decoder target type
* @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
* @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
* @flags: Region state flags
* @params: active + config params for the region
*/
struct cxl_region {
......@@ -392,38 +436,26 @@ struct cxl_region {
int id;
enum cxl_decoder_mode mode;
enum cxl_decoder_type type;
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
unsigned long flags;
struct cxl_region_params params;
};
/**
* enum cxl_nvdimm_brige_state - state machine for managing bus rescans
* @CXL_NVB_NEW: Set at bridge create and after cxl_pmem_wq is destroyed
* @CXL_NVB_DEAD: Set at brige unregistration to preclude async probing
* @CXL_NVB_ONLINE: Target state after successful ->probe()
* @CXL_NVB_OFFLINE: Target state after ->remove() or failed ->probe()
*/
enum cxl_nvdimm_brige_state {
CXL_NVB_NEW,
CXL_NVB_DEAD,
CXL_NVB_ONLINE,
CXL_NVB_OFFLINE,
};
struct cxl_nvdimm_bridge {
int id;
struct device dev;
struct cxl_port *port;
struct nvdimm_bus *nvdimm_bus;
struct nvdimm_bus_descriptor nd_desc;
struct work_struct state_work;
enum cxl_nvdimm_brige_state state;
};
#define CXL_DEV_ID_LEN 19
struct cxl_nvdimm {
struct device dev;
struct cxl_memdev *cxlmd;
struct cxl_nvdimm_bridge *bridge;
struct xarray pmem_regions;
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
};
struct cxl_pmem_region_mapping {
......@@ -438,7 +470,6 @@ struct cxl_pmem_region {
struct device dev;
struct cxl_region *cxlr;
struct nd_region *nd_region;
struct cxl_nvdimm_bridge *bridge;
struct range hpa_range;
int nr_mappings;
struct cxl_pmem_region_mapping mapping[];
......@@ -500,12 +531,16 @@ cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev)
* @dport: PCI bridge or firmware device representing the downstream link
* @port_id: unique hardware identifier for dport in decoder target list
* @component_reg_phys: downstream port component registers
* @rcrb: base address for the Root Complex Register Block
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
*/
struct cxl_dport {
struct device *dport;
int port_id;
resource_size_t component_reg_phys;
resource_size_t rcrb;
bool rch;
struct cxl_port *port;
};
......@@ -562,11 +597,10 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport);
int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
struct cxl_dport *parent_dport);
struct cxl_port *find_cxl_root(struct device *dev);
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
int cxl_bus_rescan(void);
void cxl_bus_rescan(void);
void cxl_bus_drain(void);
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
struct cxl_dport **dport);
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
......@@ -574,6 +608,10 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport, int port_id,
resource_size_t component_reg_phys);
struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
struct device *dport_dev, int port_id,
resource_size_t component_reg_phys,
resource_size_t rcrb);
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
......@@ -581,7 +619,9 @@ struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
unsigned int nr_targets,
cxl_calc_hb_fn calc_hb);
struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos);
struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
......@@ -637,7 +677,7 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd);
int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
#ifdef CONFIG_CXL_REGION
......
......@@ -35,6 +35,8 @@
* @cdev: char dev core object for ioctl operations
* @cxlds: The device state backing this device
* @detach_work: active memdev lost a port in its ancestry
* @cxl_nvb: coordinate removal of @cxl_nvd if present
* @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
* @id: id number of this memdev instance.
*/
struct cxl_memdev {
......@@ -42,6 +44,8 @@ struct cxl_memdev {
struct cdev cdev;
struct cxl_dev_state *cxlds;
struct work_struct detach_work;
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
int id;
};
......@@ -76,6 +80,15 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
if (!port)
return NULL;
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
/**
* struct cxl_mbox_cmd - A command to be submitted to hardware.
* @opcode: (input) The command set and command submitted to hardware.
......@@ -88,6 +101,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
* outputs commands this is always expected to be deterministic. For
* variable sized output commands, it tells the exact number of bytes
* written.
* @min_out: (input) internal command output payload size validation
* @return_code: (output) Error code returned from hardware.
*
* This is the primary mechanism used to send commands to the hardware.
......@@ -102,6 +116,7 @@ struct cxl_mbox_cmd {
void *payload_out;
size_t size_in;
size_t size_out;
size_t min_out;
u16 return_code;
};
......@@ -186,8 +201,10 @@ struct cxl_endpoint_dvsec_info {
* Currently only memory devices are represented.
*
* @dev: The device associated with this CXL state
* @cxlmd: The device representing the CXL.mem capabilities of @dev
* @regs: Parsed register blocks
* @cxl_dvsec: Offset to the PCIe device DVSEC
* @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
* @payload_size: Size of space for payload
* (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
* @lsa_size: Size of Label Storage Area
......@@ -218,10 +235,12 @@ struct cxl_endpoint_dvsec_info {
*/
struct cxl_dev_state {
struct device *dev;
struct cxl_memdev *cxlmd;
struct cxl_regs regs;
int cxl_dvsec;
bool rcd;
size_t payload_size;
size_t lsa_size;
struct mutex mbox_mutex; /* Protects device mailbox and firmware */
......@@ -273,6 +292,12 @@ enum cxl_opcode {
CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303,
CXL_MBOX_OP_SCAN_MEDIA = 0x4304,
CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305,
CXL_MBOX_OP_GET_SECURITY_STATE = 0x4500,
CXL_MBOX_OP_SET_PASSPHRASE = 0x4501,
CXL_MBOX_OP_DISABLE_PASSPHRASE = 0x4502,
CXL_MBOX_OP_UNLOCK = 0x4503,
CXL_MBOX_OP_FREEZE_SECURITY = 0x4504,
CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE = 0x4505,
CXL_MBOX_OP_MAX = 0x10000
};
......@@ -372,8 +397,43 @@ struct cxl_mem_command {
#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
};
int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
size_t in_size, void *out, size_t out_size);
#define CXL_PMEM_SEC_STATE_USER_PASS_SET 0x01
#define CXL_PMEM_SEC_STATE_MASTER_PASS_SET 0x02
#define CXL_PMEM_SEC_STATE_LOCKED 0x04
#define CXL_PMEM_SEC_STATE_FROZEN 0x08
#define CXL_PMEM_SEC_STATE_USER_PLIMIT 0x10
#define CXL_PMEM_SEC_STATE_MASTER_PLIMIT 0x20
/* set passphrase input payload */
struct cxl_set_pass {
u8 type;
u8 reserved[31];
/* CXL field using NVDIMM define, same length */
u8 old_pass[NVDIMM_PASSPHRASE_LEN];
u8 new_pass[NVDIMM_PASSPHRASE_LEN];
} __packed;
/* disable passphrase input payload */
struct cxl_disable_pass {
u8 type;
u8 reserved[31];
u8 pass[NVDIMM_PASSPHRASE_LEN];
} __packed;
/* passphrase secure erase payload */
struct cxl_pass_erase {
u8 type;
u8 reserved[31];
u8 pass[NVDIMM_PASSPHRASE_LEN];
} __packed;
enum {
CXL_PMEM_SEC_PASS_MASTER = 0,
CXL_PMEM_SEC_PASS_USER,
};
int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *cmd);
int cxl_dev_state_identify(struct cxl_dev_state *cxlds);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
int cxl_enumerate_cmds(struct cxl_dev_state *cxlds);
......
......@@ -62,15 +62,6 @@ enum cxl_regloc_type {
CXL_REGLOC_RBI_TYPES
};
static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
struct cxl_register_map *map)
{
if (map->block_offset == U64_MAX)
return CXL_RESOURCE_NONE;
return pci_resource_start(pdev, map->barno) + map->block_offset;
}
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
......
......@@ -45,9 +45,60 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
return 0;
}
static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
struct cxl_dport *parent_dport)
{
struct cxl_port *parent_port = parent_dport->port;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_port *endpoint, *iter, *down;
resource_size_t component_reg_phys;
int rc;
/*
* Now that the path to the root is established record all the
* intervening ports in the chain.
*/
for (iter = parent_port, down = NULL; !is_cxl_root(iter);
down = iter, iter = to_cxl_port(iter->dev.parent)) {
struct cxl_ep *ep;
ep = cxl_ep_load(iter, cxlmd);
ep->next = down;
}
/*
* The component registers for an RCD might come from the
* host-bridge RCRB if they are not already mapped via the
* typical register locator mechanism.
*/
if (parent_dport->rch && cxlds->component_reg_phys == CXL_RESOURCE_NONE)
component_reg_phys = cxl_rcrb_to_component(
&cxlmd->dev, parent_dport->rcrb, CXL_RCRB_UPSTREAM);
else
component_reg_phys = cxlds->component_reg_phys;
endpoint = devm_cxl_add_port(host, &cxlmd->dev, component_reg_phys,
parent_dport);
if (IS_ERR(endpoint))
return PTR_ERR(endpoint);
rc = cxl_endpoint_autoremove(cxlmd, endpoint);
if (rc)
return rc;
if (!endpoint->dev.driver) {
dev_err(&cxlmd->dev, "%s failed probe\n",
dev_name(&endpoint->dev));
return -ENXIO;
}
return 0;
}
static int cxl_mem_probe(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *endpoint_parent;
struct cxl_port *parent_port;
struct cxl_dport *dport;
struct dentry *dentry;
......@@ -80,21 +131,34 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
device_lock(&parent_port->dev);
if (!parent_port->dev.driver) {
if (dport->rch)
endpoint_parent = parent_port->uport;
else
endpoint_parent = &parent_port->dev;
device_lock(endpoint_parent);
if (!endpoint_parent->driver) {
dev_err(dev, "CXL port topology %s not enabled\n",
dev_name(&parent_port->dev));
dev_name(endpoint_parent));
rc = -ENXIO;
goto unlock;
}
rc = devm_cxl_add_endpoint(cxlmd, dport);
rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport);
unlock:
device_unlock(&parent_port->dev);
device_unlock(endpoint_parent);
put_device(&parent_port->dev);
if (rc)
return rc;
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
rc = devm_cxl_add_nvdimm(cxlmd);
if (rc == -ENODEV)
dev_info(dev, "PMEM disabled by platform\n");
else
return rc;
}
/*
* The kernel may be operating out of CXL memory on this device,
* there is no spec defined way to determine whether this device
......
......@@ -9,10 +9,13 @@
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/aer.h>
#include <linux/io.h>
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
#define CREATE_TRACE_POINTS
#include <trace/events/cxl.h>
/**
* DOC: cxl pci
......@@ -276,35 +279,22 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
{
void __iomem *addr;
int bar = map->barno;
struct device *dev = &pdev->dev;
resource_size_t offset = map->block_offset;
/* Basic sanity check that BAR is big enough */
if (pci_resource_len(pdev, bar) < offset) {
dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
&pdev->resource[bar], &offset);
return -ENXIO;
}
addr = pci_iomap(pdev, bar, 0);
if (!addr) {
map->base = ioremap(map->resource, map->max_size);
if (!map->base) {
dev_err(dev, "failed to map registers\n");
return -ENOMEM;
}
dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
bar, &offset);
map->base = addr + map->block_offset;
dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource);
return 0;
}
static void cxl_unmap_regblock(struct pci_dev *pdev,
struct cxl_register_map *map)
{
pci_iounmap(pdev, map->base - map->block_offset);
iounmap(map->base);
map->base = NULL;
}
......@@ -324,6 +314,9 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return -ENXIO;
}
if (!comp_map->ras.valid)
dev_dbg(dev, "RAS registers not found\n");
dev_dbg(dev, "Set up component registers\n");
break;
case CXL_REGLOC_RBI_MEMDEV:
......@@ -347,27 +340,6 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return 0;
}
static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
{
struct device *dev = cxlds->dev;
struct pci_dev *pdev = to_pci_dev(dev);
switch (map->reg_type) {
case CXL_REGLOC_RBI_COMPONENT:
cxl_map_component_regs(pdev, &cxlds->regs.component, map);
dev_dbg(dev, "Mapping component registers...\n");
break;
case CXL_REGLOC_RBI_MEMDEV:
cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
dev_dbg(dev, "Probing device registers...\n");
break;
default:
break;
}
return 0;
}
static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map)
{
......@@ -418,6 +390,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
continue;
}
if (!pci_request_config_region_exclusive(pdev, off,
PCI_DOE_CAP_SIZEOF,
dev_name(dev)))
pci_err(pdev, "Failed to exclude DOE registers\n");
if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
dev_err(dev, "xa_insert failed to insert MB @ %x\n",
off);
......@@ -428,6 +405,20 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
}
}
/*
* Assume that any RCIEP that emits the CXL memory expander class code
* is an RCD
*/
static bool is_cxl_restricted(struct pci_dev *pdev)
{
return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
}
static void disable_aer(void *pdev)
{
pci_disable_pcie_error_reporting(pdev);
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct cxl_register_map map;
......@@ -449,7 +440,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
return PTR_ERR(cxlds);
pci_set_drvdata(pdev, cxlds);
cxlds->rcd = is_cxl_restricted(pdev);
cxlds->serial = pci_get_dsn(pdev);
cxlds->cxl_dvsec = pci_find_dvsec_capability(
pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
......@@ -461,7 +454,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
rc = cxl_map_regs(cxlds, &map);
rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map);
if (rc)
return rc;
......@@ -474,10 +467,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
cxlds->component_reg_phys = map.resource;
devm_cxl_pci_create_doe(cxlds);
rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component,
&map, BIT(CXL_CM_CAP_CAP_ID_RAS));
if (rc)
dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
......@@ -498,8 +496,13 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
if (cxlds->regs.ras) {
pci_enable_pcie_error_reporting(pdev);
rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
if (rc)
return rc;
}
pci_save_state(pdev);
return rc;
}
......@@ -511,10 +514,151 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
/* CXL spec rev3.0 8.2.4.16.1 */
static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
{
void __iomem *addr;
u32 *log_addr;
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
log_addr = log;
for (i = 0; i < log_u32_size; i++) {
*log_addr = readl(addr);
log_addr++;
addr += sizeof(u32);
}
}
/*
* Log the state of the RAS status registers and prepare them to log the
* next error status. Return 1 if reset needed.
*/
static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
{
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct device *dev = &cxlmd->dev;
u32 hl[CXL_HEADERLOG_SIZE_U32];
void __iomem *addr;
u32 status;
u32 fe;
if (!cxlds->regs.ras)
return false;
addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
return false;
/* If multiple errors, log header points to first error from ctrl reg */
if (hweight32(status) > 1) {
addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
} else {
fe = status;
}
header_log_copy(cxlds, hl);
trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
return true;
}
static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct device *dev = &cxlmd->dev;
bool ue;
/*
* A frozen channel indicates an impending reset which is fatal to
* CXL.mem operation, and will likely crash the system. On the off
* chance the situation is recoverable dump the status of the RAS
* capability registers and bounce the active state of the memdev.
*/
ue = cxl_report_and_clear(cxlds);
switch (state) {
case pci_channel_io_normal:
if (ue) {
device_release_driver(dev);
return PCI_ERS_RESULT_NEED_RESET;
}
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
dev_warn(&pdev->dev,
"%s: frozen state error detected, disable CXL.mem\n",
dev_name(dev));
device_release_driver(dev);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
dev_warn(&pdev->dev,
"failure state error detected, request disconnect\n");
return PCI_ERS_RESULT_DISCONNECT;
}
return PCI_ERS_RESULT_NEED_RESET;
}
static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct device *dev = &cxlmd->dev;
dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
dev_name(dev));
pci_restore_state(pdev);
if (device_attach(dev) <= 0)
return PCI_ERS_RESULT_DISCONNECT;
return PCI_ERS_RESULT_RECOVERED;
}
static void cxl_error_resume(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct device *dev = &cxlmd->dev;
dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
dev->driver ? "successful" : "failed");
}
static void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct device *dev = &cxlmd->dev;
void __iomem *addr;
u32 status;
if (!cxlds->regs.ras)
return;
addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
trace_cxl_aer_correctable_error(dev, status);
}
}
static const struct pci_error_handlers cxl_error_handlers = {
.error_detected = cxl_error_detected,
.slot_reset = cxl_slot_reset,
.resume = cxl_error_resume,
.cor_error_detected = cxl_cor_error_detected,
};
static struct pci_driver cxl_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = cxl_mem_pci_tbl,
.probe = cxl_pci_probe,
.err_handler = &cxl_error_handlers,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
......
......@@ -11,12 +11,7 @@
#include "cxlmem.h"
#include "cxl.h"
/*
* Ordered workqueue for cxl nvdimm device arrival and departure
* to coordinate bus rescans when a bridge arrives and trigger remove
* operations when the bridge is removed.
*/
static struct workqueue_struct *cxl_pmem_wq;
extern const struct nvdimm_security_ops *cxl_security_ops;
static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
......@@ -27,78 +22,81 @@ static void clear_exclusive(void *cxlds)
static void unregister_nvdimm(void *nvdimm)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
struct cxl_pmem_region *cxlr_pmem;
unsigned long index;
nvdimm_delete(nvdimm);
}
device_lock(&cxl_nvb->dev);
dev_set_drvdata(&cxl_nvd->dev, NULL);
xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
get_device(&cxlr_pmem->dev);
device_unlock(&cxl_nvb->dev);
static ssize_t provider_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
device_release_driver(&cxlr_pmem->dev);
put_device(&cxlr_pmem->dev);
return sysfs_emit(buf, "%s\n", dev_name(&cxl_nvd->dev));
}
static DEVICE_ATTR_RO(provider);
device_lock(&cxl_nvb->dev);
}
device_unlock(&cxl_nvb->dev);
static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_dev_state *cxlds = cxl_nvd->cxlmd->cxlds;
nvdimm_delete(nvdimm);
cxl_nvd->bridge = NULL;
return sysfs_emit(buf, "%lld\n", cxlds->serial);
}
static DEVICE_ATTR_RO(id);
static struct attribute *cxl_dimm_attributes[] = {
&dev_attr_id.attr,
&dev_attr_provider.attr,
NULL
};
static const struct attribute_group cxl_dimm_attribute_group = {
.name = "cxl",
.attrs = cxl_dimm_attributes,
};
static const struct attribute_group *cxl_dimm_attribute_groups[] = {
&cxl_dimm_attribute_group,
NULL
};
static int cxl_nvdimm_probe(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
unsigned long flags = 0, cmd_mask = 0;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_nvdimm_bridge *cxl_nvb;
struct nvdimm *nvdimm;
int rc;
cxl_nvb = cxl_find_nvdimm_bridge(dev);
if (!cxl_nvb)
return -ENXIO;
device_lock(&cxl_nvb->dev);
if (!cxl_nvb->nvdimm_bus) {
rc = -ENXIO;
goto out;
}
set_exclusive_cxl_commands(cxlds, exclusive_cmds);
rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
if (rc)
goto out;
return rc;
set_bit(NDD_LABELING, &flags);
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
cmd_mask, 0, NULL);
if (!nvdimm) {
rc = -ENOMEM;
goto out;
}
nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
cxl_dimm_attribute_groups, flags,
cmd_mask, 0, NULL, cxl_nvd->dev_id,
cxl_security_ops, NULL);
if (!nvdimm)
return -ENOMEM;
dev_set_drvdata(dev, nvdimm);
cxl_nvd->bridge = cxl_nvb;
rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
out:
device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
return rc;
return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
}
static struct cxl_driver cxl_nvdimm_driver = {
.name = "cxl_nvdimm",
.probe = cxl_nvdimm_probe,
.id = CXL_DEVICE_NVDIMM,
.drv = {
.suppress_bind_attrs = true,
},
};
static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
......@@ -121,6 +119,7 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
unsigned int buf_len)
{
struct cxl_mbox_get_lsa get_lsa;
struct cxl_mbox_cmd mbox_cmd;
int rc;
if (sizeof(*cmd) > buf_len)
......@@ -132,9 +131,15 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
.offset = cpu_to_le32(cmd->in_offset),
.length = cpu_to_le32(cmd->in_length),
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_LSA,
.payload_in = &get_lsa,
.size_in = sizeof(get_lsa),
.size_out = cmd->in_length,
.payload_out = cmd->out_buf,
};
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
sizeof(get_lsa), cmd->out_buf, cmd->in_length);
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
cmd->status = 0;
return rc;
......@@ -145,6 +150,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
unsigned int buf_len)
{
struct cxl_mbox_set_lsa *set_lsa;
struct cxl_mbox_cmd mbox_cmd;
int rc;
if (sizeof(*cmd) > buf_len)
......@@ -163,10 +169,13 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
.offset = cpu_to_le32(cmd->in_offset),
};
memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_LSA,
.payload_in = set_lsa,
.size_in = struct_size(set_lsa, data, cmd->in_length),
};
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa,
struct_size(set_lsa, data, cmd->in_length),
NULL, 0);
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
/*
* Set "firmware" status (4-packed bytes at the end of the input
......@@ -216,204 +225,48 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
}
static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
{
if (cxl_nvb->nvdimm_bus)
return true;
cxl_nvb->nvdimm_bus =
nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
return cxl_nvb->nvdimm_bus != NULL;
}
static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
{
struct cxl_nvdimm *cxl_nvd;
if (!is_cxl_nvdimm(dev))
return 0;
cxl_nvd = to_cxl_nvdimm(dev);
if (cxl_nvd->bridge != cxl_nvb)
return 0;
device_release_driver(dev);
return 0;
}
static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
{
struct cxl_pmem_region *cxlr_pmem;
if (!is_cxl_pmem_region(dev))
return 0;
cxlr_pmem = to_cxl_pmem_region(dev);
if (cxlr_pmem->bridge != cxl_nvb)
return 0;
device_release_driver(dev);
return 0;
}
static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
struct nvdimm_bus *nvdimm_bus)
static void unregister_nvdimm_bus(void *_cxl_nvb)
{
if (!nvdimm_bus)
return;
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus;
/*
* Set the state of cxl_nvdimm devices to unbound / idle before
* nvdimm_bus_unregister() rips the nvdimm objects out from
* underneath them.
*/
bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
cxl_pmem_region_release_driver);
bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
cxl_nvdimm_release_driver);
cxl_nvb->nvdimm_bus = NULL;
nvdimm_bus_unregister(nvdimm_bus);
}
static void cxl_nvb_update_state(struct work_struct *work)
{
struct cxl_nvdimm_bridge *cxl_nvb =
container_of(work, typeof(*cxl_nvb), state_work);
struct nvdimm_bus *victim_bus = NULL;
bool release = false, rescan = false;
device_lock(&cxl_nvb->dev);
switch (cxl_nvb->state) {
case CXL_NVB_ONLINE:
if (!online_nvdimm_bus(cxl_nvb)) {
dev_err(&cxl_nvb->dev,
"failed to establish nvdimm bus\n");
release = true;
} else
rescan = true;
break;
case CXL_NVB_OFFLINE:
case CXL_NVB_DEAD:
victim_bus = cxl_nvb->nvdimm_bus;
cxl_nvb->nvdimm_bus = NULL;
break;
default:
break;
}
device_unlock(&cxl_nvb->dev);
if (release)
device_release_driver(&cxl_nvb->dev);
if (rescan) {
int rc = bus_rescan_devices(&cxl_bus_type);
dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
}
offline_nvdimm_bus(cxl_nvb, victim_bus);
put_device(&cxl_nvb->dev);
}
static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
{
/*
* Take a reference that the workqueue will drop if new work
* gets queued.
*/
get_device(&cxl_nvb->dev);
if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
put_device(&cxl_nvb->dev);
}
static void cxl_nvdimm_bridge_remove(struct device *dev)
{
struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
if (cxl_nvb->state == CXL_NVB_ONLINE)
cxl_nvb->state = CXL_NVB_OFFLINE;
cxl_nvdimm_bridge_state_work(cxl_nvb);
}
static int cxl_nvdimm_bridge_probe(struct device *dev)
{
struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
if (cxl_nvb->state == CXL_NVB_DEAD)
return -ENXIO;
if (cxl_nvb->state == CXL_NVB_NEW) {
cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
.provider_name = "CXL",
.module = THIS_MODULE,
.ndctl = cxl_pmem_ctl,
};
cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
.provider_name = "CXL",
.module = THIS_MODULE,
.ndctl = cxl_pmem_ctl,
};
INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state);
}
cxl_nvb->nvdimm_bus =
nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
cxl_nvb->state = CXL_NVB_ONLINE;
cxl_nvdimm_bridge_state_work(cxl_nvb);
if (!cxl_nvb->nvdimm_bus)
return -ENOMEM;
return 0;
return devm_add_action_or_reset(dev, unregister_nvdimm_bus, cxl_nvb);
}
static struct cxl_driver cxl_nvdimm_bridge_driver = {
.name = "cxl_nvdimm_bridge",
.probe = cxl_nvdimm_bridge_probe,
.remove = cxl_nvdimm_bridge_remove,
.id = CXL_DEVICE_NVDIMM_BRIDGE,
.drv = {
.suppress_bind_attrs = true,
},
};
static int match_cxl_nvdimm(struct device *dev, void *data)
{
return is_cxl_nvdimm(dev);
}
static void unregister_nvdimm_region(void *nd_region)
{
nvdimm_region_delete(nd_region);
}
static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
struct cxl_pmem_region *cxlr_pmem)
{
int rc;
rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
cxlr_pmem, GFP_KERNEL);
if (rc)
return rc;
get_device(&cxlr_pmem->dev);
return 0;
}
static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd,
struct cxl_pmem_region *cxlr_pmem)
{
/*
* It is possible this is called without a corresponding
* cxl_nvdimm_add_region for @cxlr_pmem
*/
cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
if (cxlr_pmem)
put_device(&cxlr_pmem->dev);
}
static void release_mappings(void *data)
{
int i;
struct cxl_pmem_region *cxlr_pmem = data;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
device_lock(&cxl_nvb->dev);
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
}
device_unlock(&cxl_nvb->dev);
}
static void cxlr_pmem_remove_resource(void *res)
{
remove_resource(res);
......@@ -429,8 +282,8 @@ static int cxl_pmem_region_probe(struct device *dev)
struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
struct cxl_region *cxlr = cxlr_pmem->cxlr;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
struct cxl_pmem_region_info *info = NULL;
struct cxl_nvdimm_bridge *cxl_nvb;
struct nd_interleave_set *nd_set;
struct nd_region_desc ndr_desc;
struct cxl_nvdimm *cxl_nvd;
......@@ -438,28 +291,12 @@ static int cxl_pmem_region_probe(struct device *dev)
struct resource *res;
int rc, i = 0;
cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
if (!cxl_nvb) {
dev_dbg(dev, "bridge not found\n");
return -ENXIO;
}
cxlr_pmem->bridge = cxl_nvb;
device_lock(&cxl_nvb->dev);
if (!cxl_nvb->nvdimm_bus) {
dev_dbg(dev, "nvdimm bus not found\n");
rc = -ENXIO;
goto out_nvb;
}
memset(&mappings, 0, sizeof(mappings));
memset(&ndr_desc, 0, sizeof(ndr_desc));
res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
if (!res) {
rc = -ENOMEM;
goto out_nvb;
}
if (!res)
return -ENOMEM;
res->name = "Persistent Memory";
res->start = cxlr_pmem->hpa_range.start;
......@@ -469,11 +306,11 @@ static int cxl_pmem_region_probe(struct device *dev)
rc = insert_resource(&iomem_resource, res);
if (rc)
goto out_nvb;
return rc;
rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
if (rc)
goto out_nvb;
return rc;
ndr_desc.res = res;
ndr_desc.provider_data = cxlr_pmem;
......@@ -487,43 +324,23 @@ static int cxl_pmem_region_probe(struct device *dev)
}
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set) {
rc = -ENOMEM;
goto out_nvb;
}
if (!nd_set)
return -ENOMEM;
ndr_desc.memregion = cxlr->id;
set_bit(ND_REGION_CXL, &ndr_desc.flags);
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto out_nvb;
}
rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
if (rc)
goto out_nvd;
if (!info)
return -ENOMEM;
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_memdev *cxlmd = m->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *d;
d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
if (!d) {
dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto out_nvd;
}
/* safe to drop ref now with bridge lock held */
put_device(d);
cxl_nvd = to_cxl_nvdimm(d);
cxl_nvd = cxlmd->cxl_nvd;
nvdimm = dev_get_drvdata(&cxl_nvd->dev);
if (!nvdimm) {
dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
......@@ -532,14 +349,6 @@ static int cxl_pmem_region_probe(struct device *dev)
goto out_nvd;
}
/*
* Pin the region per nvdimm device as those may be released
* out-of-order with respect to the region, and a single nvdimm
* maybe associated with multiple regions
*/
rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
if (rc)
goto out_nvd;
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
......@@ -572,9 +381,6 @@ static int cxl_pmem_region_probe(struct device *dev)
cxlr_pmem->nd_region);
out_nvd:
kfree(info);
out_nvb:
device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
return rc;
}
......@@ -583,33 +389,11 @@ static struct cxl_driver cxl_pmem_region_driver = {
.name = "cxl_pmem_region",
.probe = cxl_pmem_region_probe,
.id = CXL_DEVICE_PMEM_REGION,
.drv = {
.suppress_bind_attrs = true,
},
};
/*
* Return all bridges to the CXL_NVB_NEW state to invalidate any
* ->state_work referring to the now destroyed cxl_pmem_wq.
*/
static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
{
struct cxl_nvdimm_bridge *cxl_nvb;
if (!is_cxl_nvdimm_bridge(dev))
return 0;
cxl_nvb = to_cxl_nvdimm_bridge(dev);
device_lock(dev);
cxl_nvb->state = CXL_NVB_NEW;
device_unlock(dev);
return 0;
}
static void destroy_cxl_pmem_wq(void)
{
destroy_workqueue(cxl_pmem_wq);
bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
}
static __init int cxl_pmem_init(void)
{
int rc;
......@@ -617,13 +401,9 @@ static __init int cxl_pmem_init(void)
set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
if (!cxl_pmem_wq)
return -ENXIO;
rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
if (rc)
goto err_bridge;
return rc;
rc = cxl_driver_register(&cxl_nvdimm_driver);
if (rc)
......@@ -639,8 +419,6 @@ static __init int cxl_pmem_init(void)
cxl_driver_unregister(&cxl_nvdimm_driver);
err_nvdimm:
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
err_bridge:
destroy_cxl_pmem_wq();
return rc;
}
......@@ -649,7 +427,6 @@ static __exit void cxl_pmem_exit(void)
cxl_driver_unregister(&cxl_pmem_region_driver);
cxl_driver_unregister(&cxl_nvdimm_driver);
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
destroy_cxl_pmem_wq();
}
MODULE_LICENSE("GPL v2");
......
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/libnvdimm.h>
#include <asm/unaligned.h>
#include <linux/module.h>
#include <linux/async.h>
#include <linux/slab.h>
#include <linux/memregion.h>
#include "cxlmem.h"
#include "cxl.h"
static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm,
enum nvdimm_passphrase_type ptype)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long security_flags = 0;
struct cxl_get_security_output {
__le32 flags;
} out;
struct cxl_mbox_cmd mbox_cmd;
u32 sec_out;
int rc;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SECURITY_STATE,
.size_out = sizeof(out),
.payload_out = &out,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return 0;
sec_out = le32_to_cpu(out.flags);
if (ptype == NVDIMM_MASTER) {
if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)
set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
else
set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PLIMIT)
set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
return security_flags;
}
if (sec_out & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
if (sec_out & CXL_PMEM_SEC_STATE_FROZEN ||
sec_out & CXL_PMEM_SEC_STATE_USER_PLIMIT)
set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
if (sec_out & CXL_PMEM_SEC_STATE_LOCKED)
set_bit(NVDIMM_SECURITY_LOCKED, &security_flags);
else
set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
} else {
set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
}
return security_flags;
}
static int cxl_pmem_security_change_key(struct nvdimm *nvdimm,
const struct nvdimm_key_data *old_data,
const struct nvdimm_key_data *new_data,
enum nvdimm_passphrase_type ptype)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_set_pass set_pass;
set_pass = (struct cxl_set_pass) {
.type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER :
CXL_PMEM_SEC_PASS_USER,
};
memcpy(set_pass.old_pass, old_data->data, NVDIMM_PASSPHRASE_LEN);
memcpy(set_pass.new_pass, new_data->data, NVDIMM_PASSPHRASE_LEN);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_PASSPHRASE,
.size_in = sizeof(set_pass),
.payload_in = &set_pass,
};
return cxl_internal_send_cmd(cxlds, &mbox_cmd);
}
static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data,
enum nvdimm_passphrase_type ptype)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_disable_pass dis_pass;
struct cxl_mbox_cmd mbox_cmd;
dis_pass = (struct cxl_disable_pass) {
.type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER :
CXL_PMEM_SEC_PASS_USER,
};
memcpy(dis_pass.pass, key_data->data, NVDIMM_PASSPHRASE_LEN);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_DISABLE_PASSPHRASE,
.size_in = sizeof(dis_pass),
.payload_in = &dis_pass,
};
return cxl_internal_send_cmd(cxlds, &mbox_cmd);
}
static int cxl_pmem_security_disable(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data)
{
return __cxl_pmem_security_disable(nvdimm, key_data, NVDIMM_USER);
}
static int cxl_pmem_security_disable_master(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data)
{
return __cxl_pmem_security_disable(nvdimm, key_data, NVDIMM_MASTER);
}
static int cxl_pmem_security_freeze(struct nvdimm *nvdimm)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_mbox_cmd mbox_cmd = {
.opcode = CXL_MBOX_OP_FREEZE_SECURITY,
};
return cxl_internal_send_cmd(cxlds, &mbox_cmd);
}
static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
u8 pass[NVDIMM_PASSPHRASE_LEN];
struct cxl_mbox_cmd mbox_cmd;
int rc;
memcpy(pass, key_data->data, NVDIMM_PASSPHRASE_LEN);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_UNLOCK,
.size_in = NVDIMM_PASSPHRASE_LEN,
.payload_in = pass,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
return 0;
}
static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key,
enum nvdimm_passphrase_type ptype)
{
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_pass_erase erase;
int rc;
erase = (struct cxl_pass_erase) {
.type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER :
CXL_PMEM_SEC_PASS_USER,
};
memcpy(erase.pass, key->data, NVDIMM_PASSPHRASE_LEN);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE,
.size_in = sizeof(erase),
.payload_in = &erase,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
return 0;
}
static const struct nvdimm_security_ops __cxl_security_ops = {
.get_flags = cxl_pmem_get_security_flags,
.change_key = cxl_pmem_security_change_key,
.disable = cxl_pmem_security_disable,
.freeze = cxl_pmem_security_freeze,
.unlock = cxl_pmem_security_unlock,
.erase = cxl_pmem_security_passphrase_erase,
.disable_master = cxl_pmem_security_disable_master,
};
const struct nvdimm_security_ops *cxl_security_ops = &__cxl_security_ops;
......@@ -114,4 +114,16 @@ config NVDIMM_TEST_BUILD
core devm_memremap_pages() implementation and other
infrastructure.
config NVDIMM_SECURITY_TEST
bool "Enable NVDIMM security unit tests"
depends on NVDIMM_KEYS
help
The NVDIMM and CXL subsystems support unit testing of their device
security state machines. The NVDIMM_SECURITY_TEST option disables CPU
cache maintenance operations around events like secure erase and
overwrite. Also, when enabled, the NVDIMM subsystem core helps the unit
test implement a mock state machine.
Select N if unsure.
endif
......@@ -349,11 +349,18 @@ static ssize_t available_slots_show(struct device *dev,
}
static DEVICE_ATTR_RO(available_slots);
__weak ssize_t security_show(struct device *dev,
ssize_t security_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
/*
* For the test version we need to poll the "hardware" in order
* to get the updated status for unlock testing.
*/
if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST))
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
return sprintf(buf, "overwrite\n");
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
......
......@@ -2,6 +2,7 @@
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/
#include <linux/memregion.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/device.h>
......@@ -100,6 +101,16 @@ static void nd_region_remove(struct device *dev)
*/
sysfs_put(nd_region->bb_state);
nd_region->bb_state = NULL;
/*
* Try to flush caches here since a disabled region may be subject to
* secure erase while disabled, and previous dirty data should not be
* written back to a new instance of the region. This only matters on
* bare metal where security commands are available, so silent failure
* here is ok.
*/
if (cpu_cache_has_invalidate_memregion())
cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
}
static int child_notify(struct device *dev, void *data)
......
......@@ -59,9 +59,51 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
return 0;
}
static int nd_region_invalidate_memregion(struct nd_region *nd_region)
{
int i, incoherent = 0;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
if (test_bit(NDD_INCOHERENT, &nvdimm->flags)) {
incoherent++;
break;
}
}
if (!incoherent)
return 0;
if (!cpu_cache_has_invalidate_memregion()) {
if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) {
dev_warn(
&nd_region->dev,
"Bypassing cpu_cache_invalidate_memergion() for testing!\n");
goto out;
} else {
dev_err(&nd_region->dev,
"Failed to synchronize CPU cache state\n");
return -ENXIO;
}
}
cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
out:
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
clear_bit(NDD_INCOHERENT, &nvdimm->flags);
}
return 0;
}
int nd_region_activate(struct nd_region *nd_region)
{
int i, j, num_flush = 0;
int i, j, rc, num_flush = 0;
struct nd_region_data *ndrd;
struct device *dev = &nd_region->dev;
size_t flush_data_size = sizeof(void *);
......@@ -85,6 +127,10 @@ int nd_region_activate(struct nd_region *nd_region)
}
nvdimm_bus_unlock(&nd_region->dev);
rc = nd_region_invalidate_memregion(nd_region);
if (rc)
return rc;
ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
if (!ndrd)
return -ENOMEM;
......@@ -1222,3 +1268,5 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
}
MODULE_IMPORT_NS(DEVMEM);
......@@ -177,6 +177,10 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
|| !nvdimm->sec.flags)
return -EIO;
/* cxl_test needs this to pre-populate the security state */
if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST))
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
/* No need to go further if security is disabled */
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
return 0;
......@@ -204,6 +208,8 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
rc = nvdimm->sec.ops->unlock(nvdimm, data);
dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
if (rc == 0)
set_bit(NDD_INCOHERENT, &nvdimm->flags);
nvdimm_put_key(key);
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
......@@ -239,7 +245,8 @@ static int check_security_state(struct nvdimm *nvdimm)
return 0;
}
static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
static int security_disable(struct nvdimm *nvdimm, unsigned int keyid,
enum nvdimm_passphrase_type pass_type)
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
......@@ -250,8 +257,13 @@ static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable
|| !nvdimm->sec.flags)
if (!nvdimm->sec.ops || !nvdimm->sec.flags)
return -EOPNOTSUPP;
if (pass_type == NVDIMM_USER && !nvdimm->sec.ops->disable)
return -EOPNOTSUPP;
if (pass_type == NVDIMM_MASTER && !nvdimm->sec.ops->disable_master)
return -EOPNOTSUPP;
rc = check_security_state(nvdimm);
......@@ -263,12 +275,21 @@ static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
if (!data)
return -ENOKEY;
rc = nvdimm->sec.ops->disable(nvdimm, data);
dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
if (pass_type == NVDIMM_MASTER) {
rc = nvdimm->sec.ops->disable_master(nvdimm, data);
dev_dbg(dev, "key: %d disable_master: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
} else {
rc = nvdimm->sec.ops->disable(nvdimm, data);
dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
}
nvdimm_put_key(key);
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
if (pass_type == NVDIMM_MASTER)
nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
else
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
return rc;
}
......@@ -355,6 +376,8 @@ static int security_erase(struct nvdimm *nvdimm, unsigned int keyid,
return -ENOKEY;
rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
if (rc == 0)
set_bit(NDD_INCOHERENT, &nvdimm->flags);
dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
rc == 0 ? "success" : "fail");
......@@ -389,6 +412,8 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
return -ENOKEY;
rc = nvdimm->sec.ops->overwrite(nvdimm, data);
if (rc == 0)
set_bit(NDD_INCOHERENT, &nvdimm->flags);
dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
......@@ -473,6 +498,7 @@ void nvdimm_security_overwrite_query(struct work_struct *work)
#define OPS \
C( OP_FREEZE, "freeze", 1), \
C( OP_DISABLE, "disable", 2), \
C( OP_DISABLE_MASTER, "disable_master", 2), \
C( OP_UPDATE, "update", 3), \
C( OP_ERASE, "erase", 2), \
C( OP_OVERWRITE, "overwrite", 2), \
......@@ -524,7 +550,10 @@ ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len)
rc = nvdimm_security_freeze(nvdimm);
} else if (i == OP_DISABLE) {
dev_dbg(dev, "disable %u\n", key);
rc = security_disable(nvdimm, key);
rc = security_disable(nvdimm, key, NVDIMM_USER);
} else if (i == OP_DISABLE_MASTER) {
dev_dbg(dev, "disable_master %u\n", key);
rc = security_disable(nvdimm, key, NVDIMM_MASTER);
} else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) {
dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey);
rc = security_update(nvdimm, key, newkey, i == OP_UPDATE
......
......@@ -756,6 +756,13 @@ static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
if (ret)
return ret;
if (resource_is_exclusive(&dev->driver_exclusive_resource, off,
count)) {
pci_warn_once(dev, "%s: Unexpected write to kernel-exclusive config offset %llx",
current->comm, off);
add_taint(TAINT_USER, LOCKDEP_STILL_OK);
}
if (off > dev->cfg_size)
return 0;
if (off + count > dev->cfg_size) {
......
......@@ -961,8 +961,14 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (aer)
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->status);
if (pcie_aer_is_native(dev))
if (pcie_aer_is_native(dev)) {
struct pci_driver *pdrv = dev->driver;
if (pdrv && pdrv->err_handler &&
pdrv->err_handler->cor_error_detected)
pdrv->err_handler->cor_error_detected(dev);
pcie_clear_device_status(dev);
}
} else if (info->severity == AER_NONFATAL)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)
......
......@@ -2305,6 +2305,12 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
INIT_LIST_HEAD(&dev->bus_list);
dev->dev.type = &pci_dev_type;
dev->bus = pci_bus_get(bus);
dev->driver_exclusive_resource = (struct resource) {
.name = "PCI Exclusive",
.start = 0,
.end = -1,
};
#ifdef CONFIG_PCI_MSI
raw_spin_lock_init(&dev->msi_lock);
#endif
......
......@@ -318,6 +318,8 @@ extern void __devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n);
extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
extern bool iomem_is_exclusive(u64 addr);
extern bool resource_is_exclusive(struct resource *resource, u64 addr,
resource_size_t size);
extern int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
......
......@@ -35,6 +35,11 @@ enum {
NDD_WORK_PENDING = 4,
/* dimm supports namespace labels */
NDD_LABELING = 6,
/*
* dimm contents have changed requiring invalidation of CPU caches prior
* to activation of a region that includes this device
*/
NDD_INCOHERENT = 7,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
......@@ -183,6 +188,8 @@ struct nvdimm_security_ops {
int (*overwrite)(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data);
int (*query_overwrite)(struct nvdimm *nvdimm);
int (*disable_master)(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data);
};
enum nvdimm_fwa_state {
......
......@@ -3,6 +3,7 @@
#define _MEMREGION_H_
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/bug.h>
struct memregion_info {
int target_node;
......@@ -20,4 +21,41 @@ static inline void memregion_free(int id)
{
}
#endif
/**
* cpu_cache_invalidate_memregion - drop any CPU cached data for
* memregions described by @res_desc
* @res_desc: one of the IORES_DESC_* types
*
* Perform cache maintenance after a memory event / operation that
* changes the contents of physical memory in a cache-incoherent manner.
* For example, device memory technologies like NVDIMM and CXL have
* device secure erase, and dynamic region provision that can replace
* the memory mapped to a given physical address.
*
* Limit the functionality to architectures that have an efficient way
* to writeback and invalidate potentially terabytes of address space at
* once. Note that this routine may or may not write back any dirty
* contents while performing the invalidation. It is only exported for
* the explicit usage of the NVDIMM and CXL modules in the 'DEVMEM'
* symbol namespace on bare platforms.
*
* Returns 0 on success or negative error code on a failure to perform
* the cache maintenance.
*/
#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
int cpu_cache_invalidate_memregion(int res_desc);
bool cpu_cache_has_invalidate_memregion(void);
#else
static inline bool cpu_cache_has_invalidate_memregion(void)
{
return false;
}
static inline int cpu_cache_invalidate_memregion(int res_desc)
{
WARN_ON_ONCE("CPU cache invalidation required");
return -ENXIO;
}
#endif
#endif /* _MEMREGION_H_ */
......@@ -410,6 +410,7 @@ struct pci_dev {
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
struct resource driver_exclusive_resource; /* driver exclusive resource ranges */
bool match_driver; /* Skip attaching driver */
......@@ -844,6 +845,9 @@ struct pci_error_handlers {
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
/* Allow device driver to record more details of a correctable error */
void (*cor_error_detected)(struct pci_dev *dev);
};
......@@ -1408,6 +1412,21 @@ int pci_request_selected_regions(struct pci_dev *, int, const char *);
int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
void pci_release_selected_regions(struct pci_dev *, int);
static inline __must_check struct resource *
pci_request_config_region_exclusive(struct pci_dev *pdev, unsigned int offset,
unsigned int len, const char *name)
{
return __request_region(&pdev->driver_exclusive_resource, offset, len,
name, IORESOURCE_EXCLUSIVE);
}
static inline void pci_release_config_region(struct pci_dev *pdev,
unsigned int offset,
unsigned int len)
{
__release_region(&pdev->driver_exclusive_resource, offset, len);
}
/* drivers/pci/bus.c */
void pci_add_resource(struct list_head *resources, struct resource *res);
void pci_add_resource_offset(struct list_head *resources, struct resource *res,
......@@ -2505,6 +2524,7 @@ void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map);
#define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg)
#define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg)
#define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg)
#define pci_warn_once(pdev, fmt, arg...) dev_warn_once(&(pdev)->dev, fmt, ##arg)
#define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg)
#define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg)
#define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg)
......
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cxl
#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _CXL_EVENTS_H
#include <linux/tracepoint.h>
#define CXL_HEADERLOG_SIZE SZ_512
#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32)
#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2)
#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3)
#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4)
#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5)
#define CXL_RAS_UC_MEM_BE_PARITY BIT(6)
#define CXL_RAS_UC_MEM_DATA_ECC BIT(7)
#define CXL_RAS_UC_REINIT_THRESH BIT(8)
#define CXL_RAS_UC_RSVD_ENCODE BIT(9)
#define CXL_RAS_UC_POISON BIT(10)
#define CXL_RAS_UC_RECV_OVERFLOW BIT(11)
#define CXL_RAS_UC_INTERNAL_ERR BIT(14)
#define CXL_RAS_UC_IDE_TX_ERR BIT(15)
#define CXL_RAS_UC_IDE_RX_ERR BIT(16)
#define show_uc_errs(status) __print_flags(status, " | ", \
{ CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \
{ CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \
{ CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \
{ CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
{ CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \
{ CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \
{ CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \
{ CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \
{ CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \
{ CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \
{ CXL_RAS_UC_POISON, "Received Poison From Peer" }, \
{ CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \
{ CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \
{ CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \
{ CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
)
TRACE_EVENT(cxl_aer_uncorrectable_error,
TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl),
TP_ARGS(dev, status, fe, hl),
TP_STRUCT__entry(
__string(dev_name, dev_name(dev))
__field(u32, status)
__field(u32, first_error)
__array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
),
TP_fast_assign(
__assign_str(dev_name, dev_name(dev));
__entry->status = status;
__entry->first_error = fe;
/*
* Embed the 512B headerlog data for user app retrieval and
* parsing, but no need to print this in the trace buffer.
*/
memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
),
TP_printk("%s: status: '%s' first_error: '%s'",
__get_str(dev_name),
show_uc_errs(__entry->status),
show_uc_errs(__entry->first_error)
)
);
#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0)
#define CXL_RAS_CE_MEM_DATA_ECC BIT(1)
#define CXL_RAS_CE_CRC_THRESH BIT(2)
#define CLX_RAS_CE_RETRY_THRESH BIT(3)
#define CXL_RAS_CE_CACHE_POISON BIT(4)
#define CXL_RAS_CE_MEM_POISON BIT(5)
#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6)
#define show_ce_errs(status) __print_flags(status, " | ", \
{ CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
{ CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \
{ CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \
{ CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \
{ CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \
{ CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \
{ CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
)
TRACE_EVENT(cxl_aer_correctable_error,
TP_PROTO(const struct device *dev, u32 status),
TP_ARGS(dev, status),
TP_STRUCT__entry(
__string(dev_name, dev_name(dev))
__field(u32, status)
),
TP_fast_assign(
__assign_str(dev_name, dev_name(dev));
__entry->status = status;
),
TP_printk("%s: status: '%s'",
__get_str(dev_name), show_ce_errs(__entry->status)
)
);
#endif /* _CXL_EVENTS_H */
/* This part must be outside protection */
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE cxl
#include <trace/define_trace.h>
......@@ -1119,6 +1119,7 @@
#define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */
#define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */
#define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */
#define PCI_DOE_CAP_SIZEOF 0x18 /* Size of DOE register block */
/* DOE Data Object - note not actually registers */
#define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff
......
......@@ -1707,18 +1707,15 @@ static int strict_iomem_checks;
*
* Returns true if exclusive to the kernel, otherwise returns false.
*/
bool iomem_is_exclusive(u64 addr)
bool resource_is_exclusive(struct resource *root, u64 addr, resource_size_t size)
{
const unsigned int exclusive_system_ram = IORESOURCE_SYSTEM_RAM |
IORESOURCE_EXCLUSIVE;
bool skip_children = false, err = false;
int size = PAGE_SIZE;
struct resource *p;
addr = addr & PAGE_MASK;
read_lock(&resource_lock);
for_each_resource(&iomem_resource, p, skip_children) {
for_each_resource(root, p, skip_children) {
if (p->start >= addr + size)
break;
if (p->end < addr) {
......@@ -1757,6 +1754,12 @@ bool iomem_is_exclusive(u64 addr)
return err;
}
bool iomem_is_exclusive(u64 addr)
{
return resource_is_exclusive(&iomem_resource, addr & PAGE_MASK,
PAGE_SIZE);
}
struct resource_entry *resource_list_create_entry(struct resource *res,
size_t extra_size)
{
......
......@@ -672,6 +672,9 @@ config ARCH_HAS_PMEM_API
config MEMREGION
bool
config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
bool
config ARCH_HAS_MEMREMAP_COMPAT_ALIGN
bool
......
......@@ -10,6 +10,7 @@ ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
ldflags-y += --wrap=devm_cxl_enumerate_decoders
ldflags-y += --wrap=cxl_await_media_ready
ldflags-y += --wrap=cxl_hdm_decode_init
ldflags-y += --wrap=cxl_rcrb_to_component
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
......@@ -26,6 +27,7 @@ cxl_acpi-y += config_check.o
obj-m += cxl_pmem.o
cxl_pmem-y := $(CXL_SRC)/pmem.o
cxl_pmem-y += $(CXL_SRC)/security.o
cxl_pmem-y += config_check.o
obj-m += cxl_port.o
......
......@@ -10,4 +10,6 @@ void check(void)
BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_BUS));
BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_ACPI));
BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM));
BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST));
BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST));
}
......@@ -11,11 +11,15 @@
#include <cxlmem.h>
#include "mock.h"
static int interleave_arithmetic;
#define NR_CXL_HOST_BRIDGES 2
#define NR_CXL_SINGLE_HOST 1
#define NR_CXL_RCH 1
#define NR_CXL_ROOT_PORTS 2
#define NR_CXL_SWITCH_PORTS 2
#define NR_CXL_PORT_DECODERS 8
#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH)
static struct platform_device *cxl_acpi;
static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
......@@ -35,6 +39,8 @@ static struct platform_device *cxl_swd_single[NR_MEM_SINGLE];
struct platform_device *cxl_mem[NR_MEM_MULTI];
struct platform_device *cxl_mem_single[NR_MEM_SINGLE];
static struct platform_device *cxl_rch[NR_CXL_RCH];
static struct platform_device *cxl_rcd[NR_CXL_RCH];
static inline bool is_multi_bridge(struct device *dev)
{
......@@ -57,7 +63,7 @@ static inline bool is_single_bridge(struct device *dev)
}
static struct acpi_device acpi0017_mock;
static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = {
static struct acpi_device host_bridge[NR_BRIDGES] = {
[0] = {
.handle = &host_bridge[0],
},
......@@ -67,7 +73,9 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]
[2] = {
.handle = &host_bridge[2],
},
[3] = {
.handle = &host_bridge[3],
},
};
static bool is_mock_dev(struct device *dev)
......@@ -80,6 +88,9 @@ static bool is_mock_dev(struct device *dev)
for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++)
if (dev == &cxl_mem_single[i]->dev)
return true;
for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++)
if (dev == &cxl_rcd[i]->dev)
return true;
if (dev == &cxl_acpi->dev)
return true;
return false;
......@@ -101,7 +112,7 @@ static bool is_mock_adev(struct acpi_device *adev)
static struct {
struct acpi_table_cedt cedt;
struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST];
struct acpi_cedt_chbs chbs[NR_BRIDGES];
struct {
struct acpi_cedt_cfmws cfmws;
u32 target[1];
......@@ -122,6 +133,26 @@ static struct {
struct acpi_cedt_cfmws cfmws;
u32 target[1];
} cfmws4;
struct {
struct acpi_cedt_cfmws cfmws;
u32 target[1];
} cfmws5;
struct {
struct acpi_cedt_cfmws cfmws;
u32 target[1];
} cfmws6;
struct {
struct acpi_cedt_cfmws cfmws;
u32 target[2];
} cfmws7;
struct {
struct acpi_cedt_cfmws cfmws;
u32 target[4];
} cfmws8;
struct {
struct acpi_cedt_cxims cxims;
u64 xormap_list[2];
} cxims0;
} __packed mock_cedt = {
.cedt = {
.header = {
......@@ -154,6 +185,14 @@ static struct {
.uid = 2,
.cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
},
.chbs[3] = {
.header = {
.type = ACPI_CEDT_TYPE_CHBS,
.length = sizeof(mock_cedt.chbs[0]),
},
.uid = 3,
.cxl_version = ACPI_CEDT_CHBS_VERSION_CXL11,
},
.cfmws0 = {
.cfmws = {
.header = {
......@@ -229,6 +268,81 @@ static struct {
},
.target = { 2 },
},
.cfmws5 = {
.cfmws = {
.header = {
.type = ACPI_CEDT_TYPE_CFMWS,
.length = sizeof(mock_cedt.cfmws5),
},
.interleave_ways = 0,
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = 5,
.window_size = SZ_256M,
},
.target = { 3 },
},
/* .cfmws6,7,8 use ACPI_CEDT_CFMWS_ARITHMETIC_XOR */
.cfmws6 = {
.cfmws = {
.header = {
.type = ACPI_CEDT_TYPE_CFMWS,
.length = sizeof(mock_cedt.cfmws6),
},
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 0,
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 0,
.window_size = SZ_256M * 8UL,
},
.target = { 0, },
},
.cfmws7 = {
.cfmws = {
.header = {
.type = ACPI_CEDT_TYPE_CFMWS,
.length = sizeof(mock_cedt.cfmws7),
},
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 1,
.granularity = 0,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 1,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
},
.cfmws8 = {
.cfmws = {
.header = {
.type = ACPI_CEDT_TYPE_CFMWS,
.length = sizeof(mock_cedt.cfmws8),
},
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 2,
.granularity = 0,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 0,
.window_size = SZ_256M * 16UL,
},
.target = { 0, 1, 0, 1, },
},
.cxims0 = {
.cxims = {
.header = {
.type = ACPI_CEDT_TYPE_CXIMS,
.length = sizeof(mock_cedt.cxims0),
},
.hbig = 0,
.nr_xormaps = 2,
},
.xormap_list = { 0x404100, 0x808200, },
},
};
struct acpi_cedt_cfmws *mock_cfmws[] = {
......@@ -237,6 +351,22 @@ struct acpi_cedt_cfmws *mock_cfmws[] = {
[2] = &mock_cedt.cfmws2.cfmws,
[3] = &mock_cedt.cfmws3.cfmws,
[4] = &mock_cedt.cfmws4.cfmws,
[5] = &mock_cedt.cfmws5.cfmws,
/* Modulo Math above, XOR Math below */
[6] = &mock_cedt.cfmws6.cfmws,
[7] = &mock_cedt.cfmws7.cfmws,
[8] = &mock_cedt.cfmws8.cfmws,
};
static int cfmws_start;
static int cfmws_end;
#define CFMWS_MOD_ARRAY_START 0
#define CFMWS_MOD_ARRAY_END 5
#define CFMWS_XOR_ARRAY_START 6
#define CFMWS_XOR_ARRAY_END 8
struct acpi_cedt_cxims *mock_cxims[1] = {
[0] = &mock_cedt.cxims0.cxims,
};
struct cxl_mock_res {
......@@ -262,11 +392,11 @@ static void depopulate_all_mock_resources(void)
mutex_unlock(&mock_res_lock);
}
static struct cxl_mock_res *alloc_mock_res(resource_size_t size)
static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align)
{
struct cxl_mock_res *res = kzalloc(sizeof(*res), GFP_KERNEL);
struct genpool_data_align data = {
.align = SZ_256M,
.align = align,
};
unsigned long phys;
......@@ -301,17 +431,17 @@ static int populate_cedt(void)
else
size = ACPI_CEDT_CHBS_LENGTH_CXL11;
res = alloc_mock_res(size);
res = alloc_mock_res(size, size);
if (!res)
return -ENOMEM;
chbs->base = res->range.start;
chbs->length = size;
}
for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) {
for (i = cfmws_start; i <= cfmws_end; i++) {
struct acpi_cedt_cfmws *window = mock_cfmws[i];
res = alloc_mock_res(window->window_size);
res = alloc_mock_res(window->window_size, SZ_256M);
if (!res)
return -ENOMEM;
window->base_hpa = res->range.start;
......@@ -320,10 +450,12 @@ static int populate_cedt(void)
return 0;
}
static bool is_mock_port(struct device *dev);
/*
* WARNING, this hack assumes the format of 'struct
* cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that
* the first struct member is the device being probed by the cxl_acpi
* WARNING, this hack assumes the format of 'struct cxl_cfmws_context'
* and 'struct cxl_chbs_context' share the property that the first
* struct member is a cxl_test device being probed by the cxl_acpi
* driver.
*/
struct cxl_cedt_context {
......@@ -340,7 +472,7 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id,
unsigned long end;
int i;
if (dev != &cxl_acpi->dev)
if (!is_mock_port(dev) && !is_mock_dev(dev))
return acpi_table_parse_cedt(id, handler_arg, arg);
if (id == ACPI_CEDT_TYPE_CHBS)
......@@ -351,12 +483,19 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id,
}
if (id == ACPI_CEDT_TYPE_CFMWS)
for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) {
for (i = cfmws_start; i <= cfmws_end; i++) {
h = (union acpi_subtable_headers *) mock_cfmws[i];
end = (unsigned long) h + mock_cfmws[i]->header.length;
handler_arg(h, arg, end);
}
if (id == ACPI_CEDT_TYPE_CXIMS)
for (i = 0; i < ARRAY_SIZE(mock_cxims); i++) {
h = (union acpi_subtable_headers *)mock_cxims[i];
end = (unsigned long)h + mock_cxims[i]->header.length;
handler_arg(h, arg, end);
}
return 0;
}
......@@ -370,6 +509,10 @@ static bool is_mock_bridge(struct device *dev)
for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++)
if (dev == &cxl_hb_single[i]->dev)
return true;
for (i = 0; i < ARRAY_SIZE(cxl_rch); i++)
if (dev == &cxl_rch[i]->dev)
return true;
return false;
}
......@@ -439,14 +582,21 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname,
return AE_OK;
}
static struct pci_bus mock_pci_bus[NR_CXL_HOST_BRIDGES];
static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = {
static struct pci_bus mock_pci_bus[NR_BRIDGES];
static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = {
[0] = {
.bus = &mock_pci_bus[0],
},
[1] = {
.bus = &mock_pci_bus[1],
},
[2] = {
.bus = &mock_pci_bus[2],
},
[3] = {
.bus = &mock_pci_bus[3],
},
};
static bool is_mock_bus(struct pci_bus *bus)
......@@ -634,7 +784,6 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
{
struct device *dev = &port->dev;
struct platform_device **array;
int i, array_size;
......@@ -684,19 +833,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
CXL_RESOURCE_NONE);
if (IS_ERR(dport)) {
dev_err(dev, "failed to add dport: %s (%ld)\n",
dev_name(&pdev->dev), PTR_ERR(dport));
if (IS_ERR(dport))
return PTR_ERR(dport);
}
dev_dbg(dev, "add dport%d: %s\n", pdev->id,
dev_name(&pdev->dev));
}
return 0;
}
resource_size_t mock_cxl_rcrb_to_component(struct device *dev,
resource_size_t rcrb,
enum cxl_rcrb which)
{
dev_dbg(dev, "rcrb: %pa which: %d\n", &rcrb, which);
return (resource_size_t) which + 1;
}
static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_adev = is_mock_adev,
.is_mock_bridge = is_mock_bridge,
......@@ -705,6 +857,7 @@ static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_dev = is_mock_dev,
.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
.acpi_evaluate_integer = mock_acpi_evaluate_integer,
.cxl_rcrb_to_component = mock_cxl_rcrb_to_component,
.acpi_pci_find_root = mock_acpi_pci_find_root,
.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
.devm_cxl_setup_hdm = mock_cxl_setup_hdm,
......@@ -729,6 +882,87 @@ static void mock_companion(struct acpi_device *adev, struct device *dev)
#define SZ_512G (SZ_64G * 8)
#endif
static __init int cxl_rch_init(void)
{
int rc, i;
for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) {
int idx = NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + i;
struct acpi_device *adev = &host_bridge[idx];
struct platform_device *pdev;
pdev = platform_device_alloc("cxl_host_bridge", idx);
if (!pdev)
goto err_bridge;
mock_companion(adev, &pdev->dev);
rc = platform_device_add(pdev);
if (rc) {
platform_device_put(pdev);
goto err_bridge;
}
cxl_rch[i] = pdev;
mock_pci_bus[idx].bridge = &pdev->dev;
rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
"firmware_node");
if (rc)
goto err_bridge;
}
for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) {
int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i;
struct platform_device *rch = cxl_rch[i];
struct platform_device *pdev;
pdev = platform_device_alloc("cxl_rcd", idx);
if (!pdev)
goto err_mem;
pdev->dev.parent = &rch->dev;
set_dev_node(&pdev->dev, i % 2);
rc = platform_device_add(pdev);
if (rc) {
platform_device_put(pdev);
goto err_mem;
}
cxl_rcd[i] = pdev;
}
return 0;
err_mem:
for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--)
platform_device_unregister(cxl_rcd[i]);
err_bridge:
for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) {
struct platform_device *pdev = cxl_rch[i];
if (!pdev)
continue;
sysfs_remove_link(&pdev->dev.kobj, "firmware_node");
platform_device_unregister(cxl_rch[i]);
}
return rc;
}
static void cxl_rch_exit(void)
{
int i;
for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--)
platform_device_unregister(cxl_rcd[i]);
for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) {
struct platform_device *pdev = cxl_rch[i];
if (!pdev)
continue;
sysfs_remove_link(&pdev->dev.kobj, "firmware_node");
platform_device_unregister(cxl_rch[i]);
}
}
static __init int cxl_single_init(void)
{
int i, rc;
......@@ -751,6 +985,7 @@ static __init int cxl_single_init(void)
}
cxl_hb_single[i] = pdev;
mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev;
rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
"physical_node");
if (rc)
......@@ -897,6 +1132,16 @@ static __init int cxl_test_init(void)
if (rc)
goto err_gen_pool_add;
if (interleave_arithmetic == 1) {
cfmws_start = CFMWS_XOR_ARRAY_START;
cfmws_end = CFMWS_XOR_ARRAY_END;
dev_dbg(NULL, "cxl_test loading xor math option\n");
} else {
cfmws_start = CFMWS_MOD_ARRAY_START;
cfmws_end = CFMWS_MOD_ARRAY_END;
dev_dbg(NULL, "cxl_test loading modulo math option\n");
}
rc = populate_cedt();
if (rc)
goto err_populate;
......@@ -917,6 +1162,7 @@ static __init int cxl_test_init(void)
}
cxl_host_bridge[i] = pdev;
mock_pci_bus[i].bridge = &pdev->dev;
rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
"physical_node");
if (rc)
......@@ -999,9 +1245,13 @@ static __init int cxl_test_init(void)
if (rc)
goto err_mem;
rc = cxl_rch_init();
if (rc)
goto err_single;
cxl_acpi = platform_device_alloc("cxl_acpi", 0);
if (!cxl_acpi)
goto err_single;
goto err_rch;
mock_companion(&acpi0017_mock, &cxl_acpi->dev);
acpi0017_mock.dev.bus = &platform_bus_type;
......@@ -1014,6 +1264,8 @@ static __init int cxl_test_init(void)
err_add:
platform_device_put(cxl_acpi);
err_rch:
cxl_rch_exit();
err_single:
cxl_single_exit();
err_mem:
......@@ -1051,6 +1303,7 @@ static __exit void cxl_test_exit(void)
int i;
platform_device_unregister(cxl_acpi);
cxl_rch_exit();
cxl_single_exit();
for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
platform_device_unregister(cxl_mem[i]);
......@@ -1073,6 +1326,8 @@ static __exit void cxl_test_exit(void)
unregister_cxl_mock_ops(&cxl_mock_ops);
}
module_param(interleave_arithmetic, int, 0000);
MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1");
module_init(cxl_test_init);
module_exit(cxl_test_exit);
MODULE_LICENSE("GPL v2");
......
......@@ -65,6 +65,18 @@ static struct {
},
};
#define PASS_TRY_LIMIT 3
struct cxl_mockmem_data {
void *lsa;
u32 security_state;
u8 user_pass[NVDIMM_PASSPHRASE_LEN];
u8 master_pass[NVDIMM_PASSPHRASE_LEN];
int user_limit;
int master_limit;
};
static int mock_gsl(struct cxl_mbox_cmd *cmd)
{
if (cmd->size_out < sizeof(mock_gsl_payload))
......@@ -100,6 +112,24 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
return 0;
}
static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_identify id = {
.fw_revision = { "mock fw v1 " },
.total_capacity =
cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER),
.volatile_capacity =
cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER),
};
if (cmd->size_out < sizeof(id))
return -EINVAL;
memcpy(cmd->payload_out, &id, sizeof(id));
return 0;
}
static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_identify id = {
......@@ -137,10 +167,334 @@ static int mock_partition_info(struct cxl_dev_state *cxlds,
return 0;
}
static int mock_get_security_state(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
if (cmd->size_in)
return -EINVAL;
if (cmd->size_out != sizeof(u32))
return -EINVAL;
memcpy(cmd->payload_out, &mdata->security_state, sizeof(u32));
return 0;
}
static void master_plimit_check(struct cxl_mockmem_data *mdata)
{
if (mdata->master_limit == PASS_TRY_LIMIT)
return;
mdata->master_limit++;
if (mdata->master_limit == PASS_TRY_LIMIT)
mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PLIMIT;
}
static void user_plimit_check(struct cxl_mockmem_data *mdata)
{
if (mdata->user_limit == PASS_TRY_LIMIT)
return;
mdata->user_limit++;
if (mdata->user_limit == PASS_TRY_LIMIT)
mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT;
}
static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
struct cxl_set_pass *set_pass;
if (cmd->size_in != sizeof(*set_pass))
return -EINVAL;
if (cmd->size_out != 0)
return -EINVAL;
if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
set_pass = cmd->payload_in;
switch (set_pass->type) {
case CXL_PMEM_SEC_PASS_MASTER:
if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
/*
* CXL spec rev3.0 8.2.9.8.6.2, The master pasphrase shall only be set in
* the security disabled state when the user passphrase is not set.
*/
if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (memcmp(mdata->master_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) {
master_plimit_check(mdata);
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
memcpy(mdata->master_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN);
mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PASS_SET;
return 0;
case CXL_PMEM_SEC_PASS_USER:
if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (memcmp(mdata->user_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) {
user_plimit_check(mdata);
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
memcpy(mdata->user_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN);
mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PASS_SET;
return 0;
default:
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
}
return -EINVAL;
}
static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
struct cxl_disable_pass *dis_pass;
if (cmd->size_in != sizeof(*dis_pass))
return -EINVAL;
if (cmd->size_out != 0)
return -EINVAL;
if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
dis_pass = cmd->payload_in;
switch (dis_pass->type) {
case CXL_PMEM_SEC_PASS_MASTER:
if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (!(mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (memcmp(dis_pass->pass, mdata->master_pass, NVDIMM_PASSPHRASE_LEN)) {
master_plimit_check(mdata);
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
mdata->master_limit = 0;
memset(mdata->master_pass, 0, NVDIMM_PASSPHRASE_LEN);
mdata->security_state &= ~CXL_PMEM_SEC_STATE_MASTER_PASS_SET;
return 0;
case CXL_PMEM_SEC_PASS_USER:
if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (memcmp(dis_pass->pass, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) {
user_plimit_check(mdata);
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
mdata->user_limit = 0;
memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN);
mdata->security_state &= ~(CXL_PMEM_SEC_STATE_USER_PASS_SET |
CXL_PMEM_SEC_STATE_LOCKED);
return 0;
default:
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
return -EINVAL;
}
return 0;
}
static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
if (cmd->size_in != 0)
return -EINVAL;
if (cmd->size_out != 0)
return -EINVAL;
if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN)
return 0;
mdata->security_state |= CXL_PMEM_SEC_STATE_FROZEN;
return 0;
}
static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
if (cmd->size_in != NVDIMM_PASSPHRASE_LEN)
return -EINVAL;
if (cmd->size_out != 0)
return -EINVAL;
if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (memcmp(cmd->payload_in, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) {
if (++mdata->user_limit == PASS_TRY_LIMIT)
mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT;
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
mdata->user_limit = 0;
mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED;
return 0;
}
static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
struct cxl_pass_erase *erase;
if (cmd->size_in != sizeof(*erase))
return -EINVAL;
if (cmd->size_out != 0)
return -EINVAL;
erase = cmd->payload_in;
if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT &&
erase->type == CXL_PMEM_SEC_PASS_USER) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT &&
erase->type == CXL_PMEM_SEC_PASS_MASTER) {
cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
return -ENXIO;
}
switch (erase->type) {
case CXL_PMEM_SEC_PASS_MASTER:
/*
* The spec does not clearly define the behavior of the scenario
* where a master passphrase is passed in while the master
* passphrase is not set and user passphrase is not set. The
* code will take the assumption that it will behave the same
* as a CXL secure erase command without passphrase (0x4401).
*/
if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) {
if (memcmp(mdata->master_pass, erase->pass,
NVDIMM_PASSPHRASE_LEN)) {
master_plimit_check(mdata);
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
mdata->master_limit = 0;
mdata->user_limit = 0;
mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET;
memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN);
mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED;
} else {
/*
* CXL rev3 8.2.9.8.6.3 Disable Passphrase
* When master passphrase is disabled, the device shall
* return Invalid Input for the Passphrase Secure Erase
* command with master passphrase.
*/
return -EINVAL;
}
/* Scramble encryption keys so that data is effectively erased */
break;
case CXL_PMEM_SEC_PASS_USER:
/*
* The spec does not clearly define the behavior of the scenario
* where a user passphrase is passed in while the user
* passphrase is not set. The code will take the assumption that
* it will behave the same as a CXL secure erase command without
* passphrase (0x4401).
*/
if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
if (memcmp(mdata->user_pass, erase->pass,
NVDIMM_PASSPHRASE_LEN)) {
user_plimit_check(mdata);
cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
return -ENXIO;
}
mdata->user_limit = 0;
mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET;
memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN);
}
/*
* CXL rev3 Table 8-118
* If user passphrase is not set or supported by device, current
* passphrase value is ignored. Will make the assumption that
* the operation will proceed as secure erase w/o passphrase
* since spec is not explicit.
*/
/* Scramble encryption keys so that data is effectively erased */
break;
default:
return -EINVAL;
}
return 0;
}
static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in;
void *lsa = dev_get_drvdata(cxlds->dev);
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
void *lsa = mdata->lsa;
u32 offset, length;
if (sizeof(*get_lsa) > cmd->size_in)
......@@ -159,7 +513,8 @@ static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in;
void *lsa = dev_get_drvdata(cxlds->dev);
struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
void *lsa = mdata->lsa;
u32 offset, length;
if (sizeof(*set_lsa) > cmd->size_in)
......@@ -216,7 +571,10 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
rc = mock_get_log(cxlds, cmd);
break;
case CXL_MBOX_OP_IDENTIFY:
rc = mock_id(cxlds, cmd);
if (cxlds->rcd)
rc = mock_rcd_id(cxlds, cmd);
else
rc = mock_id(cxlds, cmd);
break;
case CXL_MBOX_OP_GET_LSA:
rc = mock_get_lsa(cxlds, cmd);
......@@ -230,6 +588,24 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
case CXL_MBOX_OP_GET_HEALTH_INFO:
rc = mock_health_info(cxlds, cmd);
break;
case CXL_MBOX_OP_GET_SECURITY_STATE:
rc = mock_get_security_state(cxlds, cmd);
break;
case CXL_MBOX_OP_SET_PASSPHRASE:
rc = mock_set_passphrase(cxlds, cmd);
break;
case CXL_MBOX_OP_DISABLE_PASSPHRASE:
rc = mock_disable_passphrase(cxlds, cmd);
break;
case CXL_MBOX_OP_FREEZE_SECURITY:
rc = mock_freeze_security(cxlds, cmd);
break;
case CXL_MBOX_OP_UNLOCK:
rc = mock_unlock_security(cxlds, cmd);
break;
case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
rc = mock_passphrase_secure_erase(cxlds, cmd);
break;
default:
break;
}
......@@ -245,21 +621,32 @@ static void label_area_release(void *lsa)
vfree(lsa);
}
static bool is_rcd(struct platform_device *pdev)
{
const struct platform_device_id *id = platform_get_device_id(pdev);
return !!id->driver_data;
}
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
void *lsa;
struct cxl_mockmem_data *mdata;
int rc;
lsa = vmalloc(LSA_SIZE);
if (!lsa)
mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL);
if (!mdata)
return -ENOMEM;
rc = devm_add_action_or_reset(dev, label_area_release, lsa);
dev_set_drvdata(dev, mdata);
mdata->lsa = vmalloc(LSA_SIZE);
if (!mdata->lsa)
return -ENOMEM;
rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa);
if (rc)
return rc;
dev_set_drvdata(dev, lsa);
cxlds = cxl_dev_state_create(dev);
if (IS_ERR(cxlds))
......@@ -268,6 +655,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
cxlds->serial = pdev->id;
cxlds->mbox_send = cxl_mock_mbox_send;
cxlds->payload_size = SZ_4K;
if (is_rcd(pdev)) {
cxlds->rcd = true;
cxlds->component_reg_phys = CXL_RESOURCE_NONE;
}
rc = cxl_enumerate_cmds(cxlds);
if (rc)
......@@ -285,14 +676,51 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(dev, cxlmd);
return 0;
}
static ssize_t security_lock_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
return sysfs_emit(buf, "%u\n",
!!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED));
}
static ssize_t security_lock_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
u32 mask = CXL_PMEM_SEC_STATE_FROZEN | CXL_PMEM_SEC_STATE_USER_PLIMIT |
CXL_PMEM_SEC_STATE_MASTER_PLIMIT;
int val;
if (kstrtoint(buf, 0, &val) < 0)
return -EINVAL;
if (val == 1) {
if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET))
return -ENXIO;
mdata->security_state |= CXL_PMEM_SEC_STATE_LOCKED;
mdata->security_state &= ~mask;
} else {
return -EINVAL;
}
return count;
}
static DEVICE_ATTR_RW(security_lock);
static struct attribute *cxl_mock_mem_attrs[] = {
&dev_attr_security_lock.attr,
NULL
};
ATTRIBUTE_GROUPS(cxl_mock_mem);
static const struct platform_device_id cxl_mock_mem_ids[] = {
{ .name = "cxl_mem", },
{ .name = "cxl_mem", 0 },
{ .name = "cxl_rcd", 1 },
{ },
};
MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids);
......@@ -302,6 +730,7 @@ static struct platform_driver cxl_mock_mem_driver = {
.id_table = cxl_mock_mem_ids,
.driver = {
.name = KBUILD_MODNAME,
.dev_groups = cxl_mock_mem_groups,
},
};
......
......@@ -224,6 +224,25 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL);
resource_size_t __wrap_cxl_rcrb_to_component(struct device *dev,
resource_size_t rcrb,
enum cxl_rcrb which)
{
int index;
resource_size_t component_reg_phys;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(dev))
component_reg_phys =
ops->cxl_rcrb_to_component(dev, rcrb, which);
else
component_reg_phys = cxl_rcrb_to_component(dev, rcrb, which);
put_cxl_mock_ops(index);
return component_reg_phys;
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcrb_to_component, CXL);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
MODULE_IMPORT_NS(CXL);
......@@ -15,6 +15,9 @@ struct cxl_mock_ops {
acpi_string pathname,
struct acpi_object_list *arguments,
unsigned long long *data);
resource_size_t (*cxl_rcrb_to_component)(struct device *dev,
resource_size_t rcrb,
enum cxl_rcrb which);
struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle);
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
......
......@@ -79,7 +79,6 @@ libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o
libnvdimm-y += dimm_devs.o
libnvdimm-y += libnvdimm_test.o
libnvdimm-y += config_check.o
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright Intel Corp. 2018 */
#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/nd.h>
#include "pmem.h"
#include "pfn.h"
#include "nd.h"
#include "nd-core.h"
ssize_t security_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
/*
* For the test version we need to poll the "hardware" in order
* to get the updated status for unlock testing.
*/
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
return sprintf(buf, "disabled\n");
if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
return sprintf(buf, "unlocked\n");
if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
return sprintf(buf, "locked\n");
return -ENOTTY;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment