Commit 16c7a368 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'cxl-fixes-for-6.1-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull cxl fixes from Dan Williams:
 "Several fixes for CXL region creation crashes, leaks and failures.

  This is mainly fallout from the original implementation of dynamic CXL
  region creation (instantiate new physical memory pools) that arrived
  in v6.0-rc1.

  Given the theme of "failures in the presence of pass-through decoders"
  this also includes new regression test infrastructure for that case.

  Summary:

   - Fix region creation crash with pass-through decoders

   - Fix region creation crash when no decoder allocation fails

   - Fix region creation crash when scanning regions to enforce the
     increasing physical address order constraint that CXL mandates

   - Fix a memory leak for cxl_pmem_region objects, track 1:N instead of
     1:1 memory-device-to-region associations.

   - Fix a memory leak for cxl_region objects when regions with active
     targets are deleted

   - Fix assignment of NUMA nodes to CXL regions by CFMWS (CXL Window)
     emulated proximity domains.

   - Fix region creation failure for switch attached devices downstream
     of a single-port host-bridge

   - Fix false positive memory leak of cxl_region objects by recycling
     recently used region ids rather than freeing them

   - Add regression test infrastructure for a pass-through decoder
     configuration

   - Fix some mailbox payload handling corner cases"

* tag 'cxl-fixes-for-6.1-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl/region: Recycle region ids
  cxl/region: Fix 'distance' calculation with passthrough ports
  tools/testing/cxl: Add a single-port host-bridge regression config
  tools/testing/cxl: Fix some error exits
  cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak
  cxl/region: Fix cxl_region leak, cleanup targets at region delete
  cxl/region: Fix region HPA ordering validation
  cxl/pmem: Use size_add() against integer overflow
  cxl/region: Fix decoder allocation crash
  ACPI: NUMA: Add CXL CFMWS 'nodes' to the possible nodes set
  cxl/pmem: Fix failure to account for 8 byte header for writes to the device LSA.
  cxl/region: Fix null pointer dereference due to pass through decoder commit
  cxl/mbox: Add a check on input payload size
parents aa529949 8f401ec1
......@@ -327,6 +327,7 @@ static int __init acpi_parse_cfmws(union acpi_subtable_headers *header,
pr_warn("ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n",
node, start, end);
}
node_set(node, numa_nodes_parsed);
/* Set the next available fake_pxm value */
(*fake_pxm)++;
......
......@@ -174,7 +174,7 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
};
int rc;
if (out_size > cxlds->payload_size)
if (in_size > cxlds->payload_size || out_size > cxlds->payload_size)
return -E2BIG;
rc = cxlds->mbox_send(cxlds, &mbox_cmd);
......
......@@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
xa_destroy(&cxl_nvd->pmem_regions);
kfree(cxl_nvd);
}
......@@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
dev = &cxl_nvd->dev;
cxl_nvd->cxlmd = cxlmd;
xa_init(&cxl_nvd->pmem_regions);
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
device_set_pm_not_required(dev);
......
......@@ -811,6 +811,7 @@ static struct cxl_dport *find_dport(struct cxl_port *port, int id)
static int add_dport(struct cxl_port *port, struct cxl_dport *new)
{
struct cxl_dport *dup;
int rc;
device_lock_assert(&port->dev);
dup = find_dport(port, new->port_id);
......@@ -821,8 +822,14 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
dev_name(dup->dport));
return -EBUSY;
}
return xa_insert(&port->dports, (unsigned long)new->dport, new,
GFP_KERNEL);
rc = xa_insert(&port->dports, (unsigned long)new->dport, new,
GFP_KERNEL);
if (rc)
return rc;
port->nr_dports++;
return 0;
}
/*
......
......@@ -174,7 +174,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr)
iter = to_cxl_port(iter->dev.parent)) {
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
rc = cxld->commit(cxld);
if (cxld->commit)
rc = cxld->commit(cxld);
if (rc)
break;
}
......@@ -657,6 +658,9 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
xa_for_each(&port->regions, index, iter) {
struct cxl_region_params *ip = &iter->region->params;
if (!ip->res)
continue;
if (ip->res->start > p->res->start) {
dev_dbg(&cxlr->dev,
"%s: HPA order violation %s:%pr vs %pr\n",
......@@ -686,18 +690,27 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
return cxl_rr;
}
static void free_region_ref(struct cxl_region_ref *cxl_rr)
static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
{
struct cxl_port *port = cxl_rr->port;
struct cxl_region *cxlr = cxl_rr->region;
struct cxl_decoder *cxld = cxl_rr->decoder;
if (!cxld)
return;
dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
if (cxld->region == cxlr) {
cxld->region = NULL;
put_device(&cxlr->dev);
}
}
static void free_region_ref(struct cxl_region_ref *cxl_rr)
{
struct cxl_port *port = cxl_rr->port;
struct cxl_region *cxlr = cxl_rr->region;
cxl_rr_free_decoder(cxl_rr);
xa_erase(&port->regions, (unsigned long)cxlr);
xa_destroy(&cxl_rr->endpoints);
kfree(cxl_rr);
......@@ -728,6 +741,33 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
return 0;
}
static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled,
struct cxl_region_ref *cxl_rr)
{
struct cxl_decoder *cxld;
if (port == cxled_to_port(cxled))
cxld = &cxled->cxld;
else
cxld = cxl_region_find_decoder(port, cxlr);
if (!cxld) {
dev_dbg(&cxlr->dev, "%s: no decoder available\n",
dev_name(&port->dev));
return -EBUSY;
}
if (cxld->region) {
dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
dev_name(&port->dev), dev_name(&cxld->dev),
dev_name(&cxld->region->dev));
return -EBUSY;
}
cxl_rr->decoder = cxld;
return 0;
}
/**
* cxl_port_attach_region() - track a region's interest in a port by endpoint
* @port: port to add a new region reference 'struct cxl_region_ref'
......@@ -794,12 +834,6 @@ static int cxl_port_attach_region(struct cxl_port *port,
cxl_rr->nr_targets++;
nr_targets_inc = true;
}
/*
* The decoder for @cxlr was allocated when the region was first
* attached to @port.
*/
cxld = cxl_rr->decoder;
} else {
cxl_rr = alloc_region_ref(port, cxlr);
if (IS_ERR(cxl_rr)) {
......@@ -810,26 +844,11 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
nr_targets_inc = true;
if (port == cxled_to_port(cxled))
cxld = &cxled->cxld;
else
cxld = cxl_region_find_decoder(port, cxlr);
if (!cxld) {
dev_dbg(&cxlr->dev, "%s: no decoder available\n",
dev_name(&port->dev));
goto out_erase;
}
if (cxld->region) {
dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
dev_name(&port->dev), dev_name(&cxld->dev),
dev_name(&cxld->region->dev));
rc = -EBUSY;
rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
if (rc)
goto out_erase;
}
cxl_rr->decoder = cxld;
}
cxld = cxl_rr->decoder;
rc = cxl_rr_ep_add(cxl_rr, cxled);
if (rc) {
......@@ -971,7 +990,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
if (cxl_rr->nr_targets_set) {
int i, distance;
distance = p->nr_targets / cxl_rr->nr_targets;
/*
* Passthrough ports impose no distance requirements between
* peers
*/
if (port->nr_dports == 1)
distance = 0;
else
distance = p->nr_targets / cxl_rr->nr_targets;
for (i = 0; i < cxl_rr->nr_targets_set; i++)
if (ep->dport == cxlsd->target[i]) {
rc = check_last_peer(cxled, ep, cxl_rr,
......@@ -1508,9 +1534,24 @@ static const struct attribute_group *region_groups[] = {
static void cxl_region_release(struct device *dev)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
struct cxl_region *cxlr = to_cxl_region(dev);
int id = atomic_read(&cxlrd->region_id);
/*
* Try to reuse the recently idled id rather than the cached
* next id to prevent the region id space from increasing
* unnecessarily.
*/
if (cxlr->id < id)
if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
memregion_free(id);
goto out;
}
memregion_free(cxlr->id);
out:
put_device(dev->parent);
kfree(cxlr);
}
......@@ -1538,8 +1579,19 @@ static struct cxl_region *to_cxl_region(struct device *dev)
static void unregister_region(void *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
int i;
device_del(dev);
/*
* Now that region sysfs is shutdown, the parameter block is now
* read-only, so no need to hold the region rwsem to access the
* region parameters.
*/
for (i = 0; i < p->interleave_ways; i++)
detach_target(cxlr, i);
cxl_region_iomem_release(cxlr);
put_device(dev);
}
......@@ -1561,6 +1613,11 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_region_key);
dev->parent = &cxlrd->cxlsd.cxld.dev;
/*
* Keep root decoder pinned through cxl_region_release to fixup
* region id allocations
*/
get_device(dev->parent);
device_set_pm_not_required(dev);
dev->bus = &cxl_bus_type;
dev->type = &cxl_region_type;
......
......@@ -423,7 +423,7 @@ struct cxl_nvdimm {
struct device dev;
struct cxl_memdev *cxlmd;
struct cxl_nvdimm_bridge *bridge;
struct cxl_pmem_region *region;
struct xarray pmem_regions;
};
struct cxl_pmem_region_mapping {
......@@ -457,6 +457,7 @@ struct cxl_pmem_region {
* @regions: cxl_region_ref instances, regions mapped by this port
* @parent_dport: dport that points to this port in the parent
* @decoder_ida: allocator for decoder ids
* @nr_dports: number of entries in @dports
* @hdm_end: track last allocated HDM decoder instance for allocation ordering
* @commit_end: cursor to track highest committed decoder for commit ordering
* @component_reg_phys: component register capability base address (optional)
......@@ -475,6 +476,7 @@ struct cxl_port {
struct xarray regions;
struct cxl_dport *parent_dport;
struct ida decoder_ida;
int nr_dports;
int hdm_end;
int commit_end;
resource_size_t component_reg_phys;
......
......@@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm)
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
struct cxl_pmem_region *cxlr_pmem;
unsigned long index;
device_lock(&cxl_nvb->dev);
cxlr_pmem = cxl_nvd->region;
dev_set_drvdata(&cxl_nvd->dev, NULL);
cxl_nvd->region = NULL;
device_unlock(&cxl_nvb->dev);
xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
get_device(&cxlr_pmem->dev);
device_unlock(&cxl_nvb->dev);
if (cxlr_pmem) {
device_release_driver(&cxlr_pmem->dev);
put_device(&cxlr_pmem->dev);
device_lock(&cxl_nvb->dev);
}
device_unlock(&cxl_nvb->dev);
nvdimm_delete(nvdimm);
cxl_nvd->bridge = NULL;
......@@ -107,7 +110,7 @@ static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
*cmd = (struct nd_cmd_get_config_size) {
.config_size = cxlds->lsa_size,
.max_xfer = cxlds->payload_size,
.max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa),
};
return 0;
......@@ -148,7 +151,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
return -EINVAL;
/* 4-byte status follows the input data in the payload */
if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len)
if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len)
return -EINVAL;
set_lsa =
......@@ -366,25 +369,49 @@ static int match_cxl_nvdimm(struct device *dev, void *data)
static void unregister_nvdimm_region(void *nd_region)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
nvdimm_region_delete(nd_region);
}
static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
struct cxl_pmem_region *cxlr_pmem)
{
int rc;
rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
cxlr_pmem, GFP_KERNEL);
if (rc)
return rc;
get_device(&cxlr_pmem->dev);
return 0;
}
static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd,
struct cxl_pmem_region *cxlr_pmem)
{
/*
* It is possible this is called without a corresponding
* cxl_nvdimm_add_region for @cxlr_pmem
*/
cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
if (cxlr_pmem)
put_device(&cxlr_pmem->dev);
}
static void release_mappings(void *data)
{
int i;
struct cxl_pmem_region *cxlr_pmem = data;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
cxlr_pmem = nd_region_provider_data(nd_region);
cxl_nvb = cxlr_pmem->bridge;
device_lock(&cxl_nvb->dev);
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
if (cxl_nvd->region) {
put_device(&cxlr_pmem->dev);
cxl_nvd->region = NULL;
}
cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
}
device_unlock(&cxl_nvb->dev);
nvdimm_region_delete(nd_region);
}
static void cxlr_pmem_remove_resource(void *res)
......@@ -422,7 +449,7 @@ static int cxl_pmem_region_probe(struct device *dev)
if (!cxl_nvb->nvdimm_bus) {
dev_dbg(dev, "nvdimm bus not found\n");
rc = -ENXIO;
goto err;
goto out_nvb;
}
memset(&mappings, 0, sizeof(mappings));
......@@ -431,7 +458,7 @@ static int cxl_pmem_region_probe(struct device *dev)
res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
if (!res) {
rc = -ENOMEM;
goto err;
goto out_nvb;
}
res->name = "Persistent Memory";
......@@ -442,11 +469,11 @@ static int cxl_pmem_region_probe(struct device *dev)
rc = insert_resource(&iomem_resource, res);
if (rc)
goto err;
goto out_nvb;
rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
if (rc)
goto err;
goto out_nvb;
ndr_desc.res = res;
ndr_desc.provider_data = cxlr_pmem;
......@@ -462,7 +489,7 @@ static int cxl_pmem_region_probe(struct device *dev)
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set) {
rc = -ENOMEM;
goto err;
goto out_nvb;
}
ndr_desc.memregion = cxlr->id;
......@@ -472,9 +499,13 @@ static int cxl_pmem_region_probe(struct device *dev)
info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto err;
goto out_nvb;
}
rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
if (rc)
goto out_nvd;
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_memdev *cxlmd = m->cxlmd;
......@@ -486,7 +517,7 @@ static int cxl_pmem_region_probe(struct device *dev)
dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto err;
goto out_nvd;
}
/* safe to drop ref now with bridge lock held */
......@@ -498,10 +529,17 @@ static int cxl_pmem_region_probe(struct device *dev)
dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto err;
goto out_nvd;
}
cxl_nvd->region = cxlr_pmem;
get_device(&cxlr_pmem->dev);
/*
* Pin the region per nvdimm device as those may be released
* out-of-order with respect to the region, and a single nvdimm
* maybe associated with multiple regions
*/
rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
if (rc)
goto out_nvd;
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
......@@ -527,27 +565,18 @@ static int cxl_pmem_region_probe(struct device *dev)
nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
if (!cxlr_pmem->nd_region) {
rc = -ENOMEM;
goto err;
goto out_nvd;
}
rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
cxlr_pmem->nd_region);
out:
out_nvd:
kfree(info);
out_nvb:
device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
return rc;
err:
dev_dbg(dev, "failed to create nvdimm region\n");
for (i--; i >= 0; i--) {
nvdimm = mappings[i].nvdimm;
cxl_nvd = nvdimm_provider_data(nvdimm);
put_device(&cxl_nvd->region->dev);
cxl_nvd->region = NULL;
}
goto out;
}
static struct cxl_driver cxl_pmem_region_driver = {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment