Commit 3be134e5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "The libnvdimm pull request is relatively small this time around due to
  some development topics being deferred to 4.11.

  As for this pull request the bulk of it has been in -next for several
  releases leading to one late fix being added (commit 868f036f
  ("libnvdimm: fix mishandled nvdimm_clear_poison() return value")). It
  has received a build success notification from the 0day-kbuild robot
  and passes the latest libnvdimm unit tests.

  Summary:

   - Dynamic label support: To date namespace label support has been
     limited to disambiguating cases where PMEM (direct load/store) and
     BLK (mmio aperture) accessed-capacity alias on the same DIMM. Since
     4.9 added support for multiple namespaces per PMEM-region there is
     value to support namespace labels even in the non-aliasing case.
     The presence of a valid namespace index block force-enables label
     support when the kernel would otherwise rely on region boundaries,
     and permits the region to be sub-divided.

   - Handle media errors in namespace metadata: Complement the error
     handling for media errors in namespace data areas with support for
     clearing errors on writes, and downgrading potential machine-check
     exceptions to simple i/o errors on read.

   - Device-DAX region attributes: Add 'align', 'id', and 'size' as
     attributes for device-dax regions. In particular this enables
     userspace tooling to generically size memory mapping and i/o
     operations. Prevent userspace from growing assumptions /
     dependencies about the parent device topology for a dax region. A
     libnvdimm namespace may not always be the parent device of a dax
     region.

   - Various cleanups and small fixes"

* tag 'libnvdimm-for-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  dax: add region 'id', 'size', and 'align' attributes
  libnvdimm: fix mishandled nvdimm_clear_poison() return value
  libnvdimm: replace mutex_is_locked() warnings with lockdep_assert_held
  libnvdimm, pfn: fix align attribute
  libnvdimm, e820: use module_platform_driver
  libnvdimm, namespace: use octal for permissions
  libnvdimm, namespace: avoid multiple sector calculations
  libnvdimm: remove else after return in nsio_rw_bytes()
  libnvdimm, namespace: fix the type of name variable
  libnvdimm: use consistent naming for request_mem_region()
  nvdimm: use the right length of "pmem"
  libnvdimm: check and clear poison before writing to pmem
  tools/testing/nvdimm: dynamic label support
  libnvdimm: allow a platform to force enable label support
  libnvdimm: use generic iostat interfaces
parents 8421c604 c44ef859
......@@ -75,6 +75,73 @@ struct dax_dev {
struct resource res[0];
};
static ssize_t id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dax_region *dax_region;
ssize_t rc = -ENXIO;
device_lock(dev);
dax_region = dev_get_drvdata(dev);
if (dax_region)
rc = sprintf(buf, "%d\n", dax_region->id);
device_unlock(dev);
return rc;
}
static DEVICE_ATTR_RO(id);
static ssize_t region_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dax_region *dax_region;
ssize_t rc = -ENXIO;
device_lock(dev);
dax_region = dev_get_drvdata(dev);
if (dax_region)
rc = sprintf(buf, "%llu\n", (unsigned long long)
resource_size(&dax_region->res));
device_unlock(dev);
return rc;
}
static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
region_size_show, NULL);
static ssize_t align_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dax_region *dax_region;
ssize_t rc = -ENXIO;
device_lock(dev);
dax_region = dev_get_drvdata(dev);
if (dax_region)
rc = sprintf(buf, "%u\n", dax_region->align);
device_unlock(dev);
return rc;
}
static DEVICE_ATTR_RO(align);
static struct attribute *dax_region_attributes[] = {
&dev_attr_region_size.attr,
&dev_attr_align.attr,
&dev_attr_id.attr,
NULL,
};
static const struct attribute_group dax_region_attribute_group = {
.name = "dax_region",
.attrs = dax_region_attributes,
};
static const struct attribute_group *dax_region_attribute_groups[] = {
&dax_region_attribute_group,
NULL,
};
static struct inode *dax_alloc_inode(struct super_block *sb)
{
return kmem_cache_alloc(dax_cache, GFP_KERNEL);
......@@ -200,12 +267,31 @@ void dax_region_put(struct dax_region *dax_region)
}
EXPORT_SYMBOL_GPL(dax_region_put);
static void dax_region_unregister(void *region)
{
struct dax_region *dax_region = region;
sysfs_remove_groups(&dax_region->dev->kobj,
dax_region_attribute_groups);
dax_region_put(dax_region);
}
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, unsigned int align, void *addr,
unsigned long pfn_flags)
{
struct dax_region *dax_region;
/*
* The DAX core assumes that it can store its private data in
* parent->driver_data. This WARN is a reminder / safeguard for
* developers of device-dax drivers.
*/
if (dev_get_drvdata(parent)) {
dev_WARN(parent, "dax core failed to setup private data\n");
return NULL;
}
if (!IS_ALIGNED(res->start, align)
|| !IS_ALIGNED(resource_size(res), align))
return NULL;
......@@ -214,6 +300,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
if (!dax_region)
return NULL;
dev_set_drvdata(parent, dax_region);
memcpy(&dax_region->res, res, sizeof(*res));
dax_region->pfn_flags = pfn_flags;
kref_init(&dax_region->kref);
......@@ -222,7 +309,14 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
dax_region->align = align;
dax_region->dev = parent;
dax_region->base = addr;
if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
kfree(dax_region);
return NULL;;
}
kref_get(&dax_region->kref);
if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
return NULL;
return dax_region;
}
EXPORT_SYMBOL_GPL(alloc_dax_region);
......
......@@ -89,7 +89,8 @@ static int dax_pmem_probe(struct device *dev)
pfn_sb = nd_pfn->pfn_sb;
if (!devm_request_mem_region(dev, nsio->res.start,
resource_size(&nsio->res), dev_name(dev))) {
resource_size(&nsio->res),
dev_name(&ndns->dev))) {
dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
return -EBUSY;
}
......
......@@ -22,9 +22,8 @@ void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns)
{
struct nd_namespace_common *ndns = *_ndns;
dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex)
|| ndns->claim != dev,
"%s: invalid claim\n", __func__);
lockdep_assert_held(&ndns->dev.mutex);
dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__);
ndns->claim = NULL;
*_ndns = NULL;
put_device(&ndns->dev);
......@@ -49,9 +48,8 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
{
if (attach->claim)
return false;
dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex)
|| *_ndns,
"%s: invalid claim\n", __func__);
lockdep_assert_held(&attach->dev.mutex);
dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__);
attach->claim = dev;
*_ndns = attach;
get_device(&attach->dev);
......@@ -226,6 +224,12 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
resource_size_t offset, void *buf, size_t size, int rw)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
sector_t sector = offset >> 9;
int rc = 0;
if (unlikely(!size))
return 0;
if (unlikely(offset + size > nsio->size)) {
dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
......@@ -233,17 +237,31 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}
if (rw == READ) {
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
return memcpy_from_pmem(buf, nsio->addr + offset, size);
} else {
memcpy_to_pmem(nsio->addr + offset, buf, size);
nvdimm_flush(to_nd_region(ndns->dev.parent));
}
return 0;
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
long cleared;
cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
if (cleared < size)
rc = -EIO;
if (cleared > 0 && cleared / 512) {
cleared /= 512;
badblocks_clear(&nsio->bb, sector, cleared);
}
invalidate_pmem(nsio->addr + offset, size);
} else
rc = -EIO;
}
memcpy_to_pmem(nsio->addr + offset, buf, size);
nvdimm_flush(to_nd_region(ndns->dev.parent));
return rc;
}
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
......@@ -253,7 +271,7 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
nsio->size = resource_size(res);
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(dev))) {
dev_name(&ndns->dev))) {
dev_warn(dev, "could not reserve region %pR\n", res);
return -EBUSY;
}
......
......@@ -317,35 +317,6 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf,
}
}
void __nd_iostat_start(struct bio *bio, unsigned long *start)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
const int rw = bio_data_dir(bio);
int cpu = part_stat_lock();
*start = jiffies;
part_round_stats(cpu, &disk->part0);
part_stat_inc(cpu, &disk->part0, ios[rw]);
part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
part_inc_in_flight(&disk->part0, rw);
part_stat_unlock();
}
EXPORT_SYMBOL(__nd_iostat_start);
void nd_iostat_end(struct bio *bio, unsigned long start)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
unsigned long duration = jiffies - start;
const int rw = bio_data_dir(bio);
int cpu = part_stat_lock();
part_stat_add(cpu, &disk->part0, ticks[rw], duration);
part_round_stats(cpu, &disk->part0);
part_dec_in_flight(&disk->part0, rw);
part_stat_unlock();
}
EXPORT_SYMBOL(nd_iostat_end);
static ssize_t commands_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
......
......@@ -64,6 +64,8 @@ static int nvdimm_probe(struct device *dev)
nd_label_copy(ndd, to_next_namespace_index(ndd),
to_current_namespace_index(ndd));
rc = nd_label_reserve_dpa(ndd);
if (ndd->ns_current >= 0)
nvdimm_set_aliasing(dev);
nvdimm_bus_unlock(dev);
if (rc)
......
......@@ -184,6 +184,13 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
return rc;
}
void nvdimm_set_aliasing(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
nvdimm->flags |= NDD_ALIASING;
}
static void nvdimm_release(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
......
......@@ -84,18 +84,8 @@ static struct platform_driver e820_pmem_driver = {
},
};
static __init int e820_pmem_init(void)
{
return platform_driver_register(&e820_pmem_driver);
}
static __exit void e820_pmem_exit(void)
{
platform_driver_unregister(&e820_pmem_driver);
}
module_platform_driver(e820_pmem_driver);
MODULE_ALIAS("platform:e820_pmem*");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
module_init(e820_pmem_init);
module_exit(e820_pmem_exit);
......@@ -938,7 +938,7 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
}
for_each_dpa_resource(ndd, res)
if (strncmp(res->name, "pmem", 3) == 0)
if (strncmp(res->name, "pmem", 4) == 0)
count++;
WARN_ON_ONCE(!count);
......
......@@ -1132,7 +1132,7 @@ static ssize_t size_show(struct device *dev,
return sprintf(buf, "%llu\n", (unsigned long long)
nvdimm_namespace_capacity(to_ndns(dev)));
}
static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
static DEVICE_ATTR(size, 0444, size_show, size_store);
static u8 *namespace_to_uuid(struct device *dev)
{
......@@ -1456,7 +1456,7 @@ static umode_t namespace_visible(struct kobject *kobj,
if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
if (a == &dev_attr_size.attr)
return S_IWUSR | S_IRUGO;
return 0644;
if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
return 0;
......@@ -1653,7 +1653,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
u64 hw_start, hw_end, pmem_start, pmem_end;
struct nd_label_ent *label_ent;
WARN_ON(!mutex_is_locked(&nd_mapping->lock));
lockdep_assert_held(&nd_mapping->lock);
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
nd_label = label_ent->label;
if (!nd_label)
......@@ -1997,7 +1997,7 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_blk *nsblk;
char *name[NSLABEL_NAME_LEN];
char name[NSLABEL_NAME_LEN];
struct device *dev = NULL;
struct resource *res;
......
......@@ -238,6 +238,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len);
void nvdimm_set_aliasing(struct device *dev);
struct nd_btt *to_nd_btt(struct device *dev);
struct nd_gen_sb {
......@@ -377,10 +378,17 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
if (!blk_queue_io_stat(disk->queue))
return false;
__nd_iostat_start(bio, start);
*start = jiffies;
generic_start_io_acct(bio_data_dir(bio),
bio_sectors(bio), &disk->part0);
return true;
}
void nd_iostat_end(struct bio *bio, unsigned long start);
static inline void nd_iostat_end(struct bio *bio, unsigned long start)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
generic_end_io_acct(bio_data_dir(bio), &disk->part0, start);
}
static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
unsigned int len)
{
......
......@@ -108,7 +108,7 @@ static ssize_t align_show(struct device *dev,
{
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
return sprintf(buf, "%lx\n", nd_pfn->align);
return sprintf(buf, "%ld\n", nd_pfn->align);
}
static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf)
......
......@@ -53,21 +53,24 @@ static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
int rc = 0;
sector = (offset - pmem->data_offset) / 512;
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared < len)
rc = -EIO;
if (cleared > 0 && cleared / 512) {
dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
__func__, (unsigned long long) sector,
cleared / 512, cleared / 512 > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared / 512);
} else {
return -EIO;
cleared /= 512;
dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
(unsigned long long) sector, cleared,
cleared > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared);
}
invalidate_pmem(pmem->virt_addr + offset, len);
return 0;
return rc;
}
static void write_pmem(void *pmem_addr, struct page *page,
......@@ -270,7 +273,7 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "unable to guarantee persistence of writes\n");
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(dev))) {
dev_name(&ndns->dev))) {
dev_warn(dev, "could not reserve region %pR\n", res);
return -EBUSY;
}
......
......@@ -509,7 +509,7 @@ void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
{
struct nd_label_ent *label_ent, *e;
WARN_ON(!mutex_is_locked(&nd_mapping->lock));
lockdep_assert_held(&nd_mapping->lock);
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
list_del(&label_ent->list);
kfree(label_ent);
......
......@@ -125,12 +125,13 @@ struct nfit_test_dcr {
(((node & 0xfff) << 16) | ((socket & 0xf) << 12) \
| ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf))
static u32 handle[NUM_DCR] = {
static u32 handle[] = {
[0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0),
[1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
[2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
[3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
[5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0),
};
static unsigned long dimm_fail_cmd_flags[NUM_DCR];
......@@ -142,6 +143,7 @@ struct nfit_test {
void *nfit_buf;
dma_addr_t nfit_dma;
size_t nfit_size;
int dcr_idx;
int num_dcr;
int num_pm;
void **dimm;
......@@ -426,11 +428,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case ND_CMD_GET_CONFIG_DATA:
rc = nfit_test_cmd_get_config_data(buf, buf_len,
t->label[i]);
t->label[i - t->dcr_idx]);
break;
case ND_CMD_SET_CONFIG_DATA:
rc = nfit_test_cmd_set_config_data(buf, buf_len,
t->label[i]);
t->label[i - t->dcr_idx]);
break;
case ND_CMD_SMART:
rc = nfit_test_cmd_smart(buf, buf_len);
......@@ -682,7 +684,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
if (!t->spa_set[2])
return -ENOMEM;
for (i = 0; i < NUM_DCR; i++) {
for (i = 0; i < t->num_dcr; i++) {
t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
if (!t->dimm[i])
return -ENOMEM;
......@@ -699,7 +701,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
}
for (i = 0; i < NUM_DCR; i++) {
for (i = 0; i < t->num_dcr; i++) {
t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]);
if (!t->dcr[i])
return -ENOMEM;
......@@ -728,6 +730,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ sizeof(struct acpi_nfit_memory_map)
+ offsetof(struct acpi_nfit_control_region, window_size);
int i;
t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
if (!t->nfit_buf)
......@@ -738,6 +741,13 @@ static int nfit_test1_alloc(struct nfit_test *t)
if (!t->spa_set[0])
return -ENOMEM;
for (i = 0; i < t->num_dcr; i++) {
t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
if (!t->label[i])
return -ENOMEM;
sprintf(t->label[i], "label%d", i);
}
t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
if (!t->spa_set[1])
return -ENOMEM;
......@@ -1450,7 +1460,7 @@ static void nfit_test1_setup(struct nfit_test *t)
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = 0;
memdev->device_handle = handle[5];
memdev->physical_id = 0;
memdev->region_id = 0;
memdev->range_index = 0+1;
......@@ -1472,7 +1482,7 @@ static void nfit_test1_setup(struct nfit_test *t)
window_size);
dcr->region_index = 0+1;
dcr_common_init(dcr);
dcr->serial_number = ~0;
dcr->serial_number = ~handle[5];
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
......@@ -1483,6 +1493,9 @@ static void nfit_test1_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
}
static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
......@@ -1886,12 +1899,15 @@ static __init int nfit_test_init(void)
switch (i) {
case 0:
nfit_test->num_pm = NUM_PM;
nfit_test->dcr_idx = 0;
nfit_test->num_dcr = NUM_DCR;
nfit_test->alloc = nfit_test0_alloc;
nfit_test->setup = nfit_test0_setup;
break;
case 1:
nfit_test->num_pm = 1;
nfit_test->dcr_idx = NUM_DCR;
nfit_test->num_dcr = 1;
nfit_test->alloc = nfit_test1_alloc;
nfit_test->setup = nfit_test1_setup;
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment