Commit f6d2b802 authored by Dan Williams's avatar Dan Williams

Merge branch 'for-5.7/libnvdimm' into libnvdimm-for-next

- Introduce 'zero_page_range' as a dax operation. This facilitates
  filesystem-dax operation without a block-device.

- Advertise a persistence-domain for of_pmem and papr_scm. The
  persistence domain indicates where cpu-store cycles need to reach in
  the platform-memory subsystem before the platform will consider them
  power-fail protected.

- Fixup some flexible-array declarations.
parents d3b88655 4e4ced93
...@@ -342,8 +342,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ...@@ -342,8 +342,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
if (p->is_volatile) if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
else else {
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
}
if (!p->region) { if (!p->region) {
dev_err(dev, "Error registering region %pR from %pOF\n", dev_err(dev, "Error registering region %pR from %pOF\n",
ndr_desc.res, p->dn); ndr_desc.res, p->dn);
......
...@@ -145,32 +145,32 @@ struct nfit_spa { ...@@ -145,32 +145,32 @@ struct nfit_spa {
unsigned long ars_state; unsigned long ars_state;
u32 clear_err_unit; u32 clear_err_unit;
u32 max_ars; u32 max_ars;
struct acpi_nfit_system_address spa[0]; struct acpi_nfit_system_address spa[];
}; };
struct nfit_dcr { struct nfit_dcr {
struct list_head list; struct list_head list;
struct acpi_nfit_control_region dcr[0]; struct acpi_nfit_control_region dcr[];
}; };
struct nfit_bdw { struct nfit_bdw {
struct list_head list; struct list_head list;
struct acpi_nfit_data_region bdw[0]; struct acpi_nfit_data_region bdw[];
}; };
struct nfit_idt { struct nfit_idt {
struct list_head list; struct list_head list;
struct acpi_nfit_interleave idt[0]; struct acpi_nfit_interleave idt[];
}; };
struct nfit_flush { struct nfit_flush {
struct list_head list; struct list_head list;
struct acpi_nfit_flush_address flush[0]; struct acpi_nfit_flush_address flush[];
}; };
struct nfit_memdev { struct nfit_memdev {
struct list_head list; struct list_head list;
struct acpi_nfit_memory_map memdev[0]; struct acpi_nfit_memory_map memdev[];
}; };
enum nfit_mem_flags { enum nfit_mem_flags {
......
...@@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, ...@@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
* device outside of mmap of the resulting character device. * device outside of mmap of the resulting character device.
*/ */
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
if (!dax_dev) if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev);
goto err; goto err;
}
/* a device_dax instance is dead while the driver is not attached */ /* a device_dax instance is dead while the driver is not attached */
kill_dax(dax_dev); kill_dax(dax_dev);
......
...@@ -344,6 +344,23 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, ...@@ -344,6 +344,23 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
} }
EXPORT_SYMBOL_GPL(dax_copy_to_iter); EXPORT_SYMBOL_GPL(dax_copy_to_iter);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
{
if (!dax_alive(dax_dev))
return -ENXIO;
/*
* There are no callers that want to zero more than one page as of now.
* Once users are there, this check can be removed after the
* device mapper code has been updated to split ranges across targets.
*/
if (nr_pages != 1)
return -EIO;
return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
}
EXPORT_SYMBOL_GPL(dax_zero_page_range);
#ifdef CONFIG_ARCH_HAS_PMEM_API #ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size); void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
...@@ -551,9 +568,16 @@ struct dax_device *alloc_dax(void *private, const char *__host, ...@@ -551,9 +568,16 @@ struct dax_device *alloc_dax(void *private, const char *__host,
dev_t devt; dev_t devt;
int minor; int minor;
if (ops && !ops->zero_page_range) {
pr_debug("%s: error: device does not provide dax"
" operation zero_page_range()\n",
__host ? __host : "Unknown");
return ERR_PTR(-EINVAL);
}
host = kstrdup(__host, GFP_KERNEL); host = kstrdup(__host, GFP_KERNEL);
if (__host && !host) if (__host && !host)
return NULL; return ERR_PTR(-ENOMEM);
minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
if (minor < 0) if (minor < 0)
...@@ -576,7 +600,7 @@ struct dax_device *alloc_dax(void *private, const char *__host, ...@@ -576,7 +600,7 @@ struct dax_device *alloc_dax(void *private, const char *__host,
ida_simple_remove(&dax_minor_ida, minor); ida_simple_remove(&dax_minor_ida, minor);
err_minor: err_minor:
kfree(host); kfree(host);
return NULL; return ERR_PTR(-ENOMEM);
} }
EXPORT_SYMBOL_GPL(alloc_dax); EXPORT_SYMBOL_GPL(alloc_dax);
......
...@@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, ...@@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
} }
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
}
#else #else
#define linear_dax_direct_access NULL #define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL #define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL #define linear_dax_copy_to_iter NULL
#define linear_dax_zero_page_range NULL
#endif #endif
static struct target_type linear_target = { static struct target_type linear_target = {
...@@ -226,6 +243,7 @@ static struct target_type linear_target = { ...@@ -226,6 +243,7 @@ static struct target_type linear_target = {
.direct_access = linear_dax_direct_access, .direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter, .dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter, .dax_copy_to_iter = linear_dax_copy_to_iter,
.dax_zero_page_range = linear_dax_zero_page_range,
}; };
int __init dm_linear_init(void) int __init dm_linear_init(void)
......
...@@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, ...@@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
} }
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
&pgoff);
if (ret)
return ret;
return dax_zero_page_range(lc->dev->dax_dev, pgoff,
nr_pages << PAGE_SHIFT);
}
#else #else
#define log_writes_dax_direct_access NULL #define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL #define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL #define log_writes_dax_copy_to_iter NULL
#define log_writes_dax_zero_page_range NULL
#endif #endif
static struct target_type log_writes_target = { static struct target_type log_writes_target = {
...@@ -1016,6 +1032,7 @@ static struct target_type log_writes_target = { ...@@ -1016,6 +1032,7 @@ static struct target_type log_writes_target = {
.direct_access = log_writes_dax_direct_access, .direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter, .dax_copy_from_iter = log_writes_dax_copy_from_iter,
.dax_copy_to_iter = log_writes_dax_copy_to_iter, .dax_copy_to_iter = log_writes_dax_copy_to_iter,
.dax_zero_page_range = log_writes_dax_zero_page_range,
}; };
static int __init dm_log_writes_init(void) static int __init dm_log_writes_init(void)
......
...@@ -360,10 +360,32 @@ static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, ...@@ -360,10 +360,32 @@ static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
} }
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
}
#else #else
#define stripe_dax_direct_access NULL #define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL #define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL #define stripe_dax_copy_to_iter NULL
#define stripe_dax_zero_page_range NULL
#endif #endif
/* /*
...@@ -486,6 +508,7 @@ static struct target_type stripe_target = { ...@@ -486,6 +508,7 @@ static struct target_type stripe_target = {
.direct_access = stripe_dax_direct_access, .direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter, .dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_copy_to_iter = stripe_dax_copy_to_iter, .dax_copy_to_iter = stripe_dax_copy_to_iter,
.dax_zero_page_range = stripe_dax_zero_page_range,
}; };
int __init dm_stripe_init(void) int __init dm_stripe_init(void)
......
...@@ -1198,6 +1198,35 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, ...@@ -1198,6 +1198,35 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
return ret; return ret;
} }
static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
int ret = -EIO;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (WARN_ON(!ti->type->dax_zero_page_range)) {
/*
* ->zero_page_range() is mandatory dax operation. If we are
* here, something is wrong.
*/
dm_put_live_table(md, srcu_idx);
goto out;
}
ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
/* /*
* A target may call dm_accept_partial_bio only from the map routine. It is * A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET, * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
...@@ -1976,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1976,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor)
if (IS_ENABLED(CONFIG_DAX_DRIVER)) { if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
md->dax_dev = alloc_dax(md, md->disk->disk_name, md->dax_dev = alloc_dax(md, md->disk->disk_name,
&dm_dax_ops, 0); &dm_dax_ops, 0);
if (!md->dax_dev) if (IS_ERR(md->dax_dev))
goto bad; goto bad;
} }
...@@ -3199,6 +3228,7 @@ static const struct dax_operations dm_dax_ops = { ...@@ -3199,6 +3228,7 @@ static const struct dax_operations dm_dax_ops = {
.dax_supported = dm_dax_supported, .dax_supported = dm_dax_supported,
.copy_from_iter = dm_dax_copy_from_iter, .copy_from_iter = dm_dax_copy_from_iter,
.copy_to_iter = dm_dax_copy_to_iter, .copy_to_iter = dm_dax_copy_to_iter,
.zero_page_range = dm_dax_zero_page_range,
}; };
/* /*
......
...@@ -62,7 +62,7 @@ struct nd_namespace_index { ...@@ -62,7 +62,7 @@ struct nd_namespace_index {
__le16 major; __le16 major;
__le16 minor; __le16 minor;
__le64 checksum; __le64 checksum;
u8 free[0]; u8 free[];
}; };
/** /**
......
...@@ -39,7 +39,7 @@ struct nd_region_data { ...@@ -39,7 +39,7 @@ struct nd_region_data {
int ns_count; int ns_count;
int ns_active; int ns_active;
unsigned int hints_shift; unsigned int hints_shift;
void __iomem *flush_wpq[0]; void __iomem *flush_wpq[];
}; };
static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd,
...@@ -157,7 +157,7 @@ struct nd_region { ...@@ -157,7 +157,7 @@ struct nd_region {
struct nd_interleave_set *nd_set; struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane; struct nd_percpu_lane __percpu *lane;
int (*flush)(struct nd_region *nd_region, struct bio *bio); int (*flush)(struct nd_region *nd_region, struct bio *bio);
struct nd_mapping mapping[0]; struct nd_mapping mapping[];
}; };
struct nd_blk_region { struct nd_blk_region {
......
...@@ -62,8 +62,10 @@ static int of_pmem_region_probe(struct platform_device *pdev) ...@@ -62,8 +62,10 @@ static int of_pmem_region_probe(struct platform_device *pdev)
if (is_volatile) if (is_volatile)
region = nvdimm_volatile_region_create(bus, &ndr_desc); region = nvdimm_volatile_region_create(bus, &ndr_desc);
else else {
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
region = nvdimm_pmem_region_create(bus, &ndr_desc); region = nvdimm_pmem_region_create(bus, &ndr_desc);
}
if (!region) if (!region)
dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
......
...@@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, ...@@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
return BLK_STS_OK; return BLK_STS_OK;
} }
static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, static blk_status_t pmem_do_read(struct pmem_device *pmem,
unsigned int len, unsigned int off, unsigned int op, struct page *page, unsigned int page_off,
sector_t sector) sector_t sector, unsigned int len)
{
blk_status_t rc;
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
return BLK_STS_IOERR;
rc = read_pmem(page, page_off, pmem_addr, len);
flush_dcache_page(page);
return rc;
}
static blk_status_t pmem_do_write(struct pmem_device *pmem,
struct page *page, unsigned int page_off,
sector_t sector, unsigned int len)
{ {
blk_status_t rc = BLK_STS_OK; blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false; bool bad_pmem = false;
...@@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, ...@@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true; bad_pmem = true;
if (!op_is_write(op)) { /*
if (unlikely(bad_pmem)) * Note that we write the data both before and after
rc = BLK_STS_IOERR; * clearing poison. The write before clear poison
else { * handles situations where the latest written data is
rc = read_pmem(page, off, pmem_addr, len); * preserved and the clear poison operation simply marks
flush_dcache_page(page); * the address range as valid without changing the data.
} * In this case application software can assume that an
} else { * interrupted write will either return the new good
/* * data or an error.
* Note that we write the data both before and after *
* clearing poison. The write before clear poison * However, if pmem_clear_poison() leaves the data in an
* handles situations where the latest written data is * indeterminate state we need to perform the write
* preserved and the clear poison operation simply marks * after clear poison.
* the address range as valid without changing the data. */
* In this case application software can assume that an flush_dcache_page(page);
* interrupted write will either return the new good write_pmem(pmem_addr, page, page_off, len);
* data or an error. if (unlikely(bad_pmem)) {
* rc = pmem_clear_poison(pmem, pmem_off, len);
* However, if pmem_clear_poison() leaves the data in an write_pmem(pmem_addr, page, page_off, len);
* indeterminate state we need to perform the write
* after clear poison.
*/
flush_dcache_page(page);
write_pmem(pmem_addr, page, off, len);
if (unlikely(bad_pmem)) {
rc = pmem_clear_poison(pmem, pmem_off, len);
write_pmem(pmem_addr, page, off, len);
}
} }
return rc; return rc;
...@@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) ...@@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
do_acct = nd_iostat_start(bio, &start); do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) { bio_for_each_segment(bvec, bio, iter) {
rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, if (op_is_write(bio_op(bio)))
bvec.bv_offset, bio_op(bio), iter.bi_sector); rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
iter.bi_sector, bvec.bv_len);
else
rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
iter.bi_sector, bvec.bv_len);
if (rc) { if (rc) {
bio->bi_status = rc; bio->bi_status = rc;
break; break;
...@@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, ...@@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct pmem_device *pmem = bdev->bd_queue->queuedata; struct pmem_device *pmem = bdev->bd_queue->queuedata;
blk_status_t rc; blk_status_t rc;
rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, if (op_is_write(op))
0, op, sector); rc = pmem_do_write(pmem, page, 0, sector,
hpage_nr_pages(page) * PAGE_SIZE);
else
rc = pmem_do_read(pmem, page, 0, sector,
hpage_nr_pages(page) * PAGE_SIZE);
/* /*
* The ->rw_page interface is subtle and tricky. The core * The ->rw_page interface is subtle and tricky. The core
* retries on any error, so we can only invoke page_endio() in * retries on any error, so we can only invoke page_endio() in
...@@ -268,6 +282,16 @@ static const struct block_device_operations pmem_fops = { ...@@ -268,6 +282,16 @@ static const struct block_device_operations pmem_fops = {
.revalidate_disk = nvdimm_revalidate_disk, .revalidate_disk = nvdimm_revalidate_disk,
}; };
static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
{
struct pmem_device *pmem = dax_get_private(dax_dev);
return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0,
PFN_PHYS(pgoff) >> SECTOR_SHIFT,
PAGE_SIZE));
}
static long pmem_dax_direct_access(struct dax_device *dax_dev, static long pmem_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{ {
...@@ -299,6 +323,7 @@ static const struct dax_operations pmem_dax_ops = { ...@@ -299,6 +323,7 @@ static const struct dax_operations pmem_dax_ops = {
.dax_supported = generic_fsdax_supported, .dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter, .copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter, .copy_to_iter = pmem_copy_to_iter,
.zero_page_range = pmem_dax_zero_page_range,
}; };
static const struct attribute_group *pmem_attribute_groups[] = { static const struct attribute_group *pmem_attribute_groups[] = {
...@@ -462,9 +487,9 @@ static int pmem_attach_disk(struct device *dev, ...@@ -462,9 +487,9 @@ static int pmem_attach_disk(struct device *dev,
if (is_nvdimm_sync(nd_region)) if (is_nvdimm_sync(nd_region))
flags = DAXDEV_F_SYNC; flags = DAXDEV_F_SYNC;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
if (!dax_dev) { if (IS_ERR(dax_dev)) {
put_disk(disk); put_disk(disk);
return -ENOMEM; return PTR_ERR(dax_dev);
} }
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev; pmem->dax_dev = dax_dev;
......
...@@ -57,11 +57,26 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, ...@@ -57,11 +57,26 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
return copy_to_iter(addr, bytes, i); return copy_to_iter(addr, bytes, i);
} }
static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages)
{
long rc;
void *kaddr;
rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
if (rc < 0)
return rc;
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0;
}
static const struct dax_operations dcssblk_dax_ops = { static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access, .direct_access = dcssblk_dax_direct_access,
.dax_supported = generic_fsdax_supported, .dax_supported = generic_fsdax_supported,
.copy_from_iter = dcssblk_dax_copy_from_iter, .copy_from_iter = dcssblk_dax_copy_from_iter,
.copy_to_iter = dcssblk_dax_copy_to_iter, .copy_to_iter = dcssblk_dax_copy_to_iter,
.zero_page_range = dcssblk_dax_zero_page_range,
}; };
struct dcssblk_dev_info { struct dcssblk_dev_info {
...@@ -680,8 +695,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char ...@@ -680,8 +695,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name, dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
&dcssblk_dax_ops, DAXDEV_F_SYNC); &dcssblk_dax_ops, DAXDEV_F_SYNC);
if (!dev_info->dax_dev) { if (IS_ERR(dev_info->dax_dev)) {
rc = -ENOMEM; rc = PTR_ERR(dev_info->dax_dev);
dev_info->dax_dev = NULL;
goto put_dev; goto put_dev;
} }
......
...@@ -1038,50 +1038,43 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, ...@@ -1038,50 +1038,43 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
return ret; return ret;
} }
static bool dax_range_is_aligned(struct block_device *bdev, int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
unsigned int offset, unsigned int length) struct iomap *iomap)
{ {
unsigned short sector_size = bdev_logical_block_size(bdev); sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
pgoff_t pgoff;
long rc, id;
void *kaddr;
bool page_aligned = false;
if (!IS_ALIGNED(offset, sector_size))
return false;
if (!IS_ALIGNED(length, sector_size))
return false;
return true; if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
} IS_ALIGNED(size, PAGE_SIZE))
page_aligned = true;
int __dax_zero_page_range(struct block_device *bdev, rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
struct dax_device *dax_dev, sector_t sector, if (rc)
unsigned int offset, unsigned int size) return rc;
{
if (dax_range_is_aligned(bdev, offset, size)) {
sector_t start_sector = sector + (offset >> 9);
return blkdev_issue_zeroout(bdev, start_sector, id = dax_read_lock();
size >> 9, GFP_NOFS, 0);
} else {
pgoff_t pgoff;
long rc, id;
void *kaddr;
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); if (page_aligned)
if (rc) rc = dax_zero_page_range(iomap->dax_dev, pgoff,
return rc; size >> PAGE_SHIFT);
else
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
}
id = dax_read_lock(); if (!page_aligned) {
rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
}
memset(kaddr + offset, 0, size); memset(kaddr + offset, 0, size);
dax_flush(dax_dev, kaddr + offset, size); dax_flush(iomap->dax_dev, kaddr + offset, size);
dax_read_unlock(id);
} }
dax_read_unlock(id);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
static loff_t static loff_t
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
......
...@@ -974,13 +974,6 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, ...@@ -974,13 +974,6 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap); return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
} }
static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
struct iomap *iomap)
{
return __dax_zero_page_range(iomap->bdev, iomap->dax_dev,
iomap_sector(iomap, pos & PAGE_MASK), offset, bytes);
}
static loff_t static loff_t
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
void *data, struct iomap *iomap, struct iomap *srcmap) void *data, struct iomap *iomap, struct iomap *srcmap)
...@@ -1000,7 +993,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, ...@@ -1000,7 +993,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
bytes = min_t(loff_t, PAGE_SIZE - offset, count); bytes = min_t(loff_t, PAGE_SIZE - offset, count);
if (IS_DAX(inode)) if (IS_DAX(inode))
status = iomap_dax_zero(pos, offset, bytes, iomap); status = dax_iomap_zero(pos, offset, bytes, iomap);
else else
status = iomap_zero(inode, pos, offset, bytes, iomap, status = iomap_zero(inode, pos, offset, bytes, iomap,
srcmap); srcmap);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
typedef unsigned long dax_entry_t; typedef unsigned long dax_entry_t;
struct iomap_ops; struct iomap_ops;
struct iomap;
struct dax_device; struct dax_device;
struct dax_operations { struct dax_operations {
/* /*
...@@ -34,6 +35,8 @@ struct dax_operations { ...@@ -34,6 +35,8 @@ struct dax_operations {
/* copy_to_iter: required operation for fs-dax direct-i/o */ /* copy_to_iter: required operation for fs-dax direct-i/o */
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *); struct iov_iter *);
/* zero_page_range: required operation. Zero page range */
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
}; };
extern struct attribute_group dax_attribute_group; extern struct attribute_group dax_attribute_group;
...@@ -199,6 +202,8 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, ...@@ -199,6 +202,8 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i); size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i); size_t bytes, struct iov_iter *i);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
...@@ -210,20 +215,8 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, ...@@ -210,20 +215,8 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping, int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index); pgoff_t index);
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
#ifdef CONFIG_FS_DAX struct iomap *iomap);
int __dax_zero_page_range(struct block_device *bdev,
struct dax_device *dax_dev, sector_t sector,
unsigned int offset, unsigned int length);
#else
static inline int __dax_zero_page_range(struct block_device *bdev,
struct dax_device *dax_dev, sector_t sector,
unsigned int offset, unsigned int length)
{
return -ENXIO;
}
#endif
static inline bool dax_mapping(struct address_space *mapping) static inline bool dax_mapping(struct address_space *mapping)
{ {
return mapping->host && IS_DAX(mapping->host); return mapping->host && IS_DAX(mapping->host);
......
...@@ -141,6 +141,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, ...@@ -141,6 +141,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn); long nr_pages, void **kaddr, pfn_t *pfn);
typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i); void *addr, size_t bytes, struct iov_iter *i);
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages);
#define PAGE_SECTORS (PAGE_SIZE / 512) #define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message); void dm_error(const char *message);
...@@ -195,6 +197,7 @@ struct target_type { ...@@ -195,6 +197,7 @@ struct target_type {
dm_dax_direct_access_fn direct_access; dm_dax_direct_access_fn direct_access;
dm_dax_copy_iter_fn dax_copy_from_iter; dm_dax_copy_iter_fn dax_copy_from_iter;
dm_dax_copy_iter_fn dax_copy_to_iter; dm_dax_copy_iter_fn dax_copy_to_iter;
dm_dax_zero_page_range_fn dax_zero_page_range;
/* For internal device-mapper use. */ /* For internal device-mapper use. */
struct list_head list; struct list_head list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment