Commit 89fd915c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm from Dan Williams:
 "A rework of media error handling in the BTT driver and other updates.
  It has appeared in a few -next releases and collected some late-
  breaking build-error and warning fixups as a result.

  Summary:

   - Media error handling support in the Block Translation Table (BTT)
     driver is reworked to address sleeping-while-atomic locking and
     memory-allocation-context conflicts.

   - The dax_device lookup overhead for xfs and ext4 is moved out of the
     iomap hot-path to a mount-time lookup.

   - A new 'ecc_unit_size' sysfs attribute is added to advertise the
     read-modify-write boundary property of a persistent memory range.

   - Preparatory fix-ups for arm and powerpc pmem support are included
     along with other miscellaneous fixes"

* tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (26 commits)
  libnvdimm, btt: fix format string warnings
  libnvdimm, btt: clean up warning and error messages
  ext4: fix null pointer dereference on sbi
  libnvdimm, nfit: move the check on nd_reserved2 to the endpoint
  dax: fix FS_DAX=n BLOCK=y compilation
  libnvdimm: fix integer overflow static analysis warning
  libnvdimm, nd_blk: remove mmio_flush_range()
  libnvdimm, btt: rework error clearing
  libnvdimm: fix potential deadlock while clearing errors
  libnvdimm, btt: cache sector_size in arena_info
  libnvdimm, btt: ensure that flags were also unchanged during a map_read
  libnvdimm, btt: refactor map entry operations with macros
  libnvdimm, btt: fix a missed NVDIMM_IO_ATOMIC case in the write path
  libnvdimm, nfit: export an 'ecc_unit_size' sysfs attribute
  ext4: perform dax_device lookup at mount
  ext2: perform dax_device lookup at mount
  xfs: perform dax_device lookup at mount
  dax: introduce a fs_dax_get_by_bdev() helper
  libnvdimm, btt: check memory allocation failure
  libnvdimm, label: fix index block size calculation
  ...
parents 66c9457d 04c3c982
...@@ -53,7 +53,6 @@ config X86 ...@@ -53,7 +53,6 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PMEM_API if X86_64
# Causing hangs/crashes, see the commit that added this change for details. # Causing hangs/crashes, see the commit that added this change for details.
select ARCH_HAS_REFCOUNT if BROKEN select ARCH_HAS_REFCOUNT if BROKEN
......
...@@ -7,6 +7,4 @@ ...@@ -7,6 +7,4 @@
void clflush_cache_range(void *addr, unsigned int size); void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
#endif /* _ASM_X86_CACHEFLUSH_H */ #endif /* _ASM_X86_CACHEFLUSH_H */
...@@ -2,7 +2,7 @@ config ACPI_NFIT ...@@ -2,7 +2,7 @@ config ACPI_NFIT
tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
depends on PHYS_ADDR_T_64BIT depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV depends on BLK_DEV
depends on ARCH_HAS_MMIO_FLUSH depends on ARCH_HAS_PMEM_API
select LIBNVDIMM select LIBNVDIMM
help help
Infrastructure to probe ACPI 6 compliant platforms for Infrastructure to probe ACPI 6 compliant platforms for
......
...@@ -228,6 +228,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, ...@@ -228,6 +228,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
if (cmd == ND_CMD_CALL) { if (cmd == ND_CMD_CALL) {
call_pkg = buf; call_pkg = buf;
func = call_pkg->nd_command; func = call_pkg->nd_command;
for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
if (call_pkg->nd_reserved2[i])
return -EINVAL;
} }
if (nvdimm) { if (nvdimm) {
...@@ -1674,8 +1678,19 @@ static ssize_t range_index_show(struct device *dev, ...@@ -1674,8 +1678,19 @@ static ssize_t range_index_show(struct device *dev,
} }
static DEVICE_ATTR_RO(range_index); static DEVICE_ATTR_RO(range_index);
static ssize_t ecc_unit_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
return sprintf(buf, "%d\n", nfit_spa->clear_err_unit);
}
static DEVICE_ATTR_RO(ecc_unit_size);
static struct attribute *acpi_nfit_region_attributes[] = { static struct attribute *acpi_nfit_region_attributes[] = {
&dev_attr_range_index.attr, &dev_attr_range_index.attr,
&dev_attr_ecc_unit_size.attr,
NULL, NULL,
}; };
...@@ -1804,6 +1819,7 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, ...@@ -1804,6 +1819,7 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
spa->range_index, i); spa->range_index, i);
struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
if (!memdev || !nfit_mem->dcr) { if (!memdev || !nfit_mem->dcr) {
dev_err(dev, "%s: failed to find DCR\n", __func__); dev_err(dev, "%s: failed to find DCR\n", __func__);
...@@ -1811,13 +1827,13 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, ...@@ -1811,13 +1827,13 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
} }
map->region_offset = memdev->region_offset; map->region_offset = memdev->region_offset;
map->serial_number = nfit_mem->dcr->serial_number; map->serial_number = dcr->serial_number;
map2->region_offset = memdev->region_offset; map2->region_offset = memdev->region_offset;
map2->serial_number = nfit_mem->dcr->serial_number; map2->serial_number = dcr->serial_number;
map2->vendor_id = nfit_mem->dcr->vendor_id; map2->vendor_id = dcr->vendor_id;
map2->manufacturing_date = nfit_mem->dcr->manufacturing_date; map2->manufacturing_date = dcr->manufacturing_date;
map2->manufacturing_location = nfit_mem->dcr->manufacturing_location; map2->manufacturing_location = dcr->manufacturing_location;
} }
/* v1.1 namespaces */ /* v1.1 namespaces */
...@@ -1835,6 +1851,28 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, ...@@ -1835,6 +1851,28 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
cmp_map_compat, NULL); cmp_map_compat, NULL);
nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
/* record the result of the sort for the mapping position */
for (i = 0; i < nr; i++) {
struct nfit_set_info_map2 *map2 = &info2->mapping[i];
int j;
for (j = 0; j < nr; j++) {
struct nd_mapping_desc *mapping = &ndr_desc->mapping[j];
struct nvdimm *nvdimm = mapping->nvdimm;
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
if (map2->serial_number == dcr->serial_number &&
map2->vendor_id == dcr->vendor_id &&
map2->manufacturing_date == dcr->manufacturing_date &&
map2->manufacturing_location
== dcr->manufacturing_location) {
mapping->position = i;
break;
}
}
}
ndr_desc->nd_set = nd_set; ndr_desc->nd_set = nd_set;
devm_kfree(dev, info); devm_kfree(dev, info);
devm_kfree(dev, info2); devm_kfree(dev, info2);
...@@ -1930,7 +1968,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, ...@@ -1930,7 +1968,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c); memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
else { else {
if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
mmio_flush_range((void __force *) arch_invalidate_pmem((void __force *)
mmio->addr.aperture + offset, c); mmio->addr.aperture + offset, c);
memcpy(iobuf + copied, mmio->addr.aperture + offset, c); memcpy(iobuf + copied, mmio->addr.aperture + offset, c);
......
...@@ -46,6 +46,8 @@ void dax_read_unlock(int id) ...@@ -46,6 +46,8 @@ void dax_read_unlock(int id)
EXPORT_SYMBOL_GPL(dax_read_unlock); EXPORT_SYMBOL_GPL(dax_read_unlock);
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
#include <linux/blkdev.h>
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff) pgoff_t *pgoff)
{ {
...@@ -59,6 +61,16 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, ...@@ -59,6 +61,16 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
} }
EXPORT_SYMBOL(bdev_dax_pgoff); EXPORT_SYMBOL(bdev_dax_pgoff);
#if IS_ENABLED(CONFIG_FS_DAX)
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
if (!blk_queue_dax(bdev->bd_queue))
return NULL;
return fs_dax_get_by_host(bdev->bd_disk->disk_name);
}
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif
/** /**
* __bdev_dax_supported() - Check if the device supports dax for filesystem * __bdev_dax_supported() - Check if the device supports dax for filesystem
* @sb: The superblock of the device * @sb: The superblock of the device
......
...@@ -31,6 +31,16 @@ enum log_ent_request { ...@@ -31,6 +31,16 @@ enum log_ent_request {
LOG_OLD_ENT LOG_OLD_ENT
}; };
static struct device *to_dev(struct arena_info *arena)
{
return &arena->nd_btt->dev;
}
static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset)
{
return offset + nd_btt->initial_offset;
}
static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
void *buf, size_t n, unsigned long flags) void *buf, size_t n, unsigned long flags)
{ {
...@@ -38,7 +48,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, ...@@ -38,7 +48,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns; struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */ /* arena offsets may be shifted from the base of the device */
offset += arena->nd_btt->initial_offset; offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_read_bytes(ndns, offset, buf, n, flags); return nvdimm_read_bytes(ndns, offset, buf, n, flags);
} }
...@@ -49,7 +59,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, ...@@ -49,7 +59,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns; struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */ /* arena offsets may be shifted from the base of the device */
offset += arena->nd_btt->initial_offset; offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_write_bytes(ndns, offset, buf, n, flags); return nvdimm_write_bytes(ndns, offset, buf, n, flags);
} }
...@@ -62,8 +72,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super) ...@@ -62,8 +72,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
* We rely on that to make sure rw_bytes does error clearing * We rely on that to make sure rw_bytes does error clearing
* correctly, so make sure that is the case. * correctly, so make sure that is the case.
*/ */
WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512)); dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512),
WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512)); "arena->infooff: %#llx is unaligned\n", arena->infooff);
dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
"arena->info2off: %#llx is unaligned\n", arena->info2off);
ret = arena_write_bytes(arena, arena->info2off, super, ret = arena_write_bytes(arena, arena->info2off, super,
sizeof(struct btt_sb), 0); sizeof(struct btt_sb), 0);
...@@ -76,7 +88,6 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super) ...@@ -76,7 +88,6 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
static int btt_info_read(struct arena_info *arena, struct btt_sb *super) static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
{ {
WARN_ON(!super);
return arena_read_bytes(arena, arena->infooff, super, return arena_read_bytes(arena, arena->infooff, super,
sizeof(struct btt_sb), 0); sizeof(struct btt_sb), 0);
} }
...@@ -92,7 +103,10 @@ static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping, ...@@ -92,7 +103,10 @@ static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
{ {
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
WARN_ON(lba >= arena->external_nlba); if (unlikely(lba >= arena->external_nlba))
dev_err_ratelimited(to_dev(arena),
"%s: lba %#x out of range (max: %#x)\n",
__func__, lba, arena->external_nlba);
return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags); return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
} }
...@@ -106,7 +120,7 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, ...@@ -106,7 +120,7 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* This 'mapping' is supposed to be just the LBA mapping, without * This 'mapping' is supposed to be just the LBA mapping, without
* any flags set, so strip the flag bits. * any flags set, so strip the flag bits.
*/ */
mapping &= MAP_LBA_MASK; mapping = ent_lba(mapping);
ze = (z_flag << 1) + e_flag; ze = (z_flag << 1) + e_flag;
switch (ze) { switch (ze) {
...@@ -131,7 +145,8 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, ...@@ -131,7 +145,8 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* construed as a valid 'normal' case, but we decide not to, * construed as a valid 'normal' case, but we decide not to,
* to avoid confusion * to avoid confusion
*/ */
WARN_ONCE(1, "Invalid use of Z and E flags\n"); dev_err_ratelimited(to_dev(arena),
"Invalid use of Z and E flags\n");
return -EIO; return -EIO;
} }
...@@ -147,7 +162,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, ...@@ -147,7 +162,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
u32 raw_mapping, postmap, ze, z_flag, e_flag; u32 raw_mapping, postmap, ze, z_flag, e_flag;
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
WARN_ON(lba >= arena->external_nlba); if (unlikely(lba >= arena->external_nlba))
dev_err_ratelimited(to_dev(arena),
"%s: lba %#x out of range (max: %#x)\n",
__func__, lba, arena->external_nlba);
ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags); ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
if (ret) if (ret)
...@@ -155,10 +173,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, ...@@ -155,10 +173,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
raw_mapping = le32_to_cpu(in); raw_mapping = le32_to_cpu(in);
z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT; z_flag = ent_z_flag(raw_mapping);
e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT; e_flag = ent_e_flag(raw_mapping);
ze = (z_flag << 1) + e_flag; ze = (z_flag << 1) + e_flag;
postmap = raw_mapping & MAP_LBA_MASK; postmap = ent_lba(raw_mapping);
/* Reuse the {z,e}_flag variables for *trim and *error */ /* Reuse the {z,e}_flag variables for *trim and *error */
z_flag = 0; z_flag = 0;
...@@ -195,7 +213,6 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, ...@@ -195,7 +213,6 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
static int btt_log_read_pair(struct arena_info *arena, u32 lane, static int btt_log_read_pair(struct arena_info *arena, u32 lane,
struct log_entry *ent) struct log_entry *ent)
{ {
WARN_ON(!ent);
return arena_read_bytes(arena, return arena_read_bytes(arena,
arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
2 * LOG_ENT_SIZE, 0); 2 * LOG_ENT_SIZE, 0);
...@@ -299,11 +316,6 @@ static int btt_log_get_old(struct log_entry *ent) ...@@ -299,11 +316,6 @@ static int btt_log_get_old(struct log_entry *ent)
return old; return old;
} }
static struct device *to_dev(struct arena_info *arena)
{
return &arena->nd_btt->dev;
}
/* /*
* This function copies the desired (old/new) log entry into ent if * This function copies the desired (old/new) log entry into ent if
* it is not NULL. It returns the sub-slot number (0 or 1) * it is not NULL. It returns the sub-slot number (0 or 1)
...@@ -381,7 +393,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, ...@@ -381,7 +393,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
if (++(arena->freelist[lane].seq) == 4) if (++(arena->freelist[lane].seq) == 4)
arena->freelist[lane].seq = 1; arena->freelist[lane].seq = 1;
arena->freelist[lane].block = le32_to_cpu(ent->old_map); if (ent_e_flag(ent->old_map))
arena->freelist[lane].has_err = 1;
arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
return ret; return ret;
} }
...@@ -407,12 +421,14 @@ static int btt_map_init(struct arena_info *arena) ...@@ -407,12 +421,14 @@ static int btt_map_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that * make sure rw_bytes does error clearing correctly, so make sure that
* is the case. * is the case.
*/ */
WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512)); dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512),
"arena->mapoff: %#llx is unaligned\n", arena->mapoff);
while (mapsize) { while (mapsize) {
size_t size = min(mapsize, chunk_size); size_t size = min(mapsize, chunk_size);
WARN_ON_ONCE(size < 512); dev_WARN_ONCE(to_dev(arena), size < 512,
"chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
size, 0); size, 0);
if (ret) if (ret)
...@@ -449,12 +465,14 @@ static int btt_log_init(struct arena_info *arena) ...@@ -449,12 +465,14 @@ static int btt_log_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that * make sure rw_bytes does error clearing correctly, so make sure that
* is the case. * is the case.
*/ */
WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512)); dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512),
"arena->logoff: %#llx is unaligned\n", arena->logoff);
while (logsize) { while (logsize) {
size_t size = min(logsize, chunk_size); size_t size = min(logsize, chunk_size);
WARN_ON_ONCE(size < 512); dev_WARN_ONCE(to_dev(arena), size < 512,
"chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf, ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
size, 0); size, 0);
if (ret) if (ret)
...@@ -480,6 +498,40 @@ static int btt_log_init(struct arena_info *arena) ...@@ -480,6 +498,40 @@ static int btt_log_init(struct arena_info *arena)
return ret; return ret;
} }
static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
{
return arena->dataoff + ((u64)lba * arena->internal_lbasize);
}
static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
{
int ret = 0;
if (arena->freelist[lane].has_err) {
void *zero_page = page_address(ZERO_PAGE(0));
u32 lba = arena->freelist[lane].block;
u64 nsoff = to_namespace_offset(arena, lba);
unsigned long len = arena->sector_size;
mutex_lock(&arena->err_lock);
while (len) {
unsigned long chunk = min(len, PAGE_SIZE);
ret = arena_write_bytes(arena, nsoff, zero_page,
chunk, 0);
if (ret)
break;
len -= chunk;
nsoff += chunk;
if (len == 0)
arena->freelist[lane].has_err = 0;
}
mutex_unlock(&arena->err_lock);
}
return ret;
}
static int btt_freelist_init(struct arena_info *arena) static int btt_freelist_init(struct arena_info *arena)
{ {
int old, new, ret; int old, new, ret;
...@@ -505,6 +557,17 @@ static int btt_freelist_init(struct arena_info *arena) ...@@ -505,6 +557,17 @@ static int btt_freelist_init(struct arena_info *arena)
arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
arena->freelist[i].block = le32_to_cpu(log_new.old_map); arena->freelist[i].block = le32_to_cpu(log_new.old_map);
/*
* FIXME: if error clearing fails during init, we want to make
* the BTT read-only
*/
if (ent_e_flag(log_new.old_map)) {
ret = arena_clear_freelist_error(arena, i);
if (ret)
dev_err_ratelimited(to_dev(arena),
"Unable to clear known errors\n");
}
/* This implies a newly created or untouched flog entry */ /* This implies a newly created or untouched flog entry */
if (log_new.old_map == log_new.new_map) if (log_new.old_map == log_new.new_map)
continue; continue;
...@@ -525,7 +588,6 @@ static int btt_freelist_init(struct arena_info *arena) ...@@ -525,7 +588,6 @@ static int btt_freelist_init(struct arena_info *arena)
if (ret) if (ret)
return ret; return ret;
} }
} }
return 0; return 0;
...@@ -566,6 +628,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, ...@@ -566,6 +628,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
if (!arena) if (!arena)
return NULL; return NULL;
arena->nd_btt = btt->nd_btt; arena->nd_btt = btt->nd_btt;
arena->sector_size = btt->sector_size;
if (!size) if (!size)
return arena; return arena;
...@@ -694,6 +757,7 @@ static int discover_arenas(struct btt *btt) ...@@ -694,6 +757,7 @@ static int discover_arenas(struct btt *btt)
arena->external_lba_start = cur_nlba; arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off); parse_arena_meta(arena, super, cur_off);
mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena); ret = btt_freelist_init(arena);
if (ret) if (ret)
goto out; goto out;
...@@ -904,11 +968,6 @@ static void unlock_map(struct arena_info *arena, u32 premap) ...@@ -904,11 +968,6 @@ static void unlock_map(struct arena_info *arena, u32 premap)
spin_unlock(&arena->map_locks[idx].lock); spin_unlock(&arena->map_locks[idx].lock);
} }
static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
{
return arena->dataoff + ((u64)lba * arena->internal_lbasize);
}
static int btt_data_read(struct arena_info *arena, struct page *page, static int btt_data_read(struct arena_info *arena, struct page *page,
unsigned int off, u32 lba, u32 len) unsigned int off, u32 lba, u32 len)
{ {
...@@ -1032,6 +1091,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1032,6 +1091,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/ */
while (1) { while (1) {
u32 new_map; u32 new_map;
int new_t, new_e;
if (t_flag) { if (t_flag) {
zero_fill_data(page, off, cur_len); zero_fill_data(page, off, cur_len);
...@@ -1050,20 +1110,29 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1050,20 +1110,29 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/ */
barrier(); barrier();
ret = btt_map_read(arena, premap, &new_map, &t_flag, ret = btt_map_read(arena, premap, &new_map, &new_t,
&e_flag, NVDIMM_IO_ATOMIC); &new_e, NVDIMM_IO_ATOMIC);
if (ret) if (ret)
goto out_rtt; goto out_rtt;
if (postmap == new_map) if ((postmap == new_map) && (t_flag == new_t) &&
(e_flag == new_e))
break; break;
postmap = new_map; postmap = new_map;
t_flag = new_t;
e_flag = new_e;
} }
ret = btt_data_read(arena, page, off, postmap, cur_len); ret = btt_data_read(arena, page, off, postmap, cur_len);
if (ret) if (ret) {
int rc;
/* Media error - set the e_flag */
rc = btt_map_write(arena, premap, postmap, 0, 1,
NVDIMM_IO_ATOMIC);
goto out_rtt; goto out_rtt;
}
if (bip) { if (bip) {
ret = btt_rw_integrity(btt, bip, arena, postmap, READ); ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
...@@ -1088,6 +1157,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1088,6 +1157,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
return ret; return ret;
} }
/*
* Normally, arena_{read,write}_bytes will take care of the initial offset
* adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem,
* we need the final, raw namespace offset here
*/
static bool btt_is_badblock(struct btt *btt, struct arena_info *arena,
u32 postmap)
{
u64 nsoff = adjust_initial_offset(arena->nd_btt,
to_namespace_offset(arena, postmap));
sector_t phys_sector = nsoff >> 9;
return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize);
}
static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
sector_t sector, struct page *page, unsigned int off, sector_t sector, struct page *page, unsigned int off,
unsigned int len) unsigned int len)
...@@ -1100,7 +1184,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1100,7 +1184,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
while (len) { while (len) {
u32 cur_len; u32 cur_len;
int e_flag;
retry:
lane = nd_region_acquire_lane(btt->nd_region); lane = nd_region_acquire_lane(btt->nd_region);
ret = lba_to_arena(btt, sector, &premap, &arena); ret = lba_to_arena(btt, sector, &premap, &arena);
...@@ -1113,6 +1199,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1113,6 +1199,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
goto out_lane; goto out_lane;
} }
if (btt_is_badblock(btt, arena, arena->freelist[lane].block))
arena->freelist[lane].has_err = 1;
if (mutex_is_locked(&arena->err_lock)
|| arena->freelist[lane].has_err) {
nd_region_release_lane(btt->nd_region, lane);
ret = arena_clear_freelist_error(arena, lane);
if (ret)
return ret;
/* OK to acquire a different lane/free block */
goto retry;
}
new_postmap = arena->freelist[lane].block; new_postmap = arena->freelist[lane].block;
/* Wait if the new block is being read from */ /* Wait if the new block is being read from */
...@@ -1138,7 +1239,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1138,7 +1239,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
} }
lock_map(arena, premap); lock_map(arena, premap);
ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL, ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag,
NVDIMM_IO_ATOMIC); NVDIMM_IO_ATOMIC);
if (ret) if (ret)
goto out_map; goto out_map;
...@@ -1146,6 +1247,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1146,6 +1247,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
ret = -EIO; ret = -EIO;
goto out_map; goto out_map;
} }
if (e_flag)
set_e_flag(old_postmap);
log.lba = cpu_to_le32(premap); log.lba = cpu_to_le32(premap);
log.old_map = cpu_to_le32(old_postmap); log.old_map = cpu_to_le32(old_postmap);
...@@ -1156,13 +1259,20 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, ...@@ -1156,13 +1259,20 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
if (ret) if (ret)
goto out_map; goto out_map;
ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0); ret = btt_map_write(arena, premap, new_postmap, 0, 0,
NVDIMM_IO_ATOMIC);
if (ret) if (ret)
goto out_map; goto out_map;
unlock_map(arena, premap); unlock_map(arena, premap);
nd_region_release_lane(btt->nd_region, lane); nd_region_release_lane(btt->nd_region, lane);
if (e_flag) {
ret = arena_clear_freelist_error(arena, lane);
if (ret)
return ret;
}
len -= cur_len; len -= cur_len;
off += cur_len; off += cur_len;
sector += btt->sector_size >> SECTOR_SHIFT; sector += btt->sector_size >> SECTOR_SHIFT;
...@@ -1211,11 +1321,13 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) ...@@ -1211,11 +1321,13 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
bio_for_each_segment(bvec, bio, iter) { bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len; unsigned int len = bvec.bv_len;
BUG_ON(len > PAGE_SIZE); if (len > PAGE_SIZE || len < btt->sector_size ||
/* Make sure len is in multiples of sector size. */ len % btt->sector_size) {
/* XXX is this right? */ dev_err_ratelimited(&btt->nd_btt->dev,
BUG_ON(len < btt->sector_size); "unaligned bio segment (len: %d)\n", len);
BUG_ON(len % btt->sector_size); bio->bi_status = BLK_STS_IOERR;
break;
}
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector); op_is_write(bio_op(bio)), iter.bi_sector);
...@@ -1345,6 +1457,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, ...@@ -1345,6 +1457,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
{ {
int ret; int ret;
struct btt *btt; struct btt *btt;
struct nd_namespace_io *nsio;
struct device *dev = &nd_btt->dev; struct device *dev = &nd_btt->dev;
btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL); btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
...@@ -1358,6 +1471,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, ...@@ -1358,6 +1471,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
INIT_LIST_HEAD(&btt->arena_list); INIT_LIST_HEAD(&btt->arena_list);
mutex_init(&btt->init_lock); mutex_init(&btt->init_lock);
btt->nd_region = nd_region; btt->nd_region = nd_region;
nsio = to_nd_namespace_io(&nd_btt->ndns->dev);
btt->phys_bb = &nsio->bb;
ret = discover_arenas(btt); ret = discover_arenas(btt);
if (ret) { if (ret) {
...@@ -1431,6 +1546,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) ...@@ -1431,6 +1546,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
} }
btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL); btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
if (!btt_sb)
return -ENOMEM;
/* /*
* If this returns < 0, that is ok as it just means there wasn't * If this returns < 0, that is ok as it just means there wasn't
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef _LINUX_BTT_H #ifndef _LINUX_BTT_H
#define _LINUX_BTT_H #define _LINUX_BTT_H
#include <linux/badblocks.h>
#include <linux/types.h> #include <linux/types.h>
#define BTT_SIG_LEN 16 #define BTT_SIG_LEN 16
...@@ -38,6 +39,11 @@ ...@@ -38,6 +39,11 @@
#define IB_FLAG_ERROR 0x00000001 #define IB_FLAG_ERROR 0x00000001
#define IB_FLAG_ERROR_MASK 0x00000001 #define IB_FLAG_ERROR_MASK 0x00000001
#define ent_lba(ent) (ent & MAP_LBA_MASK)
#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
enum btt_init_state { enum btt_init_state {
INIT_UNCHECKED = 0, INIT_UNCHECKED = 0,
INIT_NOTFOUND, INIT_NOTFOUND,
...@@ -78,6 +84,7 @@ struct free_entry { ...@@ -78,6 +84,7 @@ struct free_entry {
u32 block; u32 block;
u8 sub; u8 sub;
u8 seq; u8 seq;
u8 has_err;
}; };
struct aligned_lock { struct aligned_lock {
...@@ -104,6 +111,7 @@ struct aligned_lock { ...@@ -104,6 +111,7 @@ struct aligned_lock {
* handle incoming writes. * handle incoming writes.
* @version_major: Metadata layout version major. * @version_major: Metadata layout version major.
* @version_minor: Metadata layout version minor. * @version_minor: Metadata layout version minor.
* @sector_size: The Linux sector size - 512 or 4096
* @nextoff: Offset in bytes to the start of the next arena. * @nextoff: Offset in bytes to the start of the next arena.
* @infooff: Offset in bytes to the info block of this arena. * @infooff: Offset in bytes to the info block of this arena.
* @dataoff: Offset in bytes to the data area of this arena. * @dataoff: Offset in bytes to the data area of this arena.
...@@ -131,6 +139,7 @@ struct arena_info { ...@@ -131,6 +139,7 @@ struct arena_info {
u32 nfree; u32 nfree;
u16 version_major; u16 version_major;
u16 version_minor; u16 version_minor;
u32 sector_size;
/* Byte offsets to the different on-media structures */ /* Byte offsets to the different on-media structures */
u64 nextoff; u64 nextoff;
u64 infooff; u64 infooff;
...@@ -147,6 +156,7 @@ struct arena_info { ...@@ -147,6 +156,7 @@ struct arena_info {
struct dentry *debugfs_dir; struct dentry *debugfs_dir;
/* Arena flags */ /* Arena flags */
u32 flags; u32 flags;
struct mutex err_lock;
}; };
/** /**
...@@ -181,6 +191,7 @@ struct btt { ...@@ -181,6 +191,7 @@ struct btt {
struct mutex init_lock; struct mutex init_lock;
int init_state; int init_state;
int num_arenas; int num_arenas;
struct badblocks *phys_bb;
}; };
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
......
...@@ -61,7 +61,7 @@ static ssize_t sector_size_show(struct device *dev, ...@@ -61,7 +61,7 @@ static ssize_t sector_size_show(struct device *dev,
{ {
struct nd_btt *nd_btt = to_nd_btt(dev); struct nd_btt *nd_btt = to_nd_btt(dev);
return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf); return nd_size_select_show(nd_btt->lbasize, btt_lbasize_supported, buf);
} }
static ssize_t sector_size_store(struct device *dev, static ssize_t sector_size_store(struct device *dev,
...@@ -72,7 +72,7 @@ static ssize_t sector_size_store(struct device *dev, ...@@ -72,7 +72,7 @@ static ssize_t sector_size_store(struct device *dev,
device_lock(dev); device_lock(dev);
nvdimm_bus_lock(dev); nvdimm_bus_lock(dev);
rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize, rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported); btt_lbasize_supported);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n"); rc, buf, buf[len - 1] == '\n' ? "" : "\n");
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
* General Public License for more details. * General Public License for more details.
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched/mm.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -234,6 +235,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, ...@@ -234,6 +235,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
struct nd_cmd_clear_error clear_err; struct nd_cmd_clear_error clear_err;
struct nd_cmd_ars_cap ars_cap; struct nd_cmd_ars_cap ars_cap;
u32 clear_err_unit, mask; u32 clear_err_unit, mask;
unsigned int noio_flag;
int cmd_rc, rc; int cmd_rc, rc;
if (!nvdimm_bus) if (!nvdimm_bus)
...@@ -250,8 +252,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, ...@@ -250,8 +252,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
memset(&ars_cap, 0, sizeof(ars_cap)); memset(&ars_cap, 0, sizeof(ars_cap));
ars_cap.address = phys; ars_cap.address = phys;
ars_cap.length = len; ars_cap.length = len;
noio_flag = memalloc_noio_save();
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
sizeof(ars_cap), &cmd_rc); sizeof(ars_cap), &cmd_rc);
memalloc_noio_restore(noio_flag);
if (rc < 0) if (rc < 0)
return rc; return rc;
if (cmd_rc < 0) if (cmd_rc < 0)
...@@ -266,8 +270,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, ...@@ -266,8 +270,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
memset(&clear_err, 0, sizeof(clear_err)); memset(&clear_err, 0, sizeof(clear_err));
clear_err.address = phys; clear_err.address = phys;
clear_err.length = len; clear_err.length = len;
noio_flag = memalloc_noio_save();
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
sizeof(clear_err), &cmd_rc); sizeof(clear_err), &cmd_rc);
memalloc_noio_restore(noio_flag);
if (rc < 0) if (rc < 0)
return rc; return rc;
if (cmd_rc < 0) if (cmd_rc < 0)
...@@ -905,19 +911,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -905,19 +911,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
int read_only, unsigned int ioctl_cmd, unsigned long arg) int read_only, unsigned int ioctl_cmd, unsigned long arg)
{ {
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
size_t buf_len = 0, in_len = 0, out_len = 0;
static char out_env[ND_CMD_MAX_ENVELOPE]; static char out_env[ND_CMD_MAX_ENVELOPE];
static char in_env[ND_CMD_MAX_ENVELOPE]; static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL; const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd); unsigned int cmd = _IOC_NR(ioctl_cmd);
unsigned int func = cmd;
void __user *p = (void __user *) arg;
struct device *dev = &nvdimm_bus->dev; struct device *dev = &nvdimm_bus->dev;
struct nd_cmd_pkg pkg; void __user *p = (void __user *) arg;
const char *cmd_name, *dimm_name; const char *cmd_name, *dimm_name;
u32 in_len = 0, out_len = 0;
unsigned int func = cmd;
unsigned long cmd_mask; unsigned long cmd_mask;
void *buf; struct nd_cmd_pkg pkg;
int rc, i, cmd_rc; int rc, i, cmd_rc;
u64 buf_len = 0;
void *buf;
if (nvdimm) { if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd); desc = nd_cmd_dimm_desc(cmd);
...@@ -977,13 +984,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -977,13 +984,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (cmd == ND_CMD_CALL) { if (cmd == ND_CMD_CALL) {
func = pkg.nd_command; func = pkg.nd_command;
dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n", dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n",
__func__, dimm_name, pkg.nd_command, __func__, dimm_name, pkg.nd_command,
in_len, out_len, buf_len); in_len, out_len, buf_len);
for (i = 0; i < ARRAY_SIZE(pkg.nd_reserved2); i++)
if (pkg.nd_reserved2[i])
return -EINVAL;
} }
/* process an output envelope */ /* process an output envelope */
...@@ -1007,9 +1010,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -1007,9 +1010,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
out_len += out_size; out_len += out_size;
} }
buf_len = out_len + in_len; buf_len = (u64) out_len + (u64) in_len;
if (buf_len > ND_IOCTL_MAX_BUFLEN) { if (buf_len > ND_IOCTL_MAX_BUFLEN) {
dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__,
dimm_name, cmd_name, buf_len, dimm_name, cmd_name, buf_len,
ND_IOCTL_MAX_BUFLEN); ND_IOCTL_MAX_BUFLEN);
return -EINVAL; return -EINVAL;
......
...@@ -280,18 +280,11 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, ...@@ -280,18 +280,11 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
} }
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
/*
* FIXME: nsio_rw_bytes() may be called from atomic
* context in the btt case and the ACPI DSM path for
* clearing the error takes sleeping locks and allocates
* memory. An explicit error clearing path, and support
* for tracking badblocks in BTT metadata is needed to
* work around this collision.
*/
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
&& !(flags & NVDIMM_IO_ATOMIC)) { && !(flags & NVDIMM_IO_ATOMIC)) {
long cleared; long cleared;
might_sleep();
cleared = nvdimm_clear_poison(&ndns->dev, cleared = nvdimm_clear_poison(&ndns->dev,
nsio->res.start + offset, size); nsio->res.start + offset, size);
if (cleared < size) if (cleared < size)
......
...@@ -277,14 +277,14 @@ int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, ...@@ -277,14 +277,14 @@ int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
return 0; return 0;
} }
ssize_t nd_sector_size_show(unsigned long current_lbasize, ssize_t nd_size_select_show(unsigned long current_size,
const unsigned long *supported, char *buf) const unsigned long *supported, char *buf)
{ {
ssize_t len = 0; ssize_t len = 0;
int i; int i;
for (i = 0; supported[i]; i++) for (i = 0; supported[i]; i++)
if (current_lbasize == supported[i]) if (current_size == supported[i])
len += sprintf(buf + len, "[%ld] ", supported[i]); len += sprintf(buf + len, "[%ld] ", supported[i]);
else else
len += sprintf(buf + len, "%ld ", supported[i]); len += sprintf(buf + len, "%ld ", supported[i]);
...@@ -292,8 +292,8 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize, ...@@ -292,8 +292,8 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize,
return len; return len;
} }
ssize_t nd_sector_size_store(struct device *dev, const char *buf, ssize_t nd_size_select_store(struct device *dev, const char *buf,
unsigned long *current_lbasize, const unsigned long *supported) unsigned long *current_size, const unsigned long *supported)
{ {
unsigned long lbasize; unsigned long lbasize;
int rc, i; int rc, i;
...@@ -310,7 +310,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf, ...@@ -310,7 +310,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf,
break; break;
if (supported[i]) { if (supported[i]) {
*current_lbasize = lbasize; *current_size = lbasize;
return 0; return 0;
} else { } else {
return -EINVAL; return -EINVAL;
......
...@@ -45,12 +45,14 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd) ...@@ -45,12 +45,14 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
return ndd->nslabel_size; return ndd->nslabel_size;
} }
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
{ {
u32 index_span; return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
}
if (ndd->nsindex_size) size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
return ndd->nsindex_size; {
u32 nslot, space, size;
/* /*
* The minimum index space is 512 bytes, with that amount of * The minimum index space is 512 bytes, with that amount of
...@@ -60,16 +62,16 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) ...@@ -60,16 +62,16 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
* starts to waste space at larger config_sizes, but it's * starts to waste space at larger config_sizes, but it's
* unlikely we'll ever see anything but 128K. * unlikely we'll ever see anything but 128K.
*/ */
index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); nslot = nvdimm_num_label_slots(ndd);
index_span /= NSINDEX_ALIGN * 2; space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
ndd->nsindex_size = index_span * NSINDEX_ALIGN; size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
NSINDEX_ALIGN) * 2;
return ndd->nsindex_size; if (size <= space)
} return size / 2;
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
{ ndd->nsarea.config_size, sizeof_namespace_label(ndd));
return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); return 0;
} }
static int __nd_label_validate(struct nvdimm_drvdata *ndd) static int __nd_label_validate(struct nvdimm_drvdata *ndd)
......
...@@ -1313,14 +1313,14 @@ static ssize_t sector_size_show(struct device *dev, ...@@ -1313,14 +1313,14 @@ static ssize_t sector_size_show(struct device *dev,
if (is_namespace_blk(dev)) { if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
return nd_sector_size_show(nsblk->lbasize, return nd_size_select_show(nsblk->lbasize,
blk_lbasize_supported, buf); blk_lbasize_supported, buf);
} }
if (is_namespace_pmem(dev)) { if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
return nd_sector_size_show(nspm->lbasize, return nd_size_select_show(nspm->lbasize,
pmem_lbasize_supported, buf); pmem_lbasize_supported, buf);
} }
return -ENXIO; return -ENXIO;
...@@ -1352,7 +1352,7 @@ static ssize_t sector_size_store(struct device *dev, ...@@ -1352,7 +1352,7 @@ static ssize_t sector_size_store(struct device *dev,
if (to_ndns(dev)->claim) if (to_ndns(dev)->claim)
rc = -EBUSY; rc = -EBUSY;
if (rc >= 0) if (rc >= 0)
rc = nd_sector_size_store(dev, buf, lbasize, supported); rc = nd_size_select_store(dev, buf, lbasize, supported);
if (rc >= 0) if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev); rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
......
...@@ -42,7 +42,7 @@ struct nd_poison { ...@@ -42,7 +42,7 @@ struct nd_poison {
struct nvdimm_drvdata { struct nvdimm_drvdata {
struct device *dev; struct device *dev;
int nsindex_size, nslabel_size; int nslabel_size;
struct nd_cmd_get_config_size nsarea; struct nd_cmd_get_config_size nsarea;
void *data; void *data;
int ns_current, ns_next; int ns_current, ns_next;
...@@ -134,6 +134,7 @@ struct nd_mapping { ...@@ -134,6 +134,7 @@ struct nd_mapping {
struct nvdimm *nvdimm; struct nvdimm *nvdimm;
u64 start; u64 start;
u64 size; u64 size;
int position;
struct list_head labels; struct list_head labels;
struct mutex lock; struct mutex lock;
/* /*
...@@ -233,10 +234,10 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode); ...@@ -233,10 +234,10 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
void nd_device_notify(struct device *dev, enum nvdimm_event event); void nd_device_notify(struct device *dev, enum nvdimm_event event);
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
size_t len); size_t len);
ssize_t nd_sector_size_show(unsigned long current_lbasize, ssize_t nd_size_select_show(unsigned long current_size,
const unsigned long *supported, char *buf); const unsigned long *supported, char *buf);
ssize_t nd_sector_size_store(struct device *dev, const char *buf, ssize_t nd_size_select_store(struct device *dev, const char *buf,
unsigned long *current_lbasize, const unsigned long *supported); unsigned long *current_size, const unsigned long *supported);
int __init nvdimm_init(void); int __init nvdimm_init(void);
int __init nd_region_init(void); int __init nd_region_init(void);
int __init nd_label_init(void); int __init nd_label_init(void);
...@@ -285,6 +286,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) ...@@ -285,6 +286,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
struct nd_pfn *to_nd_pfn(struct device *dev); struct nd_pfn *to_nd_pfn(struct device *dev);
#if IS_ENABLED(CONFIG_NVDIMM_PFN) #if IS_ENABLED(CONFIG_NVDIMM_PFN)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
#else
#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
#endif
int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
bool is_nd_pfn(struct device *dev); bool is_nd_pfn(struct device *dev);
struct device *nd_pfn_create(struct nd_region *nd_region); struct device *nd_pfn_create(struct nd_region *nd_region);
......
...@@ -111,24 +111,27 @@ static ssize_t align_show(struct device *dev, ...@@ -111,24 +111,27 @@ static ssize_t align_show(struct device *dev,
return sprintf(buf, "%ld\n", nd_pfn->align); return sprintf(buf, "%ld\n", nd_pfn->align);
} }
static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) static const unsigned long *nd_pfn_supported_alignments(void)
{ {
unsigned long val; /*
int rc; * This needs to be a non-static variable because the *_SIZE
* macros aren't always constants.
rc = kstrtoul(buf, 0, &val); */
if (rc) const unsigned long supported_alignments[] = {
return rc; PAGE_SIZE,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (!is_power_of_2(val) || val < PAGE_SIZE || val > SZ_1G) HPAGE_PMD_SIZE,
return -EINVAL; #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
HPAGE_PUD_SIZE,
#endif
#endif
0,
};
static unsigned long data[ARRAY_SIZE(supported_alignments)];
if (nd_pfn->dev.driver) memcpy(data, supported_alignments, sizeof(data));
return -EBUSY;
else
nd_pfn->align = val;
return 0; return data;
} }
static ssize_t align_store(struct device *dev, static ssize_t align_store(struct device *dev,
...@@ -139,7 +142,8 @@ static ssize_t align_store(struct device *dev, ...@@ -139,7 +142,8 @@ static ssize_t align_store(struct device *dev,
device_lock(dev); device_lock(dev);
nvdimm_bus_lock(dev); nvdimm_bus_lock(dev);
rc = __align_store(nd_pfn, buf); rc = nd_size_select_store(dev, buf, &nd_pfn->align,
nd_pfn_supported_alignments());
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n"); rc, buf, buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev); nvdimm_bus_unlock(dev);
...@@ -260,6 +264,13 @@ static ssize_t size_show(struct device *dev, ...@@ -260,6 +264,13 @@ static ssize_t size_show(struct device *dev,
} }
static DEVICE_ATTR_RO(size); static DEVICE_ATTR_RO(size);
static ssize_t supported_alignments_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return nd_size_select_show(0, nd_pfn_supported_alignments(), buf);
}
static DEVICE_ATTR_RO(supported_alignments);
static struct attribute *nd_pfn_attributes[] = { static struct attribute *nd_pfn_attributes[] = {
&dev_attr_mode.attr, &dev_attr_mode.attr,
&dev_attr_namespace.attr, &dev_attr_namespace.attr,
...@@ -267,6 +278,7 @@ static struct attribute *nd_pfn_attributes[] = { ...@@ -267,6 +278,7 @@ static struct attribute *nd_pfn_attributes[] = {
&dev_attr_align.attr, &dev_attr_align.attr,
&dev_attr_resource.attr, &dev_attr_resource.attr,
&dev_attr_size.attr, &dev_attr_size.attr,
&dev_attr_supported_alignments.attr,
NULL, NULL,
}; };
...@@ -290,7 +302,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, ...@@ -290,7 +302,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
return NULL; return NULL;
nd_pfn->mode = PFN_MODE_NONE; nd_pfn->mode = PFN_MODE_NONE;
nd_pfn->align = HPAGE_SIZE; nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
dev = &nd_pfn->dev; dev = &nd_pfn->dev;
device_initialize(&nd_pfn->dev); device_initialize(&nd_pfn->dev);
if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
...@@ -638,11 +650,12 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) ...@@ -638,11 +650,12 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
/ PAGE_SIZE); / PAGE_SIZE);
if (nd_pfn->mode == PFN_MODE_PMEM) { if (nd_pfn->mode == PFN_MODE_PMEM) {
/* /*
* vmemmap_populate_hugepages() allocates the memmap array in * The altmap should be padded out to the block size used
* HPAGE_SIZE chunks. * when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*/ */
offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve,
max(nd_pfn->align, HPAGE_SIZE)) - start; max(nd_pfn->align, PMD_SIZE)) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM) } else if (nd_pfn->mode == PFN_MODE_RAM)
offset = ALIGN(start + SZ_8K + dax_label_reserve, offset = ALIGN(start + SZ_8K + dax_label_reserve,
nd_pfn->align) - start; nd_pfn->align) - start;
......
...@@ -5,20 +5,6 @@ ...@@ -5,20 +5,6 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/fs.h> #include <linux/fs.h>
#ifdef CONFIG_ARCH_HAS_PMEM_API
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
void arch_wb_cache_pmem(void *addr, size_t size);
void arch_invalidate_pmem(void *addr, size_t size);
#else
#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
}
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
}
#endif
/* this definition is in it's own header for tools/testing/nvdimm to consume */ /* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device { struct pmem_device {
/* One contiguous memory region per device */ /* One contiguous memory region per device */
......
...@@ -723,8 +723,9 @@ static ssize_t mappingN(struct device *dev, char *buf, int n) ...@@ -723,8 +723,9 @@ static ssize_t mappingN(struct device *dev, char *buf, int n)
nd_mapping = &nd_region->mapping[n]; nd_mapping = &nd_region->mapping[n];
nvdimm = nd_mapping->nvdimm; nvdimm = nd_mapping->nvdimm;
return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev), return sprintf(buf, "%s,%llu,%llu,%d\n", dev_name(&nvdimm->dev),
nd_mapping->start, nd_mapping->size); nd_mapping->start, nd_mapping->size,
nd_mapping->position);
} }
#define REGION_MAPPING(idx) \ #define REGION_MAPPING(idx) \
...@@ -965,6 +966,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ...@@ -965,6 +966,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
nd_region->mapping[i].nvdimm = nvdimm; nd_region->mapping[i].nvdimm = nvdimm;
nd_region->mapping[i].start = mapping->start; nd_region->mapping[i].start = mapping->start;
nd_region->mapping[i].size = mapping->size; nd_region->mapping[i].size = mapping->size;
nd_region->mapping[i].position = mapping->position;
INIT_LIST_HEAD(&nd_region->mapping[i].labels); INIT_LIST_HEAD(&nd_region->mapping[i].labels);
mutex_init(&nd_region->mapping[i].lock); mutex_init(&nd_region->mapping[i].lock);
......
...@@ -114,6 +114,7 @@ struct ext2_sb_info { ...@@ -114,6 +114,7 @@ struct ext2_sb_info {
*/ */
spinlock_t s_lock; spinlock_t s_lock;
struct mb_cache *s_ea_block_cache; struct mb_cache *s_ea_block_cache;
struct dax_device *s_daxdev;
}; };
static inline spinlock_t * static inline spinlock_t *
......
...@@ -800,10 +800,10 @@ int ext2_get_block(struct inode *inode, sector_t iblock, ...@@ -800,10 +800,10 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap)
{ {
struct block_device *bdev;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits; unsigned long first_block = offset >> blkbits;
unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
bool new = false, boundary = false; bool new = false, boundary = false;
u32 bno; u32 bno;
int ret; int ret;
...@@ -814,13 +814,9 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -814,13 +814,9 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret; return ret;
iomap->flags = 0; iomap->flags = 0;
bdev = inode->i_sb->s_bdev; iomap->bdev = inode->i_sb->s_bdev;
iomap->bdev = bdev;
iomap->offset = (u64)first_block << blkbits; iomap->offset = (u64)first_block << blkbits;
if (blk_queue_dax(bdev->bd_queue)) iomap->dax_dev = sbi->s_daxdev;
iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
if (ret == 0) { if (ret == 0) {
iomap->type = IOMAP_HOLE; iomap->type = IOMAP_HOLE;
...@@ -842,7 +838,6 @@ static int ...@@ -842,7 +838,6 @@ static int
ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap) ssize_t written, unsigned flags, struct iomap *iomap)
{ {
fs_put_dax(iomap->dax_dev);
if (iomap->type == IOMAP_MAPPED && if (iomap->type == IOMAP_MAPPED &&
written < length && written < length &&
(flags & IOMAP_WRITE)) (flags & IOMAP_WRITE))
......
...@@ -171,6 +171,7 @@ static void ext2_put_super (struct super_block * sb) ...@@ -171,6 +171,7 @@ static void ext2_put_super (struct super_block * sb)
brelse (sbi->s_sbh); brelse (sbi->s_sbh);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock); kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev);
kfree(sbi); kfree(sbi);
} }
...@@ -813,6 +814,7 @@ static unsigned long descriptor_loc(struct super_block *sb, ...@@ -813,6 +814,7 @@ static unsigned long descriptor_loc(struct super_block *sb,
static int ext2_fill_super(struct super_block *sb, void *data, int silent) static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{ {
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
struct buffer_head * bh; struct buffer_head * bh;
struct ext2_sb_info * sbi; struct ext2_sb_info * sbi;
struct ext2_super_block * es; struct ext2_super_block * es;
...@@ -842,6 +844,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -842,6 +844,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
} }
sb->s_fs_info = sbi; sb->s_fs_info = sbi;
sbi->s_sb_block = sb_block; sbi->s_sb_block = sb_block;
sbi->s_daxdev = dax_dev;
spin_lock_init(&sbi->s_lock); spin_lock_init(&sbi->s_lock);
...@@ -1200,6 +1203,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1200,6 +1203,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
kfree(sbi->s_blockgroup_lock); kfree(sbi->s_blockgroup_lock);
kfree(sbi); kfree(sbi);
failed: failed:
fs_put_dax(dax_dev);
return ret; return ret;
} }
......
...@@ -1526,6 +1526,7 @@ struct ext4_sb_info { ...@@ -1526,6 +1526,7 @@ struct ext4_sb_info {
/* Barrier between changing inodes' journal flags and writepages ops. */ /* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem; struct percpu_rw_semaphore s_journal_flag_rwsem;
struct dax_device *s_daxdev;
}; };
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......
...@@ -3397,7 +3397,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) ...@@ -3397,7 +3397,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap)
{ {
struct block_device *bdev; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits; unsigned long first_block = offset >> blkbits;
unsigned long last_block = (offset + length - 1) >> blkbits; unsigned long last_block = (offset + length - 1) >> blkbits;
...@@ -3466,12 +3466,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -3466,12 +3466,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} }
iomap->flags = 0; iomap->flags = 0;
bdev = inode->i_sb->s_bdev; iomap->bdev = inode->i_sb->s_bdev;
iomap->bdev = bdev; iomap->dax_dev = sbi->s_daxdev;
if (blk_queue_dax(bdev->bd_queue))
iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
iomap->offset = first_block << blkbits; iomap->offset = first_block << blkbits;
if (ret == 0) { if (ret == 0) {
...@@ -3504,7 +3500,6 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, ...@@ -3504,7 +3500,6 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
int blkbits = inode->i_blkbits; int blkbits = inode->i_blkbits;
bool truncate = false; bool truncate = false;
fs_put_dax(iomap->dax_dev);
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0; return 0;
......
...@@ -951,6 +951,7 @@ static void ext4_put_super(struct super_block *sb) ...@@ -951,6 +951,7 @@ static void ext4_put_super(struct super_block *sb)
if (sbi->s_chksum_driver) if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver); crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock); kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev);
kfree(sbi); kfree(sbi);
} }
...@@ -3398,6 +3399,7 @@ static void ext4_set_resv_clusters(struct super_block *sb) ...@@ -3398,6 +3399,7 @@ static void ext4_set_resv_clusters(struct super_block *sb)
static int ext4_fill_super(struct super_block *sb, void *data, int silent) static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{ {
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
char *orig_data = kstrdup(data, GFP_KERNEL); char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh; struct buffer_head *bh;
struct ext4_super_block *es = NULL; struct ext4_super_block *es = NULL;
...@@ -3423,6 +3425,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3423,6 +3425,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if ((data && !orig_data) || !sbi) if ((data && !orig_data) || !sbi)
goto out_free_base; goto out_free_base;
sbi->s_daxdev = dax_dev;
sbi->s_blockgroup_lock = sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) if (!sbi->s_blockgroup_lock)
...@@ -4399,6 +4402,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4399,6 +4402,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
out_free_base: out_free_base:
kfree(sbi); kfree(sbi);
kfree(orig_data); kfree(orig_data);
fs_put_dax(dax_dev);
return err ? err : ret; return err ? err : ret;
} }
......
...@@ -80,6 +80,19 @@ xfs_find_bdev_for_inode( ...@@ -80,6 +80,19 @@ xfs_find_bdev_for_inode(
return mp->m_ddev_targp->bt_bdev; return mp->m_ddev_targp->bt_bdev;
} }
struct dax_device *
xfs_find_daxdev_for_inode(
struct inode *inode)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
if (XFS_IS_REALTIME_INODE(ip))
return mp->m_rtdev_targp->bt_daxdev;
else
return mp->m_ddev_targp->bt_daxdev;
}
/* /*
* We're now finished for good with this page. Update the page state via the * We're now finished for good with this page. Update the page state via the
* associated buffer_heads, paying attention to the start and end offsets that * associated buffer_heads, paying attention to the start and end offsets that
......
...@@ -59,5 +59,6 @@ int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); ...@@ -59,5 +59,6 @@ int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
extern void xfs_count_page_state(struct page *, int *, int *); extern void xfs_count_page_state(struct page *, int *, int *);
extern struct block_device *xfs_find_bdev_for_inode(struct inode *); extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
#endif /* __XFS_AOPS_H__ */ #endif /* __XFS_AOPS_H__ */
...@@ -1802,7 +1802,8 @@ xfs_setsize_buftarg_early( ...@@ -1802,7 +1802,8 @@ xfs_setsize_buftarg_early(
xfs_buftarg_t * xfs_buftarg_t *
xfs_alloc_buftarg( xfs_alloc_buftarg(
struct xfs_mount *mp, struct xfs_mount *mp,
struct block_device *bdev) struct block_device *bdev,
struct dax_device *dax_dev)
{ {
xfs_buftarg_t *btp; xfs_buftarg_t *btp;
...@@ -1811,6 +1812,7 @@ xfs_alloc_buftarg( ...@@ -1811,6 +1812,7 @@ xfs_alloc_buftarg(
btp->bt_mount = mp; btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev; btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev; btp->bt_bdev = bdev;
btp->bt_daxdev = dax_dev;
if (xfs_setsize_buftarg_early(btp, bdev)) if (xfs_setsize_buftarg_early(btp, bdev))
goto error; goto error;
......
...@@ -108,6 +108,7 @@ typedef unsigned int xfs_buf_flags_t; ...@@ -108,6 +108,7 @@ typedef unsigned int xfs_buf_flags_t;
typedef struct xfs_buftarg { typedef struct xfs_buftarg {
dev_t bt_dev; dev_t bt_dev;
struct block_device *bt_bdev; struct block_device *bt_bdev;
struct dax_device *bt_daxdev;
struct xfs_mount *bt_mount; struct xfs_mount *bt_mount;
unsigned int bt_meta_sectorsize; unsigned int bt_meta_sectorsize;
size_t bt_meta_sectormask; size_t bt_meta_sectormask;
...@@ -385,7 +386,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) ...@@ -385,7 +386,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
* Handling of buftargs. * Handling of buftargs.
*/ */
extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
struct block_device *); struct block_device *, struct dax_device *);
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *); extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
......
...@@ -69,6 +69,7 @@ xfs_bmbt_to_iomap( ...@@ -69,6 +69,7 @@ xfs_bmbt_to_iomap(
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
} }
xfs_extlen_t xfs_extlen_t
...@@ -975,7 +976,6 @@ xfs_file_iomap_begin( ...@@ -975,7 +976,6 @@ xfs_file_iomap_begin(
int nimaps = 1, error = 0; int nimaps = 1, error = 0;
bool shared = false, trimmed = false; bool shared = false, trimmed = false;
unsigned lockmode; unsigned lockmode;
struct block_device *bdev;
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
...@@ -1085,13 +1085,6 @@ xfs_file_iomap_begin( ...@@ -1085,13 +1085,6 @@ xfs_file_iomap_begin(
xfs_bmbt_to_iomap(ip, iomap, &imap); xfs_bmbt_to_iomap(ip, iomap, &imap);
/* optionally associate a dax device with the iomap bdev */
bdev = iomap->bdev;
if (blk_queue_dax(bdev->bd_queue))
iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
if (shared) if (shared)
iomap->flags |= IOMAP_F_SHARED; iomap->flags |= IOMAP_F_SHARED;
return 0; return 0;
...@@ -1169,7 +1162,6 @@ xfs_file_iomap_end( ...@@ -1169,7 +1162,6 @@ xfs_file_iomap_end(
unsigned flags, unsigned flags,
struct iomap *iomap) struct iomap *iomap)
{ {
fs_put_dax(iomap->dax_dev);
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
length, written, iomap); length, written, iomap);
......
...@@ -714,17 +714,26 @@ STATIC void ...@@ -714,17 +714,26 @@ STATIC void
xfs_close_devices( xfs_close_devices(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
struct block_device *logdev = mp->m_logdev_targp->bt_bdev; struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
xfs_free_buftarg(mp, mp->m_logdev_targp); xfs_free_buftarg(mp, mp->m_logdev_targp);
xfs_blkdev_put(logdev); xfs_blkdev_put(logdev);
fs_put_dax(dax_logdev);
} }
if (mp->m_rtdev_targp) { if (mp->m_rtdev_targp) {
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
xfs_free_buftarg(mp, mp->m_rtdev_targp); xfs_free_buftarg(mp, mp->m_rtdev_targp);
xfs_blkdev_put(rtdev); xfs_blkdev_put(rtdev);
fs_put_dax(dax_rtdev);
} }
xfs_free_buftarg(mp, mp->m_ddev_targp); xfs_free_buftarg(mp, mp->m_ddev_targp);
fs_put_dax(dax_ddev);
} }
/* /*
...@@ -742,6 +751,8 @@ xfs_open_devices( ...@@ -742,6 +751,8 @@ xfs_open_devices(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
struct block_device *ddev = mp->m_super->s_bdev; struct block_device *ddev = mp->m_super->s_bdev;
struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev);
struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL;
struct block_device *logdev = NULL, *rtdev = NULL; struct block_device *logdev = NULL, *rtdev = NULL;
int error; int error;
...@@ -752,6 +763,7 @@ xfs_open_devices( ...@@ -752,6 +763,7 @@ xfs_open_devices(
error = xfs_blkdev_get(mp, mp->m_logname, &logdev); error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error) if (error)
goto out; goto out;
dax_logdev = fs_dax_get_by_bdev(logdev);
} }
if (mp->m_rtname) { if (mp->m_rtname) {
...@@ -765,24 +777,25 @@ xfs_open_devices( ...@@ -765,24 +777,25 @@ xfs_open_devices(
error = -EINVAL; error = -EINVAL;
goto out_close_rtdev; goto out_close_rtdev;
} }
dax_rtdev = fs_dax_get_by_bdev(rtdev);
} }
/* /*
* Setup xfs_mount buffer target pointers * Setup xfs_mount buffer target pointers
*/ */
error = -ENOMEM; error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
if (!mp->m_ddev_targp) if (!mp->m_ddev_targp)
goto out_close_rtdev; goto out_close_rtdev;
if (rtdev) { if (rtdev) {
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
if (!mp->m_rtdev_targp) if (!mp->m_rtdev_targp)
goto out_free_ddev_targ; goto out_free_ddev_targ;
} }
if (logdev && logdev != ddev) { if (logdev && logdev != ddev) {
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
if (!mp->m_logdev_targp) if (!mp->m_logdev_targp)
goto out_free_rtdev_targ; goto out_free_rtdev_targ;
} else { } else {
...@@ -798,10 +811,14 @@ xfs_open_devices( ...@@ -798,10 +811,14 @@ xfs_open_devices(
xfs_free_buftarg(mp, mp->m_ddev_targp); xfs_free_buftarg(mp, mp->m_ddev_targp);
out_close_rtdev: out_close_rtdev:
xfs_blkdev_put(rtdev); xfs_blkdev_put(rtdev);
fs_put_dax(dax_rtdev);
out_close_logdev: out_close_logdev:
if (logdev && logdev != ddev) if (logdev && logdev != ddev) {
xfs_blkdev_put(logdev); xfs_blkdev_put(logdev);
fs_put_dax(dax_logdev);
}
out: out:
fs_put_dax(dax_ddev);
return error; return error;
} }
......
...@@ -57,6 +57,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev) ...@@ -57,6 +57,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
put_dax(dax_dev); put_dax(dax_dev);
} }
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
#else #else
static inline int bdev_dax_supported(struct super_block *sb, int blocksize) static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
{ {
...@@ -71,6 +72,11 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host) ...@@ -71,6 +72,11 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
static inline void fs_put_dax(struct dax_device *dax_dev) static inline void fs_put_dax(struct dax_device *dax_dev)
{ {
} }
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
return NULL;
}
#endif #endif
int dax_read_lock(void); int dax_read_lock(void);
......
...@@ -87,6 +87,7 @@ struct nd_mapping_desc { ...@@ -87,6 +87,7 @@ struct nd_mapping_desc {
struct nvdimm *nvdimm; struct nvdimm *nvdimm;
u64 start; u64 start;
u64 size; u64 size;
int position;
}; };
struct nd_region_desc { struct nd_region_desc {
...@@ -173,4 +174,19 @@ u64 nd_fletcher64(void *addr, size_t len, bool le); ...@@ -173,4 +174,19 @@ u64 nd_fletcher64(void *addr, size_t len, bool le);
void nvdimm_flush(struct nd_region *nd_region); void nvdimm_flush(struct nd_region *nd_region);
int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region);
int nvdimm_has_cache(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region);
#ifdef CONFIG_ARCH_HAS_PMEM_API
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
void arch_wb_cache_pmem(void *addr, size_t size);
void arch_invalidate_pmem(void *addr, size_t size);
#else
#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
}
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
}
#endif
#endif /* __LIBNVDIMM_H__ */ #endif /* __LIBNVDIMM_H__ */
...@@ -559,9 +559,6 @@ config ARCH_HAS_PMEM_API ...@@ -559,9 +559,6 @@ config ARCH_HAS_PMEM_API
config ARCH_HAS_UACCESS_FLUSHCACHE config ARCH_HAS_UACCESS_FLUSHCACHE
bool bool
config ARCH_HAS_MMIO_FLUSH
bool
config STACKDEPOT config STACKDEPOT
bool bool
select STACKTRACE select STACKTRACE
......
...@@ -1546,8 +1546,8 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, ...@@ -1546,8 +1546,8 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
else { else {
memcpy(iobuf, mmio->addr.base + dpa, len); memcpy(iobuf, mmio->addr.base + dpa, len);
/* give us some some coverage of the mmio_flush_range() API */ /* give us some some coverage of the arch_invalidate_pmem() API */
mmio_flush_range(mmio->addr.base + dpa, len); arch_invalidate_pmem(mmio->addr.base + dpa, len);
} }
nd_region_release_lane(nd_region, lane); nd_region_release_lane(nd_region, lane);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment