Commit b6ffe9ba authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "libnvdimm updates for the latest ACPI and UEFI specifications. This
  pull request also includes new 'struct dax_operations' enabling to
  undo the abuse of copy_user_nocache() for copy operations to pmem.

  The dax work originally missed 4.12 to address concerns raised by Al.

  Summary:

   - Introduce the _flushcache() family of memory copy helpers and use
     them for persistent memory write operations on x86. The
     _flushcache() semantic indicates that the cache is either bypassed
     for the copy operation (movnt) or any lines dirtied by the copy
     operation are written back (clwb, clflushopt, or clflush).

   - Extend dax_operations with ->copy_from_iter() and ->flush()
     operations. These operations and other infrastructure updates allow
     all persistent memory specific dax functionality to be pushed into
     libnvdimm and the pmem driver directly. It also allows dax-specific
     sysfs attributes to be linked to a host device, for example:
     /sys/block/pmem0/dax/write_cache

   - Add support for the new NVDIMM platform/firmware mechanisms
     introduced in ACPI 6.2 and UEFI 2.7. This support includes the v1.2
     namespace label format, extensions to the address-range-scrub
     command set, new error injection commands, and a new BTT
     (block-translation-table) layout. These updates support inter-OS
     and pre-OS compatibility.

   - Fix a longstanding memory corruption bug in nfit_test.

   - Make the pmem and nvdimm-region 'badblocks' sysfs files poll(2)
     capable.

   - Miscellaneous fixes and small updates across libnvdimm and the nfit
     driver.

  Acknowledgements that came after the branch was pushed: commit
  6aa734a2 ("libnvdimm, region, pmem: fix 'badblocks'
  sysfs_get_dirent() reference lifetime") was reviewed by Toshi Kani
  <toshi.kani@hpe.com>"

* tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (42 commits)
  libnvdimm, namespace: record 'lbasize' for pmem namespaces
  acpi/nfit: Issue Start ARS to retrieve existing records
  libnvdimm: New ACPI 6.2 DSM functions
  acpi, nfit: Show bus_dsm_mask in sysfs
  libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru.
  acpi, nfit: Enable DSM pass thru for root functions.
  libnvdimm: passthru functions clear to send
  libnvdimm, btt: convert some info messages to warn/err
  libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime
  libnvdimm: fix the clear-error check in nsio_rw_bytes
  libnvdimm, btt: fix btt_rw_page not returning errors
  acpi, nfit: quiet invalid block-aperture-region warnings
  libnvdimm, btt: BTT updates for UEFI 2.7 format
  acpi, nfit: constify *_attribute_group
  libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region
  libnvdimm, pmem, dax: export a cache control attribute
  dax: convert to bitmask for flags
  dax: remove default copy_from_iter fallback
  libnvdimm, nfit: enable support for volatile ranges
  libnvdimm, pmem: fix persistence warning
  ...
parents 9f45efb9 9d92573f
...@@ -7680,9 +7680,7 @@ M: Ross Zwisler <ross.zwisler@linux.intel.com> ...@@ -7680,9 +7680,7 @@ M: Ross Zwisler <ross.zwisler@linux.intel.com>
L: linux-nvdimm@lists.01.org L: linux-nvdimm@lists.01.org
Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
S: Supported S: Supported
F: drivers/nvdimm/pmem.c F: drivers/nvdimm/pmem*
F: include/linux/pmem.h
F: arch/*/include/asm/pmem.h
LIGHTNVM PLATFORM SUPPORT LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io> M: Matias Bjorling <mb@lightnvm.io>
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/of_device.h> #include <linux/of_device.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/uio.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/prom.h> #include <asm/prom.h>
...@@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa ...@@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa
return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn); return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
} }
static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static const struct dax_operations axon_ram_dax_ops = { static const struct dax_operations axon_ram_dax_ops = {
.direct_access = axon_ram_dax_direct_access, .direct_access = axon_ram_dax_direct_access,
.copy_from_iter = axon_ram_copy_from_iter,
}; };
/** /**
......
...@@ -54,6 +54,7 @@ config X86 ...@@ -54,6 +54,7 @@ config X86
select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_KERNEL_RWX
......
/*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __ASM_X86_PMEM_H__
#define __ASM_X86_PMEM_H__
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
#ifdef CONFIG_ARCH_HAS_PMEM_API
/**
* arch_memcpy_to_pmem - copy data to persistent memory
* @dst: destination buffer for the copy
* @src: source buffer for the copy
* @n: length of the copy in bytes
*
* Copy data to persistent memory media via non-temporal stores so that
* a subsequent pmem driver flush operation will drain posted write queues.
*/
static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
int rem;
/*
* We are copying between two kernel buffers, if
* __copy_from_user_inatomic_nocache() returns an error (page
* fault) we would have already reported a general protection fault
* before the WARN+BUG.
*/
rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
__func__, dst, src, rem))
BUG();
}
/**
* arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. Note that @size is internally rounded up to be cache
* line size aligned.
*/
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
void *vend = addr + size;
void *p;
for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
/**
* arch_copy_from_iter_pmem - copy data from an iterator to PMEM
* @addr: PMEM destination address
* @bytes: number of bytes to copy
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
*/
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
size_t len;
/* TODO: skip the write-back by always using non-temporal stores */
len = copy_from_iter_nocache(addr, bytes, i);
/*
* In the iovec case on x86_64 copy_from_iter_nocache() uses
* non-temporal stores for the bulk of the transfer, but we need
* to manually flush if the transfer is unaligned. A cached
* memory copy is used when destination or size is not naturally
* aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*
* In the non-iovec case the entire destination needs to be
* flushed.
*/
if (iter_is_iovec(i)) {
unsigned long flushed, dest = (unsigned long) addr;
if (bytes < 8) {
if (!IS_ALIGNED(dest, 4) || (bytes != 4))
arch_wb_cache_pmem(addr, bytes);
} else {
if (!IS_ALIGNED(dest, 8)) {
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
arch_wb_cache_pmem(addr, 1);
}
flushed = dest - (unsigned long) addr;
if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
arch_wb_cache_pmem(addr + bytes - 1, 1);
}
} else
arch_wb_cache_pmem(addr, bytes);
return len;
}
/**
* arch_clear_pmem - zero a PMEM memory range
* @addr: virtual start address
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
*/
static inline void arch_clear_pmem(void *addr, size_t size)
{
memset(addr, 0, size);
arch_wb_cache_pmem(addr, size);
}
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
clflush_cache_range(addr, size);
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */
#endif /* __ASM_X86_PMEM_H__ */
...@@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) ...@@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
return 0; return 0;
} }
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void memcpy_flushcache(void *dst, const void *src, size_t cnt);
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */ #endif /* _ASM_X86_STRING_64_H */
...@@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne ...@@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne
extern long __copy_user_nocache(void *dst, const void __user *src, extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest); unsigned size, int zerorest);
extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len);
static inline int static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src, __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
unsigned size) unsigned size)
...@@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src, ...@@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
return __copy_user_nocache(dst, src, size, 0); return __copy_user_nocache(dst, src, size, 0);
} }
static inline int
__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
{
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
unsigned long unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len); copy_user_handle_tail(char *to, char *from, unsigned len);
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
*/ */
#include <linux/export.h> #include <linux/export.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/highmem.h>
/* /*
* Zero Userspace * Zero Userspace
...@@ -73,3 +74,136 @@ copy_user_handle_tail(char *to, char *from, unsigned len) ...@@ -73,3 +74,136 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
clac(); clac();
return len; return len;
} }
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. Note that @size is internally rounded up to be cache
* line size aligned.
*/
static void clean_cache_range(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
void *vend = addr + size;
void *p;
for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
void arch_wb_cache_pmem(void *addr, size_t size)
{
clean_cache_range(addr, size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
{
unsigned long flushed, dest = (unsigned long) dst;
long rc = __copy_user_nocache(dst, src, size, 0);
/*
* __copy_user_nocache() uses non-temporal stores for the bulk
* of the transfer, but we need to manually flush if the
* transfer is unaligned. A cached memory copy is used when
* destination or size is not naturally aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*/
if (size < 8) {
if (!IS_ALIGNED(dest, 4) || size != 4)
clean_cache_range(dst, 1);
} else {
if (!IS_ALIGNED(dest, 8)) {
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
clean_cache_range(dst, 1);
}
flushed = dest - (unsigned long) dst;
if (size > flushed && !IS_ALIGNED(size - flushed, 8))
clean_cache_range(dst + size - 1, 1);
}
return rc;
}
void memcpy_flushcache(void *_dst, const void *_src, size_t size)
{
unsigned long dest = (unsigned long) _dst;
unsigned long source = (unsigned long) _src;
/* cache copy and flush to align dest */
if (!IS_ALIGNED(dest, 8)) {
unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
memcpy((void *) dest, (void *) source, len);
clean_cache_range((void *) dest, len);
dest += len;
source += len;
size -= len;
if (!size)
return;
}
/* 4x8 movnti loop */
while (size >= 32) {
asm("movq (%0), %%r8\n"
"movq 8(%0), %%r9\n"
"movq 16(%0), %%r10\n"
"movq 24(%0), %%r11\n"
"movnti %%r8, (%1)\n"
"movnti %%r9, 8(%1)\n"
"movnti %%r10, 16(%1)\n"
"movnti %%r11, 24(%1)\n"
:: "r" (source), "r" (dest)
: "memory", "r8", "r9", "r10", "r11");
dest += 32;
source += 32;
size -= 32;
}
/* 1x8 movnti loop */
while (size >= 8) {
asm("movq (%0), %%r8\n"
"movnti %%r8, (%1)\n"
:: "r" (source), "r" (dest)
: "memory", "r8");
dest += 8;
source += 8;
size -= 8;
}
/* 1x4 movnti loop */
while (size >= 4) {
asm("movl (%0), %%r8d\n"
"movnti %%r8d, (%1)\n"
:: "r" (source), "r" (dest)
: "memory", "r8");
dest += 4;
source += 4;
size -= 4;
}
/* cache copy for remaining bytes */
if (size) {
memcpy((void *) dest, (void *) source, size);
clean_cache_range((void *) dest, size);
}
}
EXPORT_SYMBOL_GPL(memcpy_flushcache);
void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len)
{
char *from = kmap_atomic(page);
memcpy_flushcache(to, from + offset, len);
kunmap_atomic(from);
}
#endif
...@@ -150,6 +150,12 @@ void clflush_cache_range(void *vaddr, unsigned int size) ...@@ -150,6 +150,12 @@ void clflush_cache_range(void *vaddr, unsigned int size)
} }
EXPORT_SYMBOL_GPL(clflush_cache_range); EXPORT_SYMBOL_GPL(clflush_cache_range);
void arch_invalidate_pmem(void *addr, size_t size)
{
clflush_cache_range(addr, size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
static void __cpa_flush_all(void *arg) static void __cpa_flush_all(void *arg)
{ {
unsigned long cache = (unsigned long)arg; unsigned long cache = (unsigned long)arg;
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/pmem.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/nd.h> #include <linux/nd.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
...@@ -253,6 +252,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, ...@@ -253,6 +252,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
cmd_name = nvdimm_bus_cmd_name(cmd); cmd_name = nvdimm_bus_cmd_name(cmd);
cmd_mask = nd_desc->cmd_mask; cmd_mask = nd_desc->cmd_mask;
dsm_mask = cmd_mask; dsm_mask = cmd_mask;
if (cmd == ND_CMD_CALL)
dsm_mask = nd_desc->bus_dsm_mask;
desc = nd_cmd_bus_desc(cmd); desc = nd_cmd_bus_desc(cmd);
guid = to_nfit_uuid(NFIT_DEV_BUS); guid = to_nfit_uuid(NFIT_DEV_BUS);
handle = adev->handle; handle = adev->handle;
...@@ -927,6 +928,17 @@ static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) ...@@ -927,6 +928,17 @@ static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
return 0; return 0;
} }
static ssize_t bus_dsm_mask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
return sprintf(buf, "%#lx\n", nd_desc->bus_dsm_mask);
}
static struct device_attribute dev_attr_bus_dsm_mask =
__ATTR(dsm_mask, 0444, bus_dsm_mask_show, NULL);
static ssize_t revision_show(struct device *dev, static ssize_t revision_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
...@@ -1031,7 +1043,7 @@ static ssize_t scrub_store(struct device *dev, ...@@ -1031,7 +1043,7 @@ static ssize_t scrub_store(struct device *dev,
if (nd_desc) { if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
rc = acpi_nfit_ars_rescan(acpi_desc); rc = acpi_nfit_ars_rescan(acpi_desc, 0);
} }
device_unlock(dev); device_unlock(dev);
if (rc) if (rc)
...@@ -1063,10 +1075,11 @@ static struct attribute *acpi_nfit_attributes[] = { ...@@ -1063,10 +1075,11 @@ static struct attribute *acpi_nfit_attributes[] = {
&dev_attr_revision.attr, &dev_attr_revision.attr,
&dev_attr_scrub.attr, &dev_attr_scrub.attr,
&dev_attr_hw_error_scrub.attr, &dev_attr_hw_error_scrub.attr,
&dev_attr_bus_dsm_mask.attr,
NULL, NULL,
}; };
static struct attribute_group acpi_nfit_attribute_group = { static const struct attribute_group acpi_nfit_attribute_group = {
.name = "nfit", .name = "nfit",
.attrs = acpi_nfit_attributes, .attrs = acpi_nfit_attributes,
.is_visible = nfit_visible, .is_visible = nfit_visible,
...@@ -1346,7 +1359,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, ...@@ -1346,7 +1359,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
return a->mode; return a->mode;
} }
static struct attribute_group acpi_nfit_dimm_attribute_group = { static const struct attribute_group acpi_nfit_dimm_attribute_group = {
.name = "nfit", .name = "nfit",
.attrs = acpi_nfit_dimm_attributes, .attrs = acpi_nfit_dimm_attributes,
.is_visible = acpi_nfit_dimm_attr_visible, .is_visible = acpi_nfit_dimm_attr_visible,
...@@ -1608,11 +1621,23 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) ...@@ -1608,11 +1621,23 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
acpi_desc); acpi_desc);
} }
/*
* These constants are private because there are no kernel consumers of
* these commands.
*/
enum nfit_aux_cmds {
NFIT_CMD_TRANSLATE_SPA = 5,
NFIT_CMD_ARS_INJECT_SET = 7,
NFIT_CMD_ARS_INJECT_CLEAR = 8,
NFIT_CMD_ARS_INJECT_GET = 9,
};
static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
{ {
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS); const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
struct acpi_device *adev; struct acpi_device *adev;
unsigned long dsm_mask;
int i; int i;
nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
...@@ -1623,6 +1648,20 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) ...@@ -1623,6 +1648,20 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
set_bit(i, &nd_desc->cmd_mask); set_bit(i, &nd_desc->cmd_mask);
set_bit(ND_CMD_CALL, &nd_desc->cmd_mask);
dsm_mask =
(1 << ND_CMD_ARS_CAP) |
(1 << ND_CMD_ARS_START) |
(1 << ND_CMD_ARS_STATUS) |
(1 << ND_CMD_CLEAR_ERROR) |
(1 << NFIT_CMD_TRANSLATE_SPA) |
(1 << NFIT_CMD_ARS_INJECT_SET) |
(1 << NFIT_CMD_ARS_INJECT_CLEAR) |
(1 << NFIT_CMD_ARS_INJECT_GET);
for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
set_bit(i, &nd_desc->bus_dsm_mask);
} }
static ssize_t range_index_show(struct device *dev, static ssize_t range_index_show(struct device *dev,
...@@ -1640,7 +1679,7 @@ static struct attribute *acpi_nfit_region_attributes[] = { ...@@ -1640,7 +1679,7 @@ static struct attribute *acpi_nfit_region_attributes[] = {
NULL, NULL,
}; };
static struct attribute_group acpi_nfit_region_attribute_group = { static const struct attribute_group acpi_nfit_region_attribute_group = {
.name = "nfit", .name = "nfit",
.attrs = acpi_nfit_region_attributes, .attrs = acpi_nfit_region_attributes,
}; };
...@@ -1663,12 +1702,29 @@ struct nfit_set_info { ...@@ -1663,12 +1702,29 @@ struct nfit_set_info {
} mapping[0]; } mapping[0];
}; };
struct nfit_set_info2 {
struct nfit_set_info_map2 {
u64 region_offset;
u32 serial_number;
u16 vendor_id;
u16 manufacturing_date;
u8 manufacturing_location;
u8 reserved[31];
} mapping[0];
};
static size_t sizeof_nfit_set_info(int num_mappings) static size_t sizeof_nfit_set_info(int num_mappings)
{ {
return sizeof(struct nfit_set_info) return sizeof(struct nfit_set_info)
+ num_mappings * sizeof(struct nfit_set_info_map); + num_mappings * sizeof(struct nfit_set_info_map);
} }
static size_t sizeof_nfit_set_info2(int num_mappings)
{
return sizeof(struct nfit_set_info2)
+ num_mappings * sizeof(struct nfit_set_info_map2);
}
static int cmp_map_compat(const void *m0, const void *m1) static int cmp_map_compat(const void *m0, const void *m1)
{ {
const struct nfit_set_info_map *map0 = m0; const struct nfit_set_info_map *map0 = m0;
...@@ -1690,6 +1746,18 @@ static int cmp_map(const void *m0, const void *m1) ...@@ -1690,6 +1746,18 @@ static int cmp_map(const void *m0, const void *m1)
return 0; return 0;
} }
static int cmp_map2(const void *m0, const void *m1)
{
const struct nfit_set_info_map2 *map0 = m0;
const struct nfit_set_info_map2 *map1 = m1;
if (map0->region_offset < map1->region_offset)
return -1;
else if (map0->region_offset > map1->region_offset)
return 1;
return 0;
}
/* Retrieve the nth entry referencing this spa */ /* Retrieve the nth entry referencing this spa */
static struct acpi_nfit_memory_map *memdev_from_spa( static struct acpi_nfit_memory_map *memdev_from_spa(
struct acpi_nfit_desc *acpi_desc, u16 range_index, int n) struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
...@@ -1707,27 +1775,31 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, ...@@ -1707,27 +1775,31 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
struct nd_region_desc *ndr_desc, struct nd_region_desc *ndr_desc,
struct acpi_nfit_system_address *spa) struct acpi_nfit_system_address *spa)
{ {
int i, spa_type = nfit_spa_type(spa);
struct device *dev = acpi_desc->dev; struct device *dev = acpi_desc->dev;
struct nd_interleave_set *nd_set; struct nd_interleave_set *nd_set;
u16 nr = ndr_desc->num_mappings; u16 nr = ndr_desc->num_mappings;
struct nfit_set_info2 *info2;
struct nfit_set_info *info; struct nfit_set_info *info;
int i;
if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
/* pass */;
else
return 0;
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set) if (!nd_set)
return -ENOMEM; return -ENOMEM;
ndr_desc->nd_set = nd_set;
guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid);
info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
if (!info) if (!info)
return -ENOMEM; return -ENOMEM;
info2 = devm_kzalloc(dev, sizeof_nfit_set_info2(nr), GFP_KERNEL);
if (!info2)
return -ENOMEM;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nfit_set_info_map *map = &info->mapping[i]; struct nfit_set_info_map *map = &info->mapping[i];
struct nfit_set_info_map2 *map2 = &info2->mapping[i];
struct nvdimm *nvdimm = mapping->nvdimm; struct nvdimm *nvdimm = mapping->nvdimm;
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
...@@ -1740,19 +1812,32 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, ...@@ -1740,19 +1812,32 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
map->region_offset = memdev->region_offset; map->region_offset = memdev->region_offset;
map->serial_number = nfit_mem->dcr->serial_number; map->serial_number = nfit_mem->dcr->serial_number;
map2->region_offset = memdev->region_offset;
map2->serial_number = nfit_mem->dcr->serial_number;
map2->vendor_id = nfit_mem->dcr->vendor_id;
map2->manufacturing_date = nfit_mem->dcr->manufacturing_date;
map2->manufacturing_location = nfit_mem->dcr->manufacturing_location;
} }
/* v1.1 namespaces */
sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
cmp_map, NULL); cmp_map, NULL);
nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); nd_set->cookie1 = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
/* v1.2 namespaces */
sort(&info2->mapping[0], nr, sizeof(struct nfit_set_info_map2),
cmp_map2, NULL);
nd_set->cookie2 = nd_fletcher64(info2, sizeof_nfit_set_info2(nr), 0);
/* support namespaces created with the wrong sort order */ /* support v1.1 namespaces created with the wrong sort order */
sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
cmp_map_compat, NULL); cmp_map_compat, NULL);
nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
ndr_desc->nd_set = nd_set; ndr_desc->nd_set = nd_set;
devm_kfree(dev, info); devm_kfree(dev, info);
devm_kfree(dev, info2);
return 0; return 0;
} }
...@@ -1842,8 +1927,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, ...@@ -1842,8 +1927,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
} }
if (rw) if (rw)
memcpy_to_pmem(mmio->addr.aperture + offset, memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
iobuf + copied, c);
else { else {
if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
mmio_flush_range((void __force *) mmio_flush_range((void __force *)
...@@ -1957,7 +2041,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, ...@@ -1957,7 +2041,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
nfit_blk->bdw_offset = nfit_mem->bdw->offset; nfit_blk->bdw_offset = nfit_mem->bdw->offset;
mmio = &nfit_blk->mmio[BDW]; mmio = &nfit_blk->mmio[BDW];
mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM); nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
if (!mmio->addr.base) { if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
nvdimm_name(nvdimm)); nvdimm_name(nvdimm));
...@@ -2051,6 +2135,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa ...@@ -2051,6 +2135,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
memset(&ars_start, 0, sizeof(ars_start)); memset(&ars_start, 0, sizeof(ars_start));
ars_start.address = spa->address; ars_start.address = spa->address;
ars_start.length = spa->length; ars_start.length = spa->length;
ars_start.flags = acpi_desc->ars_start_flags;
if (nfit_spa_type(spa) == NFIT_SPA_PM) if (nfit_spa_type(spa) == NFIT_SPA_PM)
ars_start.type = ND_ARS_PERSISTENT; ars_start.type = ND_ARS_PERSISTENT;
else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
...@@ -2077,6 +2162,7 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc) ...@@ -2077,6 +2162,7 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc)
ars_start.address = ars_status->restart_address; ars_start.address = ars_status->restart_address;
ars_start.length = ars_status->restart_length; ars_start.length = ars_status->restart_length;
ars_start.type = ars_status->type; ars_start.type = ars_status->type;
ars_start.flags = acpi_desc->ars_start_flags;
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
sizeof(ars_start), &cmd_rc); sizeof(ars_start), &cmd_rc);
if (rc < 0) if (rc < 0)
...@@ -2179,7 +2265,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ...@@ -2179,7 +2265,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
struct acpi_nfit_system_address *spa = nfit_spa->spa; struct acpi_nfit_system_address *spa = nfit_spa->spa;
struct nd_blk_region_desc *ndbr_desc; struct nd_blk_region_desc *ndbr_desc;
struct nfit_mem *nfit_mem; struct nfit_mem *nfit_mem;
int blk_valid = 0; int blk_valid = 0, rc;
if (!nvdimm) { if (!nvdimm) {
dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
...@@ -2211,6 +2297,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ...@@ -2211,6 +2297,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
ndbr_desc = to_blk_region_desc(ndr_desc); ndbr_desc = to_blk_region_desc(ndr_desc);
ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->enable = acpi_nfit_blk_region_enable;
ndbr_desc->do_io = acpi_desc->blk_do_io; ndbr_desc->do_io = acpi_desc->blk_do_io;
rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
if (rc)
return rc;
nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
ndr_desc); ndr_desc);
if (!nfit_spa->nd_region) if (!nfit_spa->nd_region)
...@@ -2229,6 +2318,13 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa) ...@@ -2229,6 +2318,13 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
nfit_spa_type(spa) == NFIT_SPA_PCD); nfit_spa_type(spa) == NFIT_SPA_PCD);
} }
static bool nfit_spa_is_volatile(struct acpi_nfit_system_address *spa)
{
return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
nfit_spa_type(spa) == NFIT_SPA_VCD ||
nfit_spa_type(spa) == NFIT_SPA_VOLATILE);
}
static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa) struct nfit_spa *nfit_spa)
{ {
...@@ -2303,7 +2399,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ...@@ -2303,7 +2399,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
ndr_desc); ndr_desc);
if (!nfit_spa->nd_region) if (!nfit_spa->nd_region)
rc = -ENOMEM; rc = -ENOMEM;
} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { } else if (nfit_spa_is_volatile(spa)) {
nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
ndr_desc); ndr_desc);
if (!nfit_spa->nd_region) if (!nfit_spa->nd_region)
...@@ -2595,6 +2691,7 @@ static void acpi_nfit_scrub(struct work_struct *work) ...@@ -2595,6 +2691,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
acpi_nfit_async_scrub(acpi_desc, nfit_spa); acpi_nfit_async_scrub(acpi_desc, nfit_spa);
acpi_desc->scrub_count++; acpi_desc->scrub_count++;
acpi_desc->ars_start_flags = 0;
if (acpi_desc->scrub_count_state) if (acpi_desc->scrub_count_state)
sysfs_notify_dirent(acpi_desc->scrub_count_state); sysfs_notify_dirent(acpi_desc->scrub_count_state);
mutex_unlock(&acpi_desc->init_mutex); mutex_unlock(&acpi_desc->init_mutex);
...@@ -2613,6 +2710,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) ...@@ -2613,6 +2710,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
return rc; return rc;
} }
acpi_desc->ars_start_flags = 0;
if (!acpi_desc->cancel) if (!acpi_desc->cancel)
queue_work(nfit_wq, &acpi_desc->work); queue_work(nfit_wq, &acpi_desc->work);
return 0; return 0;
...@@ -2817,7 +2915,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, ...@@ -2817,7 +2915,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
return 0; return 0;
} }
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
{ {
struct device *dev = acpi_desc->dev; struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa; struct nfit_spa *nfit_spa;
...@@ -2839,6 +2937,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) ...@@ -2839,6 +2937,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
nfit_spa->ars_required = 1; nfit_spa->ars_required = 1;
} }
acpi_desc->ars_start_flags = flags;
queue_work(nfit_wq, &acpi_desc->work); queue_work(nfit_wq, &acpi_desc->work);
dev_dbg(dev, "%s: ars_scan triggered\n", __func__); dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
mutex_unlock(&acpi_desc->init_mutex); mutex_unlock(&acpi_desc->init_mutex);
...@@ -2967,7 +3066,7 @@ static int acpi_nfit_remove(struct acpi_device *adev) ...@@ -2967,7 +3066,7 @@ static int acpi_nfit_remove(struct acpi_device *adev)
return 0; return 0;
} }
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
{ {
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
...@@ -2975,11 +3074,6 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) ...@@ -2975,11 +3074,6 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
acpi_status status; acpi_status status;
int ret; int ret;
dev_dbg(dev, "%s: event: %d\n", __func__, event);
if (event != NFIT_NOTIFY_UPDATE)
return;
if (!dev->driver) { if (!dev->driver) {
/* dev->driver may be null if we're being removed */ /* dev->driver may be null if we're being removed */
dev_dbg(dev, "%s: no driver found for dev\n", __func__); dev_dbg(dev, "%s: no driver found for dev\n", __func__);
...@@ -3016,6 +3110,29 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) ...@@ -3016,6 +3110,29 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
dev_err(dev, "Invalid _FIT\n"); dev_err(dev, "Invalid _FIT\n");
kfree(buf.pointer); kfree(buf.pointer);
} }
static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
0 : ND_ARS_RETURN_PREV_DATA;
acpi_nfit_ars_rescan(acpi_desc, flags);
}
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
{
dev_dbg(dev, "%s: event: 0x%x\n", __func__, event);
switch (event) {
case NFIT_NOTIFY_UPDATE:
return acpi_nfit_update_notify(dev, handle);
case NFIT_NOTIFY_UC_MEMORY_ERROR:
return acpi_nfit_uc_error_notify(dev, handle);
default:
return;
}
}
EXPORT_SYMBOL_GPL(__acpi_nfit_notify); EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
static void acpi_nfit_notify(struct acpi_device *adev, u32 event) static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
......
...@@ -79,7 +79,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, ...@@ -79,7 +79,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
* already in progress, just let that be the last * already in progress, just let that be the last
* authoritative one * authoritative one
*/ */
acpi_nfit_ars_rescan(acpi_desc); acpi_nfit_ars_rescan(acpi_desc, 0);
} }
break; break;
} }
......
...@@ -79,6 +79,7 @@ enum { ...@@ -79,6 +79,7 @@ enum {
enum nfit_root_notifiers { enum nfit_root_notifiers {
NFIT_NOTIFY_UPDATE = 0x80, NFIT_NOTIFY_UPDATE = 0x80,
NFIT_NOTIFY_UC_MEMORY_ERROR = 0x81,
}; };
enum nfit_dimm_notifiers { enum nfit_dimm_notifiers {
...@@ -154,6 +155,7 @@ struct acpi_nfit_desc { ...@@ -154,6 +155,7 @@ struct acpi_nfit_desc {
struct list_head idts; struct list_head idts;
struct nvdimm_bus *nvdimm_bus; struct nvdimm_bus *nvdimm_bus;
struct device *dev; struct device *dev;
u8 ars_start_flags;
struct nd_cmd_ars_status *ars_status; struct nd_cmd_ars_status *ars_status;
size_t ars_status_size; size_t ars_status_size;
struct work_struct work; struct work_struct work;
...@@ -206,7 +208,7 @@ struct nfit_blk { ...@@ -206,7 +208,7 @@ struct nfit_blk {
extern struct list_head acpi_descs; extern struct list_head acpi_descs;
extern struct mutex acpi_desc_lock; extern struct mutex acpi_desc_lock;
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc); int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags);
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
void nfit_mce_register(void); void nfit_mce_register(void);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef CONFIG_BLK_DEV_RAM_DAX #ifdef CONFIG_BLK_DEV_RAM_DAX
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/uio.h>
#endif #endif
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -354,8 +355,15 @@ static long brd_dax_direct_access(struct dax_device *dax_dev, ...@@ -354,8 +355,15 @@ static long brd_dax_direct_access(struct dax_device *dax_dev,
return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
} }
static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static const struct dax_operations brd_dax_ops = { static const struct dax_operations brd_dax_ops = {
.direct_access = brd_dax_direct_access, .direct_access = brd_dax_direct_access,
.copy_from_iter = brd_dax_copy_from_iter,
}; };
#endif #endif
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uio.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/fs.h> #include <linux/fs.h>
...@@ -115,13 +116,20 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) ...@@ -115,13 +116,20 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
EXPORT_SYMBOL_GPL(__bdev_dax_supported); EXPORT_SYMBOL_GPL(__bdev_dax_supported);
#endif #endif
enum dax_device_flags {
/* !alive + rcu grace period == no new operations / mappings */
DAXDEV_ALIVE,
/* gate whether dax_flush() calls the low level flush routine */
DAXDEV_WRITE_CACHE,
};
/** /**
* struct dax_device - anchor object for dax services * struct dax_device - anchor object for dax services
* @inode: core vfs * @inode: core vfs
* @cdev: optional character interface for "device dax" * @cdev: optional character interface for "device dax"
* @host: optional name for lookups where the device path is not available * @host: optional name for lookups where the device path is not available
* @private: dax driver private data * @private: dax driver private data
* @alive: !alive + rcu grace period == no new operations / mappings * @flags: state and boolean properties
*/ */
struct dax_device { struct dax_device {
struct hlist_node list; struct hlist_node list;
...@@ -129,10 +137,75 @@ struct dax_device { ...@@ -129,10 +137,75 @@ struct dax_device {
struct cdev cdev; struct cdev cdev;
const char *host; const char *host;
void *private; void *private;
bool alive; unsigned long flags;
const struct dax_operations *ops; const struct dax_operations *ops;
}; };
static ssize_t write_cache_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
ssize_t rc;
WARN_ON_ONCE(!dax_dev);
if (!dax_dev)
return -ENXIO;
rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
&dax_dev->flags));
put_dax(dax_dev);
return rc;
}
static ssize_t write_cache_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
bool write_cache;
int rc = strtobool(buf, &write_cache);
struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
WARN_ON_ONCE(!dax_dev);
if (!dax_dev)
return -ENXIO;
if (rc)
len = rc;
else if (write_cache)
set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
put_dax(dax_dev);
return len;
}
static DEVICE_ATTR_RW(write_cache);
static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = container_of(kobj, typeof(*dev), kobj);
struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
WARN_ON_ONCE(!dax_dev);
if (!dax_dev)
return 0;
if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush)
return 0;
return a->mode;
}
static struct attribute *dax_attributes[] = {
&dev_attr_write_cache.attr,
NULL,
};
struct attribute_group dax_attribute_group = {
.name = "dax",
.attrs = dax_attributes,
.is_visible = dax_visible,
};
EXPORT_SYMBOL_GPL(dax_attribute_group);
/** /**
* dax_direct_access() - translate a device pgoff to an absolute pfn * dax_direct_access() - translate a device pgoff to an absolute pfn
* @dax_dev: a dax_device instance representing the logical memory range * @dax_dev: a dax_device instance representing the logical memory range
...@@ -172,10 +245,43 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, ...@@ -172,10 +245,43 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
} }
EXPORT_SYMBOL_GPL(dax_direct_access); EXPORT_SYMBOL_GPL(dax_direct_access);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
if (!dax_alive(dax_dev))
return 0;
return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);
void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t size)
{
if (!dax_alive(dax_dev))
return;
if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))
return;
if (dax_dev->ops->flush)
dax_dev->ops->flush(dax_dev, pgoff, addr, size);
}
EXPORT_SYMBOL_GPL(dax_flush);
void dax_write_cache(struct dax_device *dax_dev, bool wc)
{
if (wc)
set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(dax_write_cache);
bool dax_alive(struct dax_device *dax_dev) bool dax_alive(struct dax_device *dax_dev)
{ {
lockdep_assert_held(&dax_srcu); lockdep_assert_held(&dax_srcu);
return dax_dev->alive; return test_bit(DAXDEV_ALIVE, &dax_dev->flags);
} }
EXPORT_SYMBOL_GPL(dax_alive); EXPORT_SYMBOL_GPL(dax_alive);
...@@ -195,7 +301,7 @@ void kill_dax(struct dax_device *dax_dev) ...@@ -195,7 +301,7 @@ void kill_dax(struct dax_device *dax_dev)
if (!dax_dev) if (!dax_dev)
return; return;
dax_dev->alive = false; clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu); synchronize_srcu(&dax_srcu);
...@@ -239,7 +345,7 @@ static void dax_destroy_inode(struct inode *inode) ...@@ -239,7 +345,7 @@ static void dax_destroy_inode(struct inode *inode)
{ {
struct dax_device *dax_dev = to_dax_dev(inode); struct dax_device *dax_dev = to_dax_dev(inode);
WARN_ONCE(dax_dev->alive, WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags),
"kill_dax() must be called before final iput()\n"); "kill_dax() must be called before final iput()\n");
call_rcu(&inode->i_rcu, dax_i_callback); call_rcu(&inode->i_rcu, dax_i_callback);
} }
...@@ -291,7 +397,7 @@ static struct dax_device *dax_dev_get(dev_t devt) ...@@ -291,7 +397,7 @@ static struct dax_device *dax_dev_get(dev_t devt)
dax_dev = to_dax_dev(inode); dax_dev = to_dax_dev(inode);
if (inode->i_state & I_NEW) { if (inode->i_state & I_NEW) {
dax_dev->alive = true; set_bit(DAXDEV_ALIVE, &dax_dev->flags);
inode->i_cdev = &dax_dev->cdev; inode->i_cdev = &dax_dev->cdev;
inode->i_mode = S_IFCHR; inode->i_mode = S_IFCHR;
inode->i_flags = S_DAX; inode->i_flags = S_DAX;
......
...@@ -170,6 +170,34 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, ...@@ -170,6 +170,34 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
} }
static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr,
size_t size)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff))
return;
dax_flush(dax_dev, pgoff, addr, size);
}
static struct target_type linear_target = { static struct target_type linear_target = {
.name = "linear", .name = "linear",
.version = {1, 4, 0}, .version = {1, 4, 0},
...@@ -183,6 +211,8 @@ static struct target_type linear_target = { ...@@ -183,6 +211,8 @@ static struct target_type linear_target = {
.prepare_ioctl = linear_prepare_ioctl, .prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices, .iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access, .direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_flush = linear_dax_flush,
}; };
int __init dm_linear_init(void) int __init dm_linear_init(void)
......
...@@ -332,6 +332,44 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, ...@@ -332,6 +332,44 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
} }
static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr,
size_t size)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff))
return;
dax_flush(dax_dev, pgoff, addr, size);
}
/* /*
* Stripe status: * Stripe status:
* *
...@@ -452,6 +490,8 @@ static struct target_type stripe_target = { ...@@ -452,6 +490,8 @@ static struct target_type stripe_target = {
.iterate_devices = stripe_iterate_devices, .iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints, .io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access, .direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_flush = stripe_dax_flush,
}; };
int __init dm_stripe_init(void) int __init dm_stripe_init(void)
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/uio.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -972,6 +973,48 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, ...@@ -972,6 +973,48 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return ret; return ret;
} }
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (!ti->type->dax_copy_from_iter) {
ret = copy_from_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t size)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (ti->type->dax_flush)
ti->type->dax_flush(ti, pgoff, addr, size);
out:
dm_put_live_table(md, srcu_idx);
}
/* /*
* A target may call dm_accept_partial_bio only from the map routine. It is * A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH. * allowed for all bio types except REQ_PREFLUSH.
...@@ -2958,6 +3001,8 @@ static const struct block_device_operations dm_blk_dops = { ...@@ -2958,6 +3001,8 @@ static const struct block_device_operations dm_blk_dops = {
static const struct dax_operations dm_dax_ops = { static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access, .direct_access = dm_dax_direct_access,
.copy_from_iter = dm_dax_copy_from_iter,
.flush = dm_dax_flush,
}; };
/* /*
......
...@@ -37,8 +37,8 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, ...@@ -37,8 +37,8 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_btt *nd_btt = arena->nd_btt; struct nd_btt *nd_btt = arena->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns; struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets are 4K from the base of the device */ /* arena offsets may be shifted from the base of the device */
offset += SZ_4K; offset += arena->nd_btt->initial_offset;
return nvdimm_read_bytes(ndns, offset, buf, n, flags); return nvdimm_read_bytes(ndns, offset, buf, n, flags);
} }
...@@ -48,8 +48,8 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, ...@@ -48,8 +48,8 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_btt *nd_btt = arena->nd_btt; struct nd_btt *nd_btt = arena->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns; struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets are 4K from the base of the device */ /* arena offsets may be shifted from the base of the device */
offset += SZ_4K; offset += arena->nd_btt->initial_offset;
return nvdimm_write_bytes(ndns, offset, buf, n, flags); return nvdimm_write_bytes(ndns, offset, buf, n, flags);
} }
...@@ -323,7 +323,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ...@@ -323,7 +323,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
old_ent = btt_log_get_old(log); old_ent = btt_log_get_old(log);
if (old_ent < 0 || old_ent > 1) { if (old_ent < 0 || old_ent > 1) {
dev_info(to_dev(arena), dev_err(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n", "log corruption (%d): lane %d seq [%d, %d]\n",
old_ent, lane, log[0].seq, log[1].seq); old_ent, lane, log[0].seq, log[1].seq);
/* TODO set error state? */ /* TODO set error state? */
...@@ -576,8 +576,8 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, ...@@ -576,8 +576,8 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
arena->internal_lbasize = roundup(arena->external_lbasize, arena->internal_lbasize = roundup(arena->external_lbasize,
INT_LBASIZE_ALIGNMENT); INT_LBASIZE_ALIGNMENT);
arena->nfree = BTT_DEFAULT_NFREE; arena->nfree = BTT_DEFAULT_NFREE;
arena->version_major = 1; arena->version_major = btt->nd_btt->version_major;
arena->version_minor = 1; arena->version_minor = btt->nd_btt->version_minor;
if (available % BTT_PG_SIZE) if (available % BTT_PG_SIZE)
available -= (available % BTT_PG_SIZE); available -= (available % BTT_PG_SIZE);
...@@ -684,7 +684,7 @@ static int discover_arenas(struct btt *btt) ...@@ -684,7 +684,7 @@ static int discover_arenas(struct btt *btt)
dev_info(to_dev(arena), "No existing arenas\n"); dev_info(to_dev(arena), "No existing arenas\n");
goto out; goto out;
} else { } else {
dev_info(to_dev(arena), dev_err(to_dev(arena),
"Found corrupted metadata!\n"); "Found corrupted metadata!\n");
ret = -ENODEV; ret = -ENODEV;
goto out; goto out;
...@@ -1227,7 +1227,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) ...@@ -1227,7 +1227,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector); op_is_write(bio_op(bio)), iter.bi_sector);
if (err) { if (err) {
dev_info(&btt->nd_btt->dev, dev_err(&btt->nd_btt->dev,
"io error in %s sector %lld, len %d,\n", "io error in %s sector %lld, len %d,\n",
(op_is_write(bio_op(bio))) ? "WRITE" : (op_is_write(bio_op(bio))) ? "WRITE" :
"READ", "READ",
...@@ -1248,10 +1248,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, ...@@ -1248,10 +1248,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write) struct page *page, bool is_write)
{ {
struct btt *btt = bdev->bd_disk->private_data; struct btt *btt = bdev->bd_disk->private_data;
int rc;
btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
page_endio(page, is_write, 0); if (rc == 0)
return 0; page_endio(page, is_write, 0);
return rc;
} }
...@@ -1369,7 +1372,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, ...@@ -1369,7 +1372,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
} }
if (btt->init_state != INIT_READY && nd_region->ro) { if (btt->init_state != INIT_READY && nd_region->ro) {
dev_info(dev, "%s is read-only, unable to init btt metadata\n", dev_warn(dev, "%s is read-only, unable to init btt metadata\n",
dev_name(&nd_region->dev)); dev_name(&nd_region->dev));
return NULL; return NULL;
} else if (btt->init_state != INIT_READY) { } else if (btt->init_state != INIT_READY) {
...@@ -1424,6 +1427,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) ...@@ -1424,6 +1427,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
{ {
struct nd_btt *nd_btt = to_nd_btt(ndns->claim); struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
struct nd_region *nd_region; struct nd_region *nd_region;
struct btt_sb *btt_sb;
struct btt *btt; struct btt *btt;
size_t rawsize; size_t rawsize;
...@@ -1432,10 +1436,21 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) ...@@ -1432,10 +1436,21 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
return -ENODEV; return -ENODEV;
} }
rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
/*
* If this returns < 0, that is ok as it just means there wasn't
* an existing BTT, and we're creating a new one. We still need to
* call this as we need the version dependent fields in nd_btt to be
* set correctly based on the holder class
*/
nd_btt_version(nd_btt, ndns, btt_sb);
rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset;
if (rawsize < ARENA_MIN_SIZE) { if (rawsize < ARENA_MIN_SIZE) {
dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n", dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K); dev_name(&ndns->dev),
ARENA_MIN_SIZE + nd_btt->initial_offset);
return -ENXIO; return -ENXIO;
} }
nd_region = to_nd_region(nd_btt->dev.parent); nd_region = to_nd_region(nd_btt->dev.parent);
......
...@@ -184,5 +184,7 @@ struct btt { ...@@ -184,5 +184,7 @@ struct btt {
}; };
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
struct btt_sb *btt_sb);
#endif #endif
...@@ -260,20 +260,55 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) ...@@ -260,20 +260,55 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
} }
EXPORT_SYMBOL(nd_btt_arena_is_valid); EXPORT_SYMBOL(nd_btt_arena_is_valid);
int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
struct btt_sb *btt_sb)
{
if (ndns->claim_class == NVDIMM_CCLASS_BTT2) {
/* Probe/setup for BTT v2.0 */
nd_btt->initial_offset = 0;
nd_btt->version_major = 2;
nd_btt->version_minor = 0;
if (nvdimm_read_bytes(ndns, 0, btt_sb, sizeof(*btt_sb), 0))
return -ENXIO;
if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
return -ENODEV;
if ((le16_to_cpu(btt_sb->version_major) != 2) ||
(le16_to_cpu(btt_sb->version_minor) != 0))
return -ENODEV;
} else {
/*
* Probe/setup for BTT v1.1 (NVDIMM_CCLASS_NONE or
* NVDIMM_CCLASS_BTT)
*/
nd_btt->initial_offset = SZ_4K;
nd_btt->version_major = 1;
nd_btt->version_minor = 1;
if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
return -ENXIO;
if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
return -ENODEV;
if ((le16_to_cpu(btt_sb->version_major) != 1) ||
(le16_to_cpu(btt_sb->version_minor) != 1))
return -ENODEV;
}
return 0;
}
EXPORT_SYMBOL(nd_btt_version);
static int __nd_btt_probe(struct nd_btt *nd_btt, static int __nd_btt_probe(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns, struct btt_sb *btt_sb) struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
{ {
int rc;
if (!btt_sb || !ndns || !nd_btt) if (!btt_sb || !ndns || !nd_btt)
return -ENODEV; return -ENODEV;
if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
return -ENXIO;
if (nvdimm_namespace_capacity(ndns) < SZ_16M) if (nvdimm_namespace_capacity(ndns) < SZ_16M)
return -ENXIO; return -ENXIO;
if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) rc = nd_btt_version(nd_btt, ndns, btt_sb);
return -ENODEV; if (rc < 0)
return rc;
nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
...@@ -295,6 +330,15 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -295,6 +330,15 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw) if (ndns->force_raw)
return -ENODEV; return -ENODEV;
switch (ndns->claim_class) {
case NVDIMM_CCLASS_NONE:
case NVDIMM_CCLASS_BTT:
case NVDIMM_CCLASS_BTT2:
break;
default:
return -ENODEV;
}
nvdimm_bus_lock(&ndns->dev); nvdimm_bus_lock(&ndns->dev);
btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns); btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
nvdimm_bus_unlock(&ndns->dev); nvdimm_bus_unlock(&ndns->dev);
......
...@@ -38,13 +38,13 @@ static int to_nd_device_type(struct device *dev) ...@@ -38,13 +38,13 @@ static int to_nd_device_type(struct device *dev)
{ {
if (is_nvdimm(dev)) if (is_nvdimm(dev))
return ND_DEVICE_DIMM; return ND_DEVICE_DIMM;
else if (is_nd_pmem(dev)) else if (is_memory(dev))
return ND_DEVICE_REGION_PMEM; return ND_DEVICE_REGION_PMEM;
else if (is_nd_blk(dev)) else if (is_nd_blk(dev))
return ND_DEVICE_REGION_BLK; return ND_DEVICE_REGION_BLK;
else if (is_nd_dax(dev)) else if (is_nd_dax(dev))
return ND_DEVICE_DAX_PMEM; return ND_DEVICE_DAX_PMEM;
else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) else if (is_nd_region(dev->parent))
return nd_region_to_nstype(to_nd_region(dev->parent)); return nd_region_to_nstype(to_nd_region(dev->parent));
return 0; return 0;
...@@ -56,7 +56,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) ...@@ -56,7 +56,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
* Ensure that region devices always have their numa node set as * Ensure that region devices always have their numa node set as
* early as possible. * early as possible.
*/ */
if (is_nd_pmem(dev) || is_nd_blk(dev)) if (is_nd_region(dev))
set_dev_node(dev, to_nd_region(dev)->numa_node); set_dev_node(dev, to_nd_region(dev)->numa_node);
return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
to_nd_device_type(dev)); to_nd_device_type(dev));
...@@ -65,7 +65,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) ...@@ -65,7 +65,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
static struct module *to_bus_provider(struct device *dev) static struct module *to_bus_provider(struct device *dev)
{ {
/* pin bus providers while regions are enabled */ /* pin bus providers while regions are enabled */
if (is_nd_pmem(dev) || is_nd_blk(dev)) { if (is_nd_region(dev)) {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
return nvdimm_bus->nd_desc->module; return nvdimm_bus->nd_desc->module;
...@@ -198,6 +198,9 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) ...@@ -198,6 +198,9 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
sector = (ctx->phys - nd_region->ndr_start) / 512; sector = (ctx->phys - nd_region->ndr_start) / 512;
badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512); badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);
if (nd_region->bb_state)
sysfs_notify_dirent(nd_region->bb_state);
return 0; return 0;
} }
...@@ -907,6 +910,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -907,6 +910,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
static char in_env[ND_CMD_MAX_ENVELOPE]; static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL; const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd); unsigned int cmd = _IOC_NR(ioctl_cmd);
unsigned int func = cmd;
void __user *p = (void __user *) arg; void __user *p = (void __user *) arg;
struct device *dev = &nvdimm_bus->dev; struct device *dev = &nvdimm_bus->dev;
struct nd_cmd_pkg pkg; struct nd_cmd_pkg pkg;
...@@ -972,6 +976,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -972,6 +976,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
} }
if (cmd == ND_CMD_CALL) { if (cmd == ND_CMD_CALL) {
func = pkg.nd_command;
dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n", dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n",
__func__, dimm_name, pkg.nd_command, __func__, dimm_name, pkg.nd_command,
in_len, out_len, buf_len); in_len, out_len, buf_len);
...@@ -1020,7 +1025,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, ...@@ -1020,7 +1025,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
} }
nvdimm_bus_lock(&nvdimm_bus->dev); nvdimm_bus_lock(&nvdimm_bus->dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc) if (rc)
goto out_unlock; goto out_unlock;
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
*/ */
#include <linux/device.h> #include <linux/device.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/pmem.h>
#include "nd-core.h" #include "nd-core.h"
#include "pmem.h"
#include "pfn.h" #include "pfn.h"
#include "btt.h" #include "btt.h"
#include "nd.h" #include "nd.h"
...@@ -184,6 +184,35 @@ ssize_t nd_namespace_store(struct device *dev, ...@@ -184,6 +184,35 @@ ssize_t nd_namespace_store(struct device *dev,
} }
ndns = to_ndns(found); ndns = to_ndns(found);
switch (ndns->claim_class) {
case NVDIMM_CCLASS_NONE:
break;
case NVDIMM_CCLASS_BTT:
case NVDIMM_CCLASS_BTT2:
if (!is_nd_btt(dev)) {
len = -EBUSY;
goto out_attach;
}
break;
case NVDIMM_CCLASS_PFN:
if (!is_nd_pfn(dev)) {
len = -EBUSY;
goto out_attach;
}
break;
case NVDIMM_CCLASS_DAX:
if (!is_nd_dax(dev)) {
len = -EBUSY;
goto out_attach;
}
break;
default:
len = -EBUSY;
goto out_attach;
break;
}
if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
dev_dbg(dev, "%s too small to host\n", name); dev_dbg(dev, "%s too small to host\n", name);
len = -ENXIO; len = -ENXIO;
...@@ -260,8 +289,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, ...@@ -260,8 +289,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
* work around this collision. * work around this collision.
*/ */
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
&& !(flags & NVDIMM_IO_ATOMIC) && !(flags & NVDIMM_IO_ATOMIC)) {
&& !ndns->claim) {
long cleared; long cleared;
cleared = nvdimm_clear_poison(&ndns->dev, cleared = nvdimm_clear_poison(&ndns->dev,
...@@ -272,12 +300,12 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, ...@@ -272,12 +300,12 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
cleared /= 512; cleared /= 512;
badblocks_clear(&nsio->bb, sector, cleared); badblocks_clear(&nsio->bb, sector, cleared);
} }
invalidate_pmem(nsio->addr + offset, size); arch_invalidate_pmem(nsio->addr + offset, size);
} else } else
rc = -EIO; rc = -EIO;
} }
memcpy_to_pmem(nsio->addr + offset, buf, size); memcpy_flushcache(nsio->addr + offset, buf, size);
nvdimm_flush(to_nd_region(ndns->dev.parent)); nvdimm_flush(to_nd_region(ndns->dev.parent));
return rc; return rc;
......
...@@ -504,7 +504,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region, ...@@ -504,7 +504,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct nvdimm_bus *nvdimm_bus; struct nvdimm_bus *nvdimm_bus;
struct list_head *poison_list; struct list_head *poison_list;
if (!is_nd_pmem(&nd_region->dev)) { if (!is_memory(&nd_region->dev)) {
dev_WARN_ONCE(&nd_region->dev, 1, dev_WARN_ONCE(&nd_region->dev, 1,
"%s only valid for pmem regions\n", __func__); "%s only valid for pmem regions\n", __func__);
return; return;
...@@ -699,6 +699,9 @@ static __init int libnvdimm_init(void) ...@@ -699,6 +699,9 @@ static __init int libnvdimm_init(void)
rc = nd_region_init(); rc = nd_region_init();
if (rc) if (rc)
goto err_region; goto err_region;
nd_label_init();
return 0; return 0;
err_region: err_region:
nvdimm_exit(); nvdimm_exit();
......
...@@ -89,7 +89,7 @@ struct device *nd_dax_create(struct nd_region *nd_region) ...@@ -89,7 +89,7 @@ struct device *nd_dax_create(struct nd_region *nd_region)
struct device *dev = NULL; struct device *dev = NULL;
struct nd_dax *nd_dax; struct nd_dax *nd_dax;
if (!is_nd_pmem(&nd_region->dev)) if (!is_memory(&nd_region->dev))
return NULL; return NULL;
nd_dax = nd_dax_alloc(nd_region); nd_dax = nd_dax_alloc(nd_region);
...@@ -111,6 +111,14 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -111,6 +111,14 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw) if (ndns->force_raw)
return -ENODEV; return -ENODEV;
switch (ndns->claim_class) {
case NVDIMM_CCLASS_NONE:
case NVDIMM_CCLASS_DAX:
break;
default:
return -ENODEV;
}
nvdimm_bus_lock(&ndns->dev); nvdimm_bus_lock(&ndns->dev);
nd_dax = nd_dax_alloc(nd_region); nd_dax = nd_dax_alloc(nd_region);
nd_pfn = &nd_dax->nd_pfn; nd_pfn = &nd_dax->nd_pfn;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include "nd-core.h" #include "nd-core.h"
#include "label.h" #include "label.h"
#include "pmem.h"
#include "nd.h" #include "nd.h"
static DEFINE_IDA(dimm_ida); static DEFINE_IDA(dimm_ida);
...@@ -235,6 +236,13 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr) ...@@ -235,6 +236,13 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
} }
EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm); EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr)
{
/* pmem mapping properties are private to libnvdimm */
return ARCH_MEMREMAP_PMEM;
}
EXPORT_SYMBOL_GPL(nd_blk_memremap_flags);
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
{ {
struct nvdimm *nvdimm = nd_mapping->nvdimm; struct nvdimm *nvdimm = nd_mapping->nvdimm;
...@@ -411,7 +419,7 @@ int alias_dpa_busy(struct device *dev, void *data) ...@@ -411,7 +419,7 @@ int alias_dpa_busy(struct device *dev, void *data)
struct resource *res; struct resource *res;
int i; int i;
if (!is_nd_pmem(dev)) if (!is_memory(dev))
return 0; return 0;
nd_region = to_nd_region(dev); nd_region = to_nd_region(dev);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
*/ */
#include <linux/device.h> #include <linux/device.h>
#include <linux/ndctl.h> #include <linux/ndctl.h>
#include <linux/uuid.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/nd.h> #include <linux/nd.h>
...@@ -19,6 +20,11 @@ ...@@ -19,6 +20,11 @@
#include "label.h" #include "label.h"
#include "nd.h" #include "nd.h"
static guid_t nvdimm_btt_guid;
static guid_t nvdimm_btt2_guid;
static guid_t nvdimm_pfn_guid;
static guid_t nvdimm_dax_guid;
static u32 best_seq(u32 a, u32 b) static u32 best_seq(u32 a, u32 b)
{ {
a &= NSINDEX_SEQ_MASK; a &= NSINDEX_SEQ_MASK;
...@@ -34,6 +40,11 @@ static u32 best_seq(u32 a, u32 b) ...@@ -34,6 +40,11 @@ static u32 best_seq(u32 a, u32 b)
return a; return a;
} }
unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
{
return ndd->nslabel_size;
}
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
{ {
u32 index_span; u32 index_span;
...@@ -49,7 +60,7 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) ...@@ -49,7 +60,7 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
* starts to waste space at larger config_sizes, but it's * starts to waste space at larger config_sizes, but it's
* unlikely we'll ever see anything but 128K. * unlikely we'll ever see anything but 128K.
*/ */
index_span = ndd->nsarea.config_size / 129; index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
index_span /= NSINDEX_ALIGN * 2; index_span /= NSINDEX_ALIGN * 2;
ndd->nsindex_size = index_span * NSINDEX_ALIGN; ndd->nsindex_size = index_span * NSINDEX_ALIGN;
...@@ -58,10 +69,10 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) ...@@ -58,10 +69,10 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
{ {
return ndd->nsarea.config_size / 129; return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
} }
int nd_label_validate(struct nvdimm_drvdata *ndd) static int __nd_label_validate(struct nvdimm_drvdata *ndd)
{ {
/* /*
* On media label format consists of two index blocks followed * On media label format consists of two index blocks followed
...@@ -104,6 +115,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) ...@@ -104,6 +115,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
u32 nslot; u32 nslot;
u8 sig[NSINDEX_SIG_LEN]; u8 sig[NSINDEX_SIG_LEN];
u64 sum_save, sum, size; u64 sum_save, sum, size;
unsigned int version, labelsize;
memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
...@@ -111,6 +123,21 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) ...@@ -111,6 +123,21 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
__func__, i); __func__, i);
continue; continue;
} }
/* label sizes larger than 128 arrived with v1.2 */
version = __le16_to_cpu(nsindex[i]->major) * 100
+ __le16_to_cpu(nsindex[i]->minor);
if (version >= 102)
labelsize = 1 << (7 + nsindex[i]->labelsize);
else
labelsize = 128;
if (labelsize != sizeof_namespace_label(ndd)) {
dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n",
__func__, i, nsindex[i]->labelsize);
continue;
}
sum_save = __le64_to_cpu(nsindex[i]->checksum); sum_save = __le64_to_cpu(nsindex[i]->checksum);
nsindex[i]->checksum = __cpu_to_le64(0); nsindex[i]->checksum = __cpu_to_le64(0);
sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
...@@ -153,7 +180,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) ...@@ -153,7 +180,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
} }
nslot = __le32_to_cpu(nsindex[i]->nslot); nslot = __le32_to_cpu(nsindex[i]->nslot);
if (nslot * sizeof(struct nd_namespace_label) if (nslot * sizeof_namespace_label(ndd)
+ 2 * sizeof_namespace_index(ndd) + 2 * sizeof_namespace_index(ndd)
> ndd->nsarea.config_size) { > ndd->nsarea.config_size) {
dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
...@@ -189,6 +216,29 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) ...@@ -189,6 +216,29 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
return -1; return -1;
} }
int nd_label_validate(struct nvdimm_drvdata *ndd)
{
/*
* In order to probe for and validate namespace index blocks we
* need to know the size of the labels, and we can't trust the
* size of the labels until we validate the index blocks.
* Resolve this dependency loop by probing for known label
* sizes, but default to v1.2 256-byte namespace labels if
* discovery fails.
*/
int label_size[] = { 128, 256 };
int i, rc;
for (i = 0; i < ARRAY_SIZE(label_size); i++) {
ndd->nslabel_size = label_size[i];
rc = __nd_label_validate(ndd);
if (rc >= 0)
return rc;
}
return -1;
}
void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
struct nd_namespace_index *src) struct nd_namespace_index *src)
{ {
...@@ -210,7 +260,22 @@ static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd) ...@@ -210,7 +260,22 @@ static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
static int to_slot(struct nvdimm_drvdata *ndd, static int to_slot(struct nvdimm_drvdata *ndd,
struct nd_namespace_label *nd_label) struct nd_namespace_label *nd_label)
{ {
return nd_label - nd_label_base(ndd); unsigned long label, base;
label = (unsigned long) nd_label;
base = (unsigned long) nd_label_base(ndd);
return (label - base) / sizeof_namespace_label(ndd);
}
static struct nd_namespace_label *to_label(struct nvdimm_drvdata *ndd, int slot)
{
unsigned long label, base;
base = (unsigned long) nd_label_base(ndd);
label = base + sizeof_namespace_label(ndd) * slot;
return (struct nd_namespace_label *) label;
} }
#define for_each_clear_bit_le(bit, addr, size) \ #define for_each_clear_bit_le(bit, addr, size) \
...@@ -268,7 +333,8 @@ static bool preamble_next(struct nvdimm_drvdata *ndd, ...@@ -268,7 +333,8 @@ static bool preamble_next(struct nvdimm_drvdata *ndd,
free, nslot); free, nslot);
} }
static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) static bool slot_valid(struct nvdimm_drvdata *ndd,
struct nd_namespace_label *nd_label, u32 slot)
{ {
/* check that we are written where we expect to be written */ /* check that we are written where we expect to be written */
if (slot != __le32_to_cpu(nd_label->slot)) if (slot != __le32_to_cpu(nd_label->slot))
...@@ -279,6 +345,21 @@ static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) ...@@ -279,6 +345,21 @@ static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
| __le64_to_cpu(nd_label->rawsize)) % SZ_4K) | __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
return false; return false;
/* check checksum */
if (namespace_label_has(ndd, checksum)) {
u64 sum, sum_save;
sum_save = __le64_to_cpu(nd_label->checksum);
nd_label->checksum = __cpu_to_le64(0);
sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
nd_label->checksum = __cpu_to_le64(sum_save);
if (sum != sum_save) {
dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n",
__func__, slot, sum);
return false;
}
}
return true; return true;
} }
...@@ -299,9 +380,9 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) ...@@ -299,9 +380,9 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
struct resource *res; struct resource *res;
u32 flags; u32 flags;
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
if (!slot_valid(nd_label, slot)) if (!slot_valid(ndd, nd_label, slot))
continue; continue;
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
...@@ -331,9 +412,9 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd) ...@@ -331,9 +412,9 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
for_each_clear_bit_le(slot, free, nslot) { for_each_clear_bit_le(slot, free, nslot) {
struct nd_namespace_label *nd_label; struct nd_namespace_label *nd_label;
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
if (!slot_valid(nd_label, slot)) { if (!slot_valid(ndd, nd_label, slot)) {
u32 label_slot = __le32_to_cpu(nd_label->slot); u32 label_slot = __le32_to_cpu(nd_label->slot);
u64 size = __le64_to_cpu(nd_label->rawsize); u64 size = __le64_to_cpu(nd_label->rawsize);
u64 dpa = __le64_to_cpu(nd_label->dpa); u64 dpa = __le64_to_cpu(nd_label->dpa);
...@@ -360,12 +441,12 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n) ...@@ -360,12 +441,12 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
for_each_clear_bit_le(slot, free, nslot) { for_each_clear_bit_le(slot, free, nslot) {
struct nd_namespace_label *nd_label; struct nd_namespace_label *nd_label;
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
if (!slot_valid(nd_label, slot)) if (!slot_valid(ndd, nd_label, slot))
continue; continue;
if (n-- == 0) if (n-- == 0)
return nd_label_base(ndd) + slot; return to_label(ndd, slot);
} }
return NULL; return NULL;
...@@ -437,7 +518,8 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq, ...@@ -437,7 +518,8 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
nslot = __le32_to_cpu(nsindex->nslot); nslot = __le32_to_cpu(nsindex->nslot);
memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN);
nsindex->flags = __cpu_to_le32(0); memset(&nsindex->flags, 0, 3);
nsindex->labelsize = sizeof_namespace_label(ndd) >> 8;
nsindex->seq = __cpu_to_le32(seq); nsindex->seq = __cpu_to_le32(seq);
offset = (unsigned long) nsindex offset = (unsigned long) nsindex
- (unsigned long) to_namespace_index(ndd, 0); - (unsigned long) to_namespace_index(ndd, 0);
...@@ -452,7 +534,10 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq, ...@@ -452,7 +534,10 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
nsindex->labeloff = __cpu_to_le64(offset); nsindex->labeloff = __cpu_to_le64(offset);
nsindex->nslot = __cpu_to_le32(nslot); nsindex->nslot = __cpu_to_le32(nslot);
nsindex->major = __cpu_to_le16(1); nsindex->major = __cpu_to_le16(1);
nsindex->minor = __cpu_to_le16(1); if (sizeof_namespace_label(ndd) < 256)
nsindex->minor = __cpu_to_le16(1);
else
nsindex->minor = __cpu_to_le16(2);
nsindex->checksum = __cpu_to_le64(0); nsindex->checksum = __cpu_to_le64(0);
if (flags & ND_NSINDEX_INIT) { if (flags & ND_NSINDEX_INIT) {
unsigned long *free = (unsigned long *) nsindex->free; unsigned long *free = (unsigned long *) nsindex->free;
...@@ -490,11 +575,49 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd, ...@@ -490,11 +575,49 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
- (unsigned long) to_namespace_index(ndd, 0); - (unsigned long) to_namespace_index(ndd, 0);
} }
enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
{
if (guid_equal(guid, &nvdimm_btt_guid))
return NVDIMM_CCLASS_BTT;
else if (guid_equal(guid, &nvdimm_btt2_guid))
return NVDIMM_CCLASS_BTT2;
else if (guid_equal(guid, &nvdimm_pfn_guid))
return NVDIMM_CCLASS_PFN;
else if (guid_equal(guid, &nvdimm_dax_guid))
return NVDIMM_CCLASS_DAX;
else if (guid_equal(guid, &guid_null))
return NVDIMM_CCLASS_NONE;
return NVDIMM_CCLASS_UNKNOWN;
}
static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
guid_t *target)
{
if (claim_class == NVDIMM_CCLASS_BTT)
return &nvdimm_btt_guid;
else if (claim_class == NVDIMM_CCLASS_BTT2)
return &nvdimm_btt2_guid;
else if (claim_class == NVDIMM_CCLASS_PFN)
return &nvdimm_pfn_guid;
else if (claim_class == NVDIMM_CCLASS_DAX)
return &nvdimm_dax_guid;
else if (claim_class == NVDIMM_CCLASS_UNKNOWN) {
/*
* If we're modifying a namespace for which we don't
* know the claim_class, don't touch the existing guid.
*/
return target;
} else
return &guid_null;
}
static int __pmem_label_update(struct nd_region *nd_region, static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
int pos) int pos)
{ {
u64 cookie = nd_region_interleave_set_cookie(nd_region); struct nd_namespace_common *ndns = &nspm->nsio.common;
struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_ent *label_ent, *victim = NULL; struct nd_label_ent *label_ent, *victim = NULL;
struct nd_namespace_label *nd_label; struct nd_namespace_label *nd_label;
...@@ -504,11 +627,13 @@ static int __pmem_label_update(struct nd_region *nd_region, ...@@ -504,11 +627,13 @@ static int __pmem_label_update(struct nd_region *nd_region,
unsigned long *free; unsigned long *free;
u32 nslot, slot; u32 nslot, slot;
size_t offset; size_t offset;
u64 cookie;
int rc; int rc;
if (!preamble_next(ndd, &nsindex, &free, &nslot)) if (!preamble_next(ndd, &nsindex, &free, &nslot))
return -ENXIO; return -ENXIO;
cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
nd_label_gen_id(&label_id, nspm->uuid, 0); nd_label_gen_id(&label_id, nspm->uuid, 0);
for_each_dpa_resource(ndd, res) for_each_dpa_resource(ndd, res)
if (strcmp(res->name, label_id.id) == 0) if (strcmp(res->name, label_id.id) == 0)
...@@ -525,8 +650,8 @@ static int __pmem_label_update(struct nd_region *nd_region, ...@@ -525,8 +650,8 @@ static int __pmem_label_update(struct nd_region *nd_region,
return -ENXIO; return -ENXIO;
dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
memset(nd_label, 0, sizeof(struct nd_namespace_label)); memset(nd_label, 0, sizeof_namespace_label(ndd));
memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
if (nspm->alt_name) if (nspm->alt_name)
memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
...@@ -535,14 +660,28 @@ static int __pmem_label_update(struct nd_region *nd_region, ...@@ -535,14 +660,28 @@ static int __pmem_label_update(struct nd_region *nd_region,
nd_label->position = __cpu_to_le16(pos); nd_label->position = __cpu_to_le16(pos);
nd_label->isetcookie = __cpu_to_le64(cookie); nd_label->isetcookie = __cpu_to_le64(cookie);
nd_label->rawsize = __cpu_to_le64(resource_size(res)); nd_label->rawsize = __cpu_to_le64(resource_size(res));
nd_label->lbasize = __cpu_to_le64(nspm->lbasize);
nd_label->dpa = __cpu_to_le64(res->start); nd_label->dpa = __cpu_to_le64(res->start);
nd_label->slot = __cpu_to_le32(slot); nd_label->slot = __cpu_to_le32(slot);
if (namespace_label_has(ndd, type_guid))
guid_copy(&nd_label->type_guid, &nd_set->type_guid);
if (namespace_label_has(ndd, abstraction_guid))
guid_copy(&nd_label->abstraction_guid,
to_abstraction_guid(ndns->claim_class,
&nd_label->abstraction_guid));
if (namespace_label_has(ndd, checksum)) {
u64 sum;
nd_label->checksum = __cpu_to_le64(0);
sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
nd_label->checksum = __cpu_to_le64(sum);
}
nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
/* update label */ /* update label */
offset = nd_label_offset(ndd, nd_label); offset = nd_label_offset(ndd, nd_label);
rc = nvdimm_set_config_data(ndd, offset, nd_label, rc = nvdimm_set_config_data(ndd, offset, nd_label,
sizeof(struct nd_namespace_label)); sizeof_namespace_label(ndd));
if (rc < 0) if (rc < 0)
return rc; return rc;
...@@ -624,6 +763,8 @@ static int __blk_label_update(struct nd_region *nd_region, ...@@ -624,6 +763,8 @@ static int __blk_label_update(struct nd_region *nd_region,
int num_labels) int num_labels)
{ {
int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nd_namespace_common *ndns = &nsblk->common;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *nd_label; struct nd_namespace_label *nd_label;
struct nd_label_ent *label_ent, *e; struct nd_label_ent *label_ent, *e;
...@@ -632,6 +773,7 @@ static int __blk_label_update(struct nd_region *nd_region, ...@@ -632,6 +773,7 @@ static int __blk_label_update(struct nd_region *nd_region,
struct resource *res, **old_res_list; struct resource *res, **old_res_list;
struct nd_label_id label_id; struct nd_label_id label_id;
u8 uuid[NSLABEL_UUID_LEN]; u8 uuid[NSLABEL_UUID_LEN];
int min_dpa_idx = 0;
LIST_HEAD(list); LIST_HEAD(list);
u32 nslot, slot; u32 nslot, slot;
...@@ -668,7 +810,7 @@ static int __blk_label_update(struct nd_region *nd_region, ...@@ -668,7 +810,7 @@ static int __blk_label_update(struct nd_region *nd_region,
/* mark unused labels for garbage collection */ /* mark unused labels for garbage collection */
for_each_clear_bit_le(slot, free, nslot) { for_each_clear_bit_le(slot, free, nslot) {
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
continue; continue;
...@@ -703,6 +845,18 @@ static int __blk_label_update(struct nd_region *nd_region, ...@@ -703,6 +845,18 @@ static int __blk_label_update(struct nd_region *nd_region,
} }
} }
/*
* Find the resource associated with the first label in the set
* per the v1.2 namespace specification.
*/
for (i = 0; i < nsblk->num_resources; i++) {
struct resource *min = nsblk->res[min_dpa_idx];
res = nsblk->res[i];
if (res->start < min->start)
min_dpa_idx = i;
}
for (i = 0; i < nsblk->num_resources; i++) { for (i = 0; i < nsblk->num_resources; i++) {
size_t offset; size_t offset;
...@@ -714,25 +868,58 @@ static int __blk_label_update(struct nd_region *nd_region, ...@@ -714,25 +868,58 @@ static int __blk_label_update(struct nd_region *nd_region,
goto abort; goto abort;
dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
memset(nd_label, 0, sizeof(struct nd_namespace_label)); memset(nd_label, 0, sizeof_namespace_label(ndd));
memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN); memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
if (nsblk->alt_name) if (nsblk->alt_name)
memcpy(nd_label->name, nsblk->alt_name, memcpy(nd_label->name, nsblk->alt_name,
NSLABEL_NAME_LEN); NSLABEL_NAME_LEN);
nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL); nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
nd_label->nlabel = __cpu_to_le16(0); /* N/A */
nd_label->position = __cpu_to_le16(0); /* N/A */ /*
nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ * Use the presence of the type_guid as a flag to
* determine isetcookie usage and nlabel + position
* policy for blk-aperture namespaces.
*/
if (namespace_label_has(ndd, type_guid)) {
if (i == min_dpa_idx) {
nd_label->nlabel = __cpu_to_le16(nsblk->num_resources);
nd_label->position = __cpu_to_le16(0);
} else {
nd_label->nlabel = __cpu_to_le16(0xffff);
nd_label->position = __cpu_to_le16(0xffff);
}
nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2);
} else {
nd_label->nlabel = __cpu_to_le16(0); /* N/A */
nd_label->position = __cpu_to_le16(0); /* N/A */
nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
}
nd_label->dpa = __cpu_to_le64(res->start); nd_label->dpa = __cpu_to_le64(res->start);
nd_label->rawsize = __cpu_to_le64(resource_size(res)); nd_label->rawsize = __cpu_to_le64(resource_size(res));
nd_label->lbasize = __cpu_to_le64(nsblk->lbasize); nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
nd_label->slot = __cpu_to_le32(slot); nd_label->slot = __cpu_to_le32(slot);
if (namespace_label_has(ndd, type_guid))
guid_copy(&nd_label->type_guid, &nd_set->type_guid);
if (namespace_label_has(ndd, abstraction_guid))
guid_copy(&nd_label->abstraction_guid,
to_abstraction_guid(ndns->claim_class,
&nd_label->abstraction_guid));
if (namespace_label_has(ndd, checksum)) {
u64 sum;
nd_label->checksum = __cpu_to_le64(0);
sum = nd_fletcher64(nd_label,
sizeof_namespace_label(ndd), 1);
nd_label->checksum = __cpu_to_le64(sum);
}
/* update label */ /* update label */
offset = nd_label_offset(ndd, nd_label); offset = nd_label_offset(ndd, nd_label);
rc = nvdimm_set_config_data(ndd, offset, nd_label, rc = nvdimm_set_config_data(ndd, offset, nd_label,
sizeof(struct nd_namespace_label)); sizeof_namespace_label(ndd));
if (rc < 0) if (rc < 0)
goto abort; goto abort;
} }
...@@ -790,7 +977,7 @@ static int __blk_label_update(struct nd_region *nd_region, ...@@ -790,7 +977,7 @@ static int __blk_label_update(struct nd_region *nd_region,
goto out; goto out;
} }
for_each_clear_bit_le(slot, free, nslot) { for_each_clear_bit_le(slot, free, nslot) {
nd_label = nd_label_base(ndd) + slot; nd_label = to_label(ndd, slot);
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
continue; continue;
...@@ -973,3 +1160,13 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region, ...@@ -973,3 +1160,13 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region,
return __blk_label_update(nd_region, nd_mapping, nsblk, count); return __blk_label_update(nd_region, nd_mapping, nsblk, count);
} }
int __init nd_label_init(void)
{
WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid));
WARN_ON(guid_parse(NVDIMM_BTT2_GUID, &nvdimm_btt2_guid));
WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid));
WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid));
return 0;
}
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/ndctl.h> #include <linux/ndctl.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/uuid.h>
#include <linux/io.h> #include <linux/io.h>
enum { enum {
...@@ -60,7 +61,8 @@ static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; ...@@ -60,7 +61,8 @@ static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
*/ */
struct nd_namespace_index { struct nd_namespace_index {
u8 sig[NSINDEX_SIG_LEN]; u8 sig[NSINDEX_SIG_LEN];
__le32 flags; u8 flags[3];
u8 labelsize;
__le32 seq; __le32 seq;
__le64 myoff; __le64 myoff;
__le64 mysize; __le64 mysize;
...@@ -98,9 +100,23 @@ struct nd_namespace_label { ...@@ -98,9 +100,23 @@ struct nd_namespace_label {
__le64 dpa; __le64 dpa;
__le64 rawsize; __le64 rawsize;
__le32 slot; __le32 slot;
__le32 unused; /*
* Accessing fields past this point should be gated by a
* namespace_label_has() check.
*/
u8 align;
u8 reserved[3];
guid_t type_guid;
guid_t abstraction_guid;
u8 reserved2[88];
__le64 checksum;
}; };
#define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a"
#define NVDIMM_BTT2_GUID "18633bfc-1735-4217-8ac9-17239282d3f8"
#define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225"
#define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088"
/** /**
* struct nd_label_id - identifier string for dpa allocation * struct nd_label_id - identifier string for dpa allocation
* @id: "{blk|pmem}-<namespace uuid>" * @id: "{blk|pmem}-<namespace uuid>"
...@@ -131,6 +147,7 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); ...@@ -131,6 +147,7 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd); u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot); bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
u32 nd_label_nfree(struct nvdimm_drvdata *ndd); u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid);
struct nd_region; struct nd_region;
struct nd_namespace_pmem; struct nd_namespace_pmem;
struct nd_namespace_blk; struct nd_namespace_blk;
......
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/nd.h> #include <linux/nd.h>
#include "nd-core.h" #include "nd-core.h"
#include "pmem.h"
#include "nd.h" #include "nd.h"
static void namespace_io_release(struct device *dev) static void namespace_io_release(struct device *dev)
...@@ -112,7 +112,7 @@ static int is_uuid_busy(struct device *dev, void *data) ...@@ -112,7 +112,7 @@ static int is_uuid_busy(struct device *dev, void *data)
static int is_namespace_uuid_busy(struct device *dev, void *data) static int is_namespace_uuid_busy(struct device *dev, void *data)
{ {
if (is_nd_pmem(dev) || is_nd_blk(dev)) if (is_nd_region(dev))
return device_for_each_child(dev, data, is_uuid_busy); return device_for_each_child(dev, data, is_uuid_busy);
return 0; return 0;
} }
...@@ -155,14 +155,33 @@ bool pmem_should_map_pages(struct device *dev) ...@@ -155,14 +155,33 @@ bool pmem_should_map_pages(struct device *dev)
IORES_DESC_NONE) == REGION_MIXED) IORES_DESC_NONE) == REGION_MIXED)
return false; return false;
#ifdef ARCH_MEMREMAP_PMEM
return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
#else
return false;
#endif
} }
EXPORT_SYMBOL(pmem_should_map_pages); EXPORT_SYMBOL(pmem_should_map_pages);
unsigned int pmem_sector_size(struct nd_namespace_common *ndns)
{
if (is_namespace_pmem(&ndns->dev)) {
struct nd_namespace_pmem *nspm;
nspm = to_nd_namespace_pmem(&ndns->dev);
if (nspm->lbasize == 0 || nspm->lbasize == 512)
/* default */;
else if (nspm->lbasize == 4096)
return 4096;
else
dev_WARN(&ndns->dev, "unsupported sector size: %ld\n",
nspm->lbasize);
}
/*
* There is no namespace label (is_namespace_io()), or the label
* indicates the default sector size.
*/
return 512;
}
EXPORT_SYMBOL(pmem_sector_size);
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name) char *name)
{ {
...@@ -787,7 +806,7 @@ static int __reserve_free_pmem(struct device *dev, void *data) ...@@ -787,7 +806,7 @@ static int __reserve_free_pmem(struct device *dev, void *data)
struct nd_label_id label_id; struct nd_label_id label_id;
int i; int i;
if (!is_nd_pmem(dev)) if (!is_memory(dev))
return 0; return 0;
nd_region = to_nd_region(dev); nd_region = to_nd_region(dev);
...@@ -1283,28 +1302,49 @@ static ssize_t resource_show(struct device *dev, ...@@ -1283,28 +1302,49 @@ static ssize_t resource_show(struct device *dev,
} }
static DEVICE_ATTR_RO(resource); static DEVICE_ATTR_RO(resource);
static const unsigned long ns_lbasize_supported[] = { 512, 520, 528, static const unsigned long blk_lbasize_supported[] = { 512, 520, 528,
4096, 4104, 4160, 4224, 0 }; 4096, 4104, 4160, 4224, 0 };
static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 };
static ssize_t sector_size_show(struct device *dev, static ssize_t sector_size_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
if (!is_namespace_blk(dev)) return nd_sector_size_show(nsblk->lbasize,
return -ENXIO; blk_lbasize_supported, buf);
}
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf); return nd_sector_size_show(nspm->lbasize,
pmem_lbasize_supported, buf);
}
return -ENXIO;
} }
static ssize_t sector_size_store(struct device *dev, static ssize_t sector_size_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len) struct device_attribute *attr, const char *buf, size_t len)
{ {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_region *nd_region = to_nd_region(dev->parent);
const unsigned long *supported;
unsigned long *lbasize;
ssize_t rc = 0; ssize_t rc = 0;
if (!is_namespace_blk(dev)) if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
lbasize = &nsblk->lbasize;
supported = blk_lbasize_supported;
} else if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
lbasize = &nspm->lbasize;
supported = pmem_lbasize_supported;
} else
return -ENXIO; return -ENXIO;
device_lock(dev); device_lock(dev);
...@@ -1312,8 +1352,7 @@ static ssize_t sector_size_store(struct device *dev, ...@@ -1312,8 +1352,7 @@ static ssize_t sector_size_store(struct device *dev,
if (to_ndns(dev)->claim) if (to_ndns(dev)->claim)
rc = -EBUSY; rc = -EBUSY;
if (rc >= 0) if (rc >= 0)
rc = nd_sector_size_store(dev, buf, &nsblk->lbasize, rc = nd_sector_size_store(dev, buf, lbasize, supported);
ns_lbasize_supported);
if (rc >= 0) if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev); rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
...@@ -1368,6 +1407,58 @@ static ssize_t dpa_extents_show(struct device *dev, ...@@ -1368,6 +1407,58 @@ static ssize_t dpa_extents_show(struct device *dev,
} }
static DEVICE_ATTR_RO(dpa_extents); static DEVICE_ATTR_RO(dpa_extents);
static int btt_claim_class(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
int i, loop_bitmask = 0;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_index *nsindex;
nsindex = to_namespace_index(ndd, ndd->ns_current);
if (nsindex == NULL)
loop_bitmask |= 1;
else {
/* check whether existing labels are v1.1 or v1.2 */
if (__le16_to_cpu(nsindex->major) == 1
&& __le16_to_cpu(nsindex->minor) == 1)
loop_bitmask |= 2;
else
loop_bitmask |= 4;
}
}
/*
* If nsindex is null loop_bitmask's bit 0 will be set, and if an index
* block is found, a v1.1 label for any mapping will set bit 1, and a
* v1.2 label will set bit 2.
*
* At the end of the loop, at most one of the three bits must be set.
* If multiple bits were set, it means the different mappings disagree
* about their labels, and this must be cleaned up first.
*
* If all the label index blocks are found to agree, nsindex of NULL
* implies labels haven't been initialized yet, and when they will,
* they will be of the 1.2 format, so we can assume BTT2.0
*
* If 1.1 labels are found, we enforce BTT1.1, and if 1.2 labels are
* found, we enforce BTT2.0
*
* If the loop was never entered, default to BTT1.1 (legacy namespaces)
*/
switch (loop_bitmask) {
case 0:
case 2:
return NVDIMM_CCLASS_BTT;
case 1:
case 4:
return NVDIMM_CCLASS_BTT2;
default:
return -ENXIO;
}
}
static ssize_t holder_show(struct device *dev, static ssize_t holder_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
...@@ -1382,6 +1473,74 @@ static ssize_t holder_show(struct device *dev, ...@@ -1382,6 +1473,74 @@ static ssize_t holder_show(struct device *dev,
} }
static DEVICE_ATTR_RO(holder); static DEVICE_ATTR_RO(holder);
static ssize_t __holder_class_store(struct device *dev, const char *buf)
{
struct nd_namespace_common *ndns = to_ndns(dev);
if (dev->driver || ndns->claim)
return -EBUSY;
if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0)
ndns->claim_class = btt_claim_class(dev);
else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0)
ndns->claim_class = NVDIMM_CCLASS_PFN;
else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0)
ndns->claim_class = NVDIMM_CCLASS_DAX;
else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0)
ndns->claim_class = NVDIMM_CCLASS_NONE;
else
return -EINVAL;
/* btt_claim_class() could've returned an error */
if (ndns->claim_class < 0)
return ndns->claim_class;
return 0;
}
static ssize_t holder_class_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __holder_class_store(dev, buf);
if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
device_unlock(dev);
return rc < 0 ? rc : len;
}
static ssize_t holder_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
device_lock(dev);
if (ndns->claim_class == NVDIMM_CCLASS_NONE)
rc = sprintf(buf, "\n");
else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
(ndns->claim_class == NVDIMM_CCLASS_BTT2))
rc = sprintf(buf, "btt\n");
else if (ndns->claim_class == NVDIMM_CCLASS_PFN)
rc = sprintf(buf, "pfn\n");
else if (ndns->claim_class == NVDIMM_CCLASS_DAX)
rc = sprintf(buf, "dax\n");
else
rc = sprintf(buf, "<unknown>\n");
device_unlock(dev);
return rc;
}
static DEVICE_ATTR_RW(holder_class);
static ssize_t mode_show(struct device *dev, static ssize_t mode_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
...@@ -1440,6 +1599,7 @@ static struct attribute *nd_namespace_attributes[] = { ...@@ -1440,6 +1599,7 @@ static struct attribute *nd_namespace_attributes[] = {
&dev_attr_force_raw.attr, &dev_attr_force_raw.attr,
&dev_attr_sector_size.attr, &dev_attr_sector_size.attr,
&dev_attr_dpa_extents.attr, &dev_attr_dpa_extents.attr,
&dev_attr_holder_class.attr,
NULL, NULL,
}; };
...@@ -1458,14 +1618,12 @@ static umode_t namespace_visible(struct kobject *kobj, ...@@ -1458,14 +1618,12 @@ static umode_t namespace_visible(struct kobject *kobj,
if (a == &dev_attr_size.attr) if (a == &dev_attr_size.attr)
return 0644; return 0644;
if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
return 0;
return a->mode; return a->mode;
} }
if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
|| a == &dev_attr_holder.attr || a == &dev_attr_holder.attr
|| a == &dev_attr_holder_class.attr
|| a == &dev_attr_force_raw.attr || a == &dev_attr_force_raw.attr
|| a == &dev_attr_mode.attr) || a == &dev_attr_mode.attr)
return a->mode; return a->mode;
...@@ -1599,6 +1757,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, ...@@ -1599,6 +1757,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
for (i = 0; i < nd_region->ndr_mappings; i++) { for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_ent *label_ent; struct nd_label_ent *label_ent;
bool found_uuid = false; bool found_uuid = false;
...@@ -1619,8 +1779,17 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, ...@@ -1619,8 +1779,17 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0) if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
continue; continue;
if (namespace_label_has(ndd, type_guid)
&& !guid_equal(&nd_set->type_guid,
&nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
nd_set->type_guid.b,
nd_label->type_guid.b);
continue;
}
if (found_uuid) { if (found_uuid) {
dev_dbg(to_ndd(nd_mapping)->dev, dev_dbg(ndd->dev,
"%s duplicate entry for uuid\n", "%s duplicate entry for uuid\n",
__func__); __func__);
return false; return false;
...@@ -1698,10 +1867,11 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) ...@@ -1698,10 +1867,11 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
* @nd_label: target pmem namespace label to evaluate * @nd_label: target pmem namespace label to evaluate
*/ */
struct device *create_namespace_pmem(struct nd_region *nd_region, struct device *create_namespace_pmem(struct nd_region *nd_region,
struct nd_namespace_index *nsindex,
struct nd_namespace_label *nd_label) struct nd_namespace_label *nd_label)
{ {
u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
u64 altcookie = nd_region_interleave_set_altcookie(nd_region); u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
u64 cookie = nd_region_interleave_set_cookie(nd_region);
struct nd_label_ent *label_ent; struct nd_label_ent *label_ent;
struct nd_namespace_pmem *nspm; struct nd_namespace_pmem *nspm;
struct nd_mapping *nd_mapping; struct nd_mapping *nd_mapping;
...@@ -1775,6 +1945,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, ...@@ -1775,6 +1945,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
/* Calculate total size and populate namespace properties from label0 */ /* Calculate total size and populate namespace properties from label0 */
for (i = 0; i < nd_region->ndr_mappings; i++) { for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_namespace_label *label0; struct nd_namespace_label *label0;
struct nvdimm_drvdata *ndd;
nd_mapping = &nd_region->mapping[i]; nd_mapping = &nd_region->mapping[i];
label_ent = list_first_entry_or_null(&nd_mapping->labels, label_ent = list_first_entry_or_null(&nd_mapping->labels,
...@@ -1794,6 +1965,12 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, ...@@ -1794,6 +1965,12 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
NSLABEL_NAME_LEN, GFP_KERNEL); NSLABEL_NAME_LEN, GFP_KERNEL);
nspm->uuid = kmemdup((void __force *) label0->uuid, nspm->uuid = kmemdup((void __force *) label0->uuid,
NSLABEL_UUID_LEN, GFP_KERNEL); NSLABEL_UUID_LEN, GFP_KERNEL);
nspm->lbasize = __le64_to_cpu(label0->lbasize);
ndd = to_ndd(nd_mapping);
if (namespace_label_has(ndd, abstraction_guid))
nspm->nsio.common.claim_class
= to_nvdimm_cclass(&label0->abstraction_guid);
} }
if (!nspm->alt_name || !nspm->uuid) { if (!nspm->alt_name || !nspm->uuid) {
...@@ -1876,7 +2053,7 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) ...@@ -1876,7 +2053,7 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
struct resource *res; struct resource *res;
struct device *dev; struct device *dev;
if (!is_nd_pmem(&nd_region->dev)) if (!is_memory(&nd_region->dev))
return NULL; return NULL;
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
...@@ -2005,12 +2182,29 @@ struct device *create_namespace_blk(struct nd_region *nd_region, ...@@ -2005,12 +2182,29 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
{ {
struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_blk *nsblk; struct nd_namespace_blk *nsblk;
char name[NSLABEL_NAME_LEN]; char name[NSLABEL_NAME_LEN];
struct device *dev = NULL; struct device *dev = NULL;
struct resource *res; struct resource *res;
if (namespace_label_has(ndd, type_guid)) {
if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
nd_set->type_guid.b,
nd_label->type_guid.b);
return ERR_PTR(-EAGAIN);
}
if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) {
dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n",
nd_set->cookie2,
__le64_to_cpu(nd_label->isetcookie));
return ERR_PTR(-EAGAIN);
}
}
nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
if (!nsblk) if (!nsblk)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -2021,6 +2215,9 @@ struct device *create_namespace_blk(struct nd_region *nd_region, ...@@ -2021,6 +2215,9 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
GFP_KERNEL); GFP_KERNEL);
if (namespace_label_has(ndd, abstraction_guid))
nsblk->common.claim_class
= to_nvdimm_cclass(&nd_label->abstraction_guid);
if (!nsblk->uuid) if (!nsblk->uuid)
goto blk_err; goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN); memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
...@@ -2102,27 +2299,30 @@ static struct device **scan_labels(struct nd_region *nd_region) ...@@ -2102,27 +2299,30 @@ static struct device **scan_labels(struct nd_region *nd_region)
kfree(devs); kfree(devs);
devs = __devs; devs = __devs;
if (is_nd_blk(&nd_region->dev)) { if (is_nd_blk(&nd_region->dev))
dev = create_namespace_blk(nd_region, nd_label, count); dev = create_namespace_blk(nd_region, nd_label, count);
if (IS_ERR(dev)) else {
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_index *nsindex;
nsindex = to_namespace_index(ndd, ndd->ns_current);
dev = create_namespace_pmem(nd_region, nsindex, nd_label);
}
if (IS_ERR(dev)) {
switch (PTR_ERR(dev)) {
case -EAGAIN:
/* skip invalid labels */
continue;
case -ENODEV:
/* fallthrough to seed creation */
break;
default:
goto err; goto err;
}
} else
devs[count++] = dev; devs[count++] = dev;
} else {
dev = create_namespace_pmem(nd_region, nd_label);
if (IS_ERR(dev)) {
switch (PTR_ERR(dev)) {
case -EAGAIN:
/* skip invalid labels */
continue;
case -ENODEV:
/* fallthrough to seed creation */
break;
default:
goto err;
}
} else
devs[count++] = dev;
}
} }
dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n", dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
...@@ -2156,7 +2356,7 @@ static struct device **scan_labels(struct nd_region *nd_region) ...@@ -2156,7 +2356,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
} }
dev->parent = &nd_region->dev; dev->parent = &nd_region->dev;
devs[count++] = dev; devs[count++] = dev;
} else if (is_nd_pmem(&nd_region->dev)) { } else if (is_memory(&nd_region->dev)) {
/* clean unselected labels */ /* clean unselected labels */
for (i = 0; i < nd_region->ndr_mappings; i++) { for (i = 0; i < nd_region->ndr_mappings; i++) {
struct list_head *l, *e; struct list_head *l, *e;
......
...@@ -64,7 +64,16 @@ struct blk_alloc_info { ...@@ -64,7 +64,16 @@ struct blk_alloc_info {
bool is_nvdimm(struct device *dev); bool is_nvdimm(struct device *dev);
bool is_nd_pmem(struct device *dev); bool is_nd_pmem(struct device *dev);
bool is_nd_volatile(struct device *dev);
bool is_nd_blk(struct device *dev); bool is_nd_blk(struct device *dev);
static inline bool is_nd_region(struct device *dev)
{
return is_nd_pmem(dev) || is_nd_blk(dev) || is_nd_volatile(dev);
}
static inline bool is_memory(struct device *dev)
{
return is_nd_pmem(dev) || is_nd_volatile(dev);
}
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void); int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void); void nvdimm_bus_exit(void);
......
...@@ -42,7 +42,7 @@ struct nd_poison { ...@@ -42,7 +42,7 @@ struct nd_poison {
struct nvdimm_drvdata { struct nvdimm_drvdata {
struct device *dev; struct device *dev;
int nsindex_size; int nsindex_size, nslabel_size;
struct nd_cmd_get_config_size nsarea; struct nd_cmd_get_config_size nsarea;
void *data; void *data;
int ns_current, ns_next; int ns_current, ns_next;
...@@ -96,6 +96,12 @@ static inline struct nd_namespace_index *to_next_namespace_index( ...@@ -96,6 +96,12 @@ static inline struct nd_namespace_index *to_next_namespace_index(
return to_namespace_index(ndd, ndd->ns_next); return to_namespace_index(ndd, ndd->ns_next);
} }
unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd);
#define namespace_label_has(ndd, field) \
(offsetof(struct nd_namespace_label, field) \
< sizeof_namespace_label(ndd))
#define nd_dbg_dpa(r, d, res, fmt, arg...) \ #define nd_dbg_dpa(r, d, res, fmt, arg...) \
dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \ (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
...@@ -155,6 +161,7 @@ struct nd_region { ...@@ -155,6 +161,7 @@ struct nd_region {
u64 ndr_start; u64 ndr_start;
int id, num_lanes, ro, numa_node; int id, num_lanes, ro, numa_node;
void *provider_data; void *provider_data;
struct kernfs_node *bb_state;
struct badblocks bb; struct badblocks bb;
struct nd_interleave_set *nd_set; struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane; struct nd_percpu_lane __percpu *lane;
...@@ -188,6 +195,9 @@ struct nd_btt { ...@@ -188,6 +195,9 @@ struct nd_btt {
u64 size; u64 size;
u8 *uuid; u8 *uuid;
int id; int id;
int initial_offset;
u16 version_major;
u16 version_minor;
}; };
enum nd_pfn_mode { enum nd_pfn_mode {
...@@ -229,6 +239,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf, ...@@ -229,6 +239,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf,
unsigned long *current_lbasize, const unsigned long *supported); unsigned long *current_lbasize, const unsigned long *supported);
int __init nvdimm_init(void); int __init nvdimm_init(void);
int __init nd_region_init(void); int __init nd_region_init(void);
int __init nd_label_init(void);
void nvdimm_exit(void); void nvdimm_exit(void);
void nd_region_exit(void); void nd_region_exit(void);
struct nvdimm; struct nvdimm;
...@@ -330,7 +341,8 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region) ...@@ -330,7 +341,8 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
struct nd_region *to_nd_region(struct device *dev); struct nd_region *to_nd_region(struct device *dev);
int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_to_nstype(struct nd_region *nd_region);
int nd_region_register_namespaces(struct nd_region *nd_region, int *err); int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
struct nd_namespace_index *nsindex);
u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region); u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_lock(struct device *dev);
void nvdimm_bus_unlock(struct device *dev); void nvdimm_bus_unlock(struct device *dev);
...@@ -349,6 +361,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); ...@@ -349,6 +361,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt); int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name); char *name);
unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
void nvdimm_badblocks_populate(struct nd_region *nd_region, void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res); struct badblocks *bb, const struct resource *res);
#if IS_ENABLED(CONFIG_ND_CLAIM) #if IS_ENABLED(CONFIG_ND_CLAIM)
......
...@@ -331,7 +331,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) ...@@ -331,7 +331,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
struct nd_pfn *nd_pfn; struct nd_pfn *nd_pfn;
struct device *dev; struct device *dev;
if (!is_nd_pmem(&nd_region->dev)) if (!is_memory(&nd_region->dev))
return NULL; return NULL;
nd_pfn = nd_pfn_alloc(nd_region); nd_pfn = nd_pfn_alloc(nd_region);
...@@ -354,7 +354,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) ...@@ -354,7 +354,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
if (!pfn_sb || !ndns) if (!pfn_sb || !ndns)
return -ENODEV; return -ENODEV;
if (!is_nd_pmem(nd_pfn->dev.parent)) if (!is_memory(nd_pfn->dev.parent))
return -ENODEV; return -ENODEV;
if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0))
...@@ -471,6 +471,14 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -471,6 +471,14 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw) if (ndns->force_raw)
return -ENODEV; return -ENODEV;
switch (ndns->claim_class) {
case NVDIMM_CCLASS_NONE:
case NVDIMM_CCLASS_PFN:
break;
default:
return -ENODEV;
}
nvdimm_bus_lock(&ndns->dev); nvdimm_bus_lock(&ndns->dev);
nd_pfn = nd_pfn_alloc(nd_region); nd_pfn = nd_pfn_alloc(nd_region);
pfn_dev = nd_pfn_devinit(nd_pfn, ndns); pfn_dev = nd_pfn_devinit(nd_pfn, ndns);
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pmem.h> #include <linux/uio.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/nd.h> #include <linux/nd.h>
#include "pmem.h" #include "pmem.h"
...@@ -68,9 +68,11 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem, ...@@ -68,9 +68,11 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
(unsigned long long) sector, cleared, (unsigned long long) sector, cleared,
cleared > 1 ? "s" : ""); cleared > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared); badblocks_clear(&pmem->bb, sector, cleared);
if (pmem->bb_state)
sysfs_notify_dirent(pmem->bb_state);
} }
invalidate_pmem(pmem->virt_addr + offset, len); arch_invalidate_pmem(pmem->virt_addr + offset, len);
return rc; return rc;
} }
...@@ -80,7 +82,7 @@ static void write_pmem(void *pmem_addr, struct page *page, ...@@ -80,7 +82,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
{ {
void *mem = kmap_atomic(page); void *mem = kmap_atomic(page);
memcpy_to_pmem(pmem_addr, mem + off, len); memcpy_flushcache(pmem_addr, mem + off, len);
kunmap_atomic(mem); kunmap_atomic(mem);
} }
...@@ -235,8 +237,27 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, ...@@ -235,8 +237,27 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
} }
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter_flushcache(addr, bytes, i);
}
static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t size)
{
arch_wb_cache_pmem(addr, size);
}
static const struct dax_operations pmem_dax_ops = { static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access, .direct_access = pmem_dax_direct_access,
.copy_from_iter = pmem_copy_from_iter,
.flush = pmem_dax_flush,
};
static const struct attribute_group *pmem_attribute_groups[] = {
&dax_attribute_group,
NULL,
}; };
static void pmem_release_queue(void *q) static void pmem_release_queue(void *q)
...@@ -265,14 +286,15 @@ static int pmem_attach_disk(struct device *dev, ...@@ -265,14 +286,15 @@ static int pmem_attach_disk(struct device *dev,
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_region *nd_region = to_nd_region(dev->parent);
struct vmem_altmap __altmap, *altmap = NULL; struct vmem_altmap __altmap, *altmap = NULL;
int nid = dev_to_node(dev), fua, wbc;
struct resource *res = &nsio->res; struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL; struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev; struct dax_device *dax_dev;
int nid = dev_to_node(dev);
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem; struct pmem_device *pmem;
struct resource pfn_res; struct resource pfn_res;
struct request_queue *q; struct request_queue *q;
struct device *gendev;
struct gendisk *disk; struct gendisk *disk;
void *addr; void *addr;
...@@ -294,8 +316,12 @@ static int pmem_attach_disk(struct device *dev, ...@@ -294,8 +316,12 @@ static int pmem_attach_disk(struct device *dev,
dev_set_drvdata(dev, pmem); dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start; pmem->phys_addr = res->start;
pmem->size = resource_size(res); pmem->size = resource_size(res);
if (nvdimm_has_flush(nd_region) < 0) fua = nvdimm_has_flush(nd_region);
if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) {
dev_warn(dev, "unable to guarantee persistence of writes\n"); dev_warn(dev, "unable to guarantee persistence of writes\n");
fua = 0;
}
wbc = nvdimm_has_cache(nd_region);
if (!devm_request_mem_region(dev, res->start, resource_size(res), if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) { dev_name(&ndns->dev))) {
...@@ -339,9 +365,10 @@ static int pmem_attach_disk(struct device *dev, ...@@ -339,9 +365,10 @@ static int pmem_attach_disk(struct device *dev,
return PTR_ERR(addr); return PTR_ERR(addr);
pmem->virt_addr = addr; pmem->virt_addr = addr;
blk_queue_write_cache(q, true, true); blk_queue_write_cache(q, wbc, fua);
blk_queue_make_request(q, pmem_make_request); blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_max_hw_sectors(q, UINT_MAX);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
...@@ -368,14 +395,23 @@ static int pmem_attach_disk(struct device *dev, ...@@ -368,14 +395,23 @@ static int pmem_attach_disk(struct device *dev,
put_disk(disk); put_disk(disk);
return -ENOMEM; return -ENOMEM;
} }
dax_write_cache(dax_dev, wbc);
pmem->dax_dev = dax_dev; pmem->dax_dev = dax_dev;
gendev = disk_to_dev(disk);
gendev->groups = pmem_attribute_groups;
device_add_disk(dev, disk); device_add_disk(dev, disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM; return -ENOMEM;
revalidate_disk(disk); revalidate_disk(disk);
pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
"badblocks");
if (!pmem->bb_state)
dev_warn(dev, "'badblocks' notification disabled\n");
return 0; return 0;
} }
...@@ -407,8 +443,18 @@ static int nd_pmem_probe(struct device *dev) ...@@ -407,8 +443,18 @@ static int nd_pmem_probe(struct device *dev)
static int nd_pmem_remove(struct device *dev) static int nd_pmem_remove(struct device *dev)
{ {
struct pmem_device *pmem = dev_get_drvdata(dev);
if (is_nd_btt(dev)) if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev)); nvdimm_namespace_detach_btt(to_nd_btt(dev));
else {
/*
* Note, this assumes device_lock() context to not race
* nd_pmem_notify()
*/
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
}
nvdimm_flush(to_nd_region(dev->parent)); nvdimm_flush(to_nd_region(dev->parent));
return 0; return 0;
...@@ -427,6 +473,7 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) ...@@ -427,6 +473,7 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
struct resource res; struct resource res;
struct badblocks *bb; struct badblocks *bb;
struct kernfs_node *bb_state;
if (event != NVDIMM_REVALIDATE_POISON) if (event != NVDIMM_REVALIDATE_POISON)
return; return;
...@@ -438,11 +485,13 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) ...@@ -438,11 +485,13 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
nd_region = to_nd_region(ndns->dev.parent); nd_region = to_nd_region(ndns->dev.parent);
nsio = to_nd_namespace_io(&ndns->dev); nsio = to_nd_namespace_io(&ndns->dev);
bb = &nsio->bb; bb = &nsio->bb;
bb_state = NULL;
} else { } else {
struct pmem_device *pmem = dev_get_drvdata(dev); struct pmem_device *pmem = dev_get_drvdata(dev);
nd_region = to_region(pmem); nd_region = to_region(pmem);
bb = &pmem->bb; bb = &pmem->bb;
bb_state = pmem->bb_state;
if (is_nd_pfn(dev)) { if (is_nd_pfn(dev)) {
struct nd_pfn *nd_pfn = to_nd_pfn(dev); struct nd_pfn *nd_pfn = to_nd_pfn(dev);
...@@ -462,6 +511,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) ...@@ -462,6 +511,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
res.start = nsio->res.start + offset; res.start = nsio->res.start + offset;
res.end = nsio->res.end - end_trunc; res.end = nsio->res.end - end_trunc;
nvdimm_badblocks_populate(nd_region, bb, &res); nvdimm_badblocks_populate(nd_region, bb, &res);
if (bb_state)
sysfs_notify_dirent(bb_state);
} }
MODULE_ALIAS("pmem"); MODULE_ALIAS("pmem");
......
...@@ -5,6 +5,20 @@ ...@@ -5,6 +5,20 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/fs.h> #include <linux/fs.h>
#ifdef CONFIG_ARCH_HAS_PMEM_API
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
void arch_wb_cache_pmem(void *addr, size_t size);
void arch_invalidate_pmem(void *addr, size_t size);
#else
#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
}
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
}
#endif
/* this definition is in it's own header for tools/testing/nvdimm to consume */ /* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device { struct pmem_device {
/* One contiguous memory region per device */ /* One contiguous memory region per device */
...@@ -17,6 +31,7 @@ struct pmem_device { ...@@ -17,6 +31,7 @@ struct pmem_device {
size_t size; size_t size;
/* trim size when namespace capacity has been section aligned */ /* trim size when namespace capacity has been section aligned */
u32 pfn_pad; u32 pfn_pad;
struct kernfs_node *bb_state;
struct badblocks bb; struct badblocks bb;
struct dax_device *dax_dev; struct dax_device *dax_dev;
struct gendisk *disk; struct gendisk *disk;
......
...@@ -58,10 +58,14 @@ static int nd_region_probe(struct device *dev) ...@@ -58,10 +58,14 @@ static int nd_region_probe(struct device *dev)
if (devm_init_badblocks(dev, &nd_region->bb)) if (devm_init_badblocks(dev, &nd_region->bb))
return -ENODEV; return -ENODEV;
nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
"badblocks");
if (!nd_region->bb_state)
dev_warn(&nd_region->dev,
"'badblocks' notification disabled\n");
ndr_res.start = nd_region->ndr_start; ndr_res.start = nd_region->ndr_start;
ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1; ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
nvdimm_badblocks_populate(nd_region, nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
&nd_region->bb, &ndr_res);
} }
nd_region->btt_seed = nd_btt_create(nd_region); nd_region->btt_seed = nd_btt_create(nd_region);
...@@ -105,6 +109,13 @@ static int nd_region_remove(struct device *dev) ...@@ -105,6 +109,13 @@ static int nd_region_remove(struct device *dev)
dev_set_drvdata(dev, NULL); dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev); nvdimm_bus_unlock(dev);
/*
* Note, this assumes device_lock() context to not race
* nd_region_notify()
*/
sysfs_put(nd_region->bb_state);
nd_region->bb_state = NULL;
return 0; return 0;
} }
...@@ -126,6 +137,8 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event) ...@@ -126,6 +137,8 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event)
nd_region->ndr_size - 1; nd_region->ndr_size - 1;
nvdimm_badblocks_populate(nd_region, nvdimm_badblocks_populate(nd_region,
&nd_region->bb, &res); &nd_region->bb, &res);
if (nd_region->bb_state)
sysfs_notify_dirent(nd_region->bb_state);
} }
} }
device_for_each_child(dev, &event, child_notify); device_for_each_child(dev, &event, child_notify);
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/pmem.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/nd.h> #include <linux/nd.h>
...@@ -169,6 +168,11 @@ bool is_nd_blk(struct device *dev) ...@@ -169,6 +168,11 @@ bool is_nd_blk(struct device *dev)
return dev ? dev->type == &nd_blk_device_type : false; return dev ? dev->type == &nd_blk_device_type : false;
} }
bool is_nd_volatile(struct device *dev)
{
return dev ? dev->type == &nd_volatile_device_type : false;
}
struct nd_region *to_nd_region(struct device *dev) struct nd_region *to_nd_region(struct device *dev)
{ {
struct nd_region *nd_region = container_of(dev, struct nd_region, dev); struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
...@@ -215,7 +219,7 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data); ...@@ -215,7 +219,7 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
*/ */
int nd_region_to_nstype(struct nd_region *nd_region) int nd_region_to_nstype(struct nd_region *nd_region)
{ {
if (is_nd_pmem(&nd_region->dev)) { if (is_memory(&nd_region->dev)) {
u16 i, alias; u16 i, alias;
for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) { for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
...@@ -243,7 +247,7 @@ static ssize_t size_show(struct device *dev, ...@@ -243,7 +247,7 @@ static ssize_t size_show(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev); struct nd_region *nd_region = to_nd_region(dev);
unsigned long long size = 0; unsigned long long size = 0;
if (is_nd_pmem(dev)) { if (is_memory(dev)) {
size = nd_region->ndr_size; size = nd_region->ndr_size;
} else if (nd_region->ndr_mappings == 1) { } else if (nd_region->ndr_mappings == 1) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nd_mapping *nd_mapping = &nd_region->mapping[0];
...@@ -307,13 +311,41 @@ static ssize_t set_cookie_show(struct device *dev, ...@@ -307,13 +311,41 @@ static ssize_t set_cookie_show(struct device *dev,
{ {
struct nd_region *nd_region = to_nd_region(dev); struct nd_region *nd_region = to_nd_region(dev);
struct nd_interleave_set *nd_set = nd_region->nd_set; struct nd_interleave_set *nd_set = nd_region->nd_set;
ssize_t rc = 0;
if (is_nd_pmem(dev) && nd_set) if (is_memory(dev) && nd_set)
/* pass, should be precluded by region_visible */; /* pass, should be precluded by region_visible */;
else else
return -ENXIO; return -ENXIO;
return sprintf(buf, "%#llx\n", nd_set->cookie); /*
* The cookie to show depends on which specification of the
* labels we are using. If there are not labels then default to
* the v1.1 namespace label cookie definition. To read all this
* data we need to wait for probing to settle.
*/
device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (nd_region->ndr_mappings) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
if (ndd) {
struct nd_namespace_index *nsindex;
nsindex = to_namespace_index(ndd, ndd->ns_current);
rc = sprintf(buf, "%#llx\n",
nd_region_interleave_set_cookie(nd_region,
nsindex));
}
}
nvdimm_bus_unlock(dev);
device_unlock(dev);
if (rc)
return rc;
return sprintf(buf, "%#llx\n", nd_set->cookie1);
} }
static DEVICE_ATTR_RO(set_cookie); static DEVICE_ATTR_RO(set_cookie);
...@@ -335,7 +367,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region) ...@@ -335,7 +367,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
if (!ndd) if (!ndd)
return 0; return 0;
if (is_nd_pmem(&nd_region->dev)) { if (is_memory(&nd_region->dev)) {
available += nd_pmem_available_dpa(nd_region, available += nd_pmem_available_dpa(nd_region,
nd_mapping, &overlap); nd_mapping, &overlap);
if (overlap > blk_max_overlap) { if (overlap > blk_max_overlap) {
...@@ -521,10 +553,10 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) ...@@ -521,10 +553,10 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
struct nd_interleave_set *nd_set = nd_region->nd_set; struct nd_interleave_set *nd_set = nd_region->nd_set;
int type = nd_region_to_nstype(nd_region); int type = nd_region_to_nstype(nd_region);
if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr)
return 0; return 0;
if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
return 0; return 0;
if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
...@@ -552,7 +584,7 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) ...@@ -552,7 +584,7 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
|| type == ND_DEVICE_NAMESPACE_BLK) || type == ND_DEVICE_NAMESPACE_BLK)
&& a == &dev_attr_available_size.attr) && a == &dev_attr_available_size.attr)
return a->mode; return a->mode;
else if (is_nd_pmem(dev) && nd_set) else if (is_memory(dev) && nd_set)
return a->mode; return a->mode;
return 0; return 0;
...@@ -564,13 +596,18 @@ struct attribute_group nd_region_attribute_group = { ...@@ -564,13 +596,18 @@ struct attribute_group nd_region_attribute_group = {
}; };
EXPORT_SYMBOL_GPL(nd_region_attribute_group); EXPORT_SYMBOL_GPL(nd_region_attribute_group);
u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
struct nd_namespace_index *nsindex)
{ {
struct nd_interleave_set *nd_set = nd_region->nd_set; struct nd_interleave_set *nd_set = nd_region->nd_set;
if (nd_set) if (!nd_set)
return nd_set->cookie; return 0;
return 0;
if (nsindex && __le16_to_cpu(nsindex->major) == 1
&& __le16_to_cpu(nsindex->minor) == 1)
return nd_set->cookie1;
return nd_set->cookie2;
} }
u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region) u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
...@@ -604,7 +641,7 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, ...@@ -604,7 +641,7 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
{ {
struct nd_region *nd_region; struct nd_region *nd_region;
if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) { if (!probe && is_nd_region(dev)) {
int i; int i;
nd_region = to_nd_region(dev); nd_region = to_nd_region(dev);
...@@ -622,12 +659,8 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, ...@@ -622,12 +659,8 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
if (ndd) if (ndd)
atomic_dec(&nvdimm->busy); atomic_dec(&nvdimm->busy);
} }
if (is_nd_pmem(dev))
return;
} }
if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent)) if (dev->parent && is_nd_region(dev->parent) && probe) {
&& probe) {
nd_region = to_nd_region(dev->parent); nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev); nvdimm_bus_lock(dev);
if (nd_region->ns_seed == dev) if (nd_region->ns_seed == dev)
...@@ -800,7 +833,7 @@ int nd_blk_region_init(struct nd_region *nd_region) ...@@ -800,7 +833,7 @@ int nd_blk_region_init(struct nd_region *nd_region)
return 0; return 0;
if (nd_region->ndr_mappings < 1) { if (nd_region->ndr_mappings < 1) {
dev_err(dev, "invalid BLK region\n"); dev_dbg(dev, "invalid BLK region\n");
return -ENXIO; return -ENXIO;
} }
...@@ -1015,8 +1048,8 @@ void nvdimm_flush(struct nd_region *nd_region) ...@@ -1015,8 +1048,8 @@ void nvdimm_flush(struct nd_region *nd_region)
* The first wmb() is needed to 'sfence' all previous writes * The first wmb() is needed to 'sfence' all previous writes
* such that they are architecturally visible for the platform * such that they are architecturally visible for the platform
* buffer flush. Note that we've already arranged for pmem * buffer flush. Note that we've already arranged for pmem
* writes to avoid the cache via arch_memcpy_to_pmem(). The * writes to avoid the cache via memcpy_flushcache(). The final
* final wmb() ensures ordering for the NVDIMM flush write. * wmb() ensures ordering for the NVDIMM flush write.
*/ */
wmb(); wmb();
for (i = 0; i < nd_region->ndr_mappings; i++) for (i = 0; i < nd_region->ndr_mappings; i++)
...@@ -1038,8 +1071,9 @@ int nvdimm_has_flush(struct nd_region *nd_region) ...@@ -1038,8 +1071,9 @@ int nvdimm_has_flush(struct nd_region *nd_region)
{ {
int i; int i;
/* no nvdimm == flushing capability unknown */ /* no nvdimm or pmem api == flushing capability unknown */
if (nd_region->ndr_mappings == 0) if (nd_region->ndr_mappings == 0
|| !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
return -ENXIO; return -ENXIO;
for (i = 0; i < nd_region->ndr_mappings; i++) { for (i = 0; i < nd_region->ndr_mappings; i++) {
...@@ -1059,6 +1093,12 @@ int nvdimm_has_flush(struct nd_region *nd_region) ...@@ -1059,6 +1093,12 @@ int nvdimm_has_flush(struct nd_region *nd_region)
} }
EXPORT_SYMBOL_GPL(nvdimm_has_flush); EXPORT_SYMBOL_GPL(nvdimm_has_flush);
int nvdimm_has_cache(struct nd_region *nd_region)
{
return is_nd_pmem(&nd_region->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_has_cache);
void __exit nd_region_devs_exit(void) void __exit nd_region_devs_exit(void)
{ {
ida_destroy(&region_ida); ida_destroy(&region_ida);
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/uio.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <asm/extmem.h> #include <asm/extmem.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -43,8 +44,15 @@ static const struct block_device_operations dcssblk_devops = { ...@@ -43,8 +44,15 @@ static const struct block_device_operations dcssblk_devops = {
.release = dcssblk_release, .release = dcssblk_release,
}; };
static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static const struct dax_operations dcssblk_dax_ops = { static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access, .direct_access = dcssblk_dax_direct_access,
.copy_from_iter = dcssblk_dax_copy_from_iter,
}; };
struct dcssblk_dev_info { struct dcssblk_dev_info {
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/uio.h> #include <linux/uio.h>
...@@ -784,7 +783,7 @@ static int dax_writeback_one(struct block_device *bdev, ...@@ -784,7 +783,7 @@ static int dax_writeback_one(struct block_device *bdev,
} }
dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
wb_cache_pmem(kaddr, size); dax_flush(dax_dev, pgoff, kaddr, size);
/* /*
* After we have flushed the cache, we can clear the dirty tag. There * After we have flushed the cache, we can clear the dirty tag. There
* cannot be new dirty data in the pfn after the flush has completed as * cannot be new dirty data in the pfn after the flush has completed as
...@@ -976,7 +975,8 @@ int __dax_zero_page_range(struct block_device *bdev, ...@@ -976,7 +975,8 @@ int __dax_zero_page_range(struct block_device *bdev,
dax_read_unlock(id); dax_read_unlock(id);
return rc; return rc;
} }
clear_pmem(kaddr + offset, size); memset(kaddr + offset, 0, size);
dax_flush(dax_dev, pgoff, kaddr + offset, size);
dax_read_unlock(id); dax_read_unlock(id);
} }
return 0; return 0;
...@@ -1055,7 +1055,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -1055,7 +1055,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
map_len = end - pos; map_len = end - pos;
if (iov_iter_rw(iter) == WRITE) if (iov_iter_rw(iter) == WRITE)
map_len = copy_from_iter_pmem(kaddr, map_len, iter); map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter);
else else
map_len = copy_to_iter(kaddr, map_len, iter); map_len = copy_to_iter(kaddr, map_len, iter);
if (map_len <= 0) { if (map_len <= 0) {
......
...@@ -16,8 +16,15 @@ struct dax_operations { ...@@ -16,8 +16,15 @@ struct dax_operations {
*/ */
long (*direct_access)(struct dax_device *, pgoff_t, long, long (*direct_access)(struct dax_device *, pgoff_t, long,
void **, pfn_t *); void **, pfn_t *);
/* copy_from_iter: required operation for fs-dax direct-i/o */
size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
/* flush: optional driver-specific cache management after writes */
void (*flush)(struct dax_device *, pgoff_t, void *, size_t);
}; };
extern struct attribute_group dax_attribute_group;
#if IS_ENABLED(CONFIG_DAX) #if IS_ENABLED(CONFIG_DAX)
struct dax_device *dax_get_by_host(const char *host); struct dax_device *dax_get_by_host(const char *host);
void put_dax(struct dax_device *dax_dev); void put_dax(struct dax_device *dax_dev);
...@@ -75,6 +82,11 @@ void kill_dax(struct dax_device *dax_dev); ...@@ -75,6 +82,11 @@ void kill_dax(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
void **kaddr, pfn_t *pfn); void **kaddr, pfn_t *pfn);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t size);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
/* /*
* We use lowest available bit in exceptional entry for locking, one bit for * We use lowest available bit in exceptional entry for locking, one bit for
......
...@@ -132,6 +132,10 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); ...@@ -132,6 +132,10 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
*/ */
typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn); long nr_pages, void **kaddr, pfn_t *pfn);
typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr,
size_t size);
#define PAGE_SECTORS (PAGE_SIZE / 512) #define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message); void dm_error(const char *message);
...@@ -181,6 +185,8 @@ struct target_type { ...@@ -181,6 +185,8 @@ struct target_type {
dm_iterate_devices_fn iterate_devices; dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints; dm_io_hints_fn io_hints;
dm_dax_direct_access_fn direct_access; dm_dax_direct_access_fn direct_access;
dm_dax_copy_from_iter_fn dax_copy_from_iter;
dm_dax_flush_fn dax_flush;
/* For internal device-mapper use. */ /* For internal device-mapper use. */
struct list_head list; struct list_head list;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/uuid.h>
enum { enum {
/* when a dimm supports both PMEM and BLK access a label is required */ /* when a dimm supports both PMEM and BLK access a label is required */
...@@ -54,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, ...@@ -54,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm_bus_descriptor { struct nvdimm_bus_descriptor {
const struct attribute_group **attr_groups; const struct attribute_group **attr_groups;
unsigned long bus_dsm_mask;
unsigned long cmd_mask; unsigned long cmd_mask;
struct module *module; struct module *module;
char *provider_name; char *provider_name;
...@@ -71,9 +73,14 @@ struct nd_cmd_desc { ...@@ -71,9 +73,14 @@ struct nd_cmd_desc {
}; };
struct nd_interleave_set { struct nd_interleave_set {
u64 cookie; /* v1.1 definition of the interleave-set-cookie algorithm */
u64 cookie1;
/* v1.2 definition of the interleave-set-cookie algorithm */
u64 cookie2;
/* compatibility with initial buggy Linux implementation */ /* compatibility with initial buggy Linux implementation */
u64 altcookie; u64 altcookie;
guid_t type_guid;
}; };
struct nd_mapping_desc { struct nd_mapping_desc {
...@@ -159,9 +166,11 @@ void *nd_region_provider_data(struct nd_region *nd_region); ...@@ -159,9 +166,11 @@ void *nd_region_provider_data(struct nd_region *nd_region);
void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); void *nd_blk_region_provider_data(struct nd_blk_region *ndbr);
void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data);
struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr);
unsigned int nd_region_acquire_lane(struct nd_region *nd_region); unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
u64 nd_fletcher64(void *addr, size_t len, bool le); u64 nd_fletcher64(void *addr, size_t len, bool le);
void nvdimm_flush(struct nd_region *nd_region); void nvdimm_flush(struct nd_region *nd_region);
int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region);
int nvdimm_has_cache(struct nd_region *nd_region);
#endif /* __LIBNVDIMM_H__ */ #endif /* __LIBNVDIMM_H__ */
...@@ -21,6 +21,15 @@ enum nvdimm_event { ...@@ -21,6 +21,15 @@ enum nvdimm_event {
NVDIMM_REVALIDATE_POISON, NVDIMM_REVALIDATE_POISON,
}; };
enum nvdimm_claim_class {
NVDIMM_CCLASS_NONE,
NVDIMM_CCLASS_BTT,
NVDIMM_CCLASS_BTT2,
NVDIMM_CCLASS_PFN,
NVDIMM_CCLASS_DAX,
NVDIMM_CCLASS_UNKNOWN,
};
struct nd_device_driver { struct nd_device_driver {
struct device_driver drv; struct device_driver drv;
unsigned long type; unsigned long type;
...@@ -41,12 +50,14 @@ static inline struct nd_device_driver *to_nd_device_driver( ...@@ -41,12 +50,14 @@ static inline struct nd_device_driver *to_nd_device_driver(
* @force_raw: ignore other personalities for the namespace (e.g. btt) * @force_raw: ignore other personalities for the namespace (e.g. btt)
* @dev: device model node * @dev: device model node
* @claim: when set a another personality has taken ownership of the namespace * @claim: when set a another personality has taken ownership of the namespace
* @claim_class: restrict claim type to a given class
* @rw_bytes: access the raw namespace capacity with byte-aligned transfers * @rw_bytes: access the raw namespace capacity with byte-aligned transfers
*/ */
struct nd_namespace_common { struct nd_namespace_common {
int force_raw; int force_raw;
struct device dev; struct device dev;
struct device *claim; struct device *claim;
enum nvdimm_claim_class claim_class;
int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
void *buf, size_t size, int rw, unsigned long flags); void *buf, size_t size, int rw, unsigned long flags);
}; };
...@@ -75,12 +86,14 @@ struct nd_namespace_io { ...@@ -75,12 +86,14 @@ struct nd_namespace_io {
/** /**
* struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory
* @nsio: device and system physical address range to drive * @nsio: device and system physical address range to drive
* @lbasize: logical sector size for the namespace in block-device-mode
* @alt_name: namespace name supplied in the dimm label * @alt_name: namespace name supplied in the dimm label
* @uuid: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label
* @id: ida allocated id * @id: ida allocated id
*/ */
struct nd_namespace_pmem { struct nd_namespace_pmem {
struct nd_namespace_io nsio; struct nd_namespace_io nsio;
unsigned long lbasize;
char *alt_name; char *alt_name;
u8 *uuid; u8 *uuid;
int id; int id;
......
/*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __PMEM_H__
#define __PMEM_H__
#include <linux/io.h>
#include <linux/uio.h>
#ifdef CONFIG_ARCH_HAS_PMEM_API
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
#include <asm/pmem.h>
#else
#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
/*
* These are simply here to enable compilation, all call sites gate
* calling these symbols with arch_has_pmem_api() and redirect to the
* implementation in asm/pmem.h.
*/
static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
BUG();
}
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
BUG();
return 0;
}
static inline void arch_clear_pmem(void *addr, size_t size)
{
BUG();
}
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
BUG();
}
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
BUG();
}
#endif
static inline bool arch_has_pmem_api(void)
{
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
}
/**
* memcpy_to_pmem - copy data to persistent memory
* @dst: destination buffer for the copy
* @src: source buffer for the copy
* @n: length of the copy in bytes
*
* Perform a memory copy that results in the destination of the copy
* being effectively evicted from, or never written to, the processor
* cache hierarchy after the copy completes. After memcpy_to_pmem()
* data may still reside in cpu or platform buffers, so this operation
* must be followed by a blkdev_issue_flush() on the pmem block device.
*/
static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
{
if (arch_has_pmem_api())
arch_memcpy_to_pmem(dst, src, n);
else
memcpy(dst, src, n);
}
/**
* copy_from_iter_pmem - copy data from an iterator to PMEM
* @addr: PMEM destination address
* @bytes: number of bytes to copy
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
* See blkdev_issue_flush() note for memcpy_to_pmem().
*/
static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
if (arch_has_pmem_api())
return arch_copy_from_iter_pmem(addr, bytes, i);
return copy_from_iter_nocache(addr, bytes, i);
}
/**
* clear_pmem - zero a PMEM memory range
* @addr: virtual start address
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
* See blkdev_issue_flush() note for memcpy_to_pmem().
*/
static inline void clear_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_clear_pmem(addr, size);
else
memset(addr, 0, size);
}
/**
* invalidate_pmem - flush a pmem range from the cache hierarchy
* @addr: virtual start address
* @size: bytes to invalidate (internally aligned to cache line size)
*
* For platforms that support clearing poison this flushes any poisoned
* ranges out of the cache
*/
static inline void invalidate_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_invalidate_pmem(addr, size);
}
/**
* wb_cache_pmem - write back processor cache for PMEM memory range
* @addr: virtual start address
* @size: number of bytes to write back
*
* Write back the processor cache range starting at 'addr' for 'size' bytes.
* See blkdev_issue_flush() note for memcpy_to_pmem().
*/
static inline void wb_cache_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_wb_cache_pmem(addr, size);
}
#endif /* __PMEM_H__ */
...@@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src, ...@@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
return 0; return 0;
} }
#endif #endif
#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
}
#endif
void *memchr_inv(const void *s, int c, size_t n); void *memchr_inv(const void *s, int c, size_t n);
char *strreplace(char *s, char old, char new); char *strreplace(char *s, char old, char new);
......
...@@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); ...@@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/*
* Note, users like pmem that depend on the stricter semantics of
* copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
* destination is flushed from the cache on return.
*/
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#else
static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes,
struct iov_iter *i)
{
return copy_from_iter_nocache(addr, bytes, i);
}
#endif
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
size_t iov_iter_zero(size_t bytes, struct iov_iter *); size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_alignment(const struct iov_iter *i);
......
...@@ -105,7 +105,8 @@ struct nd_cmd_ars_cap { ...@@ -105,7 +105,8 @@ struct nd_cmd_ars_cap {
__u32 status; __u32 status;
__u32 max_ars_out; __u32 max_ars_out;
__u32 clear_err_unit; __u32 clear_err_unit;
__u32 reserved; __u16 flags;
__u16 reserved;
} __packed; } __packed;
struct nd_cmd_ars_start { struct nd_cmd_ars_start {
...@@ -144,6 +145,43 @@ struct nd_cmd_clear_error { ...@@ -144,6 +145,43 @@ struct nd_cmd_clear_error {
__u64 cleared; __u64 cleared;
} __packed; } __packed;
struct nd_cmd_trans_spa {
__u64 spa;
__u32 status;
__u8 flags;
__u8 _reserved[3];
__u64 trans_length;
__u32 num_nvdimms;
struct nd_nvdimm_device {
__u32 nfit_device_handle;
__u32 _reserved;
__u64 dpa;
} __packed devices[0];
} __packed;
struct nd_cmd_ars_err_inj {
__u64 err_inj_spa_range_base;
__u64 err_inj_spa_range_length;
__u8 err_inj_options;
__u32 status;
} __packed;
struct nd_cmd_ars_err_inj_clr {
__u64 err_inj_clr_spa_range_base;
__u64 err_inj_clr_spa_range_length;
__u32 status;
} __packed;
struct nd_cmd_ars_err_inj_stat {
__u32 status;
__u32 inj_err_rec_count;
struct nd_error_stat_query_record {
__u64 err_inj_stat_spa_range_base;
__u64 err_inj_stat_spa_range_length;
} __packed record[0];
} __packed;
enum { enum {
ND_CMD_IMPLEMENTED = 0, ND_CMD_IMPLEMENTED = 0,
...@@ -169,6 +207,7 @@ enum { ...@@ -169,6 +207,7 @@ enum {
enum { enum {
ND_ARS_VOLATILE = 1, ND_ARS_VOLATILE = 1,
ND_ARS_PERSISTENT = 2, ND_ARS_PERSISTENT = 2,
ND_ARS_RETURN_PREV_DATA = 1 << 1,
ND_CONFIG_LOCKED = 1, ND_CONFIG_LOCKED = 1,
}; };
...@@ -179,6 +218,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) ...@@ -179,6 +218,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
[ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_START] = "ars_start",
[ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_ARS_STATUS] = "ars_status",
[ND_CMD_CLEAR_ERROR] = "clear_error", [ND_CMD_CLEAR_ERROR] = "clear_error",
[ND_CMD_CALL] = "cmd_call",
}; };
if (cmd < ARRAY_SIZE(names) && names[cmd]) if (cmd < ARRAY_SIZE(names) && names[cmd])
......
...@@ -556,6 +556,9 @@ config ARCH_HAS_SG_CHAIN ...@@ -556,6 +556,9 @@ config ARCH_HAS_SG_CHAIN
config ARCH_HAS_PMEM_API config ARCH_HAS_PMEM_API
bool bool
config ARCH_HAS_UACCESS_FLUSHCACHE
bool
config ARCH_HAS_MMIO_FLUSH config ARCH_HAS_MMIO_FLUSH
bool bool
......
...@@ -615,6 +615,28 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) ...@@ -615,6 +615,28 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
} }
EXPORT_SYMBOL(copy_from_iter_nocache); EXPORT_SYMBOL(copy_from_iter_nocache);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
if (unlikely(i->type & ITER_PIPE)) {
WARN_ON(1);
return 0;
}
iterate_and_advance(i, bytes, v,
__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
v.iov_base, v.iov_len),
memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
v.iov_len)
)
return bytes;
}
EXPORT_SYMBOL_GPL(copy_from_iter_flushcache);
#endif
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
{ {
char *to = addr; char *to = addr;
......
...@@ -1943,7 +1943,7 @@ static __init int nfit_test_init(void) ...@@ -1943,7 +1943,7 @@ static __init int nfit_test_init(void)
nfit_test->setup = nfit_test0_setup; nfit_test->setup = nfit_test0_setup;
break; break;
case 1: case 1:
nfit_test->num_pm = 1; nfit_test->num_pm = 2;
nfit_test->dcr_idx = NUM_DCR; nfit_test->dcr_idx = NUM_DCR;
nfit_test->num_dcr = 2; nfit_test->num_dcr = 2;
nfit_test->alloc = nfit_test1_alloc; nfit_test->alloc = nfit_test1_alloc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment