Commit c1d6e828 authored by Dan Williams's avatar Dan Williams

pmem: add dax_operations support

Setup a dax_device to have the same lifetime as the pmem block device
and add a ->direct_access() method that is equivalent to
pmem_direct_access(). Once fs/dax.c has been converted to use
dax_operations the old pmem_direct_access() will be removed.
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 6568b08b
...@@ -13,13 +13,6 @@ ...@@ -13,13 +13,6 @@
#ifndef __DAX_H__ #ifndef __DAX_H__
#define __DAX_H__ #define __DAX_H__
struct dax_device; struct dax_device;
struct dax_operations;
struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops);
void put_dax(struct dax_device *dax_dev);
bool dax_alive(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
struct dax_device *inode_dax(struct inode *inode); struct dax_device *inode_dax(struct inode *inode);
struct inode *dax_inode(struct dax_device *dax_dev); struct inode *dax_inode(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
#endif /* __DAX_H__ */ #endif /* __DAX_H__ */
...@@ -20,6 +20,7 @@ if LIBNVDIMM ...@@ -20,6 +20,7 @@ if LIBNVDIMM
config BLK_DEV_PMEM config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support" tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM default LIBNVDIMM
select DAX
select ND_BTT if BTT select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN select ND_PFN if NVDIMM_PFN
help help
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pmem.h> #include <linux/pmem.h>
#include <linux/dax.h>
#include <linux/nd.h> #include <linux/nd.h>
#include "pmem.h" #include "pmem.h"
#include "pfn.h" #include "pfn.h"
...@@ -199,13 +200,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, ...@@ -199,13 +200,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
} }
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
__weak long pmem_direct_access(struct block_device *bdev, sector_t sector, __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct pmem_device *pmem = bdev->bd_queue->queuedata; resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
resource_size_t offset = sector * 512 + pmem->data_offset;
if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
PFN_PHYS(nr_pages))))
return -EIO; return -EIO;
*kaddr = pmem->virt_addr + offset; *kaddr = pmem->virt_addr + offset;
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
...@@ -215,26 +216,51 @@ __weak long pmem_direct_access(struct block_device *bdev, sector_t sector, ...@@ -215,26 +216,51 @@ __weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
* requested range. * requested range.
*/ */
if (unlikely(pmem->bb.count)) if (unlikely(pmem->bb.count))
return size; return nr_pages;
return pmem->size - pmem->pfn_pad - offset; return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
}
static long pmem_blk_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, pfn_t *pfn, long size)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
return __pmem_direct_access(pmem, PHYS_PFN(sector * 512),
PHYS_PFN(size), kaddr, pfn);
} }
static const struct block_device_operations pmem_fops = { static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.rw_page = pmem_rw_page, .rw_page = pmem_rw_page,
.direct_access = pmem_direct_access, .direct_access = pmem_blk_direct_access,
.revalidate_disk = nvdimm_revalidate_disk, .revalidate_disk = nvdimm_revalidate_disk,
}; };
static long pmem_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
struct pmem_device *pmem = dax_get_private(dax_dev);
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
};
static void pmem_release_queue(void *q) static void pmem_release_queue(void *q)
{ {
blk_cleanup_queue(q); blk_cleanup_queue(q);
} }
static void pmem_release_disk(void *disk) static void pmem_release_disk(void *__pmem)
{ {
del_gendisk(disk); struct pmem_device *pmem = __pmem;
put_disk(disk);
kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev);
del_gendisk(pmem->disk);
put_disk(pmem->disk);
} }
static int pmem_attach_disk(struct device *dev, static int pmem_attach_disk(struct device *dev,
...@@ -245,6 +271,7 @@ static int pmem_attach_disk(struct device *dev, ...@@ -245,6 +271,7 @@ static int pmem_attach_disk(struct device *dev,
struct vmem_altmap __altmap, *altmap = NULL; struct vmem_altmap __altmap, *altmap = NULL;
struct resource *res = &nsio->res; struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL; struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
int nid = dev_to_node(dev); int nid = dev_to_node(dev);
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem; struct pmem_device *pmem;
...@@ -325,6 +352,7 @@ static int pmem_attach_disk(struct device *dev, ...@@ -325,6 +352,7 @@ static int pmem_attach_disk(struct device *dev,
disk = alloc_disk_node(0, nid); disk = alloc_disk_node(0, nid);
if (!disk) if (!disk)
return -ENOMEM; return -ENOMEM;
pmem->disk = disk;
disk->fops = &pmem_fops; disk->fops = &pmem_fops;
disk->queue = q; disk->queue = q;
...@@ -336,9 +364,16 @@ static int pmem_attach_disk(struct device *dev, ...@@ -336,9 +364,16 @@ static int pmem_attach_disk(struct device *dev,
return -ENOMEM; return -ENOMEM;
nvdimm_badblocks_populate(nd_region, &pmem->bb, res); nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
disk->bb = &pmem->bb; disk->bb = &pmem->bb;
device_add_disk(dev, disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
if (!dax_dev) {
put_disk(disk);
return -ENOMEM;
}
pmem->dax_dev = dax_dev;
device_add_disk(dev, disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM; return -ENOMEM;
revalidate_disk(disk); revalidate_disk(disk);
......
...@@ -5,8 +5,6 @@ ...@@ -5,8 +5,6 @@
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/fs.h> #include <linux/fs.h>
long pmem_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, pfn_t *pfn, long size);
/* this definition is in it's own header for tools/testing/nvdimm to consume */ /* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device { struct pmem_device {
/* One contiguous memory region per device */ /* One contiguous memory region per device */
...@@ -20,5 +18,10 @@ struct pmem_device { ...@@ -20,5 +18,10 @@ struct pmem_device {
/* trim size when namespace capacity has been section aligned */ /* trim size when namespace capacity has been section aligned */
u32 pfn_pad; u32 pfn_pad;
struct badblocks bb; struct badblocks bb;
struct dax_device *dax_dev;
struct gendisk *disk;
}; };
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn);
#endif /* __NVDIMM_PMEM_H__ */ #endif /* __NVDIMM_PMEM_H__ */
...@@ -21,6 +21,12 @@ struct dax_operations { ...@@ -21,6 +21,12 @@ struct dax_operations {
int dax_read_lock(void); int dax_read_lock(void);
void dax_read_unlock(int id); void dax_read_unlock(int id);
struct dax_device *dax_get_by_host(const char *host); struct dax_device *dax_get_by_host(const char *host);
struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops);
void put_dax(struct dax_device *dax_dev);
bool dax_alive(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
/* /*
* We use lowest available bit in exceptional entry for locking, one bit for * We use lowest available bit in exceptional entry for locking, one bit for
......
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
#include <pmem.h> #include <pmem.h>
#include <nd.h> #include <nd.h>
long pmem_direct_access(struct block_device *bdev, sector_t sector, long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
void **kaddr, pfn_t *pfn, long size) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct pmem_device *pmem = bdev->bd_queue->queuedata; resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
resource_size_t offset = sector * 512 + pmem->data_offset;
if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
PFN_PHYS(nr_pages))))
return -EIO; return -EIO;
/* /*
...@@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, ...@@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector,
*kaddr = pmem->virt_addr + offset; *kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset); page = vmalloc_to_page(pmem->virt_addr + offset);
*pfn = page_to_pfn_t(page); *pfn = page_to_pfn_t(page);
dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
"%s: sector: %#llx pfn: %#lx\n", __func__, __func__, pmem, pgoff, page_to_pfn(page));
(unsigned long long) sector, page_to_pfn(page));
return PAGE_SIZE; return 1;
} }
*kaddr = pmem->virt_addr + offset; *kaddr = pmem->virt_addr + offset;
...@@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, ...@@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector,
* requested range. * requested range.
*/ */
if (unlikely(pmem->bb.count)) if (unlikely(pmem->bb.count))
return size; return nr_pages;
return pmem->size - pmem->pfn_pad - offset; return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment