Commit effa04cc authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.4/lightnvm' of git://git.kernel.dk/linux-block

Pull lightnvm support from Jens Axboe:
 "This adds support for lightnvm, and adds support to NVMe as well.
  This is pretty exciting, in that it enables new and interesting use
  cases for compatible flash devices.  There's a LWN writeup about an
  earlier posting here:

      https://lwn.net/Articles/641247/

  This has been underway for a while, and should be ready for merging at
  this point"

* 'for-4.4/lightnvm' of git://git.kernel.dk/linux-block:
  nvme: lightnvm: clean up a data type
  lightnvm: refactor phys addrs type to u64
  nvme: LightNVM support
  rrpc: Round-robin sector target with cost-based gc
  gennvm: Generic NVM manager
  lightnvm: Support for Open-Channel SSDs
parents a9aa31cd 5f436e5e
...@@ -149,6 +149,7 @@ Code Seq#(hex) Include File Comments ...@@ -149,6 +149,7 @@ Code Seq#(hex) Include File Comments
'K' all linux/kd.h 'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict! 'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt2sas/mpt2sas_ctl.h conflict! 'L' 10-1F drivers/scsi/mpt2sas/mpt2sas_ctl.h conflict!
'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver 'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html> <http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict! 'M' all linux/soundcard.h conflict!
......
...@@ -6279,6 +6279,14 @@ F: drivers/nvdimm/pmem.c ...@@ -6279,6 +6279,14 @@ F: drivers/nvdimm/pmem.c
F: include/linux/pmem.h F: include/linux/pmem.h
F: arch/*/include/asm/pmem.h F: arch/*/include/asm/pmem.h
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
W: http://github/OpenChannelSSD
S: Maintained
F: drivers/lightnvm/
F: include/linux/lightnvm.h
F: include/uapi/linux/lightnvm.h
LINUX FOR IBM pSERIES (RS/6000) LINUX FOR IBM pSERIES (RS/6000)
M: Paul Mackerras <paulus@au.ibm.com> M: Paul Mackerras <paulus@au.ibm.com>
W: http://www.ibm.com/linux/ltc/projects/ppc W: http://www.ibm.com/linux/ltc/projects/ppc
......
...@@ -44,6 +44,8 @@ source "drivers/net/Kconfig" ...@@ -44,6 +44,8 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig" source "drivers/isdn/Kconfig"
source "drivers/lightnvm/Kconfig"
# input before char - char/joystick depends on it. As does USB. # input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig" source "drivers/input/Kconfig"
......
...@@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS) += nubus/ ...@@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/ obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/ obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/ obj-$(CONFIG_SCSI) += scsi/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += nvme/ obj-y += nvme/
obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_TARGET_CORE) += target/
......
#
# Open-Channel SSD NVM configuration
#
menuconfig NVM
bool "Open-Channel SSD target support"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
Open-Channel SSDs implement a set of extension to SSDs, that
exposes direct access to the underlying non-volatile memory.
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
if NVM
config NVM_DEBUG
bool "Open-Channel SSD debugging support"
---help---
Exposes a debug management interface to create/remove targets at:
/sys/module/lnvm/parameters/configure_debug
It is required to create/remove targets without IOCTLs.
config NVM_GENNVM
tristate "Generic NVM manager for Open-Channel SSDs"
---help---
NVM media manager for Open-Channel SSDs that offload management
functionality to device, while keeping data placement and garbage
collection decisions on the host.
config NVM_RRPC
tristate "Round-robin Hybrid Open-Channel SSD target"
---help---
Allows an open-channel SSD to be exposed as a block device to the
host. The target is implemented using a linear mapping table and
cost-based garbage collection. It is optimized for 4K IO sizes.
endif # NVM
#
# Makefile for Open-Channel SSDs.
#
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_GENNVM) += gennvm.o
obj-$(CONFIG_NVM_RRPC) += rrpc.o
/*
* Copyright (C) 2015 IT University of Copenhagen. All rights reserved.
* Initial release: Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
*/
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/sem.h>
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/lightnvm.h>
#include <uapi/linux/lightnvm.h>
static LIST_HEAD(nvm_targets);
static LIST_HEAD(nvm_mgrs);
static LIST_HEAD(nvm_devices);
static DECLARE_RWSEM(nvm_lock);
static struct nvm_tgt_type *nvm_find_target_type(const char *name)
{
struct nvm_tgt_type *tt;
list_for_each_entry(tt, &nvm_targets, list)
if (!strcmp(name, tt->name))
return tt;
return NULL;
}
int nvm_register_target(struct nvm_tgt_type *tt)
{
int ret = 0;
down_write(&nvm_lock);
if (nvm_find_target_type(tt->name))
ret = -EEXIST;
else
list_add(&tt->list, &nvm_targets);
up_write(&nvm_lock);
return ret;
}
EXPORT_SYMBOL(nvm_register_target);
void nvm_unregister_target(struct nvm_tgt_type *tt)
{
if (!tt)
return;
down_write(&nvm_lock);
list_del(&tt->list);
up_write(&nvm_lock);
}
EXPORT_SYMBOL(nvm_unregister_target);
void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
dma_addr_t *dma_handler)
{
return dev->ops->dev_dma_alloc(dev->q, dev->ppalist_pool, mem_flags,
dma_handler);
}
EXPORT_SYMBOL(nvm_dev_dma_alloc);
void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list,
dma_addr_t dma_handler)
{
dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler);
}
EXPORT_SYMBOL(nvm_dev_dma_free);
static struct nvmm_type *nvm_find_mgr_type(const char *name)
{
struct nvmm_type *mt;
list_for_each_entry(mt, &nvm_mgrs, list)
if (!strcmp(name, mt->name))
return mt;
return NULL;
}
int nvm_register_mgr(struct nvmm_type *mt)
{
int ret = 0;
down_write(&nvm_lock);
if (nvm_find_mgr_type(mt->name))
ret = -EEXIST;
else
list_add(&mt->list, &nvm_mgrs);
up_write(&nvm_lock);
return ret;
}
EXPORT_SYMBOL(nvm_register_mgr);
void nvm_unregister_mgr(struct nvmm_type *mt)
{
if (!mt)
return;
down_write(&nvm_lock);
list_del(&mt->list);
up_write(&nvm_lock);
}
EXPORT_SYMBOL(nvm_unregister_mgr);
static struct nvm_dev *nvm_find_nvm_dev(const char *name)
{
struct nvm_dev *dev;
list_for_each_entry(dev, &nvm_devices, devices)
if (!strcmp(name, dev->name))
return dev;
return NULL;
}
struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
unsigned long flags)
{
return dev->mt->get_blk(dev, lun, flags);
}
EXPORT_SYMBOL(nvm_get_blk);
/* Assumes that all valid pages have already been moved on release to bm */
void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
return dev->mt->put_blk(dev, blk);
}
EXPORT_SYMBOL(nvm_put_blk);
int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
return dev->mt->submit_io(dev, rqd);
}
EXPORT_SYMBOL(nvm_submit_io);
int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
return dev->mt->erase_blk(dev, blk, 0);
}
EXPORT_SYMBOL(nvm_erase_blk);
static void nvm_core_free(struct nvm_dev *dev)
{
kfree(dev);
}
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
struct nvm_id_group *grp = &id->groups[0];
/* device values */
dev->nr_chnls = grp->num_ch;
dev->luns_per_chnl = grp->num_lun;
dev->pgs_per_blk = grp->num_pg;
dev->blks_per_lun = grp->num_blk;
dev->nr_planes = grp->num_pln;
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
dev->addr_mode = id->ppat;
dev->addr_format = id->ppaf;
dev->plane_mode = NVM_PLANE_SINGLE;
dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
dev->plane_mode = NVM_PLANE_QUAD;
/* calculated values */
dev->sec_per_pl = dev->sec_per_pg * dev->nr_planes;
dev->sec_per_blk = dev->sec_per_pl * dev->pgs_per_blk;
dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun;
dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
dev->total_blocks = dev->nr_planes *
dev->blks_per_lun *
dev->luns_per_chnl *
dev->nr_chnls;
dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
INIT_LIST_HEAD(&dev->online_targets);
return 0;
}
static void nvm_free(struct nvm_dev *dev)
{
if (!dev)
return;
if (dev->mt)
dev->mt->unregister_mgr(dev);
nvm_core_free(dev);
}
static int nvm_init(struct nvm_dev *dev)
{
struct nvmm_type *mt;
int ret = 0;
if (!dev->q || !dev->ops)
return -EINVAL;
if (dev->ops->identity(dev->q, &dev->identity)) {
pr_err("nvm: device could not be identified\n");
ret = -EINVAL;
goto err;
}
pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n",
dev->identity.ver_id, dev->identity.vmnt,
dev->identity.cgrps);
if (dev->identity.ver_id != 1) {
pr_err("nvm: device not supported by kernel.");
goto err;
}
if (dev->identity.cgrps != 1) {
pr_err("nvm: only one group configuration supported.");
goto err;
}
ret = nvm_core_init(dev);
if (ret) {
pr_err("nvm: could not initialize core structures.\n");
goto err;
}
/* register with device with a supported manager */
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret < 0)
goto err; /* initialization failed */
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret) {
pr_info("nvm: no compatible manager found.\n");
return 0;
}
pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
dev->name, dev->sec_per_pg, dev->nr_planes,
dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns,
dev->nr_chnls);
return 0;
err:
nvm_free(dev);
pr_err("nvm: failed to initialize nvm\n");
return ret;
}
static void nvm_exit(struct nvm_dev *dev)
{
if (dev->ppalist_pool)
dev->ops->destroy_dma_pool(dev->ppalist_pool);
nvm_free(dev);
pr_info("nvm: successfully unloaded\n");
}
int nvm_register(struct request_queue *q, char *disk_name,
struct nvm_dev_ops *ops)
{
struct nvm_dev *dev;
int ret;
if (!ops->identity)
return -EINVAL;
dev = kzalloc(sizeof(struct nvm_dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->q = q;
dev->ops = ops;
strncpy(dev->name, disk_name, DISK_NAME_LEN);
ret = nvm_init(dev);
if (ret)
goto err_init;
down_write(&nvm_lock);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
if (dev->ops->max_phys_sect > 1) {
dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
"ppalist");
if (!dev->ppalist_pool) {
pr_err("nvm: could not create ppa pool\n");
return -ENOMEM;
}
} else if (dev->ops->max_phys_sect > 256) {
pr_info("nvm: max sectors supported is 256.\n");
return -EINVAL;
}
return 0;
err_init:
kfree(dev);
return ret;
}
EXPORT_SYMBOL(nvm_register);
void nvm_unregister(char *disk_name)
{
struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
if (!dev) {
pr_err("nvm: could not find device %s to unregister\n",
disk_name);
return;
}
nvm_exit(dev);
down_write(&nvm_lock);
list_del(&dev->devices);
up_write(&nvm_lock);
}
EXPORT_SYMBOL(nvm_unregister);
static const struct block_device_operations nvm_fops = {
.owner = THIS_MODULE,
};
static int nvm_create_target(struct nvm_dev *dev,
struct nvm_ioctl_create *create)
{
struct nvm_ioctl_create_simple *s = &create->conf.s;
struct request_queue *tqueue;
struct nvmm_type *mt;
struct gendisk *tdisk;
struct nvm_tgt_type *tt;
struct nvm_target *t;
void *targetdata;
int ret = 0;
if (!dev->mt) {
/* register with device with a supported NVM manager */
list_for_each_entry(mt, &nvm_mgrs, list) {
ret = mt->register_mgr(dev);
if (ret < 0)
return ret; /* initialization failed */
if (ret > 0) {
dev->mt = mt;
break; /* successfully initialized */
}
}
if (!ret) {
pr_info("nvm: no compatible nvm manager found.\n");
return -ENODEV;
}
}
tt = nvm_find_target_type(create->tgttype);
if (!tt) {
pr_err("nvm: target type %s not found\n", create->tgttype);
return -EINVAL;
}
down_write(&nvm_lock);
list_for_each_entry(t, &dev->online_targets, list) {
if (!strcmp(create->tgtname, t->disk->disk_name)) {
pr_err("nvm: target name already exists.\n");
up_write(&nvm_lock);
return -EINVAL;
}
}
up_write(&nvm_lock);
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t)
return -ENOMEM;
tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
if (!tqueue)
goto err_t;
blk_queue_make_request(tqueue, tt->make_rq);
tdisk = alloc_disk(0);
if (!tdisk)
goto err_queue;
sprintf(tdisk->disk_name, "%s", create->tgtname);
tdisk->flags = GENHD_FL_EXT_DEVT;
tdisk->major = 0;
tdisk->first_minor = 0;
tdisk->fops = &nvm_fops;
tdisk->queue = tqueue;
targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
if (IS_ERR(targetdata))
goto err_init;
tdisk->private_data = targetdata;
tqueue->queuedata = targetdata;
blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
set_capacity(tdisk, tt->capacity(targetdata));
add_disk(tdisk);
t->type = tt;
t->disk = tdisk;
down_write(&nvm_lock);
list_add_tail(&t->list, &dev->online_targets);
up_write(&nvm_lock);
return 0;
err_init:
put_disk(tdisk);
err_queue:
blk_cleanup_queue(tqueue);
err_t:
kfree(t);
return -ENOMEM;
}
static void nvm_remove_target(struct nvm_target *t)
{
struct nvm_tgt_type *tt = t->type;
struct gendisk *tdisk = t->disk;
struct request_queue *q = tdisk->queue;
lockdep_assert_held(&nvm_lock);
del_gendisk(tdisk);
if (tt->exit)
tt->exit(tdisk->private_data);
blk_cleanup_queue(q);
put_disk(tdisk);
list_del(&t->list);
kfree(t);
}
static int __nvm_configure_create(struct nvm_ioctl_create *create)
{
struct nvm_dev *dev;
struct nvm_ioctl_create_simple *s;
dev = nvm_find_nvm_dev(create->dev);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
}
if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) {
pr_err("nvm: config type not valid\n");
return -EINVAL;
}
s = &create->conf.s;
if (s->lun_begin > s->lun_end || s->lun_end > dev->nr_luns) {
pr_err("nvm: lun out of bound (%u:%u > %u)\n",
s->lun_begin, s->lun_end, dev->nr_luns);
return -EINVAL;
}
return nvm_create_target(dev, create);
}
static int __nvm_configure_remove(struct nvm_ioctl_remove *remove)
{
struct nvm_target *t = NULL;
struct nvm_dev *dev;
int ret = -1;
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices)
list_for_each_entry(t, &dev->online_targets, list) {
if (!strcmp(remove->tgtname, t->disk->disk_name)) {
nvm_remove_target(t);
ret = 0;
break;
}
}
up_write(&nvm_lock);
if (ret) {
pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname);
return -EINVAL;
}
return 0;
}
#ifdef CONFIG_NVM_DEBUG
static int nvm_configure_show(const char *val)
{
struct nvm_dev *dev;
char opcode, devname[DISK_NAME_LEN];
int ret;
ret = sscanf(val, "%c %32s", &opcode, devname);
if (ret != 2) {
pr_err("nvm: invalid command. Use \"opcode devicename\".\n");
return -EINVAL;
}
dev = nvm_find_nvm_dev(devname);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
}
if (!dev->mt)
return 0;
dev->mt->free_blocks_print(dev);
return 0;
}
static int nvm_configure_remove(const char *val)
{
struct nvm_ioctl_remove remove;
char opcode;
int ret;
ret = sscanf(val, "%c %256s", &opcode, remove.tgtname);
if (ret != 2) {
pr_err("nvm: invalid command. Use \"d targetname\".\n");
return -EINVAL;
}
remove.flags = 0;
return __nvm_configure_remove(&remove);
}
static int nvm_configure_create(const char *val)
{
struct nvm_ioctl_create create;
char opcode;
int lun_begin, lun_end, ret;
ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev,
create.tgtname, create.tgttype,
&lun_begin, &lun_end);
if (ret != 6) {
pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n");
return -EINVAL;
}
create.flags = 0;
create.conf.type = NVM_CONFIG_TYPE_SIMPLE;
create.conf.s.lun_begin = lun_begin;
create.conf.s.lun_end = lun_end;
return __nvm_configure_create(&create);
}
/* Exposes administrative interface through /sys/module/lnvm/configure_by_str */
static int nvm_configure_by_str_event(const char *val,
const struct kernel_param *kp)
{
char opcode;
int ret;
ret = sscanf(val, "%c", &opcode);
if (ret != 1) {
pr_err("nvm: string must have the format of \"cmd ...\"\n");
return -EINVAL;
}
switch (opcode) {
case 'a':
return nvm_configure_create(val);
case 'd':
return nvm_configure_remove(val);
case 's':
return nvm_configure_show(val);
default:
pr_err("nvm: invalid command\n");
return -EINVAL;
}
return 0;
}
static int nvm_configure_get(char *buf, const struct kernel_param *kp)
{
int sz = 0;
char *buf_start = buf;
struct nvm_dev *dev;
buf += sprintf(buf, "available devices:\n");
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices) {
if (sz > 4095 - DISK_NAME_LEN)
break;
buf += sprintf(buf, " %32s\n", dev->name);
}
up_write(&nvm_lock);
return buf - buf_start - 1;
}
static const struct kernel_param_ops nvm_configure_by_str_event_param_ops = {
.set = nvm_configure_by_str_event,
.get = nvm_configure_get,
};
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "lnvm."
module_param_cb(configure_debug, &nvm_configure_by_str_event_param_ops, NULL,
0644);
#endif /* CONFIG_NVM_DEBUG */
static long nvm_ioctl_info(struct file *file, void __user *arg)
{
struct nvm_ioctl_info *info;
struct nvm_tgt_type *tt;
int tgt_iter = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
if (IS_ERR(info))
return -EFAULT;
info->version[0] = NVM_VERSION_MAJOR;
info->version[1] = NVM_VERSION_MINOR;
info->version[2] = NVM_VERSION_PATCH;
down_write(&nvm_lock);
list_for_each_entry(tt, &nvm_targets, list) {
struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter];
tgt->version[0] = tt->version[0];
tgt->version[1] = tt->version[1];
tgt->version[2] = tt->version[2];
strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX);
tgt_iter++;
}
info->tgtsize = tgt_iter;
up_write(&nvm_lock);
if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
return -EFAULT;
kfree(info);
return 0;
}
static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
{
struct nvm_ioctl_get_devices *devices;
struct nvm_dev *dev;
int i = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
if (!devices)
return -ENOMEM;
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices) {
struct nvm_ioctl_device_info *info = &devices->info[i];
sprintf(info->devname, "%s", dev->name);
if (dev->mt) {
info->bmversion[0] = dev->mt->version[0];
info->bmversion[1] = dev->mt->version[1];
info->bmversion[2] = dev->mt->version[2];
sprintf(info->bmname, "%s", dev->mt->name);
} else {
sprintf(info->bmname, "none");
}
i++;
if (i > 31) {
pr_err("nvm: max 31 devices can be reported.\n");
break;
}
}
up_write(&nvm_lock);
devices->nr_devices = i;
if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
return -EFAULT;
kfree(devices);
return 0;
}
static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
{
struct nvm_ioctl_create create;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
return -EFAULT;
create.dev[DISK_NAME_LEN - 1] = '\0';
create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
create.tgtname[DISK_NAME_LEN - 1] = '\0';
if (create.flags != 0) {
pr_err("nvm: no flags supported\n");
return -EINVAL;
}
return __nvm_configure_create(&create);
}
static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
{
struct nvm_ioctl_remove remove;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
return -EFAULT;
remove.tgtname[DISK_NAME_LEN - 1] = '\0';
if (remove.flags != 0) {
pr_err("nvm: no flags supported\n");
return -EINVAL;
}
return __nvm_configure_remove(&remove);
}
static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
switch (cmd) {
case NVM_INFO:
return nvm_ioctl_info(file, argp);
case NVM_GET_DEVICES:
return nvm_ioctl_get_devices(file, argp);
case NVM_DEV_CREATE:
return nvm_ioctl_dev_create(file, argp);
case NVM_DEV_REMOVE:
return nvm_ioctl_dev_remove(file, argp);
}
return 0;
}
static const struct file_operations _ctl_fops = {
.open = nonseekable_open,
.unlocked_ioctl = nvm_ctl_ioctl,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
static struct miscdevice _nvm_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "lightnvm",
.nodename = "lightnvm/control",
.fops = &_ctl_fops,
};
MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
static int __init nvm_mod_init(void)
{
int ret;
ret = misc_register(&_nvm_misc);
if (ret)
pr_err("nvm: misc_register failed for control device");
return ret;
}
static void __exit nvm_mod_exit(void)
{
misc_deregister(&_nvm_misc);
}
MODULE_AUTHOR("Matias Bjorling <m@bjorling.me>");
MODULE_LICENSE("GPL v2");
MODULE_VERSION("0.1");
module_init(nvm_mod_init);
module_exit(nvm_mod_exit);
/*
* Copyright (C) 2015 Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
* Implementation of a generic nvm manager for Open-Channel SSDs.
*/
#include "gennvm.h"
static void gennvm_blocks_free(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
int i;
gennvm_for_each_lun(gn, lun, i) {
if (!lun->vlun.blocks)
break;
vfree(lun->vlun.blocks);
}
}
static void gennvm_luns_free(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
kfree(gn->luns);
}
static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
{
struct gen_lun *lun;
int i;
gn->luns = kcalloc(dev->nr_luns, sizeof(struct gen_lun), GFP_KERNEL);
if (!gn->luns)
return -ENOMEM;
gennvm_for_each_lun(gn, lun, i) {
spin_lock_init(&lun->vlun.lock);
INIT_LIST_HEAD(&lun->free_list);
INIT_LIST_HEAD(&lun->used_list);
INIT_LIST_HEAD(&lun->bb_list);
lun->reserved_blocks = 2; /* for GC only */
lun->vlun.id = i;
lun->vlun.lun_id = i % dev->luns_per_chnl;
lun->vlun.chnl_id = i / dev->luns_per_chnl;
lun->vlun.nr_free_blocks = dev->blks_per_lun;
}
return 0;
}
static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
void *private)
{
struct gen_nvm *gn = private;
struct gen_lun *lun = &gn->luns[lun_id];
struct nvm_block *blk;
int i;
if (unlikely(bitmap_empty(bb_bitmap, nr_blocks)))
return 0;
i = -1;
while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) {
blk = &lun->vlun.blocks[i];
if (!blk) {
pr_err("gennvm: BB data is out of bounds.\n");
return -EINVAL;
}
list_move_tail(&blk->list, &lun->bb_list);
}
return 0;
}
static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
{
struct nvm_dev *dev = private;
struct gen_nvm *gn = dev->mp;
sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
u64 elba = slba + nlb;
struct gen_lun *lun;
struct nvm_block *blk;
u64 i;
int lun_id;
if (unlikely(elba > dev->total_pages)) {
pr_err("gennvm: L2P data from device is out of bounds!\n");
return -EINVAL;
}
for (i = 0; i < nlb; i++) {
u64 pba = le64_to_cpu(entries[i]);
if (unlikely(pba >= max_pages && pba != U64_MAX)) {
pr_err("gennvm: L2P data entry is out of bounds!\n");
return -EINVAL;
}
/* Address zero is a special one. The first page on a disk is
* protected. It often holds internal device boot
* information.
*/
if (!pba)
continue;
/* resolve block from physical address */
lun_id = div_u64(pba, dev->sec_per_lun);
lun = &gn->luns[lun_id];
/* Calculate block offset into lun */
pba = pba - (dev->sec_per_lun * lun_id);
blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
if (!blk->type) {
/* at this point, we don't know anything about the
* block. It's up to the FTL on top to re-etablish the
* block state
*/
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
}
}
return 0;
}
static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
{
struct gen_lun *lun;
struct nvm_block *block;
sector_t lun_iter, blk_iter, cur_block_id = 0;
int ret;
gennvm_for_each_lun(gn, lun, lun_iter) {
lun->vlun.blocks = vzalloc(sizeof(struct nvm_block) *
dev->blks_per_lun);
if (!lun->vlun.blocks)
return -ENOMEM;
for (blk_iter = 0; blk_iter < dev->blks_per_lun; blk_iter++) {
block = &lun->vlun.blocks[blk_iter];
INIT_LIST_HEAD(&block->list);
block->lun = &lun->vlun;
block->id = cur_block_id++;
/* First block is reserved for device */
if (unlikely(lun_iter == 0 && blk_iter == 0))
continue;
list_add_tail(&block->list, &lun->free_list);
}
if (dev->ops->get_bb_tbl) {
ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id,
dev->blks_per_lun, gennvm_block_bb, gn);
if (ret)
pr_err("gennvm: could not read BB table\n");
}
}
if (dev->ops->get_l2p_tbl) {
ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
gennvm_block_map, dev);
if (ret) {
pr_err("gennvm: could not read L2P table.\n");
pr_warn("gennvm: default block initialization");
}
}
return 0;
}
static int gennvm_register(struct nvm_dev *dev)
{
struct gen_nvm *gn;
int ret;
gn = kzalloc(sizeof(struct gen_nvm), GFP_KERNEL);
if (!gn)
return -ENOMEM;
gn->nr_luns = dev->nr_luns;
dev->mp = gn;
ret = gennvm_luns_init(dev, gn);
if (ret) {
pr_err("gennvm: could not initialize luns\n");
goto err;
}
ret = gennvm_blocks_init(dev, gn);
if (ret) {
pr_err("gennvm: could not initialize blocks\n");
goto err;
}
return 1;
err:
kfree(gn);
return ret;
}
static void gennvm_unregister(struct nvm_dev *dev)
{
gennvm_blocks_free(dev);
gennvm_luns_free(dev);
kfree(dev->mp);
dev->mp = NULL;
}
static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
struct nvm_lun *vlun, unsigned long flags)
{
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
struct nvm_block *blk = NULL;
int is_gc = flags & NVM_IOTYPE_GC;
spin_lock(&vlun->lock);
if (list_empty(&lun->free_list)) {
pr_err_ratelimited("gennvm: lun %u have no free pages available",
lun->vlun.id);
spin_unlock(&vlun->lock);
goto out;
}
while (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks) {
spin_unlock(&vlun->lock);
goto out;
}
blk = list_first_entry(&lun->free_list, struct nvm_block, list);
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
spin_unlock(&vlun->lock);
out:
return blk;
}
static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
struct nvm_lun *vlun = blk->lun;
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
spin_lock(&vlun->lock);
switch (blk->type) {
case 1:
list_move_tail(&blk->list, &lun->free_list);
lun->vlun.nr_free_blocks++;
blk->type = 0;
break;
case 2:
list_move_tail(&blk->list, &lun->bb_list);
break;
default:
WARN_ON_ONCE(1);
pr_err("gennvm: erroneous block type (%lu -> %u)\n",
blk->id, blk->type);
list_move_tail(&blk->list, &lun->bb_list);
}
spin_unlock(&vlun->lock);
}
static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
rqd->ppa_list[i] = addr_to_generic_mode(dev,
rqd->ppa_list[i]);
} else {
rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr);
}
}
static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
rqd->ppa_list[i] = generic_to_addr_mode(dev,
rqd->ppa_list[i]);
} else {
rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr);
}
}
static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
if (!dev->ops->submit_io)
return 0;
/* Convert address space */
gennvm_generic_to_addr_mode(dev, rqd);
rqd->dev = dev;
return dev->ops->submit_io(dev->q, rqd);
}
static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
int type)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
struct nvm_block *blk;
if (unlikely(ppa->g.ch > dev->nr_chnls ||
ppa->g.lun > dev->luns_per_chnl ||
ppa->g.blk > dev->blks_per_lun)) {
WARN_ON_ONCE(1);
pr_err("gennvm: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u",
ppa->g.ch, dev->nr_chnls,
ppa->g.lun, dev->luns_per_chnl,
ppa->g.blk, dev->blks_per_lun);
return;
}
lun = &gn->luns[ppa->g.lun * ppa->g.ch];
blk = &lun->vlun.blocks[ppa->g.blk];
/* will be moved to bb list on put_blk from target */
blk->type = type;
}
/* mark block bad. It is expected the target recover from the error. */
static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
if (!dev->ops->set_bb)
return;
if (dev->ops->set_bb(dev->q, rqd, 1))
return;
gennvm_addr_to_generic_mode(dev, rqd);
/* look up blocks and mark them as bad */
if (rqd->nr_pages > 1)
for (i = 0; i < rqd->nr_pages; i++)
gennvm_blk_set_type(dev, &rqd->ppa_list[i], 2);
else
gennvm_blk_set_type(dev, &rqd->ppa_addr, 2);
}
static int gennvm_end_io(struct nvm_rq *rqd, int error)
{
struct nvm_tgt_instance *ins = rqd->ins;
int ret = 0;
switch (error) {
case NVM_RSP_SUCCESS:
break;
case NVM_RSP_ERR_EMPTYPAGE:
break;
case NVM_RSP_ERR_FAILWRITE:
gennvm_mark_blk_bad(rqd->dev, rqd);
default:
ret++;
}
ret += ins->tt->end_io(rqd, error);
return ret;
}
static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
unsigned long flags)
{
int plane_cnt = 0, pl_idx, ret;
struct ppa_addr addr;
struct nvm_rq rqd;
if (!dev->ops->erase_block)
return 0;
addr = block_to_ppa(dev, blk);
if (dev->plane_mode == NVM_PLANE_SINGLE) {
rqd.nr_pages = 1;
rqd.ppa_addr = addr;
} else {
plane_cnt = (1 << dev->plane_mode);
rqd.nr_pages = plane_cnt;
rqd.ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL,
&rqd.dma_ppa_list);
if (!rqd.ppa_list) {
pr_err("gennvm: failed to allocate dma memory\n");
return -ENOMEM;
}
for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
addr.g.pl = pl_idx;
rqd.ppa_list[pl_idx] = addr;
}
}
gennvm_generic_to_addr_mode(dev, &rqd);
ret = dev->ops->erase_block(dev->q, &rqd);
if (plane_cnt)
nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
return ret;
}
static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
{
struct gen_nvm *gn = dev->mp;
return &gn->luns[lunid].vlun;
}
static void gennvm_free_blocks_print(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
unsigned int i;
gennvm_for_each_lun(gn, lun, i)
pr_info("%s: lun%8u\t%u\n",
dev->name, i, lun->vlun.nr_free_blocks);
}
static struct nvmm_type gennvm = {
.name = "gennvm",
.version = {0, 1, 0},
.register_mgr = gennvm_register,
.unregister_mgr = gennvm_unregister,
.get_blk = gennvm_get_blk,
.put_blk = gennvm_put_blk,
.submit_io = gennvm_submit_io,
.end_io = gennvm_end_io,
.erase_blk = gennvm_erase_blk,
.get_lun = gennvm_get_lun,
.free_blocks_print = gennvm_free_blocks_print,
};
static int __init gennvm_module_init(void)
{
return nvm_register_mgr(&gennvm);
}
static void gennvm_module_exit(void)
{
nvm_unregister_mgr(&gennvm);
}
module_init(gennvm_module_init);
module_exit(gennvm_module_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Generic media manager for Open-Channel SSDs");
/*
* Copyright: Matias Bjorling <mb@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
*/
#ifndef GENNVM_H_
#define GENNVM_H_
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/lightnvm.h>
struct gen_lun {
struct nvm_lun vlun;
int reserved_blocks;
/* lun block lists */
struct list_head used_list; /* In-use blocks */
struct list_head free_list; /* Not used blocks i.e. released
* and ready for use
*/
struct list_head bb_list; /* Bad blocks. Mutually exclusive with
* free_list and used_list
*/
};
struct gen_nvm {
int nr_luns;
struct gen_lun *luns;
};
#define gennvm_for_each_lun(bm, lun, i) \
for ((i) = 0, lun = &(bm)->luns[0]; \
(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
#endif /* GENNVM_H_ */
/*
* Copyright (C) 2015 IT University of Copenhagen
* Initial release: Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs.
*/
#include "rrpc.h"
static struct kmem_cache *rrpc_gcb_cache, *rrpc_rq_cache;
static DECLARE_RWSEM(rrpc_lock);
static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags);
#define rrpc_for_each_lun(rrpc, rlun, i) \
for ((i) = 0, rlun = &(rrpc)->luns[0]; \
(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
{
struct rrpc_block *rblk = a->rblk;
unsigned int pg_offset;
lockdep_assert_held(&rrpc->rev_lock);
if (a->addr == ADDR_EMPTY || !rblk)
return;
spin_lock(&rblk->lock);
div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
rblk->nr_invalid_pages++;
spin_unlock(&rblk->lock);
rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
}
static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
unsigned len)
{
sector_t i;
spin_lock(&rrpc->rev_lock);
for (i = slba; i < slba + len; i++) {
struct rrpc_addr *gp = &rrpc->trans_map[i];
rrpc_page_invalidate(rrpc, gp);
gp->rblk = NULL;
}
spin_unlock(&rrpc->rev_lock);
}
static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
sector_t laddr, unsigned int pages)
{
struct nvm_rq *rqd;
struct rrpc_inflight_rq *inf;
rqd = mempool_alloc(rrpc->rq_pool, GFP_ATOMIC);
if (!rqd)
return ERR_PTR(-ENOMEM);
inf = rrpc_get_inflight_rq(rqd);
if (rrpc_lock_laddr(rrpc, laddr, pages, inf)) {
mempool_free(rqd, rrpc->rq_pool);
return NULL;
}
return rqd;
}
static void rrpc_inflight_laddr_release(struct rrpc *rrpc, struct nvm_rq *rqd)
{
struct rrpc_inflight_rq *inf = rrpc_get_inflight_rq(rqd);
rrpc_unlock_laddr(rrpc, inf);
mempool_free(rqd, rrpc->rq_pool);
}
static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
{
sector_t slba = bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
struct nvm_rq *rqd;
do {
rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len);
schedule();
} while (!rqd);
if (IS_ERR(rqd)) {
pr_err("rrpc: unable to acquire inflight IO\n");
bio_io_error(bio);
return;
}
rrpc_invalidate_range(rrpc, slba, len);
rrpc_inflight_laddr_release(rrpc, rqd);
}
static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
{
return (rblk->next_page == rrpc->dev->pgs_per_blk);
}
static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct nvm_block *blk = rblk->parent;
return blk->id * rrpc->dev->pgs_per_blk;
}
static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
{
struct ppa_addr paddr;
paddr.ppa = addr;
return __linear_to_generic_addr(dev, paddr);
}
/* requires lun->lock taken */
static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk)
{
struct rrpc *rrpc = rlun->rrpc;
BUG_ON(!rblk);
if (rlun->cur) {
spin_lock(&rlun->cur->lock);
WARN_ON(!block_is_full(rrpc, rlun->cur));
spin_unlock(&rlun->cur->lock);
}
rlun->cur = rblk;
}
static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
unsigned long flags)
{
struct nvm_block *blk;
struct rrpc_block *rblk;
blk = nvm_get_blk(rrpc->dev, rlun->parent, 0);
if (!blk)
return NULL;
rblk = &rlun->blocks[blk->id];
blk->priv = rblk;
bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
rblk->next_page = 0;
rblk->nr_invalid_pages = 0;
atomic_set(&rblk->data_cmnt_size, 0);
return rblk;
}
static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
{
nvm_put_blk(rrpc->dev, rblk->parent);
}
static struct rrpc_lun *get_next_lun(struct rrpc *rrpc)
{
int next = atomic_inc_return(&rrpc->next_lun);
return &rrpc->luns[next % rrpc->nr_luns];
}
static void rrpc_gc_kick(struct rrpc *rrpc)
{
struct rrpc_lun *rlun;
unsigned int i;
for (i = 0; i < rrpc->nr_luns; i++) {
rlun = &rrpc->luns[i];
queue_work(rrpc->krqd_wq, &rlun->ws_gc);
}
}
/*
* timed GC every interval.
*/
static void rrpc_gc_timer(unsigned long data)
{
struct rrpc *rrpc = (struct rrpc *)data;
rrpc_gc_kick(rrpc);
mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
}
static void rrpc_end_sync_bio(struct bio *bio)
{
struct completion *waiting = bio->bi_private;
if (bio->bi_error)
pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
complete(waiting);
}
/*
* rrpc_move_valid_pages -- migrate live data off the block
* @rrpc: the 'rrpc' structure
* @block: the block from which to migrate live pages
*
* Description:
* GC algorithms may call this function to migrate remaining live
* pages off the block prior to erasing it. This function blocks
* further execution until the operation is complete.
*/
static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct request_queue *q = rrpc->dev->q;
struct rrpc_rev_addr *rev;
struct nvm_rq *rqd;
struct bio *bio;
struct page *page;
int slot;
int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
u64 phys_addr;
DECLARE_COMPLETION_ONSTACK(wait);
if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
return 0;
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
pr_err("nvm: could not alloc bio to gc\n");
return -ENOMEM;
}
page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_pgs_per_blk)) < nr_pgs_per_blk) {
/* Lock laddr */
phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
try:
spin_lock(&rrpc->rev_lock);
/* Get logical address from physical to logical table */
rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
/* already updated by previous regular write */
if (rev->addr == ADDR_EMPTY) {
spin_unlock(&rrpc->rev_lock);
continue;
}
rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
if (IS_ERR_OR_NULL(rqd)) {
spin_unlock(&rrpc->rev_lock);
schedule();
goto try;
}
spin_unlock(&rrpc->rev_lock);
/* Perform read to do GC */
bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
bio->bi_rw = READ;
bio->bi_private = &wait;
bio->bi_end_io = rrpc_end_sync_bio;
/* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */
bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
pr_err("rrpc: gc read failed.\n");
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
wait_for_completion_io(&wait);
bio_reset(bio);
reinit_completion(&wait);
bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
bio->bi_rw = WRITE;
bio->bi_private = &wait;
bio->bi_end_io = rrpc_end_sync_bio;
bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
/* turn the command around and write the data back to a new
* address
*/
if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
pr_err("rrpc: gc write failed.\n");
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
bio_reset(bio);
}
finished:
mempool_free(page, rrpc->page_pool);
bio_put(bio);
if (!bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) {
pr_err("nvm: failed to garbage collect block\n");
return -EIO;
}
return 0;
}
static void rrpc_block_gc(struct work_struct *work)
{
struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
ws_gc);
struct rrpc *rrpc = gcb->rrpc;
struct rrpc_block *rblk = gcb->rblk;
struct nvm_dev *dev = rrpc->dev;
pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
if (rrpc_move_valid_pages(rrpc, rblk))
goto done;
nvm_erase_blk(dev, rblk->parent);
rrpc_put_blk(rrpc, rblk);
done:
mempool_free(gcb, rrpc->gcb_pool);
}
/* the block with highest number of invalid pages, will be in the beginning
* of the list
*/
static struct rrpc_block *rblock_max_invalid(struct rrpc_block *ra,
struct rrpc_block *rb)
{
if (ra->nr_invalid_pages == rb->nr_invalid_pages)
return ra;
return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra;
}
/* linearly find the block with highest number of invalid pages
* requires lun->lock
*/
static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun)
{
struct list_head *prio_list = &rlun->prio_list;
struct rrpc_block *rblock, *max;
BUG_ON(list_empty(prio_list));
max = list_first_entry(prio_list, struct rrpc_block, prio);
list_for_each_entry(rblock, prio_list, prio)
max = rblock_max_invalid(max, rblock);
return max;
}
static void rrpc_lun_gc(struct work_struct *work)
{
struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
struct rrpc *rrpc = rlun->rrpc;
struct nvm_lun *lun = rlun->parent;
struct rrpc_block_gc *gcb;
unsigned int nr_blocks_need;
nr_blocks_need = rrpc->dev->blks_per_lun / GC_LIMIT_INVERSE;
if (nr_blocks_need < rrpc->nr_luns)
nr_blocks_need = rrpc->nr_luns;
spin_lock(&lun->lock);
while (nr_blocks_need > lun->nr_free_blocks &&
!list_empty(&rlun->prio_list)) {
struct rrpc_block *rblock = block_prio_find_max(rlun);
struct nvm_block *block = rblock->parent;
if (!rblock->nr_invalid_pages)
break;
list_del_init(&rblock->prio);
BUG_ON(!block_is_full(rrpc, rblock));
pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
if (!gcb)
break;
gcb->rrpc = rrpc;
gcb->rblk = rblock;
INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
queue_work(rrpc->kgc_wq, &gcb->ws_gc);
nr_blocks_need--;
}
spin_unlock(&lun->lock);
/* TODO: Hint that request queue can be started again */
}
static void rrpc_gc_queue(struct work_struct *work)
{
struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
ws_gc);
struct rrpc *rrpc = gcb->rrpc;
struct rrpc_block *rblk = gcb->rblk;
struct nvm_lun *lun = rblk->parent->lun;
struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
spin_lock(&rlun->lock);
list_add_tail(&rblk->prio, &rlun->prio_list);
spin_unlock(&rlun->lock);
mempool_free(gcb, rrpc->gcb_pool);
pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
rblk->parent->id);
}
static const struct block_device_operations rrpc_fops = {
.owner = THIS_MODULE,
};
static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
{
unsigned int i;
struct rrpc_lun *rlun, *max_free;
if (!is_gc)
return get_next_lun(rrpc);
/* during GC, we don't care about RR, instead we want to make
* sure that we maintain evenness between the block luns.
*/
max_free = &rrpc->luns[0];
/* prevent GC-ing lun from devouring pages of a lun with
* little free blocks. We don't take the lock as we only need an
* estimate.
*/
rrpc_for_each_lun(rrpc, rlun, i) {
if (rlun->parent->nr_free_blocks >
max_free->parent->nr_free_blocks)
max_free = rlun;
}
return max_free;
}
static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
struct rrpc_block *rblk, u64 paddr)
{
struct rrpc_addr *gp;
struct rrpc_rev_addr *rev;
BUG_ON(laddr >= rrpc->nr_pages);
gp = &rrpc->trans_map[laddr];
spin_lock(&rrpc->rev_lock);
if (gp->rblk)
rrpc_page_invalidate(rrpc, gp);
gp->addr = paddr;
gp->rblk = rblk;
rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
rev->addr = laddr;
spin_unlock(&rrpc->rev_lock);
return gp;
}
static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
{
u64 addr = ADDR_EMPTY;
spin_lock(&rblk->lock);
if (block_is_full(rrpc, rblk))
goto out;
addr = block_to_addr(rrpc, rblk) + rblk->next_page;
rblk->next_page++;
out:
spin_unlock(&rblk->lock);
return addr;
}
/* Simple round-robin Logical to physical address translation.
*
* Retrieve the mapping using the active append point. Then update the ap for
* the next write to the disk.
*
* Returns rrpc_addr with the physical address and block. Remember to return to
* rrpc->addr_cache when request is finished.
*/
static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
int is_gc)
{
struct rrpc_lun *rlun;
struct rrpc_block *rblk;
struct nvm_lun *lun;
u64 paddr;
rlun = rrpc_get_lun_rr(rrpc, is_gc);
lun = rlun->parent;
if (!is_gc && lun->nr_free_blocks < rrpc->nr_luns * 4)
return NULL;
spin_lock(&rlun->lock);
rblk = rlun->cur;
retry:
paddr = rrpc_alloc_addr(rrpc, rblk);
if (paddr == ADDR_EMPTY) {
rblk = rrpc_get_blk(rrpc, rlun, 0);
if (rblk) {
rrpc_set_lun_cur(rlun, rblk);
goto retry;
}
if (is_gc) {
/* retry from emergency gc block */
paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
if (paddr == ADDR_EMPTY) {
rblk = rrpc_get_blk(rrpc, rlun, 1);
if (!rblk) {
pr_err("rrpc: no more blocks");
goto err;
}
rlun->gc_cur = rblk;
paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
}
rblk = rlun->gc_cur;
}
}
spin_unlock(&rlun->lock);
return rrpc_update_map(rrpc, laddr, rblk, paddr);
err:
spin_unlock(&rlun->lock);
return NULL;
}
static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct rrpc_block_gc *gcb;
gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
if (!gcb) {
pr_err("rrpc: unable to queue block for gc.");
return;
}
gcb->rrpc = rrpc;
gcb->rblk = rblk;
INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
queue_work(rrpc->kgc_wq, &gcb->ws_gc);
}
static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
sector_t laddr, uint8_t npages)
{
struct rrpc_addr *p;
struct rrpc_block *rblk;
struct nvm_lun *lun;
int cmnt_size, i;
for (i = 0; i < npages; i++) {
p = &rrpc->trans_map[laddr + i];
rblk = p->rblk;
lun = rblk->parent->lun;
cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
rrpc_run_gc(rrpc, rblk);
}
}
static int rrpc_end_io(struct nvm_rq *rqd, int error)
{
struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
uint8_t npages = rqd->nr_pages;
sector_t laddr = rrpc_get_laddr(rqd->bio) - npages;
if (bio_data_dir(rqd->bio) == WRITE)
rrpc_end_io_write(rrpc, rrqd, laddr, npages);
if (rrqd->flags & NVM_IOTYPE_GC)
return 0;
rrpc_unlock_rq(rrpc, rqd);
bio_put(rqd->bio);
if (npages > 1)
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
if (rqd->metadata)
nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata);
mempool_free(rqd, rrpc->rq_pool);
return 0;
}
static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags, int npages)
{
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
struct rrpc_addr *gp;
sector_t laddr = rrpc_get_laddr(bio);
int is_gc = flags & NVM_IOTYPE_GC;
int i;
if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
return NVM_IO_REQUEUE;
}
for (i = 0; i < npages; i++) {
/* We assume that mapping occurs at 4KB granularity */
BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages));
gp = &rrpc->trans_map[laddr + i];
if (gp->rblk) {
rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
gp->addr);
} else {
BUG_ON(is_gc);
rrpc_unlock_laddr(rrpc, r);
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
rqd->dma_ppa_list);
return NVM_IO_DONE;
}
}
rqd->opcode = NVM_OP_HBREAD;
return NVM_IO_OK;
}
static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
unsigned long flags)
{
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
int is_gc = flags & NVM_IOTYPE_GC;
sector_t laddr = rrpc_get_laddr(bio);
struct rrpc_addr *gp;
if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
return NVM_IO_REQUEUE;
BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages));
gp = &rrpc->trans_map[laddr];
if (gp->rblk) {
rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
} else {
BUG_ON(is_gc);
rrpc_unlock_rq(rrpc, rqd);
return NVM_IO_DONE;
}
rqd->opcode = NVM_OP_HBREAD;
rrqd->addr = gp;
return NVM_IO_OK;
}
static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags, int npages)
{
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
struct rrpc_addr *p;
sector_t laddr = rrpc_get_laddr(bio);
int is_gc = flags & NVM_IOTYPE_GC;
int i;
if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
return NVM_IO_REQUEUE;
}
for (i = 0; i < npages; i++) {
/* We assume that mapping occurs at 4KB granularity */
p = rrpc_map_page(rrpc, laddr + i, is_gc);
if (!p) {
BUG_ON(is_gc);
rrpc_unlock_laddr(rrpc, r);
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
rqd->dma_ppa_list);
rrpc_gc_kick(rrpc);
return NVM_IO_REQUEUE;
}
rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
p->addr);
}
rqd->opcode = NVM_OP_HBWRITE;
return NVM_IO_OK;
}
static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags)
{
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
struct rrpc_addr *p;
int is_gc = flags & NVM_IOTYPE_GC;
sector_t laddr = rrpc_get_laddr(bio);
if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
return NVM_IO_REQUEUE;
p = rrpc_map_page(rrpc, laddr, is_gc);
if (!p) {
BUG_ON(is_gc);
rrpc_unlock_rq(rrpc, rqd);
rrpc_gc_kick(rrpc);
return NVM_IO_REQUEUE;
}
rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, p->addr);
rqd->opcode = NVM_OP_HBWRITE;
rrqd->addr = p;
return NVM_IO_OK;
}
static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags, uint8_t npages)
{
if (npages > 1) {
rqd->ppa_list = nvm_dev_dma_alloc(rrpc->dev, GFP_KERNEL,
&rqd->dma_ppa_list);
if (!rqd->ppa_list) {
pr_err("rrpc: not able to allocate ppa list\n");
return NVM_IO_ERR;
}
if (bio_rw(bio) == WRITE)
return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags,
npages);
return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages);
}
if (bio_rw(bio) == WRITE)
return rrpc_write_rq(rrpc, bio, rqd, flags);
return rrpc_read_rq(rrpc, bio, rqd, flags);
}
static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags)
{
int err;
struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd);
uint8_t nr_pages = rrpc_get_pages(bio);
int bio_size = bio_sectors(bio) << 9;
if (bio_size < rrpc->dev->sec_size)
return NVM_IO_ERR;
else if (bio_size > rrpc->dev->max_rq_size)
return NVM_IO_ERR;
err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages);
if (err)
return err;
bio_get(bio);
rqd->bio = bio;
rqd->ins = &rrpc->instance;
rqd->nr_pages = nr_pages;
rrq->flags = flags;
err = nvm_submit_io(rrpc->dev, rqd);
if (err) {
pr_err("rrpc: I/O submission failed: %d\n", err);
return NVM_IO_ERR;
}
return NVM_IO_OK;
}
static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
{
struct rrpc *rrpc = q->queuedata;
struct nvm_rq *rqd;
int err;
if (bio->bi_rw & REQ_DISCARD) {
rrpc_discard(rrpc, bio);
return;
}
rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
if (!rqd) {
pr_err_ratelimited("rrpc: not able to queue bio.");
bio_io_error(bio);
return;
}
memset(rqd, 0, sizeof(struct nvm_rq));
err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
switch (err) {
case NVM_IO_OK:
return;
case NVM_IO_ERR:
bio_io_error(bio);
break;
case NVM_IO_DONE:
bio_endio(bio);
break;
case NVM_IO_REQUEUE:
spin_lock(&rrpc->bio_lock);
bio_list_add(&rrpc->requeue_bios, bio);
spin_unlock(&rrpc->bio_lock);
queue_work(rrpc->kgc_wq, &rrpc->ws_requeue);
break;
}
mempool_free(rqd, rrpc->rq_pool);
}
static void rrpc_requeue(struct work_struct *work)
{
struct rrpc *rrpc = container_of(work, struct rrpc, ws_requeue);
struct bio_list bios;
struct bio *bio;
bio_list_init(&bios);
spin_lock(&rrpc->bio_lock);
bio_list_merge(&bios, &rrpc->requeue_bios);
bio_list_init(&rrpc->requeue_bios);
spin_unlock(&rrpc->bio_lock);
while ((bio = bio_list_pop(&bios)))
rrpc_make_rq(rrpc->disk->queue, bio);
}
static void rrpc_gc_free(struct rrpc *rrpc)
{
struct rrpc_lun *rlun;
int i;
if (rrpc->krqd_wq)
destroy_workqueue(rrpc->krqd_wq);
if (rrpc->kgc_wq)
destroy_workqueue(rrpc->kgc_wq);
if (!rrpc->luns)
return;
for (i = 0; i < rrpc->nr_luns; i++) {
rlun = &rrpc->luns[i];
if (!rlun->blocks)
break;
vfree(rlun->blocks);
}
}
static int rrpc_gc_init(struct rrpc *rrpc)
{
rrpc->krqd_wq = alloc_workqueue("rrpc-lun", WQ_MEM_RECLAIM|WQ_UNBOUND,
rrpc->nr_luns);
if (!rrpc->krqd_wq)
return -ENOMEM;
rrpc->kgc_wq = alloc_workqueue("rrpc-bg", WQ_MEM_RECLAIM, 1);
if (!rrpc->kgc_wq)
return -ENOMEM;
setup_timer(&rrpc->gc_timer, rrpc_gc_timer, (unsigned long)rrpc);
return 0;
}
static void rrpc_map_free(struct rrpc *rrpc)
{
vfree(rrpc->rev_trans_map);
vfree(rrpc->trans_map);
}
static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
{
struct rrpc *rrpc = (struct rrpc *)private;
struct nvm_dev *dev = rrpc->dev;
struct rrpc_addr *addr = rrpc->trans_map + slba;
struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
u64 elba = slba + nlb;
u64 i;
if (unlikely(elba > dev->total_pages)) {
pr_err("nvm: L2P data from device is out of bounds!\n");
return -EINVAL;
}
for (i = 0; i < nlb; i++) {
u64 pba = le64_to_cpu(entries[i]);
/* LNVM treats address-spaces as silos, LBA and PBA are
* equally large and zero-indexed.
*/
if (unlikely(pba >= max_pages && pba != U64_MAX)) {
pr_err("nvm: L2P data entry is out of bounds!\n");
return -EINVAL;
}
/* Address zero is a special one. The first page on a disk is
* protected. As it often holds internal device boot
* information.
*/
if (!pba)
continue;
addr[i].addr = pba;
raddr[pba].addr = slba + i;
}
return 0;
}
static int rrpc_map_init(struct rrpc *rrpc)
{
struct nvm_dev *dev = rrpc->dev;
sector_t i;
int ret;
rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages);
if (!rrpc->trans_map)
return -ENOMEM;
rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
* rrpc->nr_pages);
if (!rrpc->rev_trans_map)
return -ENOMEM;
for (i = 0; i < rrpc->nr_pages; i++) {
struct rrpc_addr *p = &rrpc->trans_map[i];
struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
p->addr = ADDR_EMPTY;
r->addr = ADDR_EMPTY;
}
if (!dev->ops->get_l2p_tbl)
return 0;
/* Bring up the mapping table from device */
ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
rrpc_l2p_update, rrpc);
if (ret) {
pr_err("nvm: rrpc: could not read L2P table.\n");
return -EINVAL;
}
return 0;
}
/* Minimum pages needed within a lun */
#define PAGE_POOL_SIZE 16
#define ADDR_POOL_SIZE 64
static int rrpc_core_init(struct rrpc *rrpc)
{
down_write(&rrpc_lock);
if (!rrpc_gcb_cache) {
rrpc_gcb_cache = kmem_cache_create("rrpc_gcb",
sizeof(struct rrpc_block_gc), 0, 0, NULL);
if (!rrpc_gcb_cache) {
up_write(&rrpc_lock);
return -ENOMEM;
}
rrpc_rq_cache = kmem_cache_create("rrpc_rq",
sizeof(struct nvm_rq) + sizeof(struct rrpc_rq),
0, 0, NULL);
if (!rrpc_rq_cache) {
kmem_cache_destroy(rrpc_gcb_cache);
up_write(&rrpc_lock);
return -ENOMEM;
}
}
up_write(&rrpc_lock);
rrpc->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
if (!rrpc->page_pool)
return -ENOMEM;
rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->nr_luns,
rrpc_gcb_cache);
if (!rrpc->gcb_pool)
return -ENOMEM;
rrpc->rq_pool = mempool_create_slab_pool(64, rrpc_rq_cache);
if (!rrpc->rq_pool)
return -ENOMEM;
spin_lock_init(&rrpc->inflights.lock);
INIT_LIST_HEAD(&rrpc->inflights.reqs);
return 0;
}
static void rrpc_core_free(struct rrpc *rrpc)
{
mempool_destroy(rrpc->page_pool);
mempool_destroy(rrpc->gcb_pool);
mempool_destroy(rrpc->rq_pool);
}
static void rrpc_luns_free(struct rrpc *rrpc)
{
kfree(rrpc->luns);
}
static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
{
struct nvm_dev *dev = rrpc->dev;
struct rrpc_lun *rlun;
int i, j;
spin_lock_init(&rrpc->rev_lock);
rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
GFP_KERNEL);
if (!rrpc->luns)
return -ENOMEM;
/* 1:1 mapping */
for (i = 0; i < rrpc->nr_luns; i++) {
struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
if (dev->pgs_per_blk >
MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
pr_err("rrpc: number of pages per block too high.");
goto err;
}
rlun = &rrpc->luns[i];
rlun->rrpc = rrpc;
rlun->parent = lun;
INIT_LIST_HEAD(&rlun->prio_list);
INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
spin_lock_init(&rlun->lock);
rrpc->total_blocks += dev->blks_per_lun;
rrpc->nr_pages += dev->sec_per_lun;
rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
rrpc->dev->blks_per_lun);
if (!rlun->blocks)
goto err;
for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
struct rrpc_block *rblk = &rlun->blocks[j];
struct nvm_block *blk = &lun->blocks[j];
rblk->parent = blk;
INIT_LIST_HEAD(&rblk->prio);
spin_lock_init(&rblk->lock);
}
}
return 0;
err:
return -ENOMEM;
}
static void rrpc_free(struct rrpc *rrpc)
{
rrpc_gc_free(rrpc);
rrpc_map_free(rrpc);
rrpc_core_free(rrpc);
rrpc_luns_free(rrpc);
kfree(rrpc);
}
static void rrpc_exit(void *private)
{
struct rrpc *rrpc = private;
del_timer(&rrpc->gc_timer);
flush_workqueue(rrpc->krqd_wq);
flush_workqueue(rrpc->kgc_wq);
rrpc_free(rrpc);
}
static sector_t rrpc_capacity(void *private)
{
struct rrpc *rrpc = private;
struct nvm_dev *dev = rrpc->dev;
sector_t reserved, provisioned;
/* cur, gc, and two emergency blocks for each lun */
reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4;
provisioned = rrpc->nr_pages - reserved;
if (reserved > rrpc->nr_pages) {
pr_err("rrpc: not enough space available to expose storage.\n");
return 0;
}
sector_div(provisioned, 10);
return provisioned * 9 * NR_PHY_IN_LOG;
}
/*
* Looks up the logical address from reverse trans map and check if its valid by
* comparing the logical to physical address with the physical address.
* Returns 0 on free, otherwise 1 if in use
*/
static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct nvm_dev *dev = rrpc->dev;
int offset;
struct rrpc_addr *laddr;
u64 paddr, pladdr;
for (offset = 0; offset < dev->pgs_per_blk; offset++) {
paddr = block_to_addr(rrpc, rblk) + offset;
pladdr = rrpc->rev_trans_map[paddr].addr;
if (pladdr == ADDR_EMPTY)
continue;
laddr = &rrpc->trans_map[pladdr];
if (paddr == laddr->addr) {
laddr->rblk = rblk;
} else {
set_bit(offset, rblk->invalid_pages);
rblk->nr_invalid_pages++;
}
}
}
static int rrpc_blocks_init(struct rrpc *rrpc)
{
struct rrpc_lun *rlun;
struct rrpc_block *rblk;
int lun_iter, blk_iter;
for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) {
rlun = &rrpc->luns[lun_iter];
for (blk_iter = 0; blk_iter < rrpc->dev->blks_per_lun;
blk_iter++) {
rblk = &rlun->blocks[blk_iter];
rrpc_block_map_update(rrpc, rblk);
}
}
return 0;
}
static int rrpc_luns_configure(struct rrpc *rrpc)
{
struct rrpc_lun *rlun;
struct rrpc_block *rblk;
int i;
for (i = 0; i < rrpc->nr_luns; i++) {
rlun = &rrpc->luns[i];
rblk = rrpc_get_blk(rrpc, rlun, 0);
if (!rblk)
return -EINVAL;
rrpc_set_lun_cur(rlun, rblk);
/* Emergency gc block */
rblk = rrpc_get_blk(rrpc, rlun, 1);
if (!rblk)
return -EINVAL;
rlun->gc_cur = rblk;
}
return 0;
}
static struct nvm_tgt_type tt_rrpc;
static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
int lun_begin, int lun_end)
{
struct request_queue *bqueue = dev->q;
struct request_queue *tqueue = tdisk->queue;
struct rrpc *rrpc;
int ret;
if (!(dev->identity.dom & NVM_RSP_L2P)) {
pr_err("nvm: rrpc: device does not support l2p (%x)\n",
dev->identity.dom);
return ERR_PTR(-EINVAL);
}
rrpc = kzalloc(sizeof(struct rrpc), GFP_KERNEL);
if (!rrpc)
return ERR_PTR(-ENOMEM);
rrpc->instance.tt = &tt_rrpc;
rrpc->dev = dev;
rrpc->disk = tdisk;
bio_list_init(&rrpc->requeue_bios);
spin_lock_init(&rrpc->bio_lock);
INIT_WORK(&rrpc->ws_requeue, rrpc_requeue);
rrpc->nr_luns = lun_end - lun_begin + 1;
/* simple round-robin strategy */
atomic_set(&rrpc->next_lun, -1);
ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
if (ret) {
pr_err("nvm: rrpc: could not initialize luns\n");
goto err;
}
rrpc->poffset = dev->sec_per_lun * lun_begin;
rrpc->lun_offset = lun_begin;
ret = rrpc_core_init(rrpc);
if (ret) {
pr_err("nvm: rrpc: could not initialize core\n");
goto err;
}
ret = rrpc_map_init(rrpc);
if (ret) {
pr_err("nvm: rrpc: could not initialize maps\n");
goto err;
}
ret = rrpc_blocks_init(rrpc);
if (ret) {
pr_err("nvm: rrpc: could not initialize state for blocks\n");
goto err;
}
ret = rrpc_luns_configure(rrpc);
if (ret) {
pr_err("nvm: rrpc: not enough blocks available in LUNs.\n");
goto err;
}
ret = rrpc_gc_init(rrpc);
if (ret) {
pr_err("nvm: rrpc: could not initialize gc\n");
goto err;
}
/* inherit the size from the underlying device */
blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n",
rrpc->nr_luns, (unsigned long long)rrpc->nr_pages);
mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
return rrpc;
err:
rrpc_free(rrpc);
return ERR_PTR(ret);
}
/* round robin, page-based FTL, and cost-based GC */
static struct nvm_tgt_type tt_rrpc = {
.name = "rrpc",
.version = {1, 0, 0},
.make_rq = rrpc_make_rq,
.capacity = rrpc_capacity,
.end_io = rrpc_end_io,
.init = rrpc_init,
.exit = rrpc_exit,
};
static int __init rrpc_module_init(void)
{
return nvm_register_target(&tt_rrpc);
}
static void rrpc_module_exit(void)
{
nvm_unregister_target(&tt_rrpc);
}
module_init(rrpc_module_init);
module_exit(rrpc_module_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Block-Device Target for Open-Channel SSDs");
/*
* Copyright (C) 2015 IT University of Copenhagen
* Initial release: Matias Bjorling <m@bjorling.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs.
*/
#ifndef RRPC_H_
#define RRPC_H_
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/vmalloc.h>
#include <linux/lightnvm.h>
/* Run only GC if less than 1/X blocks are free */
#define GC_LIMIT_INVERSE 10
#define GC_TIME_SECS 100
#define RRPC_SECTOR (512)
#define RRPC_EXPOSED_PAGE_SIZE (4096)
#define NR_PHY_IN_LOG (RRPC_EXPOSED_PAGE_SIZE / RRPC_SECTOR)
struct rrpc_inflight {
struct list_head reqs;
spinlock_t lock;
};
struct rrpc_inflight_rq {
struct list_head list;
sector_t l_start;
sector_t l_end;
};
struct rrpc_rq {
struct rrpc_inflight_rq inflight_rq;
struct rrpc_addr *addr;
unsigned long flags;
};
struct rrpc_block {
struct nvm_block *parent;
struct list_head prio;
#define MAX_INVALID_PAGES_STORAGE 8
/* Bitmap for invalid page intries */
unsigned long invalid_pages[MAX_INVALID_PAGES_STORAGE];
/* points to the next writable page within a block */
unsigned int next_page;
/* number of pages that are invalid, wrt host page size */
unsigned int nr_invalid_pages;
spinlock_t lock;
atomic_t data_cmnt_size; /* data pages committed to stable storage */
};
struct rrpc_lun {
struct rrpc *rrpc;
struct nvm_lun *parent;
struct rrpc_block *cur, *gc_cur;
struct rrpc_block *blocks; /* Reference to block allocation */
struct list_head prio_list; /* Blocks that may be GC'ed */
struct work_struct ws_gc;
spinlock_t lock;
};
struct rrpc {
/* instance must be kept in top to resolve rrpc in unprep */
struct nvm_tgt_instance instance;
struct nvm_dev *dev;
struct gendisk *disk;
u64 poffset; /* physical page offset */
int lun_offset;
int nr_luns;
struct rrpc_lun *luns;
/* calculated values */
unsigned long long nr_pages;
unsigned long total_blocks;
/* Write strategy variables. Move these into each for structure for each
* strategy
*/
atomic_t next_lun; /* Whenever a page is written, this is updated
* to point to the next write lun
*/
spinlock_t bio_lock;
struct bio_list requeue_bios;
struct work_struct ws_requeue;
/* Simple translation map of logical addresses to physical addresses.
* The logical addresses is known by the host system, while the physical
* addresses are used when writing to the disk block device.
*/
struct rrpc_addr *trans_map;
/* also store a reverse map for garbage collection */
struct rrpc_rev_addr *rev_trans_map;
spinlock_t rev_lock;
struct rrpc_inflight inflights;
mempool_t *addr_pool;
mempool_t *page_pool;
mempool_t *gcb_pool;
mempool_t *rq_pool;
struct timer_list gc_timer;
struct workqueue_struct *krqd_wq;
struct workqueue_struct *kgc_wq;
};
struct rrpc_block_gc {
struct rrpc *rrpc;
struct rrpc_block *rblk;
struct work_struct ws_gc;
};
/* Logical to physical mapping */
struct rrpc_addr {
u64 addr;
struct rrpc_block *rblk;
};
/* Physical to logical mapping */
struct rrpc_rev_addr {
u64 addr;
};
static inline sector_t rrpc_get_laddr(struct bio *bio)
{
return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
}
static inline unsigned int rrpc_get_pages(struct bio *bio)
{
return bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
}
static inline sector_t rrpc_get_sector(sector_t laddr)
{
return laddr * NR_PHY_IN_LOG;
}
static inline int request_intersects(struct rrpc_inflight_rq *r,
sector_t laddr_start, sector_t laddr_end)
{
return (laddr_end >= r->l_start && laddr_end <= r->l_end) &&
(laddr_start >= r->l_start && laddr_start <= r->l_end);
}
static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
unsigned pages, struct rrpc_inflight_rq *r)
{
sector_t laddr_end = laddr + pages - 1;
struct rrpc_inflight_rq *rtmp;
spin_lock_irq(&rrpc->inflights.lock);
list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) {
if (unlikely(request_intersects(rtmp, laddr, laddr_end))) {
/* existing, overlapping request, come back later */
spin_unlock_irq(&rrpc->inflights.lock);
return 1;
}
}
r->l_start = laddr;
r->l_end = laddr_end;
list_add_tail(&r->list, &rrpc->inflights.reqs);
spin_unlock_irq(&rrpc->inflights.lock);
return 0;
}
static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
unsigned pages,
struct rrpc_inflight_rq *r)
{
BUG_ON((laddr + pages) > rrpc->nr_pages);
return __rrpc_lock_laddr(rrpc, laddr, pages, r);
}
static inline struct rrpc_inflight_rq *rrpc_get_inflight_rq(struct nvm_rq *rqd)
{
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
return &rrqd->inflight_rq;
}
static inline int rrpc_lock_rq(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd)
{
sector_t laddr = rrpc_get_laddr(bio);
unsigned int pages = rrpc_get_pages(bio);
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
return rrpc_lock_laddr(rrpc, laddr, pages, r);
}
static inline void rrpc_unlock_laddr(struct rrpc *rrpc,
struct rrpc_inflight_rq *r)
{
unsigned long flags;
spin_lock_irqsave(&rrpc->inflights.lock, flags);
list_del_init(&r->list);
spin_unlock_irqrestore(&rrpc->inflights.lock, flags);
}
static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd)
{
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
uint8_t pages = rqd->nr_pages;
BUG_ON((r->l_start + pages) > rrpc->nr_pages);
rrpc_unlock_laddr(rrpc, r);
}
#endif /* RRPC_H_ */
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
nvme-y += pci.o scsi.o nvme-y += pci.o scsi.o lightnvm.o
/*
* nvme-lightnvm.c - LightNVM NVMe device
*
* Copyright (C) 2014-2015 IT University of Copenhagen
* Initial release: Matias Bjorling <mb@lightnvm.io>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
*/
#include "nvme.h"
#ifdef CONFIG_NVM
#include <linux/nvme.h>
#include <linux/bitops.h>
#include <linux/lightnvm.h>
#include <linux/vmalloc.h>
enum nvme_nvm_admin_opcode {
nvme_nvm_admin_identity = 0xe2,
nvme_nvm_admin_get_l2p_tbl = 0xea,
nvme_nvm_admin_get_bb_tbl = 0xf2,
nvme_nvm_admin_set_bb_tbl = 0xf1,
};
struct nvme_nvm_hb_rw {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 spba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le64 slba;
};
struct nvme_nvm_ph_rw {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 spba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le64 resv;
};
struct nvme_nvm_identity {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
__le32 chnl_off;
__u32 rsvd11[5];
};
struct nvme_nvm_l2ptbl {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[4];
__le64 prp1;
__le64 prp2;
__le64 slba;
__le32 nlb;
__le16 cdw14[6];
};
struct nvme_nvm_bbtbl {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
__le32 prp1_len;
__le32 prp2_len;
__le32 lbb;
__u32 rsvd11[3];
};
struct nvme_nvm_erase_blk {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
__le64 spba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le64 resv;
};
struct nvme_nvm_command {
union {
struct nvme_common_command common;
struct nvme_nvm_identity identity;
struct nvme_nvm_hb_rw hb_rw;
struct nvme_nvm_ph_rw ph_rw;
struct nvme_nvm_l2ptbl l2p;
struct nvme_nvm_bbtbl get_bb;
struct nvme_nvm_bbtbl set_bb;
struct nvme_nvm_erase_blk erase;
};
};
struct nvme_nvm_id_group {
__u8 mtype;
__u8 fmtype;
__le16 res16;
__u8 num_ch;
__u8 num_lun;
__u8 num_pln;
__le16 num_blk;
__le16 num_pg;
__le16 fpg_sz;
__le16 csecs;
__le16 sos;
__le32 trdt;
__le32 trdm;
__le32 tprt;
__le32 tprm;
__le32 tbet;
__le32 tbem;
__le32 mpos;
__le16 cpar;
__u8 reserved[913];
} __packed;
struct nvme_nvm_addr_format {
__u8 ch_offset;
__u8 ch_len;
__u8 lun_offset;
__u8 lun_len;
__u8 pln_offset;
__u8 pln_len;
__u8 blk_offset;
__u8 blk_len;
__u8 pg_offset;
__u8 pg_len;
__u8 sect_offset;
__u8 sect_len;
__u8 res[4];
} __packed;
struct nvme_nvm_id {
__u8 ver_id;
__u8 vmnt;
__u8 cgrps;
__u8 res[5];
__le32 cap;
__le32 dom;
struct nvme_nvm_addr_format ppaf;
__u8 ppat;
__u8 resv[223];
struct nvme_nvm_id_group groups[4];
} __packed;
/*
* Check we didn't inadvertently grow the command struct
*/
static inline void _nvme_nvm_check_size(void)
{
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
}
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
{
struct nvme_nvm_id_group *src;
struct nvm_id_group *dst;
int i, end;
end = min_t(u32, 4, nvm_id->cgrps);
for (i = 0; i < end; i++) {
src = &nvme_nvm_id->groups[i];
dst = &nvm_id->groups[i];
dst->mtype = src->mtype;
dst->fmtype = src->fmtype;
dst->num_ch = src->num_ch;
dst->num_lun = src->num_lun;
dst->num_pln = src->num_pln;
dst->num_pg = le16_to_cpu(src->num_pg);
dst->num_blk = le16_to_cpu(src->num_blk);
dst->fpg_sz = le16_to_cpu(src->fpg_sz);
dst->csecs = le16_to_cpu(src->csecs);
dst->sos = le16_to_cpu(src->sos);
dst->trdt = le32_to_cpu(src->trdt);
dst->trdm = le32_to_cpu(src->trdm);
dst->tprt = le32_to_cpu(src->tprt);
dst->tprm = le32_to_cpu(src->tprm);
dst->tbet = le32_to_cpu(src->tbet);
dst->tbem = le32_to_cpu(src->tbem);
dst->mpos = le32_to_cpu(src->mpos);
dst->cpar = le16_to_cpu(src->cpar);
}
return 0;
}
static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_nvm_id *nvme_nvm_id;
struct nvme_nvm_command c = {};
int ret;
c.identity.opcode = nvme_nvm_admin_identity;
c.identity.nsid = cpu_to_le32(ns->ns_id);
c.identity.chnl_off = 0;
nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL);
if (!nvme_nvm_id)
return -ENOMEM;
ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id,
sizeof(struct nvme_nvm_id));
if (ret) {
ret = -EIO;
goto out;
}
nvm_id->ver_id = nvme_nvm_id->ver_id;
nvm_id->vmnt = nvme_nvm_id->vmnt;
nvm_id->cgrps = nvme_nvm_id->cgrps;
nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
ret = init_grps(nvm_id, nvme_nvm_id);
out:
kfree(nvme_nvm_id);
return ret;
}
static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb,
nvm_l2p_update_fn *update_l2p, void *priv)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
u32 len = queue_max_hw_sectors(q) << 9;
u32 nlb_pr_rq = len / sizeof(u64);
u64 cmd_slba = slba;
void *entries;
int ret = 0;
c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl;
c.l2p.nsid = cpu_to_le32(ns->ns_id);
entries = kmalloc(len, GFP_KERNEL);
if (!entries)
return -ENOMEM;
while (nlb) {
u32 cmd_nlb = min(nlb_pr_rq, nlb);
c.l2p.slba = cpu_to_le64(cmd_slba);
c.l2p.nlb = cpu_to_le32(cmd_nlb);
ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c,
entries, len);
if (ret) {
dev_err(dev->dev, "L2P table transfer failed (%d)\n",
ret);
ret = -EIO;
goto out;
}
if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
ret = -EINTR;
goto out;
}
cmd_slba += cmd_nlb;
nlb -= cmd_nlb;
}
out:
kfree(entries);
return ret;
}
static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid,
unsigned int nr_blocks,
nvm_bb_update_fn *update_bbtbl, void *priv)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
void *bb_bitmap;
u16 bb_bitmap_size;
int ret = 0;
c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
c.get_bb.nsid = cpu_to_le32(ns->ns_id);
c.get_bb.lbb = cpu_to_le32(lunid);
bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE;
bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL);
if (!bb_bitmap)
return -ENOMEM;
bitmap_zero(bb_bitmap, nr_blocks);
ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap,
bb_bitmap_size);
if (ret) {
dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
ret = -EIO;
goto out;
}
ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv);
if (ret) {
ret = -EINTR;
goto out;
}
out:
kfree(bb_bitmap);
return ret;
}
static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
struct nvme_ns *ns, struct nvme_nvm_command *c)
{
c->ph_rw.opcode = rqd->opcode;
c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
c->ph_rw.control = cpu_to_le16(rqd->flags);
c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1);
if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD)
c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns,
rqd->bio->bi_iter.bi_sector));
}
static void nvme_nvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
struct nvm_dev *dev = rqd->dev;
if (dev->mt->end_io(rqd, error))
pr_err("nvme: err status: %x result: %lx\n",
rq->errors, (unsigned long)rq->special);
kfree(rq->cmd);
blk_mq_free_request(rq);
}
static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
{
struct nvme_ns *ns = q->queuedata;
struct request *rq;
struct bio *bio = rqd->bio;
struct nvme_nvm_command *cmd;
rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
if (IS_ERR(rq))
return -ENOMEM;
cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
if (!cmd) {
blk_mq_free_request(rq);
return -ENOMEM;
}
rq->cmd_type = REQ_TYPE_DRV_PRIV;
rq->ioprio = bio_prio(bio);
if (bio_has_data(bio))
rq->nr_phys_segments = bio_phys_segments(q, bio);
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
rq->cmd = (unsigned char *)cmd;
rq->cmd_len = sizeof(struct nvme_nvm_command);
rq->special = (void *)0;
rq->end_io_data = rqd;
blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
return 0;
}
static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_nvm_command c = {};
c.erase.opcode = NVM_OP_ERASE;
c.erase.nsid = cpu_to_le32(ns->ns_id);
c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa);
c.erase.length = cpu_to_le16(rqd->nr_pages - 1);
return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
}
static void *nvme_nvm_create_dma_pool(struct request_queue *q, char *name)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
}
static void nvme_nvm_destroy_dma_pool(void *pool)
{
struct dma_pool *dma_pool = pool;
dma_pool_destroy(dma_pool);
}
static void *nvme_nvm_dev_dma_alloc(struct request_queue *q, void *pool,
gfp_t mem_flags, dma_addr_t *dma_handler)
{
return dma_pool_alloc(pool, mem_flags, dma_handler);
}
static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list,
dma_addr_t dma_handler)
{
dma_pool_free(pool, ppa_list, dma_handler);
}
static struct nvm_dev_ops nvme_nvm_dev_ops = {
.identity = nvme_nvm_identity,
.get_l2p_tbl = nvme_nvm_get_l2p_tbl,
.get_bb_tbl = nvme_nvm_get_bb_tbl,
.submit_io = nvme_nvm_submit_io,
.erase_block = nvme_nvm_erase_block,
.create_dma_pool = nvme_nvm_create_dma_pool,
.destroy_dma_pool = nvme_nvm_destroy_dma_pool,
.dev_dma_alloc = nvme_nvm_dev_dma_alloc,
.dev_dma_free = nvme_nvm_dev_dma_free,
.max_phys_sect = 64,
};
int nvme_nvm_register(struct request_queue *q, char *disk_name)
{
return nvm_register(q, disk_name, &nvme_nvm_dev_ops);
}
void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
{
nvm_unregister(disk_name);
}
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_dev *dev = ns->dev;
struct pci_dev *pdev = to_pci_dev(dev->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 &&
id->vs[0] == 0x1)
return 1;
/* CNEX Labs - PCI ID + Vendor specific bit */
if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 &&
id->vs[0] == 0x1)
return 1;
return 0;
}
#else
int nvme_nvm_register(struct request_queue *q, char *disk_name)
{
return 0;
}
void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
return 0;
}
#endif /* CONFIG_NVM */
...@@ -22,6 +22,11 @@ ...@@ -22,6 +22,11 @@
extern unsigned char nvme_io_timeout; extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
};
/* /*
* Represents an NVM Express device. Each nvme_dev is a PCI function. * Represents an NVM Express device. Each nvme_dev is a PCI function.
*/ */
...@@ -84,6 +89,7 @@ struct nvme_ns { ...@@ -84,6 +89,7 @@ struct nvme_ns {
u16 ms; u16 ms;
bool ext; bool ext;
u8 pi_type; u8 pi_type;
int type;
u64 mode_select_num_blocks; u64 mode_select_num_blocks;
u32 mode_select_block_len; u32 mode_select_block_len;
}; };
...@@ -130,4 +136,8 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); ...@@ -130,4 +136,8 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
int nvme_sg_get_version_num(int __user *ip); int nvme_sg_get_version_num(int __user *ip);
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct request_queue *q, char *disk_name);
void nvme_nvm_unregister(struct request_queue *q, char *disk_name);
#endif /* _NVME_H */ #endif /* _NVME_H */
...@@ -1952,6 +1952,9 @@ static void nvme_free_ns(struct kref *kref) ...@@ -1952,6 +1952,9 @@ static void nvme_free_ns(struct kref *kref)
{ {
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
if (ns->type == NVME_NS_LIGHTNVM)
nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
spin_lock(&dev_list_lock); spin_lock(&dev_list_lock);
ns->disk->private_data = NULL; ns->disk->private_data = NULL;
spin_unlock(&dev_list_lock); spin_unlock(&dev_list_lock);
...@@ -2021,6 +2024,16 @@ static int nvme_revalidate_disk(struct gendisk *disk) ...@@ -2021,6 +2024,16 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return -ENODEV; return -ENODEV;
} }
if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
if (nvme_nvm_register(ns->queue, disk->disk_name)) {
dev_warn(dev->dev,
"%s: LightNVM init failure\n", __func__);
kfree(id);
return -ENODEV;
}
ns->type = NVME_NS_LIGHTNVM;
}
old_ms = ns->ms; old_ms = ns->ms;
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds; ns->lba_shift = id->lbaf[lbaf].ds;
...@@ -2052,7 +2065,9 @@ static int nvme_revalidate_disk(struct gendisk *disk) ...@@ -2052,7 +2065,9 @@ static int nvme_revalidate_disk(struct gendisk *disk)
!ns->ext) !ns->ext)
nvme_init_integrity(ns); nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) if ((ns->ms && !(ns->ms == 8 && ns->pi_type) &&
!blk_get_integrity(disk)) ||
ns->type == NVME_NS_LIGHTNVM)
set_capacity(disk, 0); set_capacity(disk, 0);
else else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
...@@ -2175,17 +2190,19 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) ...@@ -2175,17 +2190,19 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
goto out_free_disk; goto out_free_disk;
kref_get(&dev->kref); kref_get(&dev->kref);
add_disk(ns->disk); if (ns->type != NVME_NS_LIGHTNVM) {
if (ns->ms) { add_disk(ns->disk);
struct block_device *bd = bdget_disk(ns->disk, 0); if (ns->ms) {
if (!bd) struct block_device *bd = bdget_disk(ns->disk, 0);
return; if (!bd)
if (blkdev_get(bd, FMODE_READ, NULL)) { return;
bdput(bd); if (blkdev_get(bd, FMODE_READ, NULL)) {
return; bdput(bd);
return;
}
blkdev_reread_part(bd);
blkdev_put(bd, FMODE_READ);
} }
blkdev_reread_part(bd);
blkdev_put(bd, FMODE_READ);
} }
return; return;
out_free_disk: out_free_disk:
......
#ifndef NVM_H
#define NVM_H
enum {
NVM_IO_OK = 0,
NVM_IO_REQUEUE = 1,
NVM_IO_DONE = 2,
NVM_IO_ERR = 3,
NVM_IOTYPE_NONE = 0,
NVM_IOTYPE_GC = 1,
};
#ifdef CONFIG_NVM
#include <linux/blkdev.h>
#include <linux/types.h>
#include <linux/file.h>
#include <linux/dmapool.h>
enum {
/* HW Responsibilities */
NVM_RSP_L2P = 1 << 0,
NVM_RSP_ECC = 1 << 1,
/* Physical Adressing Mode */
NVM_ADDRMODE_LINEAR = 0,
NVM_ADDRMODE_CHANNEL = 1,
/* Plane programming mode for LUN */
NVM_PLANE_SINGLE = 0,
NVM_PLANE_DOUBLE = 1,
NVM_PLANE_QUAD = 2,
/* Status codes */
NVM_RSP_SUCCESS = 0x0,
NVM_RSP_NOT_CHANGEABLE = 0x1,
NVM_RSP_ERR_FAILWRITE = 0x40ff,
NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
/* Device opcodes */
NVM_OP_HBREAD = 0x02,
NVM_OP_HBWRITE = 0x81,
NVM_OP_PWRITE = 0x91,
NVM_OP_PREAD = 0x92,
NVM_OP_ERASE = 0x90,
/* PPA Command Flags */
NVM_IO_SNGL_ACCESS = 0x0,
NVM_IO_DUAL_ACCESS = 0x1,
NVM_IO_QUAD_ACCESS = 0x2,
NVM_IO_SUSPEND = 0x80,
NVM_IO_SLC_MODE = 0x100,
NVM_IO_SCRAMBLE_DISABLE = 0x200,
};
struct nvm_id_group {
u8 mtype;
u8 fmtype;
u16 res16;
u8 num_ch;
u8 num_lun;
u8 num_pln;
u16 num_blk;
u16 num_pg;
u16 fpg_sz;
u16 csecs;
u16 sos;
u32 trdt;
u32 trdm;
u32 tprt;
u32 tprm;
u32 tbet;
u32 tbem;
u32 mpos;
u16 cpar;
u8 res[913];
} __packed;
struct nvm_addr_format {
u8 ch_offset;
u8 ch_len;
u8 lun_offset;
u8 lun_len;
u8 pln_offset;
u8 pln_len;
u8 blk_offset;
u8 blk_len;
u8 pg_offset;
u8 pg_len;
u8 sect_offset;
u8 sect_len;
u8 res[4];
};
struct nvm_id {
u8 ver_id;
u8 vmnt;
u8 cgrps;
u8 res[5];
u32 cap;
u32 dom;
struct nvm_addr_format ppaf;
u8 ppat;
u8 resv[224];
struct nvm_id_group groups[4];
} __packed;
struct nvm_target {
struct list_head list;
struct nvm_tgt_type *type;
struct gendisk *disk;
};
struct nvm_tgt_instance {
struct nvm_tgt_type *tt;
};
#define ADDR_EMPTY (~0ULL)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
#define NVM_SEC_BITS (8)
#define NVM_PL_BITS (6)
#define NVM_PG_BITS (16)
#define NVM_BLK_BITS (16)
#define NVM_LUN_BITS (10)
#define NVM_CH_BITS (8)
struct ppa_addr {
union {
/* Channel-based PPA format in nand 4x2x2x2x8x10 */
struct {
u64 ch : 4;
u64 sec : 2; /* 4 sectors per page */
u64 pl : 2; /* 4 planes per LUN */
u64 lun : 2; /* 4 LUNs per channel */
u64 pg : 8; /* 256 pages per block */
u64 blk : 10;/* 1024 blocks per plane */
u64 resved : 36;
} chnl;
/* Generic structure for all addresses */
struct {
u64 sec : NVM_SEC_BITS;
u64 pl : NVM_PL_BITS;
u64 pg : NVM_PG_BITS;
u64 blk : NVM_BLK_BITS;
u64 lun : NVM_LUN_BITS;
u64 ch : NVM_CH_BITS;
} g;
u64 ppa;
};
} __packed;
struct nvm_rq {
struct nvm_tgt_instance *ins;
struct nvm_dev *dev;
struct bio *bio;
union {
struct ppa_addr ppa_addr;
dma_addr_t dma_ppa_list;
};
struct ppa_addr *ppa_list;
void *metadata;
dma_addr_t dma_metadata;
uint8_t opcode;
uint16_t nr_pages;
uint16_t flags;
};
static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
{
return pdu - sizeof(struct nvm_rq);
}
static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
{
return rqdata + 1;
}
struct nvm_block;
typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
nvm_l2p_update_fn *, void *);
typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
nvm_bb_update_fn *, void *);
typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
dma_addr_t *);
typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
struct nvm_dev_ops {
nvm_id_fn *identity;
nvm_get_l2p_tbl_fn *get_l2p_tbl;
nvm_op_bb_tbl_fn *get_bb_tbl;
nvm_op_set_bb_fn *set_bb;
nvm_submit_io_fn *submit_io;
nvm_erase_blk_fn *erase_block;
nvm_create_dma_pool_fn *create_dma_pool;
nvm_destroy_dma_pool_fn *destroy_dma_pool;
nvm_dev_dma_alloc_fn *dev_dma_alloc;
nvm_dev_dma_free_fn *dev_dma_free;
uint8_t max_phys_sect;
};
struct nvm_lun {
int id;
int lun_id;
int chnl_id;
unsigned int nr_free_blocks; /* Number of unused blocks */
struct nvm_block *blocks;
spinlock_t lock;
};
struct nvm_block {
struct list_head list;
struct nvm_lun *lun;
unsigned long id;
void *priv;
int type;
};
struct nvm_dev {
struct nvm_dev_ops *ops;
struct list_head devices;
struct list_head online_targets;
/* Media manager */
struct nvmm_type *mt;
void *mp;
/* Device information */
int nr_chnls;
int nr_planes;
int luns_per_chnl;
int sec_per_pg; /* only sectors for a single page */
int pgs_per_blk;
int blks_per_lun;
int sec_size;
int oob_size;
int addr_mode;
struct nvm_addr_format addr_format;
/* Calculated/Cached values. These do not reflect the actual usable
* blocks at run-time.
*/
int max_rq_size;
int plane_mode; /* drive device in single, double or quad mode */
int sec_per_pl; /* all sectors across planes */
int sec_per_blk;
int sec_per_lun;
unsigned long total_pages;
unsigned long total_blocks;
int nr_luns;
unsigned max_pages_per_blk;
void *ppalist_pool;
struct nvm_id identity;
/* Backend device */
struct request_queue *q;
char name[DISK_NAME_LEN];
};
/* fallback conversion */
static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
struct ppa_addr l;
l.ppa = r.g.sec +
r.g.pg * dev->sec_per_pg +
r.g.blk * (dev->pgs_per_blk *
dev->sec_per_pg) +
r.g.lun * (dev->blks_per_lun *
dev->pgs_per_blk *
dev->sec_per_pg) +
r.g.ch * (dev->blks_per_lun *
dev->pgs_per_blk *
dev->luns_per_chnl *
dev->sec_per_pg);
return l;
}
/* fallback conversion */
static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
struct ppa_addr l;
int secs, pgs, blks, luns;
sector_t ppa = r.ppa;
l.ppa = 0;
div_u64_rem(ppa, dev->sec_per_pg, &secs);
l.g.sec = secs;
sector_div(ppa, dev->sec_per_pg);
div_u64_rem(ppa, dev->sec_per_blk, &pgs);
l.g.pg = pgs;
sector_div(ppa, dev->pgs_per_blk);
div_u64_rem(ppa, dev->blks_per_lun, &blks);
l.g.blk = blks;
sector_div(ppa, dev->blks_per_lun);
div_u64_rem(ppa, dev->luns_per_chnl, &luns);
l.g.lun = luns;
sector_div(ppa, dev->luns_per_chnl);
l.g.ch = ppa;
return l;
}
static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
{
struct ppa_addr l;
l.ppa = 0;
l.chnl.sec = r.g.sec;
l.chnl.pl = r.g.pl;
l.chnl.pg = r.g.pg;
l.chnl.blk = r.g.blk;
l.chnl.lun = r.g.lun;
l.chnl.ch = r.g.ch;
return l;
}
static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
{
struct ppa_addr l;
l.ppa = 0;
l.g.sec = r.chnl.sec;
l.g.pl = r.chnl.pl;
l.g.pg = r.chnl.pg;
l.g.blk = r.chnl.blk;
l.g.lun = r.chnl.lun;
l.g.ch = r.chnl.ch;
return l;
}
static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
struct ppa_addr gppa)
{
switch (dev->addr_mode) {
case NVM_ADDRMODE_LINEAR:
return __linear_to_generic_addr(dev, gppa);
case NVM_ADDRMODE_CHANNEL:
return __chnl_to_generic_addr(gppa);
default:
BUG();
}
return gppa;
}
static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
struct ppa_addr gppa)
{
switch (dev->addr_mode) {
case NVM_ADDRMODE_LINEAR:
return __generic_to_linear_addr(dev, gppa);
case NVM_ADDRMODE_CHANNEL:
return __generic_to_chnl_addr(gppa);
default:
BUG();
}
return gppa;
}
static inline int ppa_empty(struct ppa_addr ppa_addr)
{
return (ppa_addr.ppa == ADDR_EMPTY);
}
static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
{
ppa_addr->ppa = ADDR_EMPTY;
}
static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
struct nvm_block *blk)
{
struct ppa_addr ppa;
struct nvm_lun *lun = blk->lun;
ppa.ppa = 0;
ppa.g.blk = blk->id % dev->blks_per_lun;
ppa.g.lun = lun->lun_id;
ppa.g.ch = lun->chnl_id;
return ppa;
}
typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
typedef void (nvm_tgt_exit_fn)(void *);
struct nvm_tgt_type {
const char *name;
unsigned int version[3];
/* target entry points */
nvm_tgt_make_rq_fn *make_rq;
nvm_tgt_capacity_fn *capacity;
nvm_tgt_end_io_fn *end_io;
/* module-specific init/teardown */
nvm_tgt_init_fn *init;
nvm_tgt_exit_fn *exit;
/* For internal use */
struct list_head list;
};
extern int nvm_register_target(struct nvm_tgt_type *);
extern void nvm_unregister_target(struct nvm_tgt_type *);
extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
typedef int (nvmm_register_fn)(struct nvm_dev *);
typedef void (nvmm_unregister_fn)(struct nvm_dev *);
typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
struct nvm_lun *, unsigned long);
typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
struct nvmm_type {
const char *name;
unsigned int version[3];
nvmm_register_fn *register_mgr;
nvmm_unregister_fn *unregister_mgr;
/* Block administration callbacks */
nvmm_get_blk_fn *get_blk;
nvmm_put_blk_fn *put_blk;
nvmm_open_blk_fn *open_blk;
nvmm_close_blk_fn *close_blk;
nvmm_flush_blk_fn *flush_blk;
nvmm_submit_io_fn *submit_io;
nvmm_end_io_fn *end_io;
nvmm_erase_blk_fn *erase_blk;
/* Configuration management */
nvmm_get_lun_fn *get_lun;
/* Statistics */
nvmm_free_blocks_print_fn *free_blocks_print;
struct list_head list;
};
extern int nvm_register_mgr(struct nvmm_type *);
extern void nvm_unregister_mgr(struct nvmm_type *);
extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
unsigned long);
extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
extern int nvm_register(struct request_queue *, char *,
struct nvm_dev_ops *);
extern void nvm_unregister(char *);
extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
#else /* CONFIG_NVM */
struct nvm_dev_ops;
static inline int nvm_register(struct request_queue *q, char *disk_name,
struct nvm_dev_ops *ops)
{
return -EINVAL;
}
static inline void nvm_unregister(char *disk_name) {}
#endif /* CONFIG_NVM */
#endif /* LIGHTNVM.H */
/*
* Copyright (C) 2015 CNEX Labs. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*/
#ifndef _UAPI_LINUX_LIGHTNVM_H
#define _UAPI_LINUX_LIGHTNVM_H
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/ioctl.h>
#else /* __KERNEL__ */
#include <stdio.h>
#include <sys/ioctl.h>
#define DISK_NAME_LEN 32
#endif /* __KERNEL__ */
#include <linux/types.h>
#include <linux/ioctl.h>
#define NVM_TTYPE_NAME_MAX 48
#define NVM_TTYPE_MAX 63
#define NVM_CTRL_FILE "/dev/lightnvm/control"
struct nvm_ioctl_info_tgt {
__u32 version[3];
__u32 reserved;
char tgtname[NVM_TTYPE_NAME_MAX];
};
struct nvm_ioctl_info {
__u32 version[3]; /* in/out - major, minor, patch */
__u16 tgtsize; /* number of targets */
__u16 reserved16; /* pad to 4K page */
__u32 reserved[12];
struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
};
enum {
NVM_DEVICE_ACTIVE = 1 << 0,
};
struct nvm_ioctl_device_info {
char devname[DISK_NAME_LEN];
char bmname[NVM_TTYPE_NAME_MAX];
__u32 bmversion[3];
__u32 flags;
__u32 reserved[8];
};
struct nvm_ioctl_get_devices {
__u32 nr_devices;
__u32 reserved[31];
struct nvm_ioctl_device_info info[31];
};
struct nvm_ioctl_create_simple {
__u32 lun_begin;
__u32 lun_end;
};
enum {
NVM_CONFIG_TYPE_SIMPLE = 0,
};
struct nvm_ioctl_create_conf {
__u32 type;
union {
struct nvm_ioctl_create_simple s;
};
};
struct nvm_ioctl_create {
char dev[DISK_NAME_LEN]; /* open-channel SSD device */
char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */
char tgtname[DISK_NAME_LEN]; /* dev to expose target as */
__u32 flags;
struct nvm_ioctl_create_conf conf;
};
struct nvm_ioctl_remove {
char tgtname[DISK_NAME_LEN];
__u32 flags;
};
/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
enum {
/* top level cmds */
NVM_INFO_CMD = 0x20,
NVM_GET_DEVICES_CMD,
/* device level cmds */
NVM_DEV_CREATE_CMD,
NVM_DEV_REMOVE_CMD,
};
#define NVM_IOCTL 'L' /* 0x4c */
#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \
struct nvm_ioctl_info)
#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
struct nvm_ioctl_get_devices)
#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
struct nvm_ioctl_create)
#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
struct nvm_ioctl_remove)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCHLEVEL 0
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment