Commit 9a1d6c9e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "Sitting on top of the core block changes, here are the driver changes
  for the 5.15 merge window:

   - NVMe updates via Christoph:
       - suspend improvements for devices with an HMB (Keith Busch)
       - handle double completions more gacefull (Sagi Grimberg)
       - cleanup the selects for the nvme core code a bit (Sagi Grimberg)
       - don't update queue count when failing to set io queues (Ruozhu Li)
       - various nvmet connect fixes (Amit Engel)
       - cleanup lightnvm leftovers (Keith Busch, me)
       - small cleanups (Colin Ian King, Hou Pu)
       - add tracing for the Set Features command (Hou Pu)
       - CMB sysfs cleanups (Keith Busch)
       - add a mutex_destroy call (Keith Busch)

   - remove lightnvm subsystem. It's served its purpose and ultimately
     led to zoned nvme support, we no longer need it (Christoph)

   - revert floppy O_NDELAY fix (Denis)

   - nbd fixes (Hou, Pavel, Baokun)

   - nbd locking fixes (Tetsuo)

   - nbd device removal fixes (Christoph)

   - raid10 rcu warning fix (Xiao)

   - raid1 write behind fix (Guoqing)

   - rnbd fixes (Gioh, Md Haris)

   - misc fixes (Colin)"

* tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block: (42 commits)
  Revert "floppy: reintroduce O_NDELAY fix"
  raid1: ensure write behind bio has less than BIO_MAX_VECS sectors
  md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
  nbd: remove nbd->destroy_complete
  nbd: only return usable devices from nbd_find_unused
  nbd: set nbd->index before releasing nbd_index_mutex
  nbd: prevent IDR lookups from finding partially initialized devices
  nbd: reset NBD to NULL when restarting in nbd_genl_connect
  nbd: add missing locking to the nbd_dev_add error path
  nvme: remove the unused NVME_NS_* enum
  nvme: remove nvm_ndev from ns
  nvme: Have NVME_FABRICS select NVME_CORE instead of transport drivers
  block: nbd: add sanity check for first_minor
  nvmet: check that host sqsize does not exceed ctrl MQES
  nvmet: avoid duplicate qid in connect cmd
  nvmet: pass back cntlid on successful completion
  nvme-rdma: don't update queue count when failing to set io queues
  nvme-tcp: don't update queue count when failing to set io queues
  nvme-tcp: pair send_mutex init with destroy
  nvme: allow user toggling hmb usage
  ...
parents 67936911 b5b0eba5
......@@ -85,7 +85,6 @@ available subsections can be seen below.
io-mapping
io_ordering
generic-counter
lightnvm-pblk
memory-devices/index
men-chameleon-bus
ntb
......
pblk: Physical Block Device Target
==================================
pblk implements a fully associative, host-based FTL that exposes a traditional
block I/O interface. Its primary responsibilities are:
- Map logical addresses onto physical addresses (4KB granularity) in a
logical-to-physical (L2P) table.
- Maintain the integrity and consistency of the L2P table as well as its
recovery from normal tear down and power outage.
- Deal with controller- and media-specific constrains.
- Handle I/O errors.
- Implement garbage collection.
- Maintain consistency across the I/O stack during synchronization points.
For more information please refer to:
http://lightnvm.io
which maintains updated FAQs, manual pages, technical documentation, tools,
contacts, etc.
......@@ -160,7 +160,6 @@ Code Seq# Include File Comments
'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict!
......
......@@ -10619,15 +10619,6 @@ F: LICENSES/
F: scripts/spdxcheck-test.sh
F: scripts/spdxcheck.py
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
L: linux-block@vger.kernel.org
S: Maintained
W: http://github/OpenChannelSSD
F: drivers/lightnvm/
F: include/linux/lightnvm.h
F: include/uapi/linux/lightnvm.h
LINEAR RANGES HELPERS
M: Mark Brown <broonie@kernel.org>
R: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
......
......@@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
source "drivers/lightnvm/Kconfig"
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"
......
......@@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DAX) += dax/
......
......@@ -4029,9 +4029,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (fdc_state[FDC(drive)].rawcmd == 1)
fdc_state[FDC(drive)].rawcmd = 2;
if (!(mode & FMODE_NDELAY)) {
if (mode & (FMODE_READ|FMODE_WRITE)) {
drive_state[drive].last_checked = 0;
clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags);
clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
&drive_state[drive].flags);
if (bdev_check_media_change(bdev))
floppy_revalidate(bdev->bd_disk);
if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
......@@ -4039,13 +4041,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
goto out;
}
res = -EROFS;
if ((mode & FMODE_WRITE) &&
!test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
goto out;
}
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
return 0;
......
......@@ -49,6 +49,7 @@
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
static struct workqueue_struct *nbd_del_wq;
static int nbd_total_devices = 0;
struct nbd_sock {
......@@ -113,12 +114,12 @@ struct nbd_device {
struct mutex config_lock;
struct gendisk *disk;
struct workqueue_struct *recv_workq;
struct work_struct remove_work;
struct list_head list;
struct task_struct *task_recv;
struct task_struct *task_setup;
struct completion *destroy_complete;
unsigned long flags;
char *backend;
......@@ -237,32 +238,36 @@ static void nbd_dev_remove(struct nbd_device *nbd)
{
struct gendisk *disk = nbd->disk;
if (disk) {
del_gendisk(disk);
blk_cleanup_disk(disk);
blk_mq_free_tag_set(&nbd->tag_set);
}
/*
* Place this in the last just before the nbd is freed to
* make sure that the disk and the related kobject are also
* totally removed to avoid duplicate creation of the same
* one.
* Remove from idr after del_gendisk() completes, so if the same ID is
* reused, the following add_disk() will succeed.
*/
if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
complete(nbd->destroy_complete);
mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, nbd->index);
mutex_unlock(&nbd_index_mutex);
kfree(nbd);
}
static void nbd_dev_remove_work(struct work_struct *work)
{
nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
}
static void nbd_put(struct nbd_device *nbd)
{
if (refcount_dec_and_mutex_lock(&nbd->refs,
&nbd_index_mutex)) {
idr_remove(&nbd_index_idr, nbd->index);
if (!refcount_dec_and_test(&nbd->refs))
return;
/* Call del_gendisk() asynchrounously to prevent deadlock */
if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
queue_work(nbd_del_wq, &nbd->remove_work);
else
nbd_dev_remove(nbd);
mutex_unlock(&nbd_index_mutex);
}
}
static int nbd_disconnected(struct nbd_config *config)
......@@ -1388,6 +1393,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
unsigned int cmd, unsigned long arg)
{
struct nbd_config *config = nbd->config;
loff_t bytesize;
switch (cmd) {
case NBD_DISCONNECT:
......@@ -1402,8 +1408,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_SIZE:
return nbd_set_size(nbd, arg, config->blksize);
case NBD_SET_SIZE_BLOCKS:
return nbd_set_size(nbd, arg * config->blksize,
config->blksize);
if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
return -EINVAL;
return nbd_set_size(nbd, bytesize, config->blksize);
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
......@@ -1665,7 +1672,7 @@ static const struct blk_mq_ops nbd_mq_ops = {
.timeout = nbd_xmit_timeout,
};
static int nbd_dev_add(int index)
static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
struct nbd_device *nbd;
struct gendisk *disk;
......@@ -1683,13 +1690,14 @@ static int nbd_dev_add(int index)
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
nbd->destroy_complete = NULL;
INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
nbd->backend = NULL;
err = blk_mq_alloc_tag_set(&nbd->tag_set);
if (err)
goto out_free_nbd;
mutex_lock(&nbd_index_mutex);
if (index >= 0) {
err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
GFP_KERNEL);
......@@ -1700,9 +1708,10 @@ static int nbd_dev_add(int index)
if (err >= 0)
index = err;
}
nbd->index = index;
mutex_unlock(&nbd_index_mutex);
if (err < 0)
goto out_free_tags;
nbd->index = index;
disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
if (IS_ERR(disk)) {
......@@ -1726,38 +1735,65 @@ static int nbd_dev_add(int index)
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
refcount_set(&nbd->refs, 1);
/*
* Start out with a zero references to keep other threads from using
* this device until it is fully initialized.
*/
refcount_set(&nbd->refs, 0);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
/* Too big first_minor can cause duplicate creation of
* sysfs files/links, since first_minor will be truncated to
* byte in __device_add_disk().
*/
disk->first_minor = index << part_shift;
if (disk->first_minor > 0xff) {
err = -EINVAL;
goto out_free_idr;
}
disk->minors = 1 << part_shift;
disk->fops = &nbd_fops;
disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index);
add_disk(disk);
/*
* Now publish the device.
*/
refcount_set(&nbd->refs, refs);
nbd_total_devices++;
return index;
return nbd;
out_free_idr:
mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
mutex_unlock(&nbd_index_mutex);
out_free_tags:
blk_mq_free_tag_set(&nbd->tag_set);
out_free_nbd:
kfree(nbd);
out:
return err;
return ERR_PTR(err);
}
static int find_free_cb(int id, void *ptr, void *data)
static struct nbd_device *nbd_find_get_unused(void)
{
struct nbd_device *nbd = ptr;
struct nbd_device **found = data;
struct nbd_device *nbd;
int id;
lockdep_assert_held(&nbd_index_mutex);
if (!refcount_read(&nbd->config_refs)) {
*found = nbd;
return 1;
idr_for_each_entry(&nbd_index_idr, nbd, id) {
if (refcount_read(&nbd->config_refs) ||
test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
continue;
if (refcount_inc_not_zero(&nbd->refs))
return nbd;
}
return 0;
return NULL;
}
/* Netlink interface. */
......@@ -1806,8 +1842,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
{
DECLARE_COMPLETION_ONSTACK(destroy_complete);
struct nbd_device *nbd = NULL;
struct nbd_device *nbd;
struct nbd_config *config;
int index = -1;
int ret;
......@@ -1829,55 +1864,29 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
again:
mutex_lock(&nbd_index_mutex);
if (index == -1) {
ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
if (ret == 0) {
int new_index;
new_index = nbd_dev_add(-1);
if (new_index < 0) {
mutex_unlock(&nbd_index_mutex);
printk(KERN_ERR "nbd: failed to add new device\n");
return new_index;
}
nbd = idr_find(&nbd_index_idr, new_index);
}
nbd = nbd_find_get_unused();
} else {
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
ret = nbd_dev_add(index);
if (ret < 0) {
if (nbd) {
if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
printk(KERN_ERR "nbd: failed to add new device\n");
return ret;
}
nbd = idr_find(&nbd_index_idr, index);
}
}
if (!nbd) {
printk(KERN_ERR "nbd: couldn't find device at index %d\n",
pr_err("nbd: device at index %d is going down\n",
index);
mutex_unlock(&nbd_index_mutex);
return -EINVAL;
}
if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
nbd->destroy_complete = &destroy_complete;
}
}
mutex_unlock(&nbd_index_mutex);
/* Wait untill the the nbd stuff is totally destroyed */
wait_for_completion(&destroy_complete);
goto again;
if (!nbd) {
nbd = nbd_dev_add(index, 2);
if (IS_ERR(nbd)) {
pr_err("nbd: failed to add new device\n");
return PTR_ERR(nbd);
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
if (index == -1)
goto again;
printk(KERN_ERR "nbd: device at index %d is going down\n",
index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
mutex_lock(&nbd->config_lock);
if (refcount_read(&nbd->config_refs)) {
......@@ -2424,16 +2433,21 @@ static int __init nbd_init(void)
if (register_blkdev(NBD_MAJOR, "nbd"))
return -EIO;
nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
if (!nbd_del_wq) {
unregister_blkdev(NBD_MAJOR, "nbd");
return -ENOMEM;
}
if (genl_register_family(&nbd_genl_family)) {
destroy_workqueue(nbd_del_wq);
unregister_blkdev(NBD_MAJOR, "nbd");
return -EINVAL;
}
nbd_dbg_init();
mutex_lock(&nbd_index_mutex);
for (i = 0; i < nbds_max; i++)
nbd_dev_add(i);
mutex_unlock(&nbd_index_mutex);
nbd_dev_add(i, 1);
return 0;
}
......@@ -2442,7 +2456,10 @@ static int nbd_exit_cb(int id, void *ptr, void *data)
struct list_head *list = (struct list_head *)data;
struct nbd_device *nbd = ptr;
/* Skip nbd that is being removed asynchronously */
if (refcount_read(&nbd->refs))
list_add_tail(&nbd->list, list);
return 0;
}
......@@ -2465,6 +2482,9 @@ static void __exit nbd_cleanup(void)
nbd_put(nbd);
}
/* Also wait for nbd_dev_remove_work() completes */
destroy_workqueue(nbd_del_wq);
idr_destroy(&nbd_index_idr);
genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
......
......@@ -227,17 +227,17 @@ static ssize_t state_show(struct kobject *kobj,
switch (dev->dev_state) {
case DEV_STATE_INIT:
return snprintf(page, PAGE_SIZE, "init\n");
return sysfs_emit(page, "init\n");
case DEV_STATE_MAPPED:
/* TODO fix cli tool before changing to proper state */
return snprintf(page, PAGE_SIZE, "open\n");
return sysfs_emit(page, "open\n");
case DEV_STATE_MAPPED_DISCONNECTED:
/* TODO fix cli tool before changing to proper state */
return snprintf(page, PAGE_SIZE, "closed\n");
return sysfs_emit(page, "closed\n");
case DEV_STATE_UNMAPPED:
return snprintf(page, PAGE_SIZE, "unmapped\n");
return sysfs_emit(page, "unmapped\n");
default:
return snprintf(page, PAGE_SIZE, "unknown\n");
return sysfs_emit(page, "unknown\n");
}
}
......@@ -263,7 +263,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname);
return sysfs_emit(page, "%s\n", dev->pathname);
}
static struct kobj_attribute rnbd_clt_mapping_path_attr =
......@@ -276,8 +276,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
return snprintf(page, PAGE_SIZE, "%s\n",
rnbd_access_mode_str(dev->access_mode));
return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode));
}
static struct kobj_attribute rnbd_clt_access_mode =
......@@ -286,7 +285,7 @@ static struct kobj_attribute rnbd_clt_access_mode =
static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n",
return sysfs_emit(page, "Usage: echo <normal|force> > %s\n",
attr->attr.name);
}
......@@ -357,8 +356,7 @@ static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
return scnprintf(page, PAGE_SIZE,
"Usage: echo <new size in sectors> > %s\n",
return sysfs_emit(page, "Usage: echo <new size in sectors> > %s\n",
attr->attr.name);
}
......@@ -390,8 +388,7 @@ static struct kobj_attribute rnbd_clt_resize_dev_attr =
static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n",
attr->attr.name);
return sysfs_emit(page, "Usage: echo <1> > %s\n", attr->attr.name);
}
static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj,
......@@ -436,7 +433,7 @@ static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname);
return sysfs_emit(page, "%s\n", dev->sess->sessname);
}
static struct kobj_attribute rnbd_clt_session_attr =
......@@ -499,7 +496,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
return scnprintf(page, PAGE_SIZE,
return sysfs_emit(page,
"Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
......
......@@ -271,7 +271,7 @@ static bool rnbd_rerun_if_needed(struct rnbd_clt_session *sess)
*/
if (cpu_q)
*cpup = cpu_q->cpu;
put_cpu_var(sess->cpu_rr);
put_cpu_ptr(sess->cpu_rr);
if (q)
rnbd_clt_dev_requeue(q);
......
......@@ -90,7 +90,7 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
return scnprintf(page, PAGE_SIZE, "%d\n",
return sysfs_emit(page, "%d\n",
!(sess_dev->open_flags & FMODE_WRITE));
}
......@@ -105,7 +105,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
return scnprintf(page, PAGE_SIZE, "%s\n",
return sysfs_emit(page, "%s\n",
rnbd_access_mode_str(sess_dev->access_mode));
}
......@@ -119,7 +119,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname);
return sysfs_emit(page, "%s\n", sess_dev->pathname);
}
static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
......@@ -128,7 +128,7 @@ static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
return sysfs_emit(page, "Usage: echo 1 > %s\n",
attr->attr.name);
}
......
......@@ -1092,7 +1092,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
err = xlbd_reserve_minors(minor, nr_minors);
if (err)
return err;
err = -ENODEV;
memset(&info->tag_set, 0, sizeof(info->tag_set));
info->tag_set.ops = &blkfront_mq_ops;
......
# SPDX-License-Identifier: GPL-2.0-only
#
# Open-Channel SSD NVM configuration
#
menuconfig NVM
bool "Open-Channel SSD target support (DEPRECATED)"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
Open-Channel SSDs implement a set of extension to SSDs, that
exposes direct access to the underlying non-volatile memory.
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
This code is deprecated and will be removed in Linux 5.15.
if NVM
config NVM_PBLK
tristate "Physical Block Device Open-Channel SSD target"
select CRC32
help
Allows an open-channel SSD to be exposed as a block device to the
host. The target assumes the device exposes raw flash and must be
explicitly managed by the host.
Please note the disk format is considered EXPERIMENTAL for now.
if NVM_PBLK
config NVM_PBLK_DEBUG
bool "PBlk Debug Support"
default n
help
Enables debug support for pblk. This includes extra checks, more
vocal error messages, and extra tracking fields in the pblk sysfs
entries.
endif # NVM_PBLK_DEBUG
endif # NVM
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Open-Channel SSDs.
#
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_PBLK) += pblk.o
pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
pblk-write.o pblk-cache.o pblk-read.o \
pblk-gc.o pblk-recovery.o pblk-map.o \
pblk-rl.o pblk-sysfs.o
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-cache.c - pblk's write cache
*/
#include "pblk.h"
void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
unsigned long flags)
{
struct pblk_w_ctx w_ctx;
sector_t lba = pblk_get_lba(bio);
unsigned long start_time;
unsigned int bpos, pos;
int nr_entries = pblk_get_secs(bio);
int i, ret;
start_time = bio_start_io_acct(bio);
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
switch (ret) {
case NVM_IO_REQUEUE:
io_schedule();
goto retry;
case NVM_IO_ERR:
pblk_pipeline_stop(pblk);
bio_io_error(bio);
goto out;
}
pblk_ppa_set_empty(&w_ctx.ppa);
w_ctx.flags = flags;
if (bio->bi_opf & REQ_PREFLUSH) {
w_ctx.flags |= PBLK_FLUSH_ENTRY;
pblk_write_kick(pblk);
}
if (unlikely(!bio_has_data(bio)))
goto out;
for (i = 0; i < nr_entries; i++) {
void *data = bio_data(bio);
w_ctx.lba = lba + i;
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
atomic64_add(nr_entries, &pblk->user_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_entries, &pblk->inflight_writes);
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
pblk_rl_inserted(&pblk->rl, nr_entries);
out:
bio_end_io_acct(bio, start_time);
pblk_write_should_kick(pblk);
if (ret == NVM_IO_DONE)
bio_endio(bio);
}
/*
* On GC the incoming lbas are not necessarily sequential. Also, some of the
* lbas might not be valid entries, which are marked as empty by the GC thread
*/
int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct pblk_w_ctx w_ctx;
unsigned int bpos, pos;
void *data = gc_rq->data;
int i, valid_entries;
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
io_schedule();
goto retry;
}
w_ctx.flags = PBLK_IOTYPE_GC;
pblk_ppa_set_empty(&w_ctx.ppa);
for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
if (gc_rq->lba_list[i] == ADDR_EMPTY)
continue;
w_ctx.lba = gc_rq->lba_list[i];
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
gc_rq->paddr_list[i], pos);
data += PBLK_EXPOSED_PAGE_SIZE;
valid_entries++;
}
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
"pblk: inconsistent GC write\n");
atomic64_add(valid_entries, &pblk->gc_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(valid_entries, &pblk->inflight_writes);
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
#endif
pblk_write_should_kick(pblk);
return NVM_IO_OK;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-map.c - pblk's lba-ppa mapping strategy
*
*/
#include "pblk.h"
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
struct ppa_addr *ppa_list,
unsigned long *lun_bitmap,
void *meta_list,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
struct pblk_emeta *emeta;
struct pblk_w_ctx *w_ctx;
__le64 *lba_list;
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
if (!line)
return -ENOSPC;
if (pblk_line_is_full(line)) {
struct pblk_line *prev_line = line;
/* If we cannot allocate a new line, make sure to store metadata
* on current line and then fail
*/
line = pblk_line_replace_data(pblk);
pblk_line_close_meta(pblk, prev_line);
if (!line) {
pblk_pipeline_stop(pblk);
return -ENOSPC;
}
}
emeta = line->emeta;
lba_list = emeta_to_lbas(pblk, emeta->buf);
paddr = pblk_alloc_page(pblk, line, nr_secs);
for (i = 0; i < nr_secs; i++, paddr++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
/* ppa to be sent to the device */
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
/* Write context for target bio completion on write buffer. Note
* that the write buffer is protected by the sync backpointer,
* and a single writer thread have access to each specific entry
* at a time. Thus, it is safe to modify the context for the
* entry we are setting up for submission without taking any
* lock or memory barrier.
*/
if (i < valid_secs) {
kref_get(&line->ref);
atomic_inc(&line->sec_to_update);
w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
w_ctx->ppa = ppa_list[i];
meta->lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
else
atomic64_inc(&pblk->pad_wa);
} else {
lba_list[paddr] = addr_empty;
meta->lba = addr_empty;
__pblk_map_invalidate(pblk, line, paddr);
}
}
pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
return 0;
}
int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off)
{
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i;
int ret;
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
}
return 0;
}
/* only if erase_ppa is set, acquire erase semaphore */
int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int sentry, unsigned long *lun_bitmap,
unsigned int valid_secs, struct ppa_addr *erase_ppa)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
int ret;
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
spin_lock(&e_line->lock);
if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = ppa_list[i];
erase_ppa->a.blk = e_line->id;
erase_ppa->a.reserved = 0;
spin_unlock(&e_line->lock);
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
spin_unlock(&e_line->lock);
}
d_line = pblk_line_get_data(pblk);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return -ENOSPC;
/* Erase blocks that are bad in this line but might not be in next */
if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
int bit = -1;
retry:
bit = find_next_bit(d_line->blk_bitmap,
lm->blk_per_line, bit + 1);
if (bit >= lm->blk_per_line)
return 0;
spin_lock(&e_line->lock);
if (test_bit(bit, e_line->erase_bitmap)) {
spin_unlock(&e_line->lock);
goto retry;
}
spin_unlock(&e_line->lock);
set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->a.blk = e_line->id;
}
return 0;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-rl.c - pblk's rate limiter for user I/O
*
*/
#include "pblk.h"
static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
{
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
int pblk_rl_is_limit(struct pblk_rl *rl)
{
int rb_space;
rb_space = atomic_read(&rl->rb_space);
return (rb_space == 0);
}
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
int rb_space = atomic_read(&rl->rb_space);
if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
return NVM_IO_ERR;
if (rb_user_cnt >= rl->rb_user_max)
return NVM_IO_REQUEUE;
return NVM_IO_OK;
}
void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
{
int rb_space = atomic_read(&rl->rb_space);
if (unlikely(rb_space >= 0))
atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
int rb_user_active;
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_user_cnt);
/* Release user I/O state. Protect from GC */
smp_store_release(&rl->rb_user_active, 1);
pblk_rl_kick_u_timer(rl);
}
void pblk_rl_werr_line_in(struct pblk_rl *rl)
{
atomic_inc(&rl->werr_lines);
}
void pblk_rl_werr_line_out(struct pblk_rl *rl)
{
atomic_dec(&rl->werr_lines);
}
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_gc_cnt);
}
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
{
atomic_sub(nr_user, &rl->rb_user_cnt);
atomic_sub(nr_gc, &rl->rb_gc_cnt);
}
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
{
return atomic_read(&rl->free_blocks);
}
unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
{
return atomic_read(&rl->free_user_blocks);
}
static void __pblk_rl_update_rates(struct pblk_rl *rl,
unsigned long free_blocks)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
int max = rl->rb_budget;
int werr_gc_needed = atomic_read(&rl->werr_lines);
if (free_blocks >= rl->high) {
if (werr_gc_needed) {
/* Allocate a small budget for recovering
* lines with write errors
*/
rl->rb_gc_max = 1 << rl->rb_windows_pw;
rl->rb_user_max = max - rl->rb_gc_max;
rl->rb_state = PBLK_RL_WERR;
} else {
rl->rb_user_max = max;
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_OFF;
}
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << ilog2(NVM_MAX_VLBA);
rl->rb_user_max = user_max;
rl->rb_gc_max = max - user_max;
if (free_blocks <= rl->rsv_blocks) {
rl->rb_user_max = 0;
rl->rb_gc_max = max;
}
/* In the worst case, we will need to GC lines in the low list
* (high valid sector count). If there are lines to GC on high
* or mid lists, these will be prioritized
*/
rl->rb_state = PBLK_RL_LOW;
}
if (rl->rb_state != PBLK_RL_OFF)
pblk_gc_should_start(pblk);
else
pblk_gc_should_stop(pblk);
}
void pblk_rl_update_rates(struct pblk_rl *rl)
{
__pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
}
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_add(blk_in_line, &rl->free_blocks);
free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
}
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
bool used)
{
int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_sub(blk_in_line, &rl->free_blocks);
if (used)
free_blocks = atomic_sub_return(blk_in_line,
&rl->free_user_blocks);
else
free_blocks = atomic_read(&rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
}
int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
int pblk_rl_max_io(struct pblk_rl *rl)
{
return rl->rb_max_io;
}
static void pblk_rl_u_timer(struct timer_list *t)
{
struct pblk_rl *rl = from_timer(rl, t, u_timer);
/* Release user I/O state. Protect from GC */
smp_store_release(&rl->rb_user_active, 0);
}
void pblk_rl_free(struct pblk_rl *rl)
{
del_timer(&rl->u_timer);
}
void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
int sec_meta, blk_meta;
unsigned int rb_windows;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
rl->high_pw = get_count_order(rl->high);
rl->rsv_blocks = pblk_get_min_chks(pblk);
/* This will always be a power-of-2 */
rb_windows = budget / NVM_MAX_VLBA;
rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
/* Maximize I/O size and ansure that back threshold is respected */
if (threshold)
rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
else
rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
atomic_set(&rl->rb_space, -1);
atomic_set(&rl->werr_lines, 0);
timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
rl->rb_user_active = 0;
rl->rb_gc_active = 0;
}
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM pblk
#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PBLK_H
#include <linux/tracepoint.h>
struct ppa_addr;
#define show_chunk_flags(state) __print_flags(state, "", \
{ NVM_CHK_ST_FREE, "FREE", }, \
{ NVM_CHK_ST_CLOSED, "CLOSED", }, \
{ NVM_CHK_ST_OPEN, "OPEN", }, \
{ NVM_CHK_ST_OFFLINE, "OFFLINE", })
#define show_line_state(state) __print_symbolic(state, \
{ PBLK_LINESTATE_NEW, "NEW", }, \
{ PBLK_LINESTATE_FREE, "FREE", }, \
{ PBLK_LINESTATE_OPEN, "OPEN", }, \
{ PBLK_LINESTATE_CLOSED, "CLOSED", }, \
{ PBLK_LINESTATE_GC, "GC", }, \
{ PBLK_LINESTATE_BAD, "BAD", }, \
{ PBLK_LINESTATE_CORRUPT, "CORRUPT" })
#define show_pblk_state(state) __print_symbolic(state, \
{ PBLK_STATE_RUNNING, "RUNNING", }, \
{ PBLK_STATE_STOPPING, "STOPPING", }, \
{ PBLK_STATE_RECOVERING, "RECOVERING", }, \
{ PBLK_STATE_STOPPED, "STOPPED" })
#define show_chunk_erase_state(state) __print_symbolic(state, \
{ PBLK_CHUNK_RESET_START, "START", }, \
{ PBLK_CHUNK_RESET_DONE, "OK", }, \
{ PBLK_CHUNK_RESET_FAILED, "FAILED" })
TRACE_EVENT(pblk_chunk_reset,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_erase_state((int)__entry->state))
);
TRACE_EVENT(pblk_chunk_state,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_flags((int)__entry->state))
);
TRACE_EVENT(pblk_line_state,
TP_PROTO(const char *name, int line, int state),
TP_ARGS(name, line, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, line)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->line = line;
__entry->state = state;
),
TP_printk("dev=%s line=%d state=%s", __get_str(name),
(int)__entry->line,
show_line_state((int)__entry->state))
);
TRACE_EVENT(pblk_state,
TP_PROTO(const char *name, int state),
TP_ARGS(name, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->state = state;
),
TP_printk("dev=%s state=%s", __get_str(name),
show_pblk_state((int)__entry->state))
);
#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE pblk-trace
#include <trace/define_trace.h>
This diff is collapsed.
This diff is collapsed.
......@@ -1329,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct raid1_plug_cb *plug = NULL;
int first_clone;
int max_sectors;
bool write_behind = false;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
......@@ -1381,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
/*
* The write-behind io is only attempted on drives marked as
* write-mostly, which means we could allocate write behind
* bio later.
*/
if (rdev && test_bit(WriteMostly, &rdev->flags))
write_behind = true;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
......@@ -1454,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
goto retry_write;
}
/*
* When using a bitmap, we may call alloc_behind_master_bio below.
* alloc_behind_master_bio allocates a copy of the data payload a page
* at a time and thus needs a new bio that can fit the whole payload
* this bio in page sized chunks.
*/
if (write_behind && bitmap)
max_sectors = min_t(int, max_sectors,
BIO_MAX_VECS * (PAGE_SIZE >> 9));
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
GFP_NOIO, &conf->bio_split);
......
This diff is collapsed.
......@@ -33,12 +33,12 @@ config NVME_HWMON
in the system.
config NVME_FABRICS
select NVME_CORE
tristate
config NVME_RDMA
tristate "NVM Express over Fabrics RDMA host driver"
depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
......@@ -55,7 +55,6 @@ config NVME_FC
tristate "NVM Express over Fabrics FC host driver"
depends on BLOCK
depends on HAS_DMA
select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
......@@ -72,7 +71,6 @@ config NVME_TCP
tristate "NVM Express over Fabrics TCP host driver"
depends on INET
depends on BLOCK
select NVME_CORE
select NVME_FABRICS
select CRYPTO
select CRYPTO_CRC32C
......
......@@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
nvme-core-y := core.o ioctl.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment