Commit 52b084d3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.3/drivers' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "On top of the 4.3 core block IO changes, here are the driver related
  changes for 4.3.  Basically just NVMe and nbd this time around:

   - NVMe:
      - PRACT PI improvement from Alok Pandey.
      - Cleanups and improvements on submission queue doorbell and
        writing, using CMB if available.  From Jon Derrick.
      - From Keith, support for setting queue maximum segments, and
        reset support.
      - Also from Jon, fixup of u64 division issue on 32-bit archs and
        wiring up of the reset support through and ioctl.
      - Two small cleanups from Matias and Sunad

  - Various code cleanups and fixes from Markus Pargmann"

* 'for-4.3/drivers' of git://git.kernel.dk/linux-block:
  NVMe: Using PRACT bit to generate and verify PI by controller
  NVMe:Remove unreachable code in nvme_abort_req
  NVMe: Add nvme subsystem reset IOCTL
  NVMe: Add nvme subsystem reset support
  NVMe: removed unused nn var from nvme_dev_add
  NVMe: Set queue max segments
  nbd: flags is a u32 variable
  nbd: Rename functions for clearness of recv/send path
  nbd: Change 'disconnect' to be boolean
  nbd: Add debugfs entries
  nbd: Remove variable 'pid'
  nbd: Move clear queue debug message
  nbd: Remove 'harderror' and propagate error properly
  nbd: restructure sock_shutdown
  nbd: sock_shutdown, remove conditional lock
  nbd: Fix timeout detection
  nvme: Fixes u64 division which breaks i386 builds
  NVMe: Use CMB for the IO SQes if available
  NVMe: Unify SQ entry writing and doorbell ringing
parents 1081230b e19b127f
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/net.h> #include <linux/net.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/debugfs.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/types.h> #include <asm/types.h>
...@@ -40,8 +41,7 @@ ...@@ -40,8 +41,7 @@
#include <linux/nbd.h> #include <linux/nbd.h>
struct nbd_device { struct nbd_device {
int flags; u32 flags;
int harderror; /* Code of hard error */
struct socket * sock; /* If == NULL, device is not ready, yet */ struct socket * sock; /* If == NULL, device is not ready, yet */
int magic; int magic;
...@@ -56,11 +56,24 @@ struct nbd_device { ...@@ -56,11 +56,24 @@ struct nbd_device {
struct gendisk *disk; struct gendisk *disk;
int blksize; int blksize;
loff_t bytesize; loff_t bytesize;
pid_t pid; /* pid of nbd-client, if attached */
int xmit_timeout; int xmit_timeout;
int disconnect; /* a disconnect has been requested by user */ bool disconnect; /* a disconnect has been requested by user */
struct timer_list timeout_timer;
struct task_struct *task_recv;
struct task_struct *task_send;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbg_dir;
#endif
}; };
#if IS_ENABLED(CONFIG_DEBUG_FS)
static struct dentry *nbd_dbg_dir;
#endif
#define nbd_name(nbd) ((nbd)->disk->disk_name)
#define NBD_MAGIC 0x68797548 #define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16; static unsigned int nbds_max = 16;
...@@ -113,26 +126,36 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req) ...@@ -113,26 +126,36 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
/* /*
* Forcibly shutdown the socket causing all listeners to error * Forcibly shutdown the socket causing all listeners to error
*/ */
static void sock_shutdown(struct nbd_device *nbd, int lock) static void sock_shutdown(struct nbd_device *nbd)
{ {
if (lock) if (!nbd->sock)
mutex_lock(&nbd->tx_lock); return;
if (nbd->sock) {
dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
kernel_sock_shutdown(nbd->sock, SHUT_RDWR); kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
nbd->sock = NULL; nbd->sock = NULL;
} del_timer_sync(&nbd->timeout_timer);
if (lock)
mutex_unlock(&nbd->tx_lock);
} }
static void nbd_xmit_timeout(unsigned long arg) static void nbd_xmit_timeout(unsigned long arg)
{ {
struct task_struct *task = (struct task_struct *)arg; struct nbd_device *nbd = (struct nbd_device *)arg;
struct task_struct *task;
if (list_empty(&nbd->queue_head))
return;
nbd->disconnect = true;
task = READ_ONCE(nbd->task_recv);
if (task)
force_sig(SIGKILL, task);
printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n", task = READ_ONCE(nbd->task_send);
task->comm, task->pid); if (task)
force_sig(SIGKILL, task); force_sig(SIGKILL, nbd->task_send);
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
} }
/* /*
...@@ -171,33 +194,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, ...@@ -171,33 +194,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
msg.msg_controllen = 0; msg.msg_controllen = 0;
msg.msg_flags = msg_flags | MSG_NOSIGNAL; msg.msg_flags = msg_flags | MSG_NOSIGNAL;
if (send) { if (send)
struct timer_list ti;
if (nbd->xmit_timeout) {
init_timer(&ti);
ti.function = nbd_xmit_timeout;
ti.data = (unsigned long)current;
ti.expires = jiffies + nbd->xmit_timeout;
add_timer(&ti);
}
result = kernel_sendmsg(sock, &msg, &iov, 1, size); result = kernel_sendmsg(sock, &msg, &iov, 1, size);
if (nbd->xmit_timeout) else
del_timer_sync(&ti);
} else
result = kernel_recvmsg(sock, &msg, &iov, 1, size, result = kernel_recvmsg(sock, &msg, &iov, 1, size,
msg.msg_flags); msg.msg_flags);
if (signal_pending(current)) {
siginfo_t info;
printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
task_pid_nr(current), current->comm,
dequeue_signal_lock(current, &current->blocked, &info));
result = -EINTR;
sock_shutdown(nbd, !send);
break;
}
if (result <= 0) { if (result <= 0) {
if (result == 0) if (result == 0)
result = -EPIPE; /* short read */ result = -EPIPE; /* short read */
...@@ -210,6 +212,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, ...@@ -210,6 +212,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
sigprocmask(SIG_SETMASK, &oldset, NULL); sigprocmask(SIG_SETMASK, &oldset, NULL);
tsk_restore_flags(current, pflags, PF_MEMALLOC); tsk_restore_flags(current, pflags, PF_MEMALLOC);
if (!send && nbd->xmit_timeout)
mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
return result; return result;
} }
...@@ -333,26 +338,24 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) ...@@ -333,26 +338,24 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
if (result <= 0) { if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), dev_err(disk_to_dev(nbd->disk),
"Receive control failed (result %d)\n", result); "Receive control failed (result %d)\n", result);
goto harderror; return ERR_PTR(result);
} }
if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
(unsigned long)ntohl(reply.magic)); (unsigned long)ntohl(reply.magic));
result = -EPROTO; return ERR_PTR(-EPROTO);
goto harderror;
} }
req = nbd_find_request(nbd, *(struct request **)reply.handle); req = nbd_find_request(nbd, *(struct request **)reply.handle);
if (IS_ERR(req)) { if (IS_ERR(req)) {
result = PTR_ERR(req); result = PTR_ERR(req);
if (result != -ENOENT) if (result != -ENOENT)
goto harderror; return ERR_PTR(result);
dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
reply.handle); reply.handle);
result = -EBADR; return ERR_PTR(-EBADR);
goto harderror;
} }
if (ntohl(reply.error)) { if (ntohl(reply.error)) {
...@@ -380,18 +383,15 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) ...@@ -380,18 +383,15 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
} }
} }
return req; return req;
harderror:
nbd->harderror = result;
return NULL;
} }
static ssize_t pid_show(struct device *dev, static ssize_t pid_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct gendisk *disk = dev_to_disk(dev); struct gendisk *disk = dev_to_disk(dev);
struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
return sprintf(buf, "%ld\n", return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
(long) ((struct nbd_device *)disk->private_data)->pid);
} }
static struct device_attribute pid_attr = { static struct device_attribute pid_attr = {
...@@ -399,7 +399,7 @@ static struct device_attribute pid_attr = { ...@@ -399,7 +399,7 @@ static struct device_attribute pid_attr = {
.show = pid_show, .show = pid_show,
}; };
static int nbd_do_it(struct nbd_device *nbd) static int nbd_thread_recv(struct nbd_device *nbd)
{ {
struct request *req; struct request *req;
int ret; int ret;
...@@ -407,20 +407,43 @@ static int nbd_do_it(struct nbd_device *nbd) ...@@ -407,20 +407,43 @@ static int nbd_do_it(struct nbd_device *nbd)
BUG_ON(nbd->magic != NBD_MAGIC); BUG_ON(nbd->magic != NBD_MAGIC);
sk_set_memalloc(nbd->sock->sk); sk_set_memalloc(nbd->sock->sk);
nbd->pid = task_pid_nr(current);
nbd->task_recv = current;
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) { if (ret) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
nbd->pid = 0; nbd->task_recv = NULL;
return ret; return ret;
} }
while ((req = nbd_read_stat(nbd)) != NULL) while (1) {
req = nbd_read_stat(nbd);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
break;
}
nbd_end_request(nbd, req); nbd_end_request(nbd, req);
}
device_remove_file(disk_to_dev(nbd->disk), &pid_attr); device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
nbd->pid = 0;
return 0; nbd->task_recv = NULL;
if (signal_pending(current)) {
siginfo_t info;
ret = dequeue_signal_lock(current, &current->blocked, &info);
dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
task_pid_nr(current), current->comm, ret);
mutex_lock(&nbd->tx_lock);
sock_shutdown(nbd);
mutex_unlock(&nbd->tx_lock);
ret = -ETIMEDOUT;
}
return ret;
} }
static void nbd_clear_que(struct nbd_device *nbd) static void nbd_clear_que(struct nbd_device *nbd)
...@@ -455,6 +478,7 @@ static void nbd_clear_que(struct nbd_device *nbd) ...@@ -455,6 +478,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
req->errors++; req->errors++;
nbd_end_request(nbd, req); nbd_end_request(nbd, req);
} }
dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
} }
...@@ -482,6 +506,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) ...@@ -482,6 +506,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd->active_req = req; nbd->active_req = req;
if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
if (nbd_send_req(nbd, req) != 0) { if (nbd_send_req(nbd, req) != 0) {
dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
req->errors++; req->errors++;
...@@ -503,11 +530,13 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) ...@@ -503,11 +530,13 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd_end_request(nbd, req); nbd_end_request(nbd, req);
} }
static int nbd_thread(void *data) static int nbd_thread_send(void *data)
{ {
struct nbd_device *nbd = data; struct nbd_device *nbd = data;
struct request *req; struct request *req;
nbd->task_send = current;
set_user_nice(current, MIN_NICE); set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
/* wait for something to do */ /* wait for something to do */
...@@ -515,6 +544,20 @@ static int nbd_thread(void *data) ...@@ -515,6 +544,20 @@ static int nbd_thread(void *data)
kthread_should_stop() || kthread_should_stop() ||
!list_empty(&nbd->waiting_queue)); !list_empty(&nbd->waiting_queue));
if (signal_pending(current)) {
siginfo_t info;
int ret;
ret = dequeue_signal_lock(current, &current->blocked,
&info);
dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
task_pid_nr(current), current->comm, ret);
mutex_lock(&nbd->tx_lock);
sock_shutdown(nbd);
mutex_unlock(&nbd->tx_lock);
break;
}
/* extract request */ /* extract request */
if (list_empty(&nbd->waiting_queue)) if (list_empty(&nbd->waiting_queue))
continue; continue;
...@@ -528,6 +571,9 @@ static int nbd_thread(void *data) ...@@ -528,6 +571,9 @@ static int nbd_thread(void *data)
/* handle request */ /* handle request */
nbd_handle_req(nbd, req); nbd_handle_req(nbd, req);
} }
nbd->task_send = NULL;
return 0; return 0;
} }
...@@ -538,7 +584,7 @@ static int nbd_thread(void *data) ...@@ -538,7 +584,7 @@ static int nbd_thread(void *data)
* { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
*/ */
static void do_nbd_request(struct request_queue *q) static void nbd_request_handler(struct request_queue *q)
__releases(q->queue_lock) __acquires(q->queue_lock) __releases(q->queue_lock) __acquires(q->queue_lock)
{ {
struct request *req; struct request *req;
...@@ -574,6 +620,9 @@ static void do_nbd_request(struct request_queue *q) ...@@ -574,6 +620,9 @@ static void do_nbd_request(struct request_queue *q)
} }
} }
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);
/* Must be called with tx_lock held */ /* Must be called with tx_lock held */
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
...@@ -597,7 +646,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -597,7 +646,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
if (!nbd->sock) if (!nbd->sock)
return -EINVAL; return -EINVAL;
nbd->disconnect = 1; nbd->disconnect = true;
nbd_send_req(nbd, &sreq); nbd_send_req(nbd, &sreq);
return 0; return 0;
...@@ -625,7 +674,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -625,7 +674,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd->sock = sock; nbd->sock = sock;
if (max_part > 0) if (max_part > 0)
bdev->bd_invalidated = 1; bdev->bd_invalidated = 1;
nbd->disconnect = 0; /* we're connected now */ nbd->disconnect = false; /* we're connected now */
return 0; return 0;
} }
return -EINVAL; return -EINVAL;
...@@ -648,6 +697,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -648,6 +697,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_TIMEOUT: case NBD_SET_TIMEOUT:
nbd->xmit_timeout = arg * HZ; nbd->xmit_timeout = arg * HZ;
if (arg)
mod_timer(&nbd->timeout_timer,
jiffies + nbd->xmit_timeout);
else
del_timer_sync(&nbd->timeout_timer);
return 0; return 0;
case NBD_SET_FLAGS: case NBD_SET_FLAGS:
...@@ -666,7 +721,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -666,7 +721,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
struct socket *sock; struct socket *sock;
int error; int error;
if (nbd->pid) if (nbd->task_recv)
return -EBUSY; return -EBUSY;
if (!nbd->sock) if (!nbd->sock)
return -EINVAL; return -EINVAL;
...@@ -683,24 +738,24 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -683,24 +738,24 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
else else
blk_queue_flush(nbd->disk->queue, 0); blk_queue_flush(nbd->disk->queue, 0);
thread = kthread_run(nbd_thread, nbd, "%s", thread = kthread_run(nbd_thread_send, nbd, "%s",
nbd->disk->disk_name); nbd_name(nbd));
if (IS_ERR(thread)) { if (IS_ERR(thread)) {
mutex_lock(&nbd->tx_lock); mutex_lock(&nbd->tx_lock);
return PTR_ERR(thread); return PTR_ERR(thread);
} }
error = nbd_do_it(nbd); nbd_dev_dbg_init(nbd);
error = nbd_thread_recv(nbd);
nbd_dev_dbg_close(nbd);
kthread_stop(thread); kthread_stop(thread);
mutex_lock(&nbd->tx_lock); mutex_lock(&nbd->tx_lock);
if (error)
return error; sock_shutdown(nbd);
sock_shutdown(nbd, 0);
sock = nbd->sock; sock = nbd->sock;
nbd->sock = NULL; nbd->sock = NULL;
nbd_clear_que(nbd); nbd_clear_que(nbd);
dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
kill_bdev(bdev); kill_bdev(bdev);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
set_device_ro(bdev, false); set_device_ro(bdev, false);
...@@ -714,7 +769,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -714,7 +769,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
blkdev_reread_part(bdev); blkdev_reread_part(bdev);
if (nbd->disconnect) /* user requested, ignore socket errors */ if (nbd->disconnect) /* user requested, ignore socket errors */
return 0; return 0;
return nbd->harderror; return error;
} }
case NBD_CLEAR_QUE: case NBD_CLEAR_QUE:
...@@ -758,6 +813,161 @@ static const struct block_device_operations nbd_fops = ...@@ -758,6 +813,161 @@ static const struct block_device_operations nbd_fops =
.ioctl = nbd_ioctl, .ioctl = nbd_ioctl,
}; };
#if IS_ENABLED(CONFIG_DEBUG_FS)
static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
struct nbd_device *nbd = s->private;
if (nbd->task_recv)
seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
if (nbd->task_send)
seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send));
return 0;
}
static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
{
return single_open(file, nbd_dbg_tasks_show, inode->i_private);
}
static const struct file_operations nbd_dbg_tasks_ops = {
.open = nbd_dbg_tasks_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
{
struct nbd_device *nbd = s->private;
u32 flags = nbd->flags;
seq_printf(s, "Hex: 0x%08x\n\n", flags);
seq_puts(s, "Known flags:\n");
if (flags & NBD_FLAG_HAS_FLAGS)
seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
if (flags & NBD_FLAG_READ_ONLY)
seq_puts(s, "NBD_FLAG_READ_ONLY\n");
if (flags & NBD_FLAG_SEND_FLUSH)
seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
if (flags & NBD_FLAG_SEND_TRIM)
seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
return 0;
}
static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
{
return single_open(file, nbd_dbg_flags_show, inode->i_private);
}
static const struct file_operations nbd_dbg_flags_ops = {
.open = nbd_dbg_flags_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
struct dentry *dir;
struct dentry *f;
dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
if (IS_ERR_OR_NULL(dir)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s' (%ld)\n",
nbd_name(nbd), PTR_ERR(dir));
return PTR_ERR(dir);
}
nbd->dbg_dir = dir;
f = debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'tasks', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'size_bytes', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'timeout', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'blocksize', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'flags', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
return 0;
}
static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
debugfs_remove_recursive(nbd->dbg_dir);
}
static int nbd_dbg_init(void)
{
struct dentry *dbg_dir;
dbg_dir = debugfs_create_dir("nbd", NULL);
if (IS_ERR(dbg_dir))
return PTR_ERR(dbg_dir);
nbd_dbg_dir = dbg_dir;
return 0;
}
static void nbd_dbg_close(void)
{
debugfs_remove_recursive(nbd_dbg_dir);
}
#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
return 0;
}
static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
}
static int nbd_dbg_init(void)
{
return 0;
}
static void nbd_dbg_close(void)
{
}
#endif
/* /*
* And here should be modules and kernel interface * And here should be modules and kernel interface
* (Just smiley confuses emacs :-) * (Just smiley confuses emacs :-)
...@@ -811,7 +1021,7 @@ static int __init nbd_init(void) ...@@ -811,7 +1021,7 @@ static int __init nbd_init(void)
* every gendisk to have its very own request_queue struct. * every gendisk to have its very own request_queue struct.
* These structs are big so we dynamically allocate them. * These structs are big so we dynamically allocate them.
*/ */
disk->queue = blk_init_queue(do_nbd_request, &nbd_lock); disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock);
if (!disk->queue) { if (!disk->queue) {
put_disk(disk); put_disk(disk);
goto out; goto out;
...@@ -835,6 +1045,8 @@ static int __init nbd_init(void) ...@@ -835,6 +1045,8 @@ static int __init nbd_init(void)
printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
nbd_dbg_init();
for (i = 0; i < nbds_max; i++) { for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk; struct gendisk *disk = nbd_dev[i].disk;
nbd_dev[i].magic = NBD_MAGIC; nbd_dev[i].magic = NBD_MAGIC;
...@@ -842,6 +1054,9 @@ static int __init nbd_init(void) ...@@ -842,6 +1054,9 @@ static int __init nbd_init(void)
spin_lock_init(&nbd_dev[i].queue_lock); spin_lock_init(&nbd_dev[i].queue_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head); INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock); mutex_init(&nbd_dev[i].tx_lock);
init_timer(&nbd_dev[i].timeout_timer);
nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
init_waitqueue_head(&nbd_dev[i].active_wq); init_waitqueue_head(&nbd_dev[i].active_wq);
init_waitqueue_head(&nbd_dev[i].waiting_wq); init_waitqueue_head(&nbd_dev[i].waiting_wq);
nbd_dev[i].blksize = 1024; nbd_dev[i].blksize = 1024;
...@@ -868,6 +1083,9 @@ static int __init nbd_init(void) ...@@ -868,6 +1083,9 @@ static int __init nbd_init(void)
static void __exit nbd_cleanup(void) static void __exit nbd_cleanup(void)
{ {
int i; int i;
nbd_dbg_close();
for (i = 0; i < nbds_max; i++) { for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk; struct gendisk *disk = nbd_dev[i].disk;
nbd_dev[i].magic = 0; nbd_dev[i].magic = 0;
......
...@@ -72,6 +72,10 @@ module_param(nvme_char_major, int, 0); ...@@ -72,6 +72,10 @@ module_param(nvme_char_major, int, 0);
static int use_threaded_interrupts; static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0); module_param(use_threaded_interrupts, int, 0);
static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
static DEFINE_SPINLOCK(dev_list_lock); static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list); static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread; static struct task_struct *nvme_thread;
...@@ -103,6 +107,7 @@ struct nvme_queue { ...@@ -103,6 +107,7 @@ struct nvme_queue {
char irqname[24]; /* nvme4294967295-65535\0 */ char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock; spinlock_t q_lock;
struct nvme_command *sq_cmds; struct nvme_command *sq_cmds;
struct nvme_command __iomem *sq_cmds_io;
volatile struct nvme_completion *cqes; volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags; struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr; dma_addr_t sq_dma_addr;
...@@ -379,27 +384,28 @@ static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag, ...@@ -379,27 +384,28 @@ static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
* *
* Safe to use from interrupt context * Safe to use from interrupt context
*/ */
static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
struct nvme_command *cmd)
{ {
u16 tail = nvmeq->sq_tail; u16 tail = nvmeq->sq_tail;
memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); if (nvmeq->sq_cmds_io)
memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
else
memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
if (++tail == nvmeq->q_depth) if (++tail == nvmeq->q_depth)
tail = 0; tail = 0;
writel(tail, nvmeq->q_db); writel(tail, nvmeq->q_db);
nvmeq->sq_tail = tail; nvmeq->sq_tail = tail;
return 0;
} }
static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{ {
unsigned long flags; unsigned long flags;
int ret;
spin_lock_irqsave(&nvmeq->q_lock, flags); spin_lock_irqsave(&nvmeq->q_lock, flags);
ret = __nvme_submit_cmd(nvmeq, cmd); __nvme_submit_cmd(nvmeq, cmd);
spin_unlock_irqrestore(&nvmeq->q_lock, flags); spin_unlock_irqrestore(&nvmeq->q_lock, flags);
return ret;
} }
static __le64 **iod_list(struct nvme_iod *iod) static __le64 **iod_list(struct nvme_iod *iod)
...@@ -730,18 +736,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, ...@@ -730,18 +736,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req, static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
struct nvme_iod *iod) struct nvme_iod *iod)
{ {
struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; struct nvme_command cmnd;
memcpy(cmnd, req->cmd, sizeof(struct nvme_command)); memcpy(&cmnd, req->cmd, sizeof(cmnd));
cmnd->rw.command_id = req->tag; cmnd.rw.command_id = req->tag;
if (req->nr_phys_segments) { if (req->nr_phys_segments) {
cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
} }
if (++nvmeq->sq_tail == nvmeq->q_depth) __nvme_submit_cmd(nvmeq, &cmnd);
nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db);
} }
/* /*
...@@ -754,45 +758,41 @@ static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, ...@@ -754,45 +758,41 @@ static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
{ {
struct nvme_dsm_range *range = struct nvme_dsm_range *range =
(struct nvme_dsm_range *)iod_list(iod)[0]; (struct nvme_dsm_range *)iod_list(iod)[0];
struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; struct nvme_command cmnd;
range->cattr = cpu_to_le32(0); range->cattr = cpu_to_le32(0);
range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
memset(cmnd, 0, sizeof(*cmnd)); memset(&cmnd, 0, sizeof(cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm; cmnd.dsm.opcode = nvme_cmd_dsm;
cmnd->dsm.command_id = req->tag; cmnd.dsm.command_id = req->tag;
cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
cmnd->dsm.nr = 0; cmnd.dsm.nr = 0;
cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
if (++nvmeq->sq_tail == nvmeq->q_depth) __nvme_submit_cmd(nvmeq, &cmnd);
nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db);
} }
static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
int cmdid) int cmdid)
{ {
struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; struct nvme_command cmnd;
memset(cmnd, 0, sizeof(*cmnd)); memset(&cmnd, 0, sizeof(cmnd));
cmnd->common.opcode = nvme_cmd_flush; cmnd.common.opcode = nvme_cmd_flush;
cmnd->common.command_id = cmdid; cmnd.common.command_id = cmdid;
cmnd->common.nsid = cpu_to_le32(ns->ns_id); cmnd.common.nsid = cpu_to_le32(ns->ns_id);
if (++nvmeq->sq_tail == nvmeq->q_depth) __nvme_submit_cmd(nvmeq, &cmnd);
nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db);
} }
static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
struct nvme_ns *ns) struct nvme_ns *ns)
{ {
struct request *req = iod_get_private(iod); struct request *req = iod_get_private(iod);
struct nvme_command *cmnd; struct nvme_command cmnd;
u16 control = 0; u16 control = 0;
u32 dsmgmt = 0; u32 dsmgmt = 0;
...@@ -804,19 +804,16 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, ...@@ -804,19 +804,16 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
if (req->cmd_flags & REQ_RAHEAD) if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; memset(&cmnd, 0, sizeof(cmnd));
memset(cmnd, 0, sizeof(*cmnd)); cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
cmnd.rw.command_id = req->tag;
cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); if (ns->ms) {
cmnd->rw.command_id = req->tag;
cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (blk_integrity_rq(req)) {
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
switch (ns->pi_type) { switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3: case NVME_NS_DPS_PI_TYPE3:
control |= NVME_RW_PRINFO_PRCHK_GUARD; control |= NVME_RW_PRINFO_PRCHK_GUARD;
...@@ -825,19 +822,21 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, ...@@ -825,19 +822,21 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
case NVME_NS_DPS_PI_TYPE2: case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD | control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF; NVME_RW_PRINFO_PRCHK_REF;
cmnd->rw.reftag = cpu_to_le32( cmnd.rw.reftag = cpu_to_le32(
nvme_block_nr(ns, blk_rq_pos(req))); nvme_block_nr(ns, blk_rq_pos(req)));
break; break;
} }
} else if (ns->ms) if (blk_integrity_rq(req))
control |= NVME_RW_PRINFO_PRACT; cmnd.rw.metadata =
cpu_to_le64(sg_dma_address(iod->meta_sg));
else
control |= NVME_RW_PRINFO_PRACT;
}
cmnd->rw.control = cpu_to_le16(control); cmnd.rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
if (++nvmeq->sq_tail == nvmeq->q_depth) __nvme_submit_cmd(nvmeq, &cmnd);
nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db);
return 0; return 0;
} }
...@@ -1080,7 +1079,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) ...@@ -1080,7 +1079,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
c.common.command_id = req->tag; c.common.command_id = req->tag;
blk_mq_free_request(req); blk_mq_free_request(req);
return __nvme_submit_cmd(nvmeq, &c); __nvme_submit_cmd(nvmeq, &c);
return 0;
} }
static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
...@@ -1103,7 +1103,8 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, ...@@ -1103,7 +1103,8 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
cmd->common.command_id = req->tag; cmd->common.command_id = req->tag;
return nvme_submit_cmd(nvmeq, cmd); nvme_submit_cmd(nvmeq, cmd);
return 0;
} }
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
...@@ -1315,12 +1316,7 @@ static void nvme_abort_req(struct request *req) ...@@ -1315,12 +1316,7 @@ static void nvme_abort_req(struct request *req)
dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag, dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
nvmeq->qid); nvmeq->qid);
if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) { nvme_submit_cmd(dev->queues[0], &cmd);
dev_warn(nvmeq->q_dmadev,
"Could not abort I/O %d QID %d",
req->tag, nvmeq->qid);
blk_mq_free_request(abort_req);
}
} }
static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
...@@ -1374,7 +1370,8 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) ...@@ -1374,7 +1370,8 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
{ {
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr); (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), if (nvmeq->sq_cmds)
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr); nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq); kfree(nvmeq);
} }
...@@ -1447,6 +1444,47 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) ...@@ -1447,6 +1444,47 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
} }
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
int entry_size)
{
int q_depth = dev->q_depth;
unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
if (q_size_aligned * nr_io_queues > dev->cmb_size) {
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
mem_per_q = round_down(mem_per_q, dev->page_size);
q_depth = div_u64(mem_per_q, entry_size);
/*
* Ensure the reduced q_depth is above some threshold where it
* would be better to map queues in system memory with the
* original depth
*/
if (q_depth < 64)
return -ENOMEM;
}
return q_depth;
}
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
unsigned offset = (qid - 1) *
roundup(SQ_SIZE(depth), dev->page_size);
nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
}
return 0;
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth) int depth)
{ {
...@@ -1459,9 +1497,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, ...@@ -1459,9 +1497,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
if (!nvmeq->cqes) if (!nvmeq->cqes)
goto free_nvmeq; goto free_nvmeq;
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
goto free_cqdma; goto free_cqdma;
nvmeq->q_dmadev = dev->dev; nvmeq->q_dmadev = dev->dev;
...@@ -1696,6 +1732,12 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) ...@@ -1696,6 +1732,12 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
page_shift = dev_page_max; page_shift = dev_page_max;
} }
dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
NVME_CAP_NSSRC(cap) : 0;
if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, &dev->bar->csts);
result = nvme_disable_ctrl(dev, cap); result = nvme_disable_ctrl(dev, cap);
if (result < 0) if (result < 0)
return result; return result;
...@@ -1856,6 +1898,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, ...@@ -1856,6 +1898,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
return status; return status;
} }
static int nvme_subsys_reset(struct nvme_dev *dev)
{
if (!dev->subsystem)
return -ENOTTY;
writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
return 0;
}
static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
...@@ -1989,7 +2040,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) ...@@ -1989,7 +2040,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
!ns->ext) !ns->ext)
nvme_init_integrity(ns); nvme_init_integrity(ns);
if (ns->ms && !blk_get_integrity(disk)) if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
set_capacity(disk, 0); set_capacity(disk, 0);
else else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
...@@ -2020,7 +2071,10 @@ static int nvme_kthread(void *data) ...@@ -2020,7 +2071,10 @@ static int nvme_kthread(void *data)
spin_lock(&dev_list_lock); spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) { list_for_each_entry_safe(dev, next, &dev_list, node) {
int i; int i;
if (readl(&dev->bar->csts) & NVME_CSTS_CFS) { u32 csts = readl(&dev->bar->csts);
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
if (work_busy(&dev->reset_work)) if (work_busy(&dev->reset_work))
continue; continue;
list_del_init(&dev->node); list_del_init(&dev->node);
...@@ -2080,8 +2134,11 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) ...@@ -2080,8 +2134,11 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
list_add_tail(&ns->list, &dev->namespaces); list_add_tail(&ns->list, &dev->namespaces);
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
if (dev->max_hw_sectors) if (dev->max_hw_sectors) {
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
blk_queue_max_segments(ns->queue,
((dev->max_hw_sectors << 9) / dev->page_size) + 1);
}
if (dev->stripe_size) if (dev->stripe_size)
blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
if (dev->vwc & NVME_CTRL_VWC_PRESENT) if (dev->vwc & NVME_CTRL_VWC_PRESENT)
...@@ -2159,6 +2216,58 @@ static int set_queue_count(struct nvme_dev *dev, int count) ...@@ -2159,6 +2216,58 @@ static int set_queue_count(struct nvme_dev *dev, int count)
return min(result & 0xffff, result >> 16) + 1; return min(result & 0xffff, result >> 16) + 1;
} }
static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
{
u64 szu, size, offset;
u32 cmbloc;
resource_size_t bar_size;
struct pci_dev *pdev = to_pci_dev(dev->dev);
void __iomem *cmb;
dma_addr_t dma_addr;
if (!use_cmb_sqes)
return NULL;
dev->cmbsz = readl(&dev->bar->cmbsz);
if (!(NVME_CMB_SZ(dev->cmbsz)))
return NULL;
cmbloc = readl(&dev->bar->cmbloc);
szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
size = szu * NVME_CMB_SZ(dev->cmbsz);
offset = szu * NVME_CMB_OFST(cmbloc);
bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
if (offset > bar_size)
return NULL;
/*
* Controllers may support a CMB size larger than their BAR,
* for example, due to being behind a bridge. Reduce the CMB to
* the reported size of the BAR
*/
if (size > bar_size - offset)
size = bar_size - offset;
dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
cmb = ioremap_wc(dma_addr, size);
if (!cmb)
return NULL;
dev->cmb_dma_addr = dma_addr;
dev->cmb_size = size;
return cmb;
}
static inline void nvme_release_cmb(struct nvme_dev *dev)
{
if (dev->cmb) {
iounmap(dev->cmb);
dev->cmb = NULL;
}
}
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{ {
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
...@@ -2177,6 +2286,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2177,6 +2286,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (result < nr_io_queues) if (result < nr_io_queues)
nr_io_queues = result; nr_io_queues = result;
if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
sizeof(struct nvme_command));
if (result > 0)
dev->q_depth = result;
else
nvme_release_cmb(dev);
}
size = db_bar_size(dev, nr_io_queues); size = db_bar_size(dev, nr_io_queues);
if (size > 8192) { if (size > 8192) {
iounmap(dev->bar); iounmap(dev->bar);
...@@ -2344,7 +2462,6 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2344,7 +2462,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
{ {
struct pci_dev *pdev = to_pci_dev(dev->dev); struct pci_dev *pdev = to_pci_dev(dev->dev);
int res; int res;
unsigned nn;
struct nvme_id_ctrl *ctrl; struct nvme_id_ctrl *ctrl;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
...@@ -2354,7 +2471,6 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2354,7 +2471,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
return -EIO; return -EIO;
} }
nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs); dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1; dev->abort_limit = ctrl->acl + 1;
dev->vwc = ctrl->vwc; dev->vwc = ctrl->vwc;
...@@ -2440,6 +2556,8 @@ static int nvme_dev_map(struct nvme_dev *dev) ...@@ -2440,6 +2556,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap); dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
dev->dbs = ((void __iomem *)dev->bar) + 4096; dev->dbs = ((void __iomem *)dev->bar) + 4096;
if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
dev->cmb = nvme_map_cmb(dev);
return 0; return 0;
...@@ -2820,6 +2938,8 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) ...@@ -2820,6 +2938,8 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
case NVME_IOCTL_RESET: case NVME_IOCTL_RESET:
dev_warn(dev->dev, "resetting controller\n"); dev_warn(dev->dev, "resetting controller\n");
return nvme_reset(dev); return nvme_reset(dev);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_subsys_reset(dev);
default: default:
return -ENOTTY; return -ENOTTY;
} }
...@@ -3145,6 +3265,7 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -3145,6 +3265,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_dev_remove_admin(dev); nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
nvme_release_cmb(dev);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev); kref_put(&dev->kref, nvme_free_dev);
} }
......
...@@ -28,18 +28,32 @@ struct nvme_bar { ...@@ -28,18 +28,32 @@ struct nvme_bar {
__u32 cc; /* Controller Configuration */ __u32 cc; /* Controller Configuration */
__u32 rsvd1; /* Reserved */ __u32 rsvd1; /* Reserved */
__u32 csts; /* Controller Status */ __u32 csts; /* Controller Status */
__u32 rsvd2; /* Reserved */ __u32 nssr; /* Subsystem Reset */
__u32 aqa; /* Admin Queue Attributes */ __u32 aqa; /* Admin Queue Attributes */
__u64 asq; /* Admin SQ Base Address */ __u64 asq; /* Admin SQ Base Address */
__u64 acq; /* Admin CQ Base Address */ __u64 acq; /* Admin CQ Base Address */
__u32 cmbloc; /* Controller Memory Buffer Location */
__u32 cmbsz; /* Controller Memory Buffer Size */
}; };
#define NVME_CAP_MQES(cap) ((cap) & 0xffff) #define NVME_CAP_MQES(cap) ((cap) & 0xffff)
#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff)
#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf)
#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10)
#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8)
#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4)
#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2)
#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1)
enum { enum {
NVME_CC_ENABLE = 1 << 0, NVME_CC_ENABLE = 1 << 0,
NVME_CC_CSS_NVM = 0 << 4, NVME_CC_CSS_NVM = 0 << 4,
...@@ -55,6 +69,7 @@ enum { ...@@ -55,6 +69,7 @@ enum {
NVME_CC_IOCQES = 4 << 20, NVME_CC_IOCQES = 4 << 20,
NVME_CSTS_RDY = 1 << 0, NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1, NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4,
NVME_CSTS_SHST_NORMAL = 0 << 2, NVME_CSTS_SHST_NORMAL = 0 << 2,
NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2,
...@@ -97,9 +112,14 @@ struct nvme_dev { ...@@ -97,9 +112,14 @@ struct nvme_dev {
char serial[20]; char serial[20];
char model[40]; char model[40];
char firmware_rev[8]; char firmware_rev[8];
bool subsystem;
u32 max_hw_sectors; u32 max_hw_sectors;
u32 stripe_size; u32 stripe_size;
u32 page_size; u32 page_size;
void __iomem *cmb;
dma_addr_t cmb_dma_addr;
u64 cmb_size;
u32 cmbsz;
u16 oncs; u16 oncs;
u16 abort_limit; u16 abort_limit;
u8 event_limit; u8 event_limit;
......
...@@ -584,5 +584,6 @@ struct nvme_passthru_cmd { ...@@ -584,5 +584,6 @@ struct nvme_passthru_cmd {
#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
#define NVME_IOCTL_RESET _IO('N', 0x44) #define NVME_IOCTL_RESET _IO('N', 0x44)
#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
#endif /* _UAPI_LINUX_NVME_H */ #endif /* _UAPI_LINUX_NVME_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment