Commit 28507135 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A collection of fixes from the past few weeks that should go into 4.5.
  This contains:

   - Overflow fix for sysfs discard show function from Alan.

   - A stacking limit init fix for max_dev_sectors, so we don't end up
     artificially capping some use cases.  From Keith.

   - Have blk-mq proper end unstarted requests on a dying queue, instead
     of pushing that to the driver.  From Keith.

   - NVMe:
        - Update to Kconfig description for NVME_SCSI, since it was
          vague and having it on is important for some SUSE distros.
          From Christoph.
        - Set of fixes from Keith, around surprise removal. Also kills
          the no-merge flag, so it supports merging.

   - Set of fixes for lightnvm from Matias, Javier, and Wenwei.

   - Fix null_blk oops when asked for lightnvm, but not available.  From
     Matias.

   - Copy-to-user EINTR fix from Hannes, fixing a case where SG_IO fails
     if interrupted by a signal.

   - Two floppy fixes from Jiri, fixing signal handling and blocking
     open.

   - A use-after-free fix for O_DIRECT, from Mike Krinkin.

   - A block module ref count fix from Roman Pen.

   - An fs IO wait accounting fix for O_DSYNC from Stephane Gasparini.

   - Smaller reallo fix for xen-blkfront from Bob Liu.

   - Removal of an unused struct member in the deadline IO scheduler,
     from Tahsin.

   - Also from Tahsin, properly initialize inode struct members
     associated with cgroup writeback, if enabled.

   - From Tejun, ensure that we keep the superblock pinned during cgroup
     writeback"

* 'for-linus' of git://git.kernel.dk/linux-block: (25 commits)
  blk: fix overflow in queue_discard_max_hw_show
  writeback: initialize inode members that track writeback history
  writeback: keep superblock pinned during cgroup writeback association switches
  bio: return EINTR if copying to user space got interrupted
  NVMe: Rate limit nvme IO warnings
  NVMe: Poll device while still active during remove
  NVMe: Requeue requests on suspended queues
  NVMe: Allow request merges
  NVMe: Fix io incapable return values
  blk-mq: End unstarted requests on dying queue
  block: Initialize max_dev_sectors to 0
  null_blk: oops when initializing without lightnvm
  block: fix module reference leak on put_disk() call for cgroups throttle
  nvme: fix Kconfig description for BLK_DEV_NVME_SCSI
  kernel/fs: fix I/O wait not accounted for RW O_DSYNC
  floppy: refactor open() flags handling
  lightnvm: allow to force mm initialization
  lightnvm: check overflow and correct mlc pairs
  lightnvm: fix request intersection locking in rrpc
  lightnvm: warn if irqs are disabled in lock laddr
  ...
parents c28b947d 18f922d0
...@@ -874,7 +874,7 @@ int submit_bio_wait(int rw, struct bio *bio) ...@@ -874,7 +874,7 @@ int submit_bio_wait(int rw, struct bio *bio)
bio->bi_private = &ret; bio->bi_private = &ret;
bio->bi_end_io = submit_bio_wait_endio; bio->bi_end_io = submit_bio_wait_endio;
submit_bio(rw, bio); submit_bio(rw, bio);
wait_for_completion(&ret.event); wait_for_completion_io(&ret.event);
return ret.error; return ret.error;
} }
...@@ -1090,9 +1090,12 @@ int bio_uncopy_user(struct bio *bio) ...@@ -1090,9 +1090,12 @@ int bio_uncopy_user(struct bio *bio)
if (!bio_flagged(bio, BIO_NULL_MAPPED)) { if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
/* /*
* if we're in a workqueue, the request is orphaned, so * if we're in a workqueue, the request is orphaned, so
* don't copy into a random user address space, just free. * don't copy into a random user address space, just free
* and return -EINTR so user space doesn't expect any data.
*/ */
if (current->mm && bio_data_dir(bio) == READ) if (!current->mm)
ret = -EINTR;
else if (bio_data_dir(bio) == READ)
ret = bio_copy_to_iter(bio, bmd->iter); ret = bio_copy_to_iter(bio, bmd->iter);
if (bmd->is_our_pages) if (bmd->is_our_pages)
bio_free_pages(bio); bio_free_pages(bio);
......
...@@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ...@@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
{ {
struct gendisk *disk; struct gendisk *disk;
struct blkcg_gq *blkg; struct blkcg_gq *blkg;
struct module *owner;
unsigned int major, minor; unsigned int major, minor;
int key_len, part, ret; int key_len, part, ret;
char *body; char *body;
...@@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ...@@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
if (!disk) if (!disk)
return -ENODEV; return -ENODEV;
if (part) { if (part) {
owner = disk->fops->owner;
put_disk(disk); put_disk(disk);
module_put(owner);
return -ENODEV; return -ENODEV;
} }
...@@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ...@@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
ret = PTR_ERR(blkg); ret = PTR_ERR(blkg);
rcu_read_unlock(); rcu_read_unlock();
spin_unlock_irq(disk->queue->queue_lock); spin_unlock_irq(disk->queue->queue_lock);
owner = disk->fops->owner;
put_disk(disk); put_disk(disk);
module_put(owner);
/* /*
* If queue was bypassing, we should retry. Do so after a * If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue * short msleep(). It isn't strictly necessary but queue
...@@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); ...@@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
void blkg_conf_finish(struct blkg_conf_ctx *ctx) void blkg_conf_finish(struct blkg_conf_ctx *ctx)
__releases(ctx->disk->queue->queue_lock) __releases(rcu) __releases(ctx->disk->queue->queue_lock) __releases(rcu)
{ {
struct module *owner;
spin_unlock_irq(ctx->disk->queue->queue_lock); spin_unlock_irq(ctx->disk->queue->queue_lock);
rcu_read_unlock(); rcu_read_unlock();
owner = ctx->disk->fops->owner;
put_disk(ctx->disk); put_disk(ctx->disk);
module_put(owner);
} }
EXPORT_SYMBOL_GPL(blkg_conf_finish); EXPORT_SYMBOL_GPL(blkg_conf_finish);
......
...@@ -599,8 +599,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, ...@@ -599,8 +599,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
* If a request wasn't started before the queue was * If a request wasn't started before the queue was
* marked dying, kill it here or it'll go unnoticed. * marked dying, kill it here or it'll go unnoticed.
*/ */
if (unlikely(blk_queue_dying(rq->q))) if (unlikely(blk_queue_dying(rq->q))) {
blk_mq_complete_request(rq, -EIO); rq->errors = -EIO;
blk_mq_end_request(rq, rq->errors);
}
return; return;
} }
......
...@@ -91,8 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim) ...@@ -91,8 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->virt_boundary_mask = 0; lim->virt_boundary_mask = 0;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors = lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
BLK_SAFE_MAX_SECTORS; lim->max_dev_sectors = 0;
lim->chunk_sectors = 0; lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0; lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0; lim->max_discard_sectors = 0;
......
...@@ -147,10 +147,9 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag ...@@ -147,10 +147,9 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page) static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
{ {
unsigned long long val;
val = q->limits.max_hw_discard_sectors << 9; return sprintf(page, "%llu\n",
return sprintf(page, "%llu\n", val); (unsigned long long)q->limits.max_hw_discard_sectors << 9);
} }
static ssize_t queue_discard_max_show(struct request_queue *q, char *page) static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
......
...@@ -39,7 +39,6 @@ struct deadline_data { ...@@ -39,7 +39,6 @@ struct deadline_data {
*/ */
struct request *next_rq[2]; struct request *next_rq[2];
unsigned int batching; /* number of sequential requests made */ unsigned int batching; /* number of sequential requests made */
sector_t last_sector; /* head position */
unsigned int starved; /* times reads have starved writes */ unsigned int starved; /* times reads have starved writes */
/* /*
...@@ -210,8 +209,6 @@ deadline_move_request(struct deadline_data *dd, struct request *rq) ...@@ -210,8 +209,6 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
dd->next_rq[WRITE] = NULL; dd->next_rq[WRITE] = NULL;
dd->next_rq[data_dir] = deadline_latter_request(rq); dd->next_rq[data_dir] = deadline_latter_request(rq);
dd->last_sector = rq_end_sector(rq);
/* /*
* take it off the sort and fifo list, move * take it off the sort and fifo list, move
* to dispatch queue * to dispatch queue
......
...@@ -866,7 +866,7 @@ static void set_fdc(int drive) ...@@ -866,7 +866,7 @@ static void set_fdc(int drive)
} }
/* locks the driver */ /* locks the driver */
static int lock_fdc(int drive, bool interruptible) static int lock_fdc(int drive)
{ {
if (WARN(atomic_read(&usage_count) == 0, if (WARN(atomic_read(&usage_count) == 0,
"Trying to lock fdc while usage count=0\n")) "Trying to lock fdc while usage count=0\n"))
...@@ -2173,7 +2173,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req) ...@@ -2173,7 +2173,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
{ {
int ret; int ret;
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
set_floppy(drive); set_floppy(drive);
...@@ -2960,7 +2960,7 @@ static int user_reset_fdc(int drive, int arg, bool interruptible) ...@@ -2960,7 +2960,7 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
{ {
int ret; int ret;
if (lock_fdc(drive, interruptible)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
if (arg == FD_RESET_ALWAYS) if (arg == FD_RESET_ALWAYS)
...@@ -3243,7 +3243,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, ...@@ -3243,7 +3243,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
mutex_lock(&open_lock); mutex_lock(&open_lock);
if (lock_fdc(drive, true)) { if (lock_fdc(drive)) {
mutex_unlock(&open_lock); mutex_unlock(&open_lock);
return -EINTR; return -EINTR;
} }
...@@ -3263,7 +3263,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, ...@@ -3263,7 +3263,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
} else { } else {
int oldStretch; int oldStretch;
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
if (cmd != FDDEFPRM) { if (cmd != FDDEFPRM) {
/* notice a disk change immediately, else /* notice a disk change immediately, else
...@@ -3349,7 +3349,7 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) ...@@ -3349,7 +3349,7 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
if (type) if (type)
*g = &floppy_type[type]; *g = &floppy_type[type];
else { else {
if (lock_fdc(drive, false)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
if (poll_drive(false, 0) == -EINTR) if (poll_drive(false, 0) == -EINTR)
return -EINTR; return -EINTR;
...@@ -3433,7 +3433,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3433,7 +3433,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
if (UDRS->fd_ref != 1) if (UDRS->fd_ref != 1)
/* somebody else has this drive open */ /* somebody else has this drive open */
return -EBUSY; return -EBUSY;
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
/* do the actual eject. Fails on /* do the actual eject. Fails on
...@@ -3445,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3445,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
process_fd_request(); process_fd_request();
return ret; return ret;
case FDCLRPRM: case FDCLRPRM:
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
current_type[drive] = NULL; current_type[drive] = NULL;
floppy_sizes[drive] = MAX_DISK_SIZE << 1; floppy_sizes[drive] = MAX_DISK_SIZE << 1;
...@@ -3467,7 +3467,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3467,7 +3467,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
UDP->flags &= ~FTD_MSG; UDP->flags &= ~FTD_MSG;
return 0; return 0;
case FDFMTBEG: case FDFMTBEG:
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
return -EINTR; return -EINTR;
...@@ -3484,7 +3484,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3484,7 +3484,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return do_format(drive, &inparam.f); return do_format(drive, &inparam.f);
case FDFMTEND: case FDFMTEND:
case FDFLUSH: case FDFLUSH:
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
return invalidate_drive(bdev); return invalidate_drive(bdev);
case FDSETEMSGTRESH: case FDSETEMSGTRESH:
...@@ -3507,7 +3507,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3507,7 +3507,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
outparam = UDP; outparam = UDP;
break; break;
case FDPOLLDRVSTAT: case FDPOLLDRVSTAT:
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
return -EINTR; return -EINTR;
...@@ -3530,7 +3530,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3530,7 +3530,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
case FDRAWCMD: case FDRAWCMD:
if (type) if (type)
return -EINVAL; return -EINVAL;
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
set_floppy(drive); set_floppy(drive);
i = raw_cmd_ioctl(cmd, (void __user *)param); i = raw_cmd_ioctl(cmd, (void __user *)param);
...@@ -3539,7 +3539,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int ...@@ -3539,7 +3539,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
process_fd_request(); process_fd_request();
return i; return i;
case FDTWADDLE: case FDTWADDLE:
if (lock_fdc(drive, true)) if (lock_fdc(drive))
return -EINTR; return -EINTR;
twaddle(); twaddle();
process_fd_request(); process_fd_request();
...@@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) ...@@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
opened_bdev[drive] = bdev; opened_bdev[drive] = bdev;
if (!(mode & (FMODE_READ|FMODE_WRITE))) {
res = -EINVAL;
goto out;
}
res = -ENXIO; res = -ENXIO;
if (!floppy_track_buffer) { if (!floppy_track_buffer) {
...@@ -3706,21 +3711,20 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) ...@@ -3706,21 +3711,20 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (UFDCS->rawcmd == 1) if (UFDCS->rawcmd == 1)
UFDCS->rawcmd = 2; UFDCS->rawcmd = 2;
if (!(mode & FMODE_NDELAY)) { UDRS->last_checked = 0;
if (mode & (FMODE_READ|FMODE_WRITE)) { clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
UDRS->last_checked = 0; check_disk_change(bdev);
clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
check_disk_change(bdev); goto out;
if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
goto out; goto out;
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
goto out; res = -EROFS;
}
res = -EROFS; if ((mode & FMODE_WRITE) &&
if ((mode & FMODE_WRITE) && !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
!test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) goto out;
goto out;
}
mutex_unlock(&open_lock); mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex); mutex_unlock(&floppy_mutex);
return 0; return 0;
...@@ -3748,7 +3752,8 @@ static unsigned int floppy_check_events(struct gendisk *disk, ...@@ -3748,7 +3752,8 @@ static unsigned int floppy_check_events(struct gendisk *disk,
return DISK_EVENT_MEDIA_CHANGE; return DISK_EVENT_MEDIA_CHANGE;
if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
lock_fdc(drive, false); if (lock_fdc(drive))
return -EINTR;
poll_drive(false, 0); poll_drive(false, 0);
process_fd_request(); process_fd_request();
} }
...@@ -3847,7 +3852,9 @@ static int floppy_revalidate(struct gendisk *disk) ...@@ -3847,7 +3852,9 @@ static int floppy_revalidate(struct gendisk *disk)
"VFS: revalidate called on non-open device.\n")) "VFS: revalidate called on non-open device.\n"))
return -EFAULT; return -EFAULT;
lock_fdc(drive, false); res = lock_fdc(drive);
if (res)
return res;
cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
test_bit(FD_VERIFY_BIT, &UDRS->flags)); test_bit(FD_VERIFY_BIT, &UDRS->flags));
if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) { if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {
......
...@@ -478,7 +478,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) ...@@ -478,7 +478,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
id->ver_id = 0x1; id->ver_id = 0x1;
id->vmnt = 0; id->vmnt = 0;
id->cgrps = 1; id->cgrps = 1;
id->cap = 0x3; id->cap = 0x2;
id->dom = 0x1; id->dom = 0x1;
id->ppaf.blk_offset = 0; id->ppaf.blk_offset = 0;
...@@ -707,9 +707,7 @@ static int null_add_dev(void) ...@@ -707,9 +707,7 @@ static int null_add_dev(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock); mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
nullb->index = nullb_indexes++; nullb->index = nullb_indexes++;
mutex_unlock(&lock); mutex_unlock(&lock);
...@@ -743,6 +741,10 @@ static int null_add_dev(void) ...@@ -743,6 +741,10 @@ static int null_add_dev(void)
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
add_disk(disk); add_disk(disk);
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock);
done: done:
return 0; return 0;
......
...@@ -1873,6 +1873,43 @@ static int talk_to_blkback(struct xenbus_device *dev, ...@@ -1873,6 +1873,43 @@ static int talk_to_blkback(struct xenbus_device *dev,
return err; return err;
} }
static int negotiate_mq(struct blkfront_info *info)
{
unsigned int backend_max_queues = 0;
int err;
unsigned int i;
BUG_ON(info->nr_rings);
/* Check if backend supports multiple queues. */
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
"multi-queue-max-queues", "%u", &backend_max_queues);
if (err < 0)
backend_max_queues = 1;
info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
/* We need at least one ring. */
if (!info->nr_rings)
info->nr_rings = 1;
info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
if (!info->rinfo) {
xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
return -ENOMEM;
}
for (i = 0; i < info->nr_rings; i++) {
struct blkfront_ring_info *rinfo;
rinfo = &info->rinfo[i];
INIT_LIST_HEAD(&rinfo->indirect_pages);
INIT_LIST_HEAD(&rinfo->grants);
rinfo->dev_info = info;
INIT_WORK(&rinfo->work, blkif_restart_queue);
spin_lock_init(&rinfo->ring_lock);
}
return 0;
}
/** /**
* Entry point to this code when a new device is created. Allocate the basic * Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and * structures and the ring buffer for communication with the backend, and
...@@ -1883,9 +1920,7 @@ static int blkfront_probe(struct xenbus_device *dev, ...@@ -1883,9 +1920,7 @@ static int blkfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id) const struct xenbus_device_id *id)
{ {
int err, vdevice; int err, vdevice;
unsigned int r_index;
struct blkfront_info *info; struct blkfront_info *info;
unsigned int backend_max_queues = 0;
/* FIXME: Use dynamic device id if this is not set. */ /* FIXME: Use dynamic device id if this is not set. */
err = xenbus_scanf(XBT_NIL, dev->nodename, err = xenbus_scanf(XBT_NIL, dev->nodename,
...@@ -1936,33 +1971,10 @@ static int blkfront_probe(struct xenbus_device *dev, ...@@ -1936,33 +1971,10 @@ static int blkfront_probe(struct xenbus_device *dev,
} }
info->xbdev = dev; info->xbdev = dev;
/* Check if backend supports multiple queues. */ err = negotiate_mq(info);
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, if (err) {
"multi-queue-max-queues", "%u", &backend_max_queues);
if (err < 0)
backend_max_queues = 1;
info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
/* We need at least one ring. */
if (!info->nr_rings)
info->nr_rings = 1;
info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
if (!info->rinfo) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure");
kfree(info); kfree(info);
return -ENOMEM; return err;
}
for (r_index = 0; r_index < info->nr_rings; r_index++) {
struct blkfront_ring_info *rinfo;
rinfo = &info->rinfo[r_index];
INIT_LIST_HEAD(&rinfo->indirect_pages);
INIT_LIST_HEAD(&rinfo->grants);
rinfo->dev_info = info;
INIT_WORK(&rinfo->work, blkif_restart_queue);
spin_lock_init(&rinfo->ring_lock);
} }
mutex_init(&info->mutex); mutex_init(&info->mutex);
...@@ -2123,12 +2135,16 @@ static int blkif_recover(struct blkfront_info *info) ...@@ -2123,12 +2135,16 @@ static int blkif_recover(struct blkfront_info *info)
static int blkfront_resume(struct xenbus_device *dev) static int blkfront_resume(struct xenbus_device *dev)
{ {
struct blkfront_info *info = dev_get_drvdata(&dev->dev); struct blkfront_info *info = dev_get_drvdata(&dev->dev);
int err; int err = 0;
dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
err = negotiate_mq(info);
if (err)
return err;
err = talk_to_blkback(dev, info); err = talk_to_blkback(dev, info);
/* /*
......
...@@ -572,11 +572,13 @@ int nvm_register(struct request_queue *q, char *disk_name, ...@@ -572,11 +572,13 @@ int nvm_register(struct request_queue *q, char *disk_name,
} }
} }
ret = nvm_get_sysblock(dev, &dev->sb); if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
if (!ret) ret = nvm_get_sysblock(dev, &dev->sb);
pr_err("nvm: device not initialized.\n"); if (!ret)
else if (ret < 0) pr_err("nvm: device not initialized.\n");
pr_err("nvm: err (%d) on device initialization\n", ret); else if (ret < 0)
pr_err("nvm: err (%d) on device initialization\n", ret);
}
/* register device with a supported media manager */ /* register device with a supported media manager */
down_write(&nvm_lock); down_write(&nvm_lock);
...@@ -1055,9 +1057,11 @@ static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init) ...@@ -1055,9 +1057,11 @@ static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN); strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
info.fs_ppa.ppa = -1; info.fs_ppa.ppa = -1;
ret = nvm_init_sysblock(dev, &info); if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
if (ret) ret = nvm_init_sysblock(dev, &info);
return ret; if (ret)
return ret;
}
memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info)); memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
...@@ -1117,7 +1121,10 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) ...@@ -1117,7 +1121,10 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
dev->mt = NULL; dev->mt = NULL;
} }
return nvm_dev_factory(dev, fact.flags); if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT)
return nvm_dev_factory(dev, fact.flags);
return 0;
} }
static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg) static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
......
...@@ -300,8 +300,10 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) ...@@ -300,8 +300,10 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
} }
page = mempool_alloc(rrpc->page_pool, GFP_NOIO); page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
if (!page) if (!page) {
bio_put(bio);
return -ENOMEM; return -ENOMEM;
}
while ((slot = find_first_zero_bit(rblk->invalid_pages, while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_pgs_per_blk)) < nr_pgs_per_blk) { nr_pgs_per_blk)) < nr_pgs_per_blk) {
......
...@@ -174,8 +174,7 @@ static inline sector_t rrpc_get_sector(sector_t laddr) ...@@ -174,8 +174,7 @@ static inline sector_t rrpc_get_sector(sector_t laddr)
static inline int request_intersects(struct rrpc_inflight_rq *r, static inline int request_intersects(struct rrpc_inflight_rq *r,
sector_t laddr_start, sector_t laddr_end) sector_t laddr_start, sector_t laddr_end)
{ {
return (laddr_end >= r->l_start && laddr_end <= r->l_end) && return (laddr_end >= r->l_start) && (laddr_start <= r->l_end);
(laddr_start >= r->l_start && laddr_start <= r->l_end);
} }
static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
...@@ -184,6 +183,8 @@ static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, ...@@ -184,6 +183,8 @@ static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
sector_t laddr_end = laddr + pages - 1; sector_t laddr_end = laddr + pages - 1;
struct rrpc_inflight_rq *rtmp; struct rrpc_inflight_rq *rtmp;
WARN_ON(irqs_disabled());
spin_lock_irq(&rrpc->inflights.lock); spin_lock_irq(&rrpc->inflights.lock);
list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) { list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) {
if (unlikely(request_intersects(rtmp, laddr, laddr_end))) { if (unlikely(request_intersects(rtmp, laddr, laddr_end))) {
......
...@@ -17,5 +17,6 @@ config BLK_DEV_NVME_SCSI ...@@ -17,5 +17,6 @@ config BLK_DEV_NVME_SCSI
and block devices nodes, as well a a translation for a small and block devices nodes, as well a a translation for a small
number of selected SCSI commands to NVMe commands to the NVMe number of selected SCSI commands to NVMe commands to the NVMe
driver. If you don't know what this means you probably want driver. If you don't know what this means you probably want
to say N here, and if you know what it means you probably to say N here, unless you run a distro that abuses the SCSI
want to say N as well. emulation to provide stable device names for mount by id, like
some OpenSuSE and SLES versions.
...@@ -1121,7 +1121,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -1121,7 +1121,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->queue = blk_mq_init_queue(ctrl->tagset); ns->queue = blk_mq_init_queue(ctrl->tagset);
if (IS_ERR(ns->queue)) if (IS_ERR(ns->queue))
goto out_free_ns; goto out_free_ns;
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
ns->queue->queuedata = ns; ns->queue->queuedata = ns;
ns->ctrl = ctrl; ns->ctrl = ctrl;
......
...@@ -146,9 +146,10 @@ struct nvme_nvm_command { ...@@ -146,9 +146,10 @@ struct nvme_nvm_command {
}; };
}; };
#define NVME_NVM_LP_MLC_PAIRS 886
struct nvme_nvm_lp_mlc { struct nvme_nvm_lp_mlc {
__u16 num_pairs; __u16 num_pairs;
__u8 pairs[886]; __u8 pairs[NVME_NVM_LP_MLC_PAIRS];
}; };
struct nvme_nvm_lp_tbl { struct nvme_nvm_lp_tbl {
...@@ -282,9 +283,14 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) ...@@ -282,9 +283,14 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
memcpy(dst->lptbl.id, src->lptbl.id, 8); memcpy(dst->lptbl.id, src->lptbl.id, 8);
dst->lptbl.mlc.num_pairs = dst->lptbl.mlc.num_pairs =
le16_to_cpu(src->lptbl.mlc.num_pairs); le16_to_cpu(src->lptbl.mlc.num_pairs);
/* 4 bits per pair */
if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
pr_err("nvm: number of MLC pairs not supported\n");
return -EINVAL;
}
memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
dst->lptbl.mlc.num_pairs >> 1); dst->lptbl.mlc.num_pairs);
} }
} }
......
...@@ -139,9 +139,9 @@ static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl) ...@@ -139,9 +139,9 @@ static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl)
u32 val = 0; u32 val = 0;
if (ctrl->ops->io_incapable(ctrl)) if (ctrl->ops->io_incapable(ctrl))
return false; return true;
if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val)) if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
return false; return true;
return val & NVME_CSTS_CFS; return val & NVME_CSTS_CFS;
} }
......
...@@ -678,6 +678,11 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -678,6 +678,11 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req); blk_mq_start_request(req);
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
ret = BLK_MQ_RQ_QUEUE_BUSY;
spin_unlock_irq(&nvmeq->q_lock);
goto out;
}
__nvme_submit_cmd(nvmeq, &cmnd); __nvme_submit_cmd(nvmeq, &cmnd);
nvme_process_cq(nvmeq); nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
...@@ -999,7 +1004,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved ...@@ -999,7 +1004,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved
if (!blk_mq_request_started(req)) if (!blk_mq_request_started(req))
return; return;
dev_warn(nvmeq->q_dmadev, dev_dbg_ratelimited(nvmeq->q_dmadev,
"Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
status = NVME_SC_ABORT_REQ; status = NVME_SC_ABORT_REQ;
...@@ -2111,16 +2116,12 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2111,16 +2116,12 @@ static void nvme_remove(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
spin_lock(&dev_list_lock);
list_del_init(&dev->node);
spin_unlock(&dev_list_lock);
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
flush_work(&dev->reset_work);
flush_work(&dev->scan_work); flush_work(&dev->scan_work);
nvme_remove_namespaces(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl);
nvme_uninit_ctrl(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl);
nvme_dev_disable(dev, true); nvme_dev_disable(dev, true);
flush_work(&dev->reset_work);
nvme_dev_remove_admin(dev); nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
nvme_release_cmb(dev); nvme_release_cmb(dev);
......
...@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) ...@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
dio->io_error = -EIO; dio->io_error = -EIO;
if (dio->is_async && dio->rw == READ && dio->should_dirty) { if (dio->is_async && dio->rw == READ && dio->should_dirty) {
bio_check_pages_dirty(bio); /* transfers ownership */
err = bio->bi_error; err = bio->bi_error;
bio_check_pages_dirty(bio); /* transfers ownership */
} else { } else {
bio_for_each_segment_all(bvec, bio, i) { bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page; struct page *page = bvec->bv_page;
......
...@@ -317,6 +317,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -317,6 +317,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct inode_switch_wbs_context *isw = struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work); container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode; struct inode *inode = isw->inode;
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb; struct bdi_writeback *new_wb = isw->new_wb;
...@@ -423,6 +424,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -423,6 +424,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
wb_put(new_wb); wb_put(new_wb);
iput(inode); iput(inode);
deactivate_super(sb);
kfree(isw); kfree(isw);
} }
...@@ -469,11 +471,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) ...@@ -469,11 +471,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* while holding I_WB_SWITCH, no one else can update the association */ /* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (inode->i_state & (I_WB_SWITCH | I_FREEING) || if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
inode_to_wb(inode) == isw->new_wb) { inode_to_wb(inode) == isw->new_wb)
spin_unlock(&inode->i_lock); goto out_unlock;
goto out_free;
} if (!atomic_inc_not_zero(&inode->i_sb->s_active))
goto out_unlock;
inode->i_state |= I_WB_SWITCH; inode->i_state |= I_WB_SWITCH;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
...@@ -489,6 +494,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) ...@@ -489,6 +494,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
return; return;
out_unlock:
spin_unlock(&inode->i_lock);
out_free: out_free:
if (isw->new_wb) if (isw->new_wb)
wb_put(isw->new_wb); wb_put(isw->new_wb);
......
...@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) ...@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_rdev = 0; inode->i_rdev = 0;
inode->dirtied_when = 0; inode->dirtied_when = 0;
#ifdef CONFIG_CGROUP_WRITEBACK
inode->i_wb_frn_winner = 0;
inode->i_wb_frn_avg_time = 0;
inode->i_wb_frn_history = 0;
#endif
if (security_inode_alloc(inode)) if (security_inode_alloc(inode))
goto out; goto out;
spin_lock_init(&inode->i_lock); spin_lock_init(&inode->i_lock);
......
...@@ -135,6 +135,10 @@ enum { ...@@ -135,6 +135,10 @@ enum {
/* Memory types */ /* Memory types */
NVM_ID_FMTYPE_SLC = 0, NVM_ID_FMTYPE_SLC = 0,
NVM_ID_FMTYPE_MLC = 1, NVM_ID_FMTYPE_MLC = 1,
/* Device capabilities */
NVM_ID_DCAP_BBLKMGMT = 0x1,
NVM_UD_DCAP_ECC = 0x2,
}; };
struct nvm_id_lp_mlc { struct nvm_id_lp_mlc {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment