Commit ed535f2c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.6-2020-02-05' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
 "Some later arrivals, but all fixes at this point:

   - bcache fix series (Coly)

   - Series of BFQ fixes (Paolo)

   - NVMe pull request from Keith with a few minor NVMe fixes

   - Various little tweaks"

* tag 'block-5.6-2020-02-05' of git://git.kernel.dk/linux-block: (23 commits)
  nvmet: update AEN list and array at one place
  nvmet: Fix controller use after free
  nvmet: Fix error print message at nvmet_install_queue function
  brd: check and limit max_part par
  nvme-pci: remove nvmeq->tags
  nvmet: fix dsm failure when payload does not match sgl descriptor
  nvmet: Pass lockdep expression to RCU lists
  block, bfq: clarify the goal of bfq_split_bfqq()
  block, bfq: get a ref to a group when adding it to a service tree
  block, bfq: remove ifdefs from around gets/puts of bfq groups
  block, bfq: extend incomplete name of field on_st
  block, bfq: get extra ref to prevent a queue from being freed during a group move
  block, bfq: do not insert oom queue into position tree
  block, bfq: do not plug I/O for bfq_queues with no proc refs
  bcache: check return value of prio_read()
  bcache: fix incorrect data type usage in btree_flush_write()
  bcache: add readahead cache policy options via sysfs interface
  bcache: explicity type cast in bset_bkey_last()
  bcache: fix memory corruption in bch_cache_accounting_clear()
  xen/blkfront: limit allocated memory size to actual use case
  ...
parents 03840663 b74e58cd
......@@ -14947,8 +14947,8 @@ S: Maintained
F: drivers/mmc/host/sdhci-omap.c
SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
M: Scott Bauer <scott.bauer@intel.com>
M: Jonathan Derrick <jonathan.derrick@intel.com>
M: Revanth Rajashekar <revanth.rajashekar@intel.com>
L: linux-block@vger.kernel.org
S: Supported
F: block/sed*
......
......@@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
kfree(bfqg);
}
static void bfqg_and_blkg_get(struct bfq_group *bfqg)
void bfqg_and_blkg_get(struct bfq_group *bfqg)
{
/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
bfqg_get(bfqg);
......@@ -651,9 +651,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
false, BFQQE_PREEMPTED);
/*
* get extra reference to prevent bfqq from being freed in
* next possible deactivate
*/
bfqq->ref++;
if (bfq_bfqq_busy(bfqq))
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st)
else if (entity->on_st_or_in_serv)
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
bfqg_and_blkg_put(bfqq_group(bfqq));
......@@ -670,6 +676,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
/* release extra ref taken above */
bfq_put_queue(bfqq);
}
/**
......@@ -1398,6 +1406,10 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
return bfqq->bfqd->root_group;
}
void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
{
struct bfq_group *bfqg;
......
......@@ -613,6 +613,10 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfqq->pos_root = NULL;
}
/* oom_bfqq does not participate in queue merging */
if (bfqq == &bfqd->oom_bfqq)
return;
/*
* bfqq cannot be merged any longer (see comments in
* bfq_setup_cooperator): no point in adding bfqq into the
......@@ -1055,7 +1059,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
static int bfqq_process_refs(struct bfq_queue *bfqq)
{
return bfqq->ref - bfqq->allocated - bfqq->entity.on_st -
return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv -
(bfqq->weight_counter != NULL);
}
......@@ -3443,6 +3447,10 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
/* No point in idling for bfqq if it won't get requests any longer */
if (unlikely(!bfqq_process_refs(bfqq)))
return false;
return (bfqq->wr_coeff > 1 &&
(bfqd->wr_busy_queues <
bfq_tot_busy_queues(bfqd) ||
......@@ -4076,6 +4084,10 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
bfqq_sequential_and_IO_bound,
idling_boosts_thr;
/* No point in idling for bfqq if it won't get requests any longer */
if (unlikely(!bfqq_process_refs(bfqq)))
return false;
bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
......@@ -4169,6 +4181,10 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
struct bfq_data *bfqd = bfqq->bfqd;
bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar;
/* No point in idling for bfqq if it won't get requests any longer */
if (unlikely(!bfqq_process_refs(bfqq)))
return false;
if (unlikely(bfqd->strict_guarantees))
return true;
......@@ -4809,9 +4825,7 @@ void bfq_put_queue(struct bfq_queue *bfqq)
{
struct bfq_queue *item;
struct hlist_node *n;
#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_group *bfqg = bfqq_group(bfqq);
#endif
if (bfqq->bfqd)
bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d",
......@@ -4884,9 +4898,7 @@ void bfq_put_queue(struct bfq_queue *bfqq)
bfqq->bfqd->last_completed_rq_bfqq = NULL;
kmem_cache_free(bfq_pool, bfqq);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
bfqg_and_blkg_put(bfqg);
#endif
}
static void bfq_put_cooperator(struct bfq_queue *bfqq)
......@@ -5967,6 +5979,8 @@ static void bfq_finish_requeue_request(struct request *rq)
}
/*
* Removes the association between the current task and bfqq, assuming
* that bic points to the bfq iocontext of the task.
* Returns NULL if a new bfqq should be allocated, or the old bfqq if this
* was the last process referring to that bfqq.
*/
......@@ -6374,10 +6388,10 @@ static void bfq_exit_queue(struct elevator_queue *e)
hrtimer_cancel(&bfqd->idle_slice_timer);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
/* release oom-queue reference to root group */
bfqg_and_blkg_put(bfqd->root_group);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
#else
spin_lock_irq(&bfqd->lock);
......
......@@ -150,7 +150,7 @@ struct bfq_entity {
* Flag, true if the entity is on a tree (either the active or
* the idle one of its service_tree) or is in service.
*/
bool on_st;
bool on_st_or_in_serv;
/* B-WF2Q+ start and finish timestamps [sectors/weight] */
u64 start, finish;
......@@ -921,6 +921,7 @@ struct bfq_group {
#else
struct bfq_group {
struct bfq_entity entity;
struct bfq_sched_data sched_data;
struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
......@@ -984,6 +985,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
void bfqg_and_blkg_get(struct bfq_group *bfqg);
void bfqg_and_blkg_put(struct bfq_group *bfqg);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
......
......@@ -533,7 +533,9 @@ static void bfq_get_entity(struct bfq_entity *entity)
bfqq->ref++;
bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
bfqq, bfqq->ref);
}
} else
bfqg_and_blkg_get(container_of(entity, struct bfq_group,
entity));
}
/**
......@@ -645,10 +647,16 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
entity->on_st = false;
entity->on_st_or_in_serv = false;
st->wsum -= entity->weight;
if (bfqq && !is_in_service)
if (is_in_service)
return;
if (bfqq)
bfq_put_queue(bfqq);
else
bfqg_and_blkg_put(container_of(entity, struct bfq_group,
entity));
}
/**
......@@ -999,7 +1007,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
*/
bfq_get_entity(entity);
entity->on_st = true;
entity->on_st_or_in_serv = true;
}
#ifdef CONFIG_BFQ_GROUP_IOSCHED
......@@ -1165,7 +1173,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
struct bfq_service_tree *st;
bool is_in_service;
if (!entity->on_st) /* entity never activated, or already inactive */
if (!entity->on_st_or_in_serv) /*
* entity never activated, or
* already inactive
*/
return false;
/*
......@@ -1620,7 +1631,7 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
* service tree either, then release the service reference to
* the queue it represents (taken with bfq_get_entity).
*/
if (!in_serv_entity->on_st) {
if (!in_serv_entity->on_st_or_in_serv) {
/*
* If no process is referencing in_serv_bfqq any
* longer, then the service reference may be the only
......
......@@ -473,6 +473,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
return kobj;
}
static inline void brd_check_and_reset_par(void)
{
if (unlikely(!max_part))
max_part = 1;
/*
* make sure 'max_part' can be divided exactly by (1U << MINORBITS),
* otherwise, it is possiable to get same dev_t when adding partitions.
*/
if ((1U << MINORBITS) % max_part != 0)
max_part = 1UL << fls(max_part);
if (max_part > DISK_MAX_PARTS) {
pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
DISK_MAX_PARTS, DISK_MAX_PARTS);
max_part = DISK_MAX_PARTS;
}
}
static int __init brd_init(void)
{
struct brd_device *brd, *next;
......@@ -496,8 +515,7 @@ static int __init brd_init(void)
if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
return -EIO;
if (unlikely(!max_part))
max_part = 1;
brd_check_and_reset_par();
for (i = 0; i < rd_nr; i++) {
brd = brd_alloc(i);
......
......@@ -622,7 +622,7 @@ struct fifo_buffer {
int total; /* sum of all values */
int values[0];
};
extern struct fifo_buffer *fifo_alloc(int fifo_size);
extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size);
/* flag bits per connection */
enum {
......
......@@ -1575,7 +1575,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
struct drbd_device *device;
struct disk_conf *new_disk_conf, *old_disk_conf;
struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
int err, fifo_size;
int err;
unsigned int fifo_size;
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
......
......@@ -3887,7 +3887,7 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
const int apv = connection->agreed_pro_version;
struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
int fifo_size = 0;
unsigned int fifo_size = 0;
int err;
peer_device = conn_peer_device(connection, pi->vnr);
......
......@@ -482,11 +482,11 @@ static void fifo_add_val(struct fifo_buffer *fb, int value)
fb->values[i] += value;
}
struct fifo_buffer *fifo_alloc(int fifo_size)
struct fifo_buffer *fifo_alloc(unsigned int fifo_size)
{
struct fifo_buffer *fb;
fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO);
if (!fb)
return NULL;
......
......@@ -1265,6 +1265,16 @@ static int nbd_start_device(struct nbd_device *nbd)
args = kzalloc(sizeof(*args), GFP_KERNEL);
if (!args) {
sock_shutdown(nbd);
/*
* If num_connections is m (2 < m),
* and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
* But NO.(n + 1) failed. We still have n recv threads.
* So, add flush_workqueue here to prevent recv threads
* dropping the last config_refs and trying to destroy
* the workqueue from inside the workqueue.
*/
if (i)
flush_workqueue(nbd->recv_workq);
return -ENOMEM;
}
sk_set_memalloc(config->socks[i]->sock->sk);
......
......@@ -151,9 +151,6 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
#define BLK_MAX_RING_SIZE \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
/*
* ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
* characters are enough. Define to 20 to keep consistent with backend.
......@@ -177,12 +174,12 @@ struct blkfront_ring_info {
unsigned int evtchn, irq;
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_MAX_RING_SIZE];
struct list_head indirect_pages;
struct list_head grants;
unsigned int persistent_gnts_c;
unsigned long shadow_free;
struct blkfront_info *dev_info;
struct blk_shadow shadow[];
};
/*
......@@ -1915,7 +1912,8 @@ static int negotiate_mq(struct blkfront_info *info)
info->nr_rings = 1;
info->rinfo = kvcalloc(info->nr_rings,
sizeof(struct blkfront_ring_info),
struct_size(info->rinfo, shadow,
BLK_RING_SIZE(info)),
GFP_KERNEL);
if (!info->rinfo) {
xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
......
......@@ -330,6 +330,9 @@ struct cached_dev {
*/
atomic_t has_dirty;
#define BCH_CACHE_READA_ALL 0
#define BCH_CACHE_READA_META_ONLY 1
unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
......
......@@ -397,7 +397,8 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state);
/* Bkey utility code */
#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \
(unsigned int)(i)->keys)
static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx)
{
......
......@@ -422,7 +422,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
static void btree_flush_write(struct cache_set *c)
{
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
unsigned int i, nr, ref_nr;
unsigned int i, nr;
int ref_nr;
atomic_t *fifo_front_p, *now_fifo_front_p;
size_t mask;
......
......@@ -379,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;
/*
* Flag for bypass if the IO is for read-ahead or background,
* unless the read-ahead request is for metadata
* If the bio is for read-ahead or background IO, bypass it or
* not depends on the following situations,
* - If the IO is for meta data, always cache it and no bypass
* - If the IO is not meta data, check dc->cache_reada_policy,
* BCH_CACHE_READA_ALL: cache it and not bypass
* BCH_CACHE_READA_META_ONLY: not cache it and bypass
* That is, read-ahead request for metadata always get cached
* (eg, for gfs2 or xfs).
*/
if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
!(bio->bi_opf & (REQ_META|REQ_PRIO)))
goto skip;
if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
(dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
goto skip;
}
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
......
......@@ -109,9 +109,13 @@ int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
void bch_cache_accounting_clear(struct cache_accounting *acc)
{
memset(&acc->total.cache_hits,
0,
sizeof(struct cache_stats));
acc->total.cache_hits = 0;
acc->total.cache_misses = 0;
acc->total.cache_bypass_hits = 0;
acc->total.cache_bypass_misses = 0;
acc->total.cache_readaheads = 0;
acc->total.cache_miss_collisions = 0;
acc->total.sectors_bypassed = 0;
}
void bch_cache_accounting_destroy(struct cache_accounting *acc)
......
......@@ -609,12 +609,13 @@ int bch_prio_write(struct cache *ca, bool wait)
return 0;
}
static void prio_read(struct cache *ca, uint64_t bucket)
static int prio_read(struct cache *ca, uint64_t bucket)
{
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket *b;
unsigned int bucket_nr = 0;
int ret = -EIO;
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets;
......@@ -627,11 +628,15 @@ static void prio_read(struct cache *ca, uint64_t bucket)
prio_io(ca, bucket, REQ_OP_READ, 0);
if (p->csum !=
bch_crc64(&p->magic, bucket_bytes(ca) - 8))
bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
pr_warn("bad csum reading priorities");
goto out;
}
if (p->magic != pset_magic(&ca->sb))
if (p->magic != pset_magic(&ca->sb)) {
pr_warn("bad magic reading priorities");
goto out;
}
bucket = p->next_bucket;
d = p->data;
......@@ -640,6 +645,10 @@ static void prio_read(struct cache *ca, uint64_t bucket)
b->prio = le16_to_cpu(d->prio);
b->gen = b->last_gc = d->gen;
}
ret = 0;
out:
return ret;
}
/* Bcache device */
......@@ -1873,8 +1882,10 @@ static int run_cache_set(struct cache_set *c)
j = &list_entry(journal.prev, struct journal_replay, list)->j;
err = "IO error reading priorities";
for_each_cache(ca, c, i)
prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]);
for_each_cache(ca, c, i) {
if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]))
goto err;
}
/*
* If prio_read() fails it'll call cache_set_error and we'll
......
......@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = {
NULL
};
static const char * const bch_reada_cache_policies[] = {
"all",
"meta-only",
NULL
};
/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
......@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
rw_attribute(readahead_cache_policy);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
......@@ -168,6 +175,11 @@ SHOW(__bch_cached_dev)
bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));
if (attr == &sysfs_readahead_cache_policy)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_reada_cache_policies,
dc->cache_readahead_policy);
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes,
......@@ -353,6 +365,15 @@ STORE(__cached_dev)
}
}
if (attr == &sysfs_readahead_cache_policy) {
v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
if (v < 0)
return v;
if ((unsigned int) v != dc->cache_readahead_policy)
dc->cache_readahead_policy = v;
}
if (attr == &sysfs_stop_when_cache_set_failed) {
v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
......@@ -467,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
&sysfs_readahead_cache_policy,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
......
......@@ -167,7 +167,6 @@ struct nvme_queue {
/* only used for poll queues: */
spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
......@@ -376,29 +375,17 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
WARN_ON(hctx_idx != 0);
WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
WARN_ON(nvmeq->tags);
hctx->driver_data = nvmeq;
nvmeq->tags = &dev->admin_tagset.tags[0];
return 0;
}
static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
struct nvme_queue *nvmeq = hctx->driver_data;
nvmeq->tags = NULL;
}
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
if (!nvmeq->tags)
nvmeq->tags = &dev->tagset.tags[hctx_idx];
WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
hctx->driver_data = nvmeq;
return 0;
......@@ -948,6 +935,13 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
}
static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
{
if (!nvmeq->qid)
return nvmeq->dev->admin_tagset.tags[0];
return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
}
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
......@@ -972,7 +966,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
return;
}
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
nvme_end_request(req, cqe->status, cqe->result);
}
......@@ -1572,7 +1566,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_init_request,
.timeout = nvme_timeout,
};
......
......@@ -129,27 +129,8 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
}
static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
{
struct nvmet_req *req;
while (1) {
mutex_lock(&ctrl->lock);
if (!ctrl->nr_async_event_cmds) {
mutex_unlock(&ctrl->lock);
return;
}
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
}
}
static void nvmet_async_event_work(struct work_struct *work)
static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
{
struct nvmet_ctrl *ctrl =
container_of(work, struct nvmet_ctrl, async_event_work);
struct nvmet_async_event *aen;
struct nvmet_req *req;
......@@ -159,18 +140,41 @@ static void nvmet_async_event_work(struct work_struct *work)
struct nvmet_async_event, entry);
if (!aen || !ctrl->nr_async_event_cmds) {
mutex_unlock(&ctrl->lock);
return;
break;
}
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
nvmet_set_result(req, nvmet_async_event_result(aen));
if (status == 0)
nvmet_set_result(req, nvmet_async_event_result(aen));
list_del(&aen->entry);
kfree(aen);
mutex_unlock(&ctrl->lock);
nvmet_req_complete(req, 0);
nvmet_req_complete(req, status);
}
}
static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
{
struct nvmet_req *req;
mutex_lock(&ctrl->lock);
while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
}
static void nvmet_async_event_work(struct work_struct *work)
{
struct nvmet_ctrl *ctrl =
container_of(work, struct nvmet_ctrl, async_event_work);
nvmet_async_events_process(ctrl, 0);
}
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
......@@ -555,7 +559,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
} else {
struct nvmet_ns *old;
list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
list_for_each_entry_rcu(old, &subsys->namespaces, dev_link,
lockdep_is_held(&subsys->lock)) {
BUG_ON(ns->nsid == old->nsid);
if (ns->nsid < old->nsid)
break;
......@@ -752,19 +757,24 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
void nvmet_sq_destroy(struct nvmet_sq *sq)
{
u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
struct nvmet_ctrl *ctrl = sq->ctrl;
/*
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
nvmet_async_events_free(sq->ctrl);
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
nvmet_async_events_process(ctrl, status);
nvmet_async_events_free(ctrl);
}
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
percpu_ref_exit(&sq->ref);
if (sq->ctrl) {
nvmet_ctrl_put(sq->ctrl);
if (ctrl) {
nvmet_ctrl_put(ctrl);
sq->ctrl = NULL; /* allows reusing the queue later */
}
}
......@@ -938,6 +948,17 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
}
EXPORT_SYMBOL_GPL(nvmet_check_data_len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
if (unlikely(data_len > req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
return false;
}
return true;
}
int nvmet_req_alloc_sgl(struct nvmet_req *req)
{
struct pci_dev *p2p_dev = NULL;
......@@ -1172,7 +1193,8 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
ctrl->p2p_client = get_device(req->p2p_client);
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link,
lockdep_is_held(&ctrl->subsys->lock))
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
}
......
......@@ -109,6 +109,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 sqsize = le16_to_cpu(c->sqsize);
struct nvmet_ctrl *old;
u16 ret;
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
......@@ -119,7 +120,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
if (!sqsize) {
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
goto err;
}
/* note: convert queue size from 0's-based value to 1's-based value */
......@@ -132,16 +134,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
}
if (ctrl->ops->install_queue) {
u16 ret = ctrl->ops->install_queue(req->sq);
ret = ctrl->ops->install_queue(req->sq);
if (ret) {
pr_err("failed to install queue %d cntlid %d ret %x\n",
qid, ret, ctrl->cntlid);
return ret;
qid, ctrl->cntlid, ret);
goto err;
}
}
return 0;
err:
req->sq->ctrl = NULL;
return ret;
}
static void nvmet_execute_admin_connect(struct nvmet_req *req)
......
......@@ -280,7 +280,7 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, nvmet_dsm_len(req)))
if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
return;
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
......
......@@ -336,7 +336,7 @@ static void nvmet_file_dsm_work(struct work_struct *w)
static void nvmet_file_execute_dsm(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, nvmet_dsm_len(req)))
if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
return;
INIT_WORK(&req->f.work, nvmet_file_dsm_work);
schedule_work(&req->f.work);
......
......@@ -374,6 +374,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
int nvmet_req_alloc_sgl(struct nvmet_req *req);
void nvmet_req_free_sgl(struct nvmet_req *req);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment