Commit 126e76ff authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block

Pull followup block layer updates from Jens Axboe:
 "I ended up splitting the main pull request for this series into two,
  mainly because of clashes between NVMe fixes that went into 4.13 after
  the for-4.14 branches were split off. This pull request is mostly
  NVMe, but not exclusively. In detail, it contains:

   - Two pull request for NVMe changes from Christoph. Nothing new on
     the feature front, basically just fixes all over the map for the
     core bits, transport, rdma, etc.

   - Series from Bart, cleaning up various bits in the BFQ scheduler.

   - Series of bcache fixes, which has been lingering for a release or
     two. Coly sent this in, but patches from various people in this
     area.

   - Set of patches for BFQ from Paolo himself, updating both
     documentation and fixing some corner cases in performance.

   - Series from Omar, attempting to now get the 4k loop support
     correct. Our confidence level is higher this time.

   - Series from Shaohua for loop as well, improving O_DIRECT
     performance and fixing a use-after-free"

* 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block: (74 commits)
  bcache: initialize dirty stripes in flash_dev_run()
  loop: set physical block size to logical block size
  bcache: fix bch_hprint crash and improve output
  bcache: Update continue_at() documentation
  bcache: silence static checker warning
  bcache: fix for gc and write-back race
  bcache: increase the number of open buckets
  bcache: Correct return value for sysfs attach errors
  bcache: correct cache_dirty_target in __update_writeback_rate()
  bcache: gc does not work when triggering by manual command
  bcache: Don't reinvent the wheel but use existing llist API
  bcache: do not subtract sectors_to_gc for bypassed IO
  bcache: fix sequential large write IO bypass
  bcache: Fix leak of bdev reference
  block/loop: remove unused field
  block/loop: fix use after free
  bfq: Use icq_to_bic() consistently
  bfq: Suppress compiler warnings about comparisons
  bfq: Check kstrtoul() return value
  bfq: Declare local functions static
  ...
parents fbd01410 175206cf
This diff is collapsed.
...@@ -206,7 +206,7 @@ static void bfqg_get(struct bfq_group *bfqg) ...@@ -206,7 +206,7 @@ static void bfqg_get(struct bfq_group *bfqg)
bfqg->ref++; bfqg->ref++;
} }
void bfqg_put(struct bfq_group *bfqg) static void bfqg_put(struct bfq_group *bfqg)
{ {
bfqg->ref--; bfqg->ref--;
...@@ -385,7 +385,7 @@ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) ...@@ -385,7 +385,7 @@ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
} }
struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
{ {
struct bfq_group_data *bgd; struct bfq_group_data *bgd;
...@@ -395,7 +395,7 @@ struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) ...@@ -395,7 +395,7 @@ struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
return &bgd->pd; return &bgd->pd;
} }
void bfq_cpd_init(struct blkcg_policy_data *cpd) static void bfq_cpd_init(struct blkcg_policy_data *cpd)
{ {
struct bfq_group_data *d = cpd_to_bfqgd(cpd); struct bfq_group_data *d = cpd_to_bfqgd(cpd);
...@@ -403,12 +403,12 @@ void bfq_cpd_init(struct blkcg_policy_data *cpd) ...@@ -403,12 +403,12 @@ void bfq_cpd_init(struct blkcg_policy_data *cpd)
CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL; CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
} }
void bfq_cpd_free(struct blkcg_policy_data *cpd) static void bfq_cpd_free(struct blkcg_policy_data *cpd)
{ {
kfree(cpd_to_bfqgd(cpd)); kfree(cpd_to_bfqgd(cpd));
} }
struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
{ {
struct bfq_group *bfqg; struct bfq_group *bfqg;
...@@ -426,7 +426,7 @@ struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) ...@@ -426,7 +426,7 @@ struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
return &bfqg->pd; return &bfqg->pd;
} }
void bfq_pd_init(struct blkg_policy_data *pd) static void bfq_pd_init(struct blkg_policy_data *pd)
{ {
struct blkcg_gq *blkg = pd_to_blkg(pd); struct blkcg_gq *blkg = pd_to_blkg(pd);
struct bfq_group *bfqg = blkg_to_bfqg(blkg); struct bfq_group *bfqg = blkg_to_bfqg(blkg);
...@@ -445,7 +445,7 @@ void bfq_pd_init(struct blkg_policy_data *pd) ...@@ -445,7 +445,7 @@ void bfq_pd_init(struct blkg_policy_data *pd)
bfqg->rq_pos_tree = RB_ROOT; bfqg->rq_pos_tree = RB_ROOT;
} }
void bfq_pd_free(struct blkg_policy_data *pd) static void bfq_pd_free(struct blkg_policy_data *pd)
{ {
struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_group *bfqg = pd_to_bfqg(pd);
...@@ -453,7 +453,7 @@ void bfq_pd_free(struct blkg_policy_data *pd) ...@@ -453,7 +453,7 @@ void bfq_pd_free(struct blkg_policy_data *pd)
bfqg_put(bfqg); bfqg_put(bfqg);
} }
void bfq_pd_reset_stats(struct blkg_policy_data *pd) static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
{ {
struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_group *bfqg = pd_to_bfqg(pd);
...@@ -740,7 +740,7 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd, ...@@ -740,7 +740,7 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd,
* blkio already grabs the queue_lock for us, so no need to use * blkio already grabs the queue_lock for us, so no need to use
* RCU-based magic * RCU-based magic
*/ */
void bfq_pd_offline(struct blkg_policy_data *pd) static void bfq_pd_offline(struct blkg_policy_data *pd)
{ {
struct bfq_service_tree *st; struct bfq_service_tree *st;
struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_group *bfqg = pd_to_bfqg(pd);
......
...@@ -239,7 +239,7 @@ static int T_slow[2]; ...@@ -239,7 +239,7 @@ static int T_slow[2];
static int T_fast[2]; static int T_fast[2];
static int device_speed_thresh[2]; static int device_speed_thresh[2];
#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) #define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
#define RQ_BFQQ(rq) ((rq)->elv.priv[1]) #define RQ_BFQQ(rq) ((rq)->elv.priv[1])
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
...@@ -720,7 +720,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, ...@@ -720,7 +720,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
entity->budget = new_budget; entity->budget = new_budget;
bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
new_budget); new_budget);
bfq_requeue_bfqq(bfqd, bfqq); bfq_requeue_bfqq(bfqd, bfqq, false);
} }
} }
...@@ -2563,7 +2563,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) ...@@ -2563,7 +2563,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_del_bfqq_busy(bfqd, bfqq, true); bfq_del_bfqq_busy(bfqd, bfqq, true);
} else { } else {
bfq_requeue_bfqq(bfqd, bfqq); bfq_requeue_bfqq(bfqd, bfqq, true);
/* /*
* Resort priority tree of potential close cooperators. * Resort priority tree of potential close cooperators.
*/ */
...@@ -3780,6 +3780,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ...@@ -3780,6 +3780,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
default: default:
dev_err(bfqq->bfqd->queue->backing_dev_info->dev, dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
"bfq: bad prio class %d\n", ioprio_class); "bfq: bad prio class %d\n", ioprio_class);
/* fall through */
case IOPRIO_CLASS_NONE: case IOPRIO_CLASS_NONE:
/* /*
* No prio set, inherit CPU scheduling settings. * No prio set, inherit CPU scheduling settings.
...@@ -4801,13 +4802,15 @@ static ssize_t bfq_var_show(unsigned int var, char *page) ...@@ -4801,13 +4802,15 @@ static ssize_t bfq_var_show(unsigned int var, char *page)
return sprintf(page, "%u\n", var); return sprintf(page, "%u\n", var);
} }
static void bfq_var_store(unsigned long *var, const char *page) static int bfq_var_store(unsigned long *var, const char *page)
{ {
unsigned long new_val; unsigned long new_val;
int ret = kstrtoul(page, 10, &new_val); int ret = kstrtoul(page, 10, &new_val);
if (ret == 0) if (ret)
*var = new_val; return ret;
*var = new_val;
return 0;
} }
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
...@@ -4848,12 +4851,16 @@ static ssize_t \ ...@@ -4848,12 +4851,16 @@ static ssize_t \
__FUNC(struct elevator_queue *e, const char *page, size_t count) \ __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \ { \
struct bfq_data *bfqd = e->elevator_data; \ struct bfq_data *bfqd = e->elevator_data; \
unsigned long uninitialized_var(__data); \ unsigned long __data, __min = (MIN), __max = (MAX); \
bfq_var_store(&__data, (page)); \ int ret; \
if (__data < (MIN)) \ \
__data = (MIN); \ ret = bfq_var_store(&__data, (page)); \
else if (__data > (MAX)) \ if (ret) \
__data = (MAX); \ return ret; \
if (__data < __min) \
__data = __min; \
else if (__data > __max) \
__data = __max; \
if (__CONV == 1) \ if (__CONV == 1) \
*(__PTR) = msecs_to_jiffies(__data); \ *(__PTR) = msecs_to_jiffies(__data); \
else if (__CONV == 2) \ else if (__CONV == 2) \
...@@ -4876,12 +4883,16 @@ STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2); ...@@ -4876,12 +4883,16 @@ STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2);
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
{ \ { \
struct bfq_data *bfqd = e->elevator_data; \ struct bfq_data *bfqd = e->elevator_data; \
unsigned long uninitialized_var(__data); \ unsigned long __data, __min = (MIN), __max = (MAX); \
bfq_var_store(&__data, (page)); \ int ret; \
if (__data < (MIN)) \ \
__data = (MIN); \ ret = bfq_var_store(&__data, (page)); \
else if (__data > (MAX)) \ if (ret) \
__data = (MAX); \ return ret; \
if (__data < __min) \
__data = __min; \
else if (__data > __max) \
__data = __max; \
*(__PTR) = (u64)__data * NSEC_PER_USEC; \ *(__PTR) = (u64)__data * NSEC_PER_USEC; \
return count; \ return count; \
} }
...@@ -4893,9 +4904,12 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e, ...@@ -4893,9 +4904,12 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
const char *page, size_t count) const char *page, size_t count)
{ {
struct bfq_data *bfqd = e->elevator_data; struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data); unsigned long __data;
int ret;
bfq_var_store(&__data, (page)); ret = bfq_var_store(&__data, (page));
if (ret)
return ret;
if (__data == 0) if (__data == 0)
bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
...@@ -4918,9 +4932,12 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, ...@@ -4918,9 +4932,12 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
const char *page, size_t count) const char *page, size_t count)
{ {
struct bfq_data *bfqd = e->elevator_data; struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data); unsigned long __data;
int ret;
bfq_var_store(&__data, (page)); ret = bfq_var_store(&__data, (page));
if (ret)
return ret;
if (__data < 1) if (__data < 1)
__data = 1; __data = 1;
...@@ -4938,9 +4955,12 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e, ...@@ -4938,9 +4955,12 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
const char *page, size_t count) const char *page, size_t count)
{ {
struct bfq_data *bfqd = e->elevator_data; struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data); unsigned long __data;
int ret;
bfq_var_store(&__data, (page)); ret = bfq_var_store(&__data, (page));
if (ret)
return ret;
if (__data > 1) if (__data > 1)
__data = 1; __data = 1;
...@@ -4957,9 +4977,12 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e, ...@@ -4957,9 +4977,12 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
const char *page, size_t count) const char *page, size_t count)
{ {
struct bfq_data *bfqd = e->elevator_data; struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data); unsigned long __data;
int ret;
bfq_var_store(&__data, (page)); ret = bfq_var_store(&__data, (page));
if (ret)
return ret;
if (__data > 1) if (__data > 1)
__data = 1; __data = 1;
......
...@@ -817,7 +817,6 @@ extern const int bfq_timeout; ...@@ -817,7 +817,6 @@ extern const int bfq_timeout;
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
struct rb_root *root); struct rb_root *root);
...@@ -917,7 +916,8 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); ...@@ -917,7 +916,8 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool ins_into_idle_tree, bool expiration); bool ins_into_idle_tree, bool expiration);
void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool expiration);
void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool expiration); bool expiration);
void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
......
...@@ -44,7 +44,8 @@ static unsigned int bfq_class_idx(struct bfq_entity *entity) ...@@ -44,7 +44,8 @@ static unsigned int bfq_class_idx(struct bfq_entity *entity)
BFQ_DEFAULT_GRP_CLASS - 1; BFQ_DEFAULT_GRP_CLASS - 1;
} }
static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd); static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
bool expiration);
static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
...@@ -54,6 +55,8 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); ...@@ -54,6 +55,8 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
* @new_entity: if not NULL, pointer to the entity whose activation, * @new_entity: if not NULL, pointer to the entity whose activation,
* requeueing or repositionig triggered the invocation of * requeueing or repositionig triggered the invocation of
* this function. * this function.
* @expiration: id true, this function is being invoked after the
* expiration of the in-service entity
* *
* This function is called to update sd->next_in_service, which, in * This function is called to update sd->next_in_service, which, in
* its turn, may change as a consequence of the insertion or * its turn, may change as a consequence of the insertion or
...@@ -72,19 +75,20 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); ...@@ -72,19 +75,20 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
* entity. * entity.
*/ */
static bool bfq_update_next_in_service(struct bfq_sched_data *sd, static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
struct bfq_entity *new_entity) struct bfq_entity *new_entity,
bool expiration)
{ {
struct bfq_entity *next_in_service = sd->next_in_service; struct bfq_entity *next_in_service = sd->next_in_service;
bool parent_sched_may_change = false; bool parent_sched_may_change = false;
bool change_without_lookup = false;
/* /*
* If this update is triggered by the activation, requeueing * If this update is triggered by the activation, requeueing
* or repositiong of an entity that does not coincide with * or repositiong of an entity that does not coincide with
* sd->next_in_service, then a full lookup in the active tree * sd->next_in_service, then a full lookup in the active tree
* can be avoided. In fact, it is enough to check whether the * can be avoided. In fact, it is enough to check whether the
* just-modified entity has a higher priority than * just-modified entity has the same priority as
* sd->next_in_service, or, even if it has the same priority * sd->next_in_service, is eligible and has a lower virtual
* as sd->next_in_service, is eligible and has a lower virtual
* finish time than sd->next_in_service. If this compound * finish time than sd->next_in_service. If this compound
* condition holds, then the new entity becomes the new * condition holds, then the new entity becomes the new
* next_in_service. Otherwise no change is needed. * next_in_service. Otherwise no change is needed.
...@@ -96,13 +100,12 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, ...@@ -96,13 +100,12 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
* set to true, and left as true if * set to true, and left as true if
* sd->next_in_service is NULL. * sd->next_in_service is NULL.
*/ */
bool replace_next = true; change_without_lookup = true;
/* /*
* If there is already a next_in_service candidate * If there is already a next_in_service candidate
* entity, then compare class priorities or timestamps * entity, then compare timestamps to decide whether
* to decide whether to replace sd->service_tree with * to replace sd->service_tree with new_entity.
* new_entity.
*/ */
if (next_in_service) { if (next_in_service) {
unsigned int new_entity_class_idx = unsigned int new_entity_class_idx =
...@@ -110,32 +113,26 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, ...@@ -110,32 +113,26 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
struct bfq_service_tree *st = struct bfq_service_tree *st =
sd->service_tree + new_entity_class_idx; sd->service_tree + new_entity_class_idx;
/* change_without_lookup =
* For efficiency, evaluate the most likely
* sub-condition first.
*/
replace_next =
(new_entity_class_idx == (new_entity_class_idx ==
bfq_class_idx(next_in_service) bfq_class_idx(next_in_service)
&& &&
!bfq_gt(new_entity->start, st->vtime) !bfq_gt(new_entity->start, st->vtime)
&& &&
bfq_gt(next_in_service->finish, bfq_gt(next_in_service->finish,
new_entity->finish)) new_entity->finish));
||
new_entity_class_idx <
bfq_class_idx(next_in_service);
} }
if (replace_next) if (change_without_lookup)
next_in_service = new_entity; next_in_service = new_entity;
} else /* invoked because of a deactivation: lookup needed */ }
next_in_service = bfq_lookup_next_entity(sd);
if (!change_without_lookup) /* lookup needed */
next_in_service = bfq_lookup_next_entity(sd, expiration);
if (next_in_service) { if (next_in_service)
parent_sched_may_change = !sd->next_in_service || parent_sched_may_change = !sd->next_in_service ||
bfq_update_parent_budget(next_in_service); bfq_update_parent_budget(next_in_service);
}
sd->next_in_service = next_in_service; sd->next_in_service = next_in_service;
...@@ -1127,10 +1124,12 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity, ...@@ -1127,10 +1124,12 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
* @requeue: true if this is a requeue, which implies that bfqq is * @requeue: true if this is a requeue, which implies that bfqq is
* being expired; thus ALL its ancestors stop being served and must * being expired; thus ALL its ancestors stop being served and must
* therefore be requeued * therefore be requeued
* @expiration: true if this function is being invoked in the expiration path
* of the in-service queue
*/ */
static void bfq_activate_requeue_entity(struct bfq_entity *entity, static void bfq_activate_requeue_entity(struct bfq_entity *entity,
bool non_blocking_wait_rq, bool non_blocking_wait_rq,
bool requeue) bool requeue, bool expiration)
{ {
struct bfq_sched_data *sd; struct bfq_sched_data *sd;
...@@ -1138,7 +1137,8 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, ...@@ -1138,7 +1137,8 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
sd = entity->sched_data; sd = entity->sched_data;
__bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq); __bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq);
if (!bfq_update_next_in_service(sd, entity) && !requeue) if (!bfq_update_next_in_service(sd, entity, expiration) &&
!requeue)
break; break;
} }
} }
...@@ -1194,6 +1194,8 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) ...@@ -1194,6 +1194,8 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
* bfq_deactivate_entity - deactivate an entity representing a bfq_queue. * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
* @entity: the entity to deactivate. * @entity: the entity to deactivate.
* @ins_into_idle_tree: true if the entity can be put into the idle tree * @ins_into_idle_tree: true if the entity can be put into the idle tree
* @expiration: true if this function is being invoked in the expiration path
* of the in-service queue
*/ */
static void bfq_deactivate_entity(struct bfq_entity *entity, static void bfq_deactivate_entity(struct bfq_entity *entity,
bool ins_into_idle_tree, bool ins_into_idle_tree,
...@@ -1222,7 +1224,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, ...@@ -1222,7 +1224,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
* then, since entity has just been * then, since entity has just been
* deactivated, a new one must be found. * deactivated, a new one must be found.
*/ */
bfq_update_next_in_service(sd, NULL); bfq_update_next_in_service(sd, NULL, expiration);
if (sd->next_in_service || sd->in_service_entity) { if (sd->next_in_service || sd->in_service_entity) {
/* /*
...@@ -1281,7 +1283,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, ...@@ -1281,7 +1283,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
__bfq_requeue_entity(entity); __bfq_requeue_entity(entity);
sd = entity->sched_data; sd = entity->sched_data;
if (!bfq_update_next_in_service(sd, entity) && if (!bfq_update_next_in_service(sd, entity, expiration) &&
!expiration) !expiration)
/* /*
* next_in_service unchanged or not causing * next_in_service unchanged or not causing
...@@ -1416,12 +1418,14 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) ...@@ -1416,12 +1418,14 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service)
/** /**
* bfq_lookup_next_entity - return the first eligible entity in @sd. * bfq_lookup_next_entity - return the first eligible entity in @sd.
* @sd: the sched_data. * @sd: the sched_data.
* @expiration: true if we are on the expiration path of the in-service queue
* *
* This function is invoked when there has been a change in the trees * This function is invoked when there has been a change in the trees
* for sd, and we need know what is the new next entity after this * for sd, and we need to know what is the new next entity to serve
* change. * after this change.
*/ */
static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd) static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
bool expiration)
{ {
struct bfq_service_tree *st = sd->service_tree; struct bfq_service_tree *st = sd->service_tree;
struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1); struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
...@@ -1448,8 +1452,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd) ...@@ -1448,8 +1452,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd)
* class, unless the idle class needs to be served. * class, unless the idle class needs to be served.
*/ */
for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) { for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
/*
* If expiration is true, then bfq_lookup_next_entity
* is being invoked as a part of the expiration path
* of the in-service queue. In this case, even if
* sd->in_service_entity is not NULL,
* sd->in_service_entiy at this point is actually not
* in service any more, and, if needed, has already
* been properly queued or requeued into the right
* tree. The reason why sd->in_service_entity is still
* not NULL here, even if expiration is true, is that
* sd->in_service_entiy is reset as a last step in the
* expiration path. So, if expiration is true, tell
* __bfq_lookup_next_entity that there is no
* sd->in_service_entity.
*/
entity = __bfq_lookup_next_entity(st + class_idx, entity = __bfq_lookup_next_entity(st + class_idx,
sd->in_service_entity); sd->in_service_entity &&
!expiration);
if (entity) if (entity)
break; break;
...@@ -1562,7 +1582,7 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) ...@@ -1562,7 +1582,7 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
for_each_entity(entity) { for_each_entity(entity) {
struct bfq_sched_data *sd = entity->sched_data; struct bfq_sched_data *sd = entity->sched_data;
if (!bfq_update_next_in_service(sd, NULL)) if (!bfq_update_next_in_service(sd, NULL, false))
break; break;
} }
...@@ -1610,16 +1630,17 @@ void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) ...@@ -1610,16 +1630,17 @@ void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *entity = &bfqq->entity;
bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq), bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
false); false, false);
bfq_clear_bfqq_non_blocking_wait_rq(bfqq); bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
} }
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool expiration)
{ {
struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *entity = &bfqq->entity;
bfq_activate_requeue_entity(entity, false, bfq_activate_requeue_entity(entity, false,
bfqq == bfqd->in_service_queue); bfqq == bfqd->in_service_queue, expiration);
} }
/* /*
......
...@@ -213,10 +213,13 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) ...@@ -213,10 +213,13 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
*/ */
blk_mq_freeze_queue(lo->lo_queue); blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio; lo->use_dio = use_dio;
if (use_dio) if (use_dio) {
queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags |= LO_FLAGS_DIRECT_IO; lo->lo_flags |= LO_FLAGS_DIRECT_IO;
else } else {
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
}
blk_mq_unfreeze_queue(lo->lo_queue); blk_mq_unfreeze_queue(lo->lo_queue);
} }
...@@ -460,12 +463,21 @@ static void lo_complete_rq(struct request *rq) ...@@ -460,12 +463,21 @@ static void lo_complete_rq(struct request *rq)
blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK); blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
} }
static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
{
if (!atomic_dec_and_test(&cmd->ref))
return;
kfree(cmd->bvec);
cmd->bvec = NULL;
blk_mq_complete_request(cmd->rq);
}
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{ {
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
cmd->ret = ret; cmd->ret = ret;
blk_mq_complete_request(cmd->rq); lo_rw_aio_do_completion(cmd);
} }
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
...@@ -473,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, ...@@ -473,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
{ {
struct iov_iter iter; struct iov_iter iter;
struct bio_vec *bvec; struct bio_vec *bvec;
struct bio *bio = cmd->rq->bio; struct request *rq = cmd->rq;
struct bio *bio = rq->bio;
struct file *file = lo->lo_backing_file; struct file *file = lo->lo_backing_file;
unsigned int offset;
int segments = 0;
int ret; int ret;
/* nomerge for loop request queue */ if (rq->bio != rq->biotail) {
WARN_ON(cmd->rq->bio != cmd->rq->biotail); struct req_iterator iter;
struct bio_vec tmp;
__rq_for_each_bio(bio, rq)
segments += bio_segments(bio);
bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
if (!bvec)
return -EIO;
cmd->bvec = bvec;
/*
* The bios of the request may be started from the middle of
* the 'bvec' because of bio splitting, so we can't directly
* copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
* API will take care of all details for us.
*/
rq_for_each_segment(tmp, rq, iter) {
*bvec = tmp;
bvec++;
}
bvec = cmd->bvec;
offset = 0;
} else {
/*
* Same here, this bio may be started from the middle of the
* 'bvec' because of bio splitting, so offset from the bvec
* must be passed to iov iterator
*/
offset = bio->bi_iter.bi_bvec_done;
bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
segments = bio_segments(bio);
}
atomic_set(&cmd->ref, 2);
bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
bio_segments(bio), blk_rq_bytes(cmd->rq)); segments, blk_rq_bytes(rq));
/* iter.iov_offset = offset;
* This bio may be started from the middle of the 'bvec'
* because of bio splitting, so offset from the bvec must
* be passed to iov iterator
*/
iter.iov_offset = bio->bi_iter.bi_bvec_done;
cmd->iocb.ki_pos = pos; cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file; cmd->iocb.ki_filp = file;
...@@ -500,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, ...@@ -500,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else else
ret = call_read_iter(file, &cmd->iocb, &iter); ret = call_read_iter(file, &cmd->iocb, &iter);
lo_rw_aio_do_completion(cmd);
if (ret != -EIOCBQUEUED) if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0); cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
return 0; return 0;
...@@ -546,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) ...@@ -546,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
} }
} }
struct switch_request {
struct file *file;
struct completion wait;
};
static inline void loop_update_dio(struct loop_device *lo) static inline void loop_update_dio(struct loop_device *lo)
{ {
__loop_update_dio(lo, io_is_direct(lo->lo_backing_file) | __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
lo->use_dio); lo->use_dio);
} }
/*
* Do the actual switch; called from the BIO completion routine
*/
static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
{
struct file *file = p->file;
struct file *old_file = lo->lo_backing_file;
struct address_space *mapping;
/* if no new file, only flush of queued bios requested */
if (!file)
return;
mapping = file->f_mapping;
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
loop_update_dio(lo);
}
/*
* loop_switch performs the hard work of switching a backing store.
* First it needs to flush existing IO, it does this by sending a magic
* BIO down the pipe. The completion of this BIO does the actual switch.
*/
static int loop_switch(struct loop_device *lo, struct file *file)
{
struct switch_request w;
w.file = file;
/* freeze queue and wait for completion of scheduled requests */
blk_mq_freeze_queue(lo->lo_queue);
/* do the switch action */
do_loop_switch(lo, &w);
/* unfreeze */
blk_mq_unfreeze_queue(lo->lo_queue);
return 0;
}
/*
* Helper to flush the IOs in loop, but keeping loop thread running
*/
static int loop_flush(struct loop_device *lo)
{
/* loop not yet configured, no running thread, nothing to flush */
if (lo->lo_state != Lo_bound)
return 0;
return loop_switch(lo, NULL);
}
static void loop_reread_partitions(struct loop_device *lo, static void loop_reread_partitions(struct loop_device *lo,
struct block_device *bdev) struct block_device *bdev)
{ {
...@@ -678,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -678,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_putf; goto out_putf;
/* and ... switch */ /* and ... switch */
error = loop_switch(lo, file); blk_mq_freeze_queue(lo->lo_queue);
if (error) mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
goto out_putf; lo->lo_backing_file = file;
lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
mapping_set_gfp_mask(file->f_mapping,
lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
fput(old_file); fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN) if (lo->lo_flags & LO_FLAGS_PARTSCAN)
...@@ -867,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -867,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct file *file, *f; struct file *file, *f;
struct inode *inode; struct inode *inode;
struct address_space *mapping; struct address_space *mapping;
unsigned lo_blocksize;
int lo_flags = 0; int lo_flags = 0;
int error; int error;
loff_t size; loff_t size;
...@@ -911,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -911,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter) !file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY; lo_flags |= LO_FLAGS_READ_ONLY;
lo_blocksize = S_ISBLK(inode->i_mode) ?
inode->i_bdev->bd_block_size : PAGE_SIZE;
error = -EFBIG; error = -EFBIG;
size = get_loop_size(lo, file); size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size) if ((loff_t)(sector_t)size != size)
...@@ -927,7 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -927,7 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false; lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
lo->lo_device = bdev; lo->lo_device = bdev;
lo->lo_flags = lo_flags; lo->lo_flags = lo_flags;
lo->lo_backing_file = file; lo->lo_backing_file = file;
...@@ -947,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -947,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
/* let user-space know about the new size */ /* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
set_blocksize(bdev, lo_blocksize); set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
block_size(inode->i_bdev) : PAGE_SIZE);
lo->lo_state = Lo_bound; lo->lo_state = Lo_bound;
if (part_shift) if (part_shift)
...@@ -1053,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo) ...@@ -1053,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo)
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
memset(lo->lo_file_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE);
blk_queue_logical_block_size(lo->lo_queue, 512);
blk_queue_physical_block_size(lo->lo_queue, 512);
blk_queue_io_min(lo->lo_queue, 512);
if (bdev) { if (bdev) {
bdput(bdev); bdput(bdev);
invalidate_bdev(bdev); invalidate_bdev(bdev);
...@@ -1336,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) ...@@ -1336,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
return error; return error;
} }
static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
{
if (lo->lo_state != Lo_bound)
return -ENXIO;
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;
blk_mq_freeze_queue(lo->lo_queue);
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
return 0;
}
static int lo_ioctl(struct block_device *bdev, fmode_t mode, static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg) unsigned int cmd, unsigned long arg)
{ {
...@@ -1384,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -1384,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_dio(lo, arg); err = loop_set_dio(lo, arg);
break; break;
case LOOP_SET_BLOCK_SIZE:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_block_size(lo, arg);
break;
default: default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
} }
...@@ -1583,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) ...@@ -1583,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
err = loop_clr_fd(lo); err = loop_clr_fd(lo);
if (!err) if (!err)
return; return;
} else { } else if (lo->lo_state == Lo_bound) {
/* /*
* Otherwise keep thread (if running) and config, * Otherwise keep thread (if running) and config,
* but flush possible ongoing bios in thread. * but flush possible ongoing bios in thread.
*/ */
loop_flush(lo); blk_mq_freeze_queue(lo->lo_queue);
blk_mq_unfreeze_queue(lo->lo_queue);
} }
mutex_unlock(&lo->lo_ctl_mutex); mutex_unlock(&lo->lo_ctl_mutex);
...@@ -1770,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i) ...@@ -1770,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i)
} }
lo->lo_queue->queuedata = lo; lo->lo_queue->queuedata = lo;
blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
/* /*
* It doesn't make sense to enable merge because the I/O * By default, we do buffer IO, so it doesn't make sense to enable
* submitted to backing file is handled page by page. * merge because the I/O submitted to backing file is handled page by
* page. For directio mode, merge does help to dispatch bigger request
* to underlayer disk. We will enable merge once directio is enabled.
*/ */
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
......
...@@ -48,7 +48,6 @@ struct loop_device { ...@@ -48,7 +48,6 @@ struct loop_device {
struct file * lo_backing_file; struct file * lo_backing_file;
struct block_device *lo_device; struct block_device *lo_device;
unsigned lo_blocksize;
void *key_data; void *key_data;
gfp_t old_gfp_mask; gfp_t old_gfp_mask;
...@@ -68,10 +67,13 @@ struct loop_device { ...@@ -68,10 +67,13 @@ struct loop_device {
struct loop_cmd { struct loop_cmd {
struct kthread_work work; struct kthread_work work;
struct request *rq; struct request *rq;
struct list_head list; union {
bool use_aio; /* use AIO interface to handle I/O */ bool use_aio; /* use AIO interface to handle I/O */
atomic_t ref; /* only for aio */
};
long ret; long ret;
struct kiocb iocb; struct kiocb iocb;
struct bio_vec *bvec;
}; };
/* Support for loadable transfer modules */ /* Support for loadable transfer modules */
......
...@@ -68,6 +68,8 @@ ...@@ -68,6 +68,8 @@
#include <linux/random.h> #include <linux/random.h>
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
#define MAX_OPEN_BUCKETS 128
/* Bucket heap / gen */ /* Bucket heap / gen */
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
...@@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c) ...@@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c)
spin_lock_init(&c->data_bucket_lock); spin_lock_init(&c->data_bucket_lock);
for (i = 0; i < 6; i++) { for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
if (!b) if (!b)
return -ENOMEM; return -ENOMEM;
......
...@@ -333,6 +333,7 @@ struct cached_dev { ...@@ -333,6 +333,7 @@ struct cached_dev {
/* Limit number of writeback bios in flight */ /* Limit number of writeback bios in flight */
struct semaphore in_flight; struct semaphore in_flight;
struct task_struct *writeback_thread; struct task_struct *writeback_thread;
struct workqueue_struct *writeback_write_wq;
struct keybuf writeback_keys; struct keybuf writeback_keys;
......
...@@ -70,21 +70,10 @@ void __closure_wake_up(struct closure_waitlist *wait_list) ...@@ -70,21 +70,10 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
list = llist_del_all(&wait_list->list); list = llist_del_all(&wait_list->list);
/* We first reverse the list to preserve FIFO ordering and fairness */ /* We first reverse the list to preserve FIFO ordering and fairness */
reverse = llist_reverse_order(list);
while (list) {
struct llist_node *t = list;
list = llist_next(list);
t->next = reverse;
reverse = t;
}
/* Then do the wakeups */ /* Then do the wakeups */
llist_for_each_entry(cl, reverse, list) {
while (reverse) {
cl = container_of(reverse, struct closure, list);
reverse = llist_next(reverse);
closure_set_waiting(cl, 0); closure_set_waiting(cl, 0);
closure_sub(cl, CLOSURE_WAITING + 1); closure_sub(cl, CLOSURE_WAITING + 1);
} }
......
...@@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) ...@@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list)
* been dropped with closure_put()), it will resume execution at @fn running out * been dropped with closure_put()), it will resume execution at @fn running out
* of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
* *
* NOTE: This macro expands to a return in the calling function!
*
* This is because after calling continue_at() you no longer have a ref on @cl, * This is because after calling continue_at() you no longer have a ref on @cl,
* and whatever @cl owns may be freed out from under you - a running closure fn * and whatever @cl owns may be freed out from under you - a running closure fn
* has a ref on its own closure which continue_at() drops. * has a ref on its own closure which continue_at() drops.
...@@ -340,8 +338,6 @@ do { \ ...@@ -340,8 +338,6 @@ do { \
* Causes @fn to be executed out of @cl, in @wq context (or called directly if * Causes @fn to be executed out of @cl, in @wq context (or called directly if
* @wq is NULL). * @wq is NULL).
* *
* NOTE: like continue_at(), this macro expands to a return in the caller!
*
* The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
* thus it's not safe to touch anything protected by @cl after a * thus it's not safe to touch anything protected by @cl after a
* continue_at_nobarrier(). * continue_at_nobarrier().
......
...@@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) ...@@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl)
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio, *n; struct bio *bio = op->bio, *n;
if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
wake_up_gc(op->c);
if (op->bypass) if (op->bypass)
return bch_data_invalidate(cl); return bch_data_invalidate(cl);
if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
wake_up_gc(op->c);
/* /*
* Journal writes are marked REQ_PREFLUSH; if the original write was a * Journal writes are marked REQ_PREFLUSH; if the original write was a
* flush, it'll wait on the journal write. * flush, it'll wait on the journal write.
...@@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) ...@@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
if (!congested && !dc->sequential_cutoff) if (!congested && !dc->sequential_cutoff)
goto rescale; goto rescale;
if (!congested &&
mode == CACHE_MODE_WRITEBACK &&
op_is_write(bio->bi_opf) &&
op_is_sync(bio->bi_opf))
goto rescale;
spin_lock(&dc->io_lock); spin_lock(&dc->io_lock);
hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
......
...@@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) ...@@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
} }
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(dc); bch_sectors_dirty_init(&dc->disk);
atomic_set(&dc->has_dirty, 1); atomic_set(&dc->has_dirty, 1);
atomic_inc(&dc->count); atomic_inc(&dc->count);
bch_writeback_queue(dc); bch_writeback_queue(dc);
...@@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl) ...@@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl)
cancel_delayed_work_sync(&dc->writeback_rate_update); cancel_delayed_work_sync(&dc->writeback_rate_update);
if (!IS_ERR_OR_NULL(dc->writeback_thread)) if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread); kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq);
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
...@@ -1228,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) ...@@ -1228,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
goto err; goto err;
bcache_device_attach(d, c, u - c->uuids); bcache_device_attach(d, c, u - c->uuids);
bch_sectors_dirty_init(d);
bch_flash_dev_request_init(d); bch_flash_dev_request_init(d);
add_disk(d->disk); add_disk(d->disk);
...@@ -1374,9 +1377,6 @@ static void cache_set_flush(struct closure *cl) ...@@ -1374,9 +1377,6 @@ static void cache_set_flush(struct closure *cl)
struct btree *b; struct btree *b;
unsigned i; unsigned i;
if (!c)
closure_return(cl);
bch_cache_accounting_destroy(&c->accounting); bch_cache_accounting_destroy(&c->accounting);
kobject_put(&c->internal); kobject_put(&c->internal);
...@@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
else else
err = "device busy"; err = "device busy";
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
if (!IS_ERR(bdev))
bdput(bdev);
if (attr == &ksysfs_register_quiet) if (attr == &ksysfs_register_quiet)
goto out; goto out;
} }
......
...@@ -192,7 +192,7 @@ STORE(__cached_dev) ...@@ -192,7 +192,7 @@ STORE(__cached_dev)
{ {
struct cached_dev *dc = container_of(kobj, struct cached_dev, struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj); disk.kobj);
unsigned v = size; ssize_t v = size;
struct cache_set *c; struct cache_set *c;
struct kobj_uevent_env *env; struct kobj_uevent_env *env;
...@@ -227,7 +227,7 @@ STORE(__cached_dev) ...@@ -227,7 +227,7 @@ STORE(__cached_dev)
bch_cached_dev_run(dc); bch_cached_dev_run(dc);
if (attr == &sysfs_cache_mode) { if (attr == &sysfs_cache_mode) {
ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); v = bch_read_string_list(buf, bch_cache_modes + 1);
if (v < 0) if (v < 0)
return v; return v;
...@@ -615,8 +615,21 @@ STORE(__bch_cache_set) ...@@ -615,8 +615,21 @@ STORE(__bch_cache_set)
bch_cache_accounting_clear(&c->accounting); bch_cache_accounting_clear(&c->accounting);
} }
if (attr == &sysfs_trigger_gc) if (attr == &sysfs_trigger_gc) {
/*
* Garbage collection thread only works when sectors_to_gc < 0,
* when users write to sysfs entry trigger_gc, most of time
* they want to forcibly triger gargage collection. Here -1 is
* set to c->sectors_to_gc, to make gc_should_run() give a
* chance to permit gc thread to run. "give a chance" means
* before going into gc_should_run(), there is still chance
* that c->sectors_to_gc being set to other positive value. So
* writing sysfs entry trigger_gc won't always make sure gc
* thread takes effect.
*/
atomic_set(&c->sectors_to_gc, -1);
wake_up_gc(c); wake_up_gc(c);
}
if (attr == &sysfs_prune_cache) { if (attr == &sysfs_prune_cache) {
struct shrink_control sc; struct shrink_control sc;
......
...@@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int) ...@@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int)
STRTO_H(strtoll, long long) STRTO_H(strtoll, long long)
STRTO_H(strtoull, unsigned long long) STRTO_H(strtoull, unsigned long long)
/**
* bch_hprint() - formats @v to human readable string for sysfs.
*
* @v - signed 64 bit integer
* @buf - the (at least 8 byte) buffer to format the result into.
*
* Returns the number of bytes used by format.
*/
ssize_t bch_hprint(char *buf, int64_t v) ssize_t bch_hprint(char *buf, int64_t v)
{ {
static const char units[] = "?kMGTPEZY"; static const char units[] = "?kMGTPEZY";
char dec[4] = ""; int u = 0, t;
int u, t = 0;
uint64_t q;
for (u = 0; v >= 1024 || v <= -1024; u++) {
t = v & ~(~0 << 10); if (v < 0)
v >>= 10; q = -v;
} else
q = v;
if (!u)
return sprintf(buf, "%llu", v); /* For as long as the number is more than 3 digits, but at least
* once, shift right / divide by 1024. Keep the remainder for
if (v < 100 && v > -100) * a digit after the decimal point.
snprintf(dec, sizeof(dec), ".%i", t / 100); */
do {
return sprintf(buf, "%lli%s%c", v, dec, units[u]); u++;
t = q & ~(~0 << 10);
q >>= 10;
} while (q >= 1000);
if (v < 0)
/* '-', up to 3 digits, '.', 1 digit, 1 character, null;
* yields 8 bytes.
*/
return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]);
else
return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]);
} }
ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
......
...@@ -21,7 +21,8 @@ ...@@ -21,7 +21,8 @@
static void __update_writeback_rate(struct cached_dev *dc) static void __update_writeback_rate(struct cached_dev *dc)
{ {
struct cache_set *c = dc->disk.c; struct cache_set *c = dc->disk.c;
uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
bcache_flash_devs_sectors_dirty(c);
uint64_t cache_dirty_target = uint64_t cache_dirty_target =
div_u64(cache_sectors * dc->writeback_percent, 100); div_u64(cache_sectors * dc->writeback_percent, 100);
...@@ -186,7 +187,7 @@ static void write_dirty(struct closure *cl) ...@@ -186,7 +187,7 @@ static void write_dirty(struct closure *cl)
closure_bio_submit(&io->bio, cl); closure_bio_submit(&io->bio, cl);
continue_at(cl, write_dirty_finish, system_wq); continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
} }
static void read_dirty_endio(struct bio *bio) static void read_dirty_endio(struct bio *bio)
...@@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) ...@@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl)
closure_bio_submit(&io->bio, cl); closure_bio_submit(&io->bio, cl);
continue_at(cl, write_dirty, system_wq); continue_at(cl, write_dirty, io->dc->writeback_write_wq);
} }
static void read_dirty(struct cached_dev *dc) static void read_dirty(struct cached_dev *dc)
...@@ -481,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, ...@@ -481,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
return MAP_CONTINUE; return MAP_CONTINUE;
} }
void bch_sectors_dirty_init(struct cached_dev *dc) void bch_sectors_dirty_init(struct bcache_device *d)
{ {
struct sectors_dirty_init op; struct sectors_dirty_init op;
bch_btree_op_init(&op.op, -1); bch_btree_op_init(&op.op, -1);
op.inode = dc->disk.id; op.inode = d->id;
bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0),
sectors_dirty_init_fn, 0); sectors_dirty_init_fn, 0);
dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); d->sectors_dirty_last = bcache_dev_sectors_dirty(d);
} }
void bch_cached_dev_writeback_init(struct cached_dev *dc) void bch_cached_dev_writeback_init(struct cached_dev *dc)
...@@ -515,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) ...@@ -515,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
int bch_cached_dev_writeback_start(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc)
{ {
dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq",
WQ_MEM_RECLAIM, 0);
if (!dc->writeback_write_wq)
return -ENOMEM;
dc->writeback_thread = kthread_create(bch_writeback_thread, dc, dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback"); "bcache_writeback");
if (IS_ERR(dc->writeback_thread)) if (IS_ERR(dc->writeback_thread))
......
...@@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) ...@@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
return ret; return ret;
} }
static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
{
uint64_t i, ret = 0;
mutex_lock(&bch_register_lock);
for (i = 0; i < c->nr_uuids; i++) {
struct bcache_device *d = c->devices[i];
if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
continue;
ret += bcache_dev_sectors_dirty(d);
}
mutex_unlock(&bch_register_lock);
return ret;
}
static inline unsigned offset_to_stripe(struct bcache_device *d, static inline unsigned offset_to_stripe(struct bcache_device *d,
uint64_t offset) uint64_t offset)
{ {
...@@ -84,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) ...@@ -84,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
void bch_sectors_dirty_init(struct cached_dev *dc); void bch_sectors_dirty_init(struct bcache_device *);
void bch_cached_dev_writeback_init(struct cached_dev *); void bch_cached_dev_writeback_init(struct cached_dev *);
int bch_cached_dev_writeback_start(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *);
......
This diff is collapsed.
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "fabrics.h" #include "fabrics.h"
static LIST_HEAD(nvmf_transports); static LIST_HEAD(nvmf_transports);
static DEFINE_MUTEX(nvmf_transports_mutex); static DECLARE_RWSEM(nvmf_transports_rwsem);
static LIST_HEAD(nvmf_hosts); static LIST_HEAD(nvmf_hosts);
static DEFINE_MUTEX(nvmf_hosts_mutex); static DEFINE_MUTEX(nvmf_hosts_mutex);
...@@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void) ...@@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void)
kref_init(&host->ref); kref_init(&host->ref);
snprintf(host->nqn, NVMF_NQN_SIZE, snprintf(host->nqn, NVMF_NQN_SIZE,
"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
mutex_lock(&nvmf_hosts_mutex); mutex_lock(&nvmf_hosts_mutex);
list_add_tail(&host->list, &nvmf_hosts); list_add_tail(&host->list, &nvmf_hosts);
...@@ -495,9 +495,9 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops) ...@@ -495,9 +495,9 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops)
if (!ops->create_ctrl) if (!ops->create_ctrl)
return -EINVAL; return -EINVAL;
mutex_lock(&nvmf_transports_mutex); down_write(&nvmf_transports_rwsem);
list_add_tail(&ops->entry, &nvmf_transports); list_add_tail(&ops->entry, &nvmf_transports);
mutex_unlock(&nvmf_transports_mutex); up_write(&nvmf_transports_rwsem);
return 0; return 0;
} }
...@@ -514,9 +514,9 @@ EXPORT_SYMBOL_GPL(nvmf_register_transport); ...@@ -514,9 +514,9 @@ EXPORT_SYMBOL_GPL(nvmf_register_transport);
*/ */
void nvmf_unregister_transport(struct nvmf_transport_ops *ops) void nvmf_unregister_transport(struct nvmf_transport_ops *ops)
{ {
mutex_lock(&nvmf_transports_mutex); down_write(&nvmf_transports_rwsem);
list_del(&ops->entry); list_del(&ops->entry);
mutex_unlock(&nvmf_transports_mutex); up_write(&nvmf_transports_rwsem);
} }
EXPORT_SYMBOL_GPL(nvmf_unregister_transport); EXPORT_SYMBOL_GPL(nvmf_unregister_transport);
...@@ -525,7 +525,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( ...@@ -525,7 +525,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
{ {
struct nvmf_transport_ops *ops; struct nvmf_transport_ops *ops;
lockdep_assert_held(&nvmf_transports_mutex); lockdep_assert_held(&nvmf_transports_rwsem);
list_for_each_entry(ops, &nvmf_transports, entry) { list_for_each_entry(ops, &nvmf_transports, entry) {
if (strcmp(ops->name, opts->transport) == 0) if (strcmp(ops->name, opts->transport) == 0)
...@@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ...@@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out; goto out;
} }
if (uuid_parse(p, &hostid)) { if (uuid_parse(p, &hostid)) {
pr_err("Invalid hostid %s\n", p);
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
...@@ -850,7 +851,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) ...@@ -850,7 +851,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_free_opts; goto out_free_opts;
opts->mask &= ~NVMF_REQUIRED_OPTS; opts->mask &= ~NVMF_REQUIRED_OPTS;
mutex_lock(&nvmf_transports_mutex); down_read(&nvmf_transports_rwsem);
ops = nvmf_lookup_transport(opts); ops = nvmf_lookup_transport(opts);
if (!ops) { if (!ops) {
pr_info("no handler found for transport %s.\n", pr_info("no handler found for transport %s.\n",
...@@ -877,16 +878,16 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) ...@@ -877,16 +878,16 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
dev_warn(ctrl->device, dev_warn(ctrl->device,
"controller returned incorrect NQN: \"%s\".\n", "controller returned incorrect NQN: \"%s\".\n",
ctrl->subnqn); ctrl->subnqn);
mutex_unlock(&nvmf_transports_mutex); up_read(&nvmf_transports_rwsem);
ctrl->ops->delete_ctrl(ctrl); ctrl->ops->delete_ctrl(ctrl);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
mutex_unlock(&nvmf_transports_mutex); up_read(&nvmf_transports_rwsem);
return ctrl; return ctrl;
out_unlock: out_unlock:
mutex_unlock(&nvmf_transports_mutex); up_read(&nvmf_transports_rwsem);
out_free_opts: out_free_opts:
nvmf_free_options(opts); nvmf_free_options(opts);
return ERR_PTR(ret); return ERR_PTR(ret);
......
...@@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); ...@@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int); struct nvme_fc_queue *, unsigned int);
static void
nvme_fc_free_lport(struct kref *ref)
{
struct nvme_fc_lport *lport =
container_of(ref, struct nvme_fc_lport, ref);
unsigned long flags;
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&lport->endp_list));
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->localport_delete(&lport->localport);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
kfree(lport);
}
static void
nvme_fc_lport_put(struct nvme_fc_lport *lport)
{
kref_put(&lport->ref, nvme_fc_free_lport);
}
static int
nvme_fc_lport_get(struct nvme_fc_lport *lport)
{
return kref_get_unless_zero(&lport->ref);
}
static struct nvme_fc_lport *
nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
{
struct nvme_fc_lport *lport;
unsigned long flags;
spin_lock_irqsave(&nvme_fc_lock, flags);
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
if (lport->localport.node_name != pinfo->node_name ||
lport->localport.port_name != pinfo->port_name)
continue;
if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
lport = ERR_PTR(-EEXIST);
goto out_done;
}
if (!nvme_fc_lport_get(lport)) {
/*
* fails if ref cnt already 0. If so,
* act as if lport already deleted
*/
lport = NULL;
goto out_done;
}
/* resume the lport */
lport->localport.port_role = pinfo->port_role;
lport->localport.port_id = pinfo->port_id;
lport->localport.port_state = FC_OBJSTATE_ONLINE;
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return lport;
}
lport = NULL;
out_done:
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return lport;
}
/** /**
* nvme_fc_register_localport - transport entry point called by an * nvme_fc_register_localport - transport entry point called by an
...@@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, ...@@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
goto out_reghost_failed; goto out_reghost_failed;
} }
/*
* look to see if there is already a localport that had been
* deregistered and in the process of waiting for all the
* references to fully be removed. If the references haven't
* expired, we can simply re-enable the localport. Remoteports
* and controller reconnections should resume naturally.
*/
newrec = nvme_fc_attach_to_unreg_lport(pinfo);
/* found an lport, but something about its state is bad */
if (IS_ERR(newrec)) {
ret = PTR_ERR(newrec);
goto out_reghost_failed;
/* found existing lport, which was resumed */
} else if (newrec) {
*portptr = &newrec->localport;
return 0;
}
/* nothing found - allocate a new localport struct */
newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz),
GFP_KERNEL); GFP_KERNEL);
if (!newrec) { if (!newrec) {
...@@ -310,44 +416,6 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, ...@@ -310,44 +416,6 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
} }
EXPORT_SYMBOL_GPL(nvme_fc_register_localport); EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
static void
nvme_fc_free_lport(struct kref *ref)
{
struct nvme_fc_lport *lport =
container_of(ref, struct nvme_fc_lport, ref);
unsigned long flags;
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&lport->endp_list));
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->localport_delete(&lport->localport);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
kfree(lport);
}
static void
nvme_fc_lport_put(struct nvme_fc_lport *lport)
{
kref_put(&lport->ref, nvme_fc_free_lport);
}
static int
nvme_fc_lport_get(struct nvme_fc_lport *lport)
{
return kref_get_unless_zero(&lport->ref);
}
/** /**
* nvme_fc_unregister_localport - transport entry point called by an * nvme_fc_unregister_localport - transport entry point called by an
* LLDD to deregister/remove a previously * LLDD to deregister/remove a previously
...@@ -2731,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -2731,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (ret) if (ret)
goto out_free_queues; goto out_free_queues;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) { if (IS_ERR(ctrl->ctrl.admin_q)) {
......
...@@ -125,6 +125,7 @@ struct nvme_ctrl { ...@@ -125,6 +125,7 @@ struct nvme_ctrl {
struct kref kref; struct kref kref;
int instance; int instance;
struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces; struct list_head namespaces;
struct mutex namespaces_mutex; struct mutex namespaces_mutex;
struct device *device; /* char device */ struct device *device; /* char device */
...@@ -142,6 +143,7 @@ struct nvme_ctrl { ...@@ -142,6 +143,7 @@ struct nvme_ctrl {
u16 cntlid; u16 cntlid;
u32 ctrl_config; u32 ctrl_config;
u16 mtfa;
u32 queue_count; u32 queue_count;
u64 cap; u64 cap;
...@@ -160,6 +162,7 @@ struct nvme_ctrl { ...@@ -160,6 +162,7 @@ struct nvme_ctrl {
u16 kas; u16 kas;
u8 npss; u8 npss;
u8 apsta; u8 apsta;
unsigned int shutdown_timeout;
unsigned int kato; unsigned int kato;
bool subsystem; bool subsystem;
unsigned long quirks; unsigned long quirks;
...@@ -167,6 +170,7 @@ struct nvme_ctrl { ...@@ -167,6 +170,7 @@ struct nvme_ctrl {
struct work_struct scan_work; struct work_struct scan_work;
struct work_struct async_event_work; struct work_struct async_event_work;
struct delayed_work ka_work; struct delayed_work ka_work;
struct work_struct fw_act_work;
/* Power saving configuration */ /* Power saving configuration */
u64 ps_max_latency_us; u64 ps_max_latency_us;
...@@ -207,13 +211,9 @@ struct nvme_ns { ...@@ -207,13 +211,9 @@ struct nvme_ns {
bool ext; bool ext;
u8 pi_type; u8 pi_type;
unsigned long flags; unsigned long flags;
u16 noiob;
#define NVME_NS_REMOVING 0 #define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1 #define NVME_NS_DEAD 1
u16 noiob;
u64 mode_select_num_blocks;
u32 mode_select_block_len;
}; };
struct nvme_ctrl_ops { struct nvme_ctrl_ops {
...@@ -314,13 +314,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, ...@@ -314,13 +314,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen, union nvme_result *result, void *buffer, unsigned bufflen,
unsigned timeout, int qid, int at_head, int flags); unsigned timeout, int qid, int at_head, int flags);
int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen, u32 *result,
unsigned timeout);
int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen,
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u32 *result, unsigned timeout);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
......
...@@ -556,8 +556,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) ...@@ -556,8 +556,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
int nprps, i; int nprps, i;
length -= (page_size - offset); length -= (page_size - offset);
if (length <= 0) if (length <= 0) {
iod->first_dma = 0;
return BLK_STS_OK; return BLK_STS_OK;
}
dma_len -= (page_size - offset); dma_len -= (page_size - offset);
if (dma_len) { if (dma_len) {
...@@ -667,7 +669,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ...@@ -667,7 +669,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
goto out_unmap; goto out_unmap;
if (rq_data_dir(req)) if (req_op(req) == REQ_OP_WRITE)
nvme_dif_remap(req, nvme_dif_prep); nvme_dif_remap(req, nvme_dif_prep);
if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
...@@ -695,7 +697,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) ...@@ -695,7 +697,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
if (iod->nents) { if (iod->nents) {
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
if (blk_integrity_rq(req)) { if (blk_integrity_rq(req)) {
if (!rq_data_dir(req)) if (req_op(req) == REQ_OP_READ)
nvme_dif_remap(req, nvme_dif_complete); nvme_dif_remap(req, nvme_dif_complete);
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
} }
...@@ -1377,6 +1379,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) ...@@ -1377,6 +1379,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->admin_tagset)) if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM; return -ENOMEM;
dev->ctrl.admin_tagset = &dev->admin_tagset;
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) { if (IS_ERR(dev->ctrl.admin_q)) {
......
This diff is collapsed.
...@@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req, ...@@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req,
u16 status; u16 status;
WARN_ON(req == NULL || slog == NULL); WARN_ON(req == NULL || slog == NULL);
if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF)) if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
status = nvmet_get_smart_log_all(req, slog); status = nvmet_get_smart_log_all(req, slog);
else else
status = nvmet_get_smart_log_nsid(req, slog); status = nvmet_get_smart_log_nsid(req, slog);
...@@ -168,15 +168,6 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) ...@@ -168,15 +168,6 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
nvmet_req_complete(req, status); nvmet_req_complete(req, status);
} }
static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len)
{
int len = min(src_len, dst_len);
memcpy(dst, src, len);
if (dst_len > len)
memset(dst + len, ' ', dst_len - len);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req) static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{ {
struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ctrl *ctrl = req->sq->ctrl;
...@@ -196,8 +187,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -196,8 +187,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
bin2hex(id->sn, &ctrl->subsys->serial, bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');
id->rab = 6; id->rab = 6;
...@@ -451,7 +443,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) ...@@ -451,7 +443,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u32 val32; u32 val32;
u16 status = 0; u16 status = 0;
switch (cdw10 & 0xf) { switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES: case NVME_FEAT_NUM_QUEUES:
nvmet_set_result(req, nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
...@@ -461,6 +453,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req) ...@@ -461,6 +453,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_set_result(req, req->sq->ctrl->kato); nvmet_set_result(req, req->sq->ctrl->kato);
break; break;
case NVME_FEAT_HOST_ID:
status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
break;
default: default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break; break;
...@@ -475,7 +470,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) ...@@ -475,7 +470,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
u16 status = 0; u16 status = 0;
switch (cdw10 & 0xf) { switch (cdw10 & 0xff) {
/* /*
* These features are mandatory in the spec, but we don't * These features are mandatory in the spec, but we don't
* have a useful way to implement them. We'll eventually * have a useful way to implement them. We'll eventually
...@@ -509,6 +504,16 @@ static void nvmet_execute_get_features(struct nvmet_req *req) ...@@ -509,6 +504,16 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
case NVME_FEAT_KATO: case NVME_FEAT_KATO:
nvmet_set_result(req, req->sq->ctrl->kato * 1000); nvmet_set_result(req, req->sq->ctrl->kato * 1000);
break; break;
case NVME_FEAT_HOST_ID:
/* need 128-bit host identifier flag */
if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid,
sizeof(req->sq->ctrl->hostid));
break;
default: default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break; break;
......
...@@ -444,7 +444,7 @@ static struct config_group *nvmet_ns_make(struct config_group *group, ...@@ -444,7 +444,7 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
goto out; goto out;
ret = -EINVAL; ret = -EINVAL;
if (nsid == 0 || nsid == 0xffffffff) if (nsid == 0 || nsid == NVME_NSID_ALL)
goto out; goto out;
ret = -ENOMEM; ret = -ENOMEM;
......
...@@ -538,37 +538,37 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit); ...@@ -538,37 +538,37 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit);
static inline bool nvmet_cc_en(u32 cc) static inline bool nvmet_cc_en(u32 cc)
{ {
return cc & 0x1; return (cc >> NVME_CC_EN_SHIFT) & 0x1;
} }
static inline u8 nvmet_cc_css(u32 cc) static inline u8 nvmet_cc_css(u32 cc)
{ {
return (cc >> 4) & 0x7; return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
} }
static inline u8 nvmet_cc_mps(u32 cc) static inline u8 nvmet_cc_mps(u32 cc)
{ {
return (cc >> 7) & 0xf; return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
} }
static inline u8 nvmet_cc_ams(u32 cc) static inline u8 nvmet_cc_ams(u32 cc)
{ {
return (cc >> 11) & 0x7; return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
} }
static inline u8 nvmet_cc_shn(u32 cc) static inline u8 nvmet_cc_shn(u32 cc)
{ {
return (cc >> 14) & 0x3; return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
} }
static inline u8 nvmet_cc_iosqes(u32 cc) static inline u8 nvmet_cc_iosqes(u32 cc)
{ {
return (cc >> 16) & 0xf; return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
} }
static inline u8 nvmet_cc_iocqes(u32 cc) static inline u8 nvmet_cc_iocqes(u32 cc)
{ {
return (cc >> 20) & 0xf; return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
} }
static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
...@@ -749,6 +749,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ...@@ -749,6 +749,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
hostnqn, subsysnqn); hostnqn, subsysnqn);
req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem); up_read(&nvmet_config_sem);
status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
goto out_put_subsystem; goto out_put_subsystem;
} }
up_read(&nvmet_config_sem); up_read(&nvmet_config_sem);
......
...@@ -154,6 +154,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) ...@@ -154,6 +154,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
le32_to_cpu(c->kato), &ctrl); le32_to_cpu(c->kato), &ctrl);
if (status) if (status)
goto out; goto out;
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req); status = nvmet_install_queue(ctrl, req);
if (status) { if (status) {
......
...@@ -58,7 +58,8 @@ struct nvmet_fc_ls_iod { ...@@ -58,7 +58,8 @@ struct nvmet_fc_ls_iod {
struct work_struct work; struct work_struct work;
} __aligned(sizeof(unsigned long long)); } __aligned(sizeof(unsigned long long));
#define NVMET_FC_MAX_KB_PER_XFR 256 #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024)
#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE)
enum nvmet_fcp_datadir { enum nvmet_fcp_datadir {
NVMET_FCP_NODATA, NVMET_FCP_NODATA,
...@@ -74,9 +75,7 @@ struct nvmet_fc_fcp_iod { ...@@ -74,9 +75,7 @@ struct nvmet_fc_fcp_iod {
struct nvme_fc_ersp_iu rspiubuf; struct nvme_fc_ersp_iu rspiubuf;
dma_addr_t rspdma; dma_addr_t rspdma;
struct scatterlist *data_sg; struct scatterlist *data_sg;
struct scatterlist *next_sg;
int data_sg_cnt; int data_sg_cnt;
u32 next_sg_offset;
u32 total_length; u32 total_length;
u32 offset; u32 offset;
enum nvmet_fcp_datadir io_dir; enum nvmet_fcp_datadir io_dir;
...@@ -112,6 +111,7 @@ struct nvmet_fc_tgtport { ...@@ -112,6 +111,7 @@ struct nvmet_fc_tgtport {
struct ida assoc_cnt; struct ida assoc_cnt;
struct nvmet_port *port; struct nvmet_port *port;
struct kref ref; struct kref ref;
u32 max_sg_cnt;
}; };
struct nvmet_fc_defer_fcp_req { struct nvmet_fc_defer_fcp_req {
...@@ -994,6 +994,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, ...@@ -994,6 +994,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
INIT_LIST_HEAD(&newrec->assoc_list); INIT_LIST_HEAD(&newrec->assoc_list);
kref_init(&newrec->ref); kref_init(&newrec->ref);
ida_init(&newrec->assoc_cnt); ida_init(&newrec->assoc_cnt);
newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS,
template->max_sgl_segments);
ret = nvmet_fc_alloc_ls_iodlist(newrec); ret = nvmet_fc_alloc_ls_iodlist(newrec);
if (ret) { if (ret) {
...@@ -1866,51 +1868,23 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, ...@@ -1866,51 +1868,23 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod, u8 op) struct nvmet_fc_fcp_iod *fod, u8 op)
{ {
struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
struct scatterlist *sg, *datasg;
unsigned long flags; unsigned long flags;
u32 tlen, sg_off; u32 tlen;
int ret; int ret;
fcpreq->op = op; fcpreq->op = op;
fcpreq->offset = fod->offset; fcpreq->offset = fod->offset;
fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC;
tlen = min_t(u32, (NVMET_FC_MAX_KB_PER_XFR * 1024),
tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE,
(fod->total_length - fod->offset)); (fod->total_length - fod->offset));
tlen = min_t(u32, tlen, NVME_FC_MAX_SEGMENTS * PAGE_SIZE);
tlen = min_t(u32, tlen, fod->tgtport->ops->max_sgl_segments
* PAGE_SIZE);
fcpreq->transfer_length = tlen; fcpreq->transfer_length = tlen;
fcpreq->transferred_length = 0; fcpreq->transferred_length = 0;
fcpreq->fcp_error = 0; fcpreq->fcp_error = 0;
fcpreq->rsplen = 0; fcpreq->rsplen = 0;
fcpreq->sg_cnt = 0; fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE];
fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE);
datasg = fod->next_sg;
sg_off = fod->next_sg_offset;
for (sg = fcpreq->sg ; tlen; sg++) {
*sg = *datasg;
if (sg_off) {
sg->offset += sg_off;
sg->length -= sg_off;
sg->dma_address += sg_off;
sg_off = 0;
}
if (tlen < sg->length) {
sg->length = tlen;
fod->next_sg = datasg;
fod->next_sg_offset += tlen;
} else if (tlen == sg->length) {
fod->next_sg_offset = 0;
fod->next_sg = sg_next(datasg);
} else {
fod->next_sg_offset = 0;
datasg = sg_next(datasg);
}
tlen -= sg->length;
fcpreq->sg_cnt++;
}
/* /*
* If the last READDATA request: check if LLDD supports * If the last READDATA request: check if LLDD supports
...@@ -2225,8 +2199,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, ...@@ -2225,8 +2199,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->req.sg = fod->data_sg; fod->req.sg = fod->data_sg;
fod->req.sg_cnt = fod->data_sg_cnt; fod->req.sg_cnt = fod->data_sg_cnt;
fod->offset = 0; fod->offset = 0;
fod->next_sg = fod->data_sg;
fod->next_sg_offset = 0;
if (fod->io_dir == NVMET_FCP_WRITE) { if (fod->io_dir == NVMET_FCP_WRITE) {
/* pull the data over before invoking nvmet layer */ /* pull the data over before invoking nvmet layer */
......
...@@ -193,9 +193,6 @@ fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname, ...@@ -193,9 +193,6 @@ fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname,
#define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN) #define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN)
#define ALL_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | NVMF_OPT_ROLES | \
NVMF_OPT_FCADDR | NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN)
static DEFINE_SPINLOCK(fcloop_lock); static DEFINE_SPINLOCK(fcloop_lock);
static LIST_HEAD(fcloop_lports); static LIST_HEAD(fcloop_lports);
......
...@@ -375,6 +375,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ...@@ -375,6 +375,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (error) if (error)
goto out_free_sq; goto out_free_sq;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) { if (IS_ERR(ctrl->ctrl.admin_q)) {
......
...@@ -115,6 +115,7 @@ struct nvmet_ctrl { ...@@ -115,6 +115,7 @@ struct nvmet_ctrl {
u32 cc; u32 cc;
u32 csts; u32 csts;
uuid_t hostid;
u16 cntlid; u16 cntlid;
u32 kato; u32 kato;
......
...@@ -624,7 +624,7 @@ struct nvmefc_tgt_fcp_req { ...@@ -624,7 +624,7 @@ struct nvmefc_tgt_fcp_req {
u32 timeout; u32 timeout;
u32 transfer_length; u32 transfer_length;
struct fc_ba_rjt ba_rjt; struct fc_ba_rjt ba_rjt;
struct scatterlist sg[NVME_FC_MAX_SEGMENTS]; struct scatterlist *sg;
int sg_cnt; int sg_cnt;
void *rspaddr; void *rspaddr;
dma_addr_t rspdma; dma_addr_t rspdma;
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#define NVME_RDMA_IP_PORT 4420 #define NVME_RDMA_IP_PORT 4420
#define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type { enum nvme_subsys_type {
NVME_NQN_DISC = 1, /* Discovery type target subsystem */ NVME_NQN_DISC = 1, /* Discovery type target subsystem */
NVME_NQN_NVME = 2, /* NVME type target subsystem */ NVME_NQN_NVME = 2, /* NVME type target subsystem */
...@@ -133,19 +135,26 @@ enum { ...@@ -133,19 +135,26 @@ enum {
enum { enum {
NVME_CC_ENABLE = 1 << 0, NVME_CC_ENABLE = 1 << 0,
NVME_CC_CSS_NVM = 0 << 4, NVME_CC_CSS_NVM = 0 << 4,
NVME_CC_EN_SHIFT = 0,
NVME_CC_CSS_SHIFT = 4,
NVME_CC_MPS_SHIFT = 7, NVME_CC_MPS_SHIFT = 7,
NVME_CC_ARB_RR = 0 << 11, NVME_CC_AMS_SHIFT = 11,
NVME_CC_ARB_WRRU = 1 << 11, NVME_CC_SHN_SHIFT = 14,
NVME_CC_ARB_VS = 7 << 11, NVME_CC_IOSQES_SHIFT = 16,
NVME_CC_SHN_NONE = 0 << 14, NVME_CC_IOCQES_SHIFT = 20,
NVME_CC_SHN_NORMAL = 1 << 14, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
NVME_CC_SHN_ABRUPT = 2 << 14, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
NVME_CC_SHN_MASK = 3 << 14, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << 16, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << 20, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
NVME_CSTS_RDY = 1 << 0, NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1, NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4, NVME_CSTS_NSSRO = 1 << 4,
NVME_CSTS_PP = 1 << 5,
NVME_CSTS_SHST_NORMAL = 0 << 2, NVME_CSTS_SHST_NORMAL = 0 << 2,
NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2,
...@@ -251,6 +260,7 @@ enum { ...@@ -251,6 +260,7 @@ enum {
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2, NVME_CTRL_ONCS_DSM = 1 << 2,
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
...@@ -376,6 +386,13 @@ struct nvme_smart_log { ...@@ -376,6 +386,13 @@ struct nvme_smart_log {
__u8 rsvd216[296]; __u8 rsvd216[296];
}; };
struct nvme_fw_slot_info_log {
__u8 afi;
__u8 rsvd1[7];
__le64 frs[7];
__u8 rsvd64[448];
};
enum { enum {
NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1, NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
...@@ -386,6 +403,7 @@ enum { ...@@ -386,6 +403,7 @@ enum {
enum { enum {
NVME_AER_NOTICE_NS_CHANGED = 0x0002, NVME_AER_NOTICE_NS_CHANGED = 0x0002,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
}; };
struct nvme_lba_range_type { struct nvme_lba_range_type {
...@@ -677,6 +695,7 @@ enum { ...@@ -677,6 +695,7 @@ enum {
NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_ASYNC_EVENT = 0x0b,
NVME_FEAT_AUTO_PST = 0x0c, NVME_FEAT_AUTO_PST = 0x0c,
NVME_FEAT_HOST_MEM_BUF = 0x0d, NVME_FEAT_HOST_MEM_BUF = 0x0d,
NVME_FEAT_TIMESTAMP = 0x0e,
NVME_FEAT_KATO = 0x0f, NVME_FEAT_KATO = 0x0f,
NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_HOST_ID = 0x81,
......
...@@ -230,6 +230,7 @@ static inline const char *kbasename(const char *path) ...@@ -230,6 +230,7 @@ static inline const char *kbasename(const char *path)
void fortify_panic(const char *name) __noreturn __cold; void fortify_panic(const char *name) __noreturn __cold;
void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter");
void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
...@@ -425,4 +426,33 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) ...@@ -425,4 +426,33 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
#endif #endif
/**
* memcpy_and_pad - Copy one buffer to another with padding
* @dest: Where to copy to
* @dest_len: The destination buffer size
* @src: Where to copy from
* @count: The number of bytes to copy
* @pad: Character to use for padding if space is left in destination.
*/
__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len,
const void *src, size_t count, int pad)
{
size_t dest_size = __builtin_object_size(dest, 0);
size_t src_size = __builtin_object_size(src, 0);
if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) {
if (dest_size < dest_len && dest_size < count)
__write_overflow();
else if (src_size < dest_len && src_size < count)
__read_overflow3();
}
if (dest_size < dest_len)
fortify_panic(__func__);
if (dest_len > count) {
memcpy(dest, src, count);
memset(dest + count, pad, dest_len - count);
} else
memcpy(dest, src, dest_len);
}
#endif /* _LINUX_STRING_H_ */ #endif /* _LINUX_STRING_H_ */
...@@ -88,6 +88,7 @@ struct loop_info64 { ...@@ -88,6 +88,7 @@ struct loop_info64 {
#define LOOP_CHANGE_FD 0x4C06 #define LOOP_CHANGE_FD 0x4C06
#define LOOP_SET_CAPACITY 0x4C07 #define LOOP_SET_CAPACITY 0x4C07
#define LOOP_SET_DIRECT_IO 0x4C08 #define LOOP_SET_DIRECT_IO 0x4C08
#define LOOP_SET_BLOCK_SIZE 0x4C09
/* /dev/loop-control interface */ /* /dev/loop-control interface */
#define LOOP_CTL_ADD 0x4C80 #define LOOP_CTL_ADD 0x4C80
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment