Commit 7cd4ecd9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drivers-5.10-2020-10-12' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "Here are the driver updates for 5.10.

  A few SCSI updates in here too, in coordination with Martin as they
  depend on core block changes for the shared tag bitmap.

  This contains:

   - NVMe pull requests via Christoph:
      - fix keep alive timer modification (Amit Engel)
      - order the PCI ID list more sensibly (Andy Shevchenko)
      - cleanup the open by controller helper (Chaitanya Kulkarni)
      - use an xarray for the CSE log lookup (Chaitanya Kulkarni)
      - support ZNS in nvmet passthrough mode (Chaitanya Kulkarni)
      - fix nvme_ns_report_zones (Christoph Hellwig)
      - add a sanity check to nvmet-fc (James Smart)
      - fix interrupt allocation when too many polled queues are
        specified (Jeffle Xu)
      - small nvmet-tcp optimization (Mark Wunderlich)
      - fix a controller refcount leak on init failure (Chaitanya
        Kulkarni)
      - misc cleanups (Chaitanya Kulkarni)
      - major refactoring of the scanning code (Christoph Hellwig)

   - MD updates via Song:
      - Bug fixes in bitmap code, from Zhao Heming
      - Fix a work queue check, from Guoqing Jiang
      - Fix raid5 oops with reshape, from Song Liu
      - Clean up unused code, from Jason Yan
      - Discard improvements, from Xiao Ni
      - raid5/6 page offset support, from Yufen Yu

   - Shared tag bitmap for SCSI/hisi_sas/null_blk (John, Kashyap,
     Hannes)

   - null_blk open/active zone limit support (Niklas)

   - Set of bcache updates (Coly, Dongsheng, Qinglang)"

* tag 'drivers-5.10-2020-10-12' of git://git.kernel.dk/linux-block: (78 commits)
  md/raid5: fix oops during stripe resizing
  md/bitmap: fix memory leak of temporary bitmap
  md: fix the checking of wrong work queue
  md/bitmap: md_bitmap_get_counter returns wrong blocks
  md/bitmap: md_bitmap_read_sb uses wrong bitmap blocks
  md/raid0: remove unused function is_io_in_chunk_boundary()
  nvme-core: remove extra condition for vwc
  nvme-core: remove extra variable
  nvme: remove nvme_identify_ns_list
  nvme: refactor nvme_validate_ns
  nvme: move nvme_validate_ns
  nvme: query namespace identifiers before adding the namespace
  nvme: revalidate zone bitmaps in nvme_update_ns_info
  nvme: remove nvme_update_formats
  nvme: update the known admin effects
  nvme: set the queue limits in nvme_update_ns_info
  nvme: remove the 0 lba_shift check in nvme_update_ns_info
  nvme: clean up the check for too large logic block sizes
  nvme: freeze the queue over ->lba_shift updates
  nvme: factor out a nvme_configure_metadata helper
  ...
parents 79ec6d9c 79cd1668
......@@ -644,7 +644,7 @@ struct compat_cdrom_generic_command {
unsigned char pad[3];
compat_int_t quiet;
compat_int_t timeout;
compat_caddr_t reserved[1];
compat_caddr_t unused;
};
#endif
......@@ -666,7 +666,7 @@ static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc,
.data_direction = cgc32.data_direction,
.quiet = cgc32.quiet,
.timeout = cgc32.timeout,
.reserved[0] = compat_ptr(cgc32.reserved[0]),
.unused = compat_ptr(cgc32.unused),
};
memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE);
return 0;
......@@ -691,7 +691,7 @@ static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc,
.data_direction = cgc->data_direction,
.quiet = cgc->quiet,
.timeout = cgc->timeout,
.reserved[0] = (uintptr_t)(cgc->reserved[0]),
.unused = (uintptr_t)(cgc->unused),
};
memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE);
......
......@@ -104,7 +104,7 @@ do_async_gen_syndrome(struct dma_chan *chan,
* do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
*/
static void
do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
do_sync_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks,
size_t len, struct async_submit_ctl *submit)
{
void **srcs;
......@@ -121,7 +121,8 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
BUG_ON(i > disks - 3); /* P or Q can't be zero */
srcs[i] = (void*)raid6_empty_zero_page;
} else {
srcs[i] = page_address(blocks[i]) + offset;
srcs[i] = page_address(blocks[i]) + offsets[i];
if (i < disks - 2) {
stop = i;
if (start == -1)
......@@ -138,10 +139,23 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
async_tx_sync_epilog(submit);
}
static inline bool
is_dma_pq_aligned_offs(struct dma_device *dev, unsigned int *offs,
int src_cnt, size_t len)
{
int i;
for (i = 0; i < src_cnt; i++) {
if (!is_dma_pq_aligned(dev, offs[i], 0, len))
return false;
}
return true;
}
/**
* async_gen_syndrome - asynchronously calculate a raid6 syndrome
* @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
* @offset: common offset into each block (src and dest) to start transaction
* @offsets: offset array into each block (src and dest) to start transaction
* @disks: number of blocks (including missing P or Q, see below)
* @len: length of operation in bytes
* @submit: submission/completion modifiers
......@@ -160,7 +174,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
* path.
*/
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
async_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks,
size_t len, struct async_submit_ctl *submit)
{
int src_cnt = disks - 2;
......@@ -179,7 +193,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
(src_cnt <= dma_maxpq(device, 0) ||
dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
is_dma_pq_aligned_offs(device, offsets, disks, len)) {
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = 0;
unsigned char coefs[MAX_DISKS];
......@@ -196,8 +210,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
for (i = 0, j = 0; i < src_cnt; i++) {
if (blocks[i] == NULL)
continue;
unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
len, DMA_TO_DEVICE);
unmap->addr[j] = dma_map_page(device->dev, blocks[i],
offsets[i], len, DMA_TO_DEVICE);
coefs[j] = raid6_gfexp[i];
unmap->to_cnt++;
j++;
......@@ -210,7 +224,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
unmap->bidi_cnt++;
if (P(blocks, disks))
unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
offset, len, DMA_BIDIRECTIONAL);
P(offsets, disks),
len, DMA_BIDIRECTIONAL);
else {
unmap->addr[j++] = 0;
dma_flags |= DMA_PREP_PQ_DISABLE_P;
......@@ -219,7 +234,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
unmap->bidi_cnt++;
if (Q(blocks, disks))
unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
offset, len, DMA_BIDIRECTIONAL);
Q(offsets, disks),
len, DMA_BIDIRECTIONAL);
else {
unmap->addr[j++] = 0;
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
......@@ -240,13 +256,13 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
if (!P(blocks, disks)) {
P(blocks, disks) = pq_scribble_page;
BUG_ON(len + offset > PAGE_SIZE);
P(offsets, disks) = 0;
}
if (!Q(blocks, disks)) {
Q(blocks, disks) = pq_scribble_page;
BUG_ON(len + offset > PAGE_SIZE);
Q(offsets, disks) = 0;
}
do_sync_gen_syndrome(blocks, offset, disks, len, submit);
do_sync_gen_syndrome(blocks, offsets, disks, len, submit);
return NULL;
}
......@@ -270,6 +286,7 @@ pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, si
* @len: length of operation in bytes
* @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
* @spare: temporary result buffer for the synchronous case
* @s_off: spare buffer page offset
* @submit: submission / completion modifiers
*
* The same notes from async_gen_syndrome apply to the 'blocks',
......@@ -278,9 +295,9 @@ pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, si
* specified.
*/
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
async_syndrome_val(struct page **blocks, unsigned int *offsets, int disks,
size_t len, enum sum_check_flags *pqres, struct page *spare,
struct async_submit_ctl *submit)
unsigned int s_off, struct async_submit_ctl *submit)
{
struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len);
struct dma_device *device = chan ? chan->device : NULL;
......@@ -295,7 +312,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
if (unmap && disks <= dma_maxpq(device, 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
is_dma_pq_aligned_offs(device, offsets, disks, len)) {
struct device *dev = device->dev;
dma_addr_t pq[2];
int i, j = 0, src_cnt = 0;
......@@ -307,7 +324,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
for (i = 0; i < disks-2; i++)
if (likely(blocks[i])) {
unmap->addr[j] = dma_map_page(dev, blocks[i],
offset, len,
offsets[i], len,
DMA_TO_DEVICE);
coefs[j] = raid6_gfexp[i];
unmap->to_cnt++;
......@@ -320,7 +337,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
dma_flags |= DMA_PREP_PQ_DISABLE_P;
} else {
pq[0] = dma_map_page(dev, P(blocks, disks),
offset, len,
P(offsets, disks), len,
DMA_TO_DEVICE);
unmap->addr[j++] = pq[0];
unmap->to_cnt++;
......@@ -330,7 +347,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
} else {
pq[1] = dma_map_page(dev, Q(blocks, disks),
offset, len,
Q(offsets, disks), len,
DMA_TO_DEVICE);
unmap->addr[j++] = pq[1];
unmap->to_cnt++;
......@@ -355,7 +372,9 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
async_tx_submit(chan, tx, submit);
} else {
struct page *p_src = P(blocks, disks);
unsigned int p_off = P(offsets, disks);
struct page *q_src = Q(blocks, disks);
unsigned int q_off = Q(offsets, disks);
enum async_tx_flags flags_orig = submit->flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
void *scribble = submit->scribble;
......@@ -381,27 +400,32 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
if (p_src) {
init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
NULL, NULL, scribble);
tx = async_xor(spare, blocks, offset, disks-2, len, submit);
tx = async_xor_offs(spare, s_off,
blocks, offsets, disks-2, len, submit);
async_tx_quiesce(&tx);
p = page_address(p_src) + offset;
s = page_address(spare) + offset;
p = page_address(p_src) + p_off;
s = page_address(spare) + s_off;
*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
}
if (q_src) {
P(blocks, disks) = NULL;
Q(blocks, disks) = spare;
Q(offsets, disks) = s_off;
init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, offset, disks, len, submit);
tx = async_gen_syndrome(blocks, offsets, disks,
len, submit);
async_tx_quiesce(&tx);
q = page_address(q_src) + offset;
s = page_address(spare) + offset;
q = page_address(q_src) + q_off;
s = page_address(spare) + s_off;
*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
}
/* restore P, Q and submit */
P(blocks, disks) = p_src;
P(offsets, disks) = p_off;
Q(blocks, disks) = q_src;
Q(offsets, disks) = q_off;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
......
This diff is collapsed.
......@@ -97,7 +97,8 @@ do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
}
static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
do_sync_xor_offs(struct page *dest, unsigned int offset,
struct page **src_list, unsigned int *src_offs,
int src_cnt, size_t len, struct async_submit_ctl *submit)
{
int i;
......@@ -114,7 +115,8 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
/* convert to buffer pointers */
for (i = 0; i < src_cnt; i++)
if (src_list[i])
srcs[xor_src_cnt++] = page_address(src_list[i]) + offset;
srcs[xor_src_cnt++] = page_address(src_list[i]) +
(src_offs ? src_offs[i] : offset);
src_cnt = xor_src_cnt;
/* set destination address */
dest_buf = page_address(dest) + offset;
......@@ -135,11 +137,31 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
async_tx_sync_epilog(submit);
}
static inline bool
dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset,
unsigned int *src_offs, int src_cnt, int len)
{
int i;
if (!is_dma_xor_aligned(device, offset, 0, len))
return false;
if (!src_offs)
return true;
for (i = 0; i < src_cnt; i++) {
if (!is_dma_xor_aligned(device, src_offs[i], 0, len))
return false;
}
return true;
}
/**
* async_xor - attempt to xor a set of blocks with a dma engine.
* async_xor_offs - attempt to xor a set of blocks with a dma engine.
* @dest: destination page
* @offset: dst offset to start transaction
* @src_list: array of source pages
* @offset: common src/dst offset to start transaction
* @src_offs: array of source pages offset, NULL means common src/dst offset
* @src_cnt: number of source pages
* @len: length in bytes
* @submit: submission / completion modifiers
......@@ -157,7 +179,8 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
* is not specified.
*/
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
async_xor_offs(struct page *dest, unsigned int offset,
struct page **src_list, unsigned int *src_offs,
int src_cnt, size_t len, struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
......@@ -171,7 +194,8 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
if (device)
unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT);
if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
if (unmap && dma_xor_aligned_offsets(device, offset,
src_offs, src_cnt, len)) {
struct dma_async_tx_descriptor *tx;
int i, j;
......@@ -184,7 +208,8 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
continue;
unmap->to_cnt++;
unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
src_offs ? src_offs[i] : offset,
len, DMA_TO_DEVICE);
}
/* map it bidirectional as it may be re-used as a source */
......@@ -213,11 +238,42 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
/* wait for any prerequisite operations */
async_tx_quiesce(&submit->depend_tx);
do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
do_sync_xor_offs(dest, offset, src_list, src_offs,
src_cnt, len, submit);
return NULL;
}
}
EXPORT_SYMBOL_GPL(async_xor_offs);
/**
* async_xor - attempt to xor a set of blocks with a dma engine.
* @dest: destination page
* @src_list: array of source pages
* @offset: common src/dst offset to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @submit: submission / completion modifiers
*
* honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
*
* xor_blocks always uses the dest as a source so the
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
* the calculation. The assumption with dma eninges is that they only
* use the destination buffer as a source when it is explicity specified
* in the source list.
*
* src_list note: if the dest is also a source it must be at index zero.
* The contents of this array will be overwritten if a scribble region
* is not specified.
*/
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, struct async_submit_ctl *submit)
{
return async_xor_offs(dest, offset, src_list, NULL,
src_cnt, len, submit);
}
EXPORT_SYMBOL_GPL(async_xor);
static int page_is_zero(struct page *p, unsigned int offset, size_t len)
......@@ -237,10 +293,11 @@ xor_val_chan(struct async_submit_ctl *submit, struct page *dest,
}
/**
* async_xor_val - attempt a xor parity check with a dma engine.
* async_xor_val_offs - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously
* @offset: des offset in pages to start transaction
* @src_list: array of source pages
* @offset: offset in pages to start transaction
* @src_offs: array of source pages offset, NULL means common src/det offset
* @src_cnt: number of source pages
* @len: length in bytes
* @result: 0 if sum == 0 else non-zero
......@@ -253,7 +310,8 @@ xor_val_chan(struct async_submit_ctl *submit, struct page *dest,
* is not specified.
*/
struct dma_async_tx_descriptor *
async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
async_xor_val_offs(struct page *dest, unsigned int offset,
struct page **src_list, unsigned int *src_offs,
int src_cnt, size_t len, enum sum_check_flags *result,
struct async_submit_ctl *submit)
{
......@@ -268,7 +326,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOWAIT);
if (unmap && src_cnt <= device->max_xor &&
is_dma_xor_aligned(device, offset, 0, len)) {
dma_xor_aligned_offsets(device, offset, src_offs, src_cnt, len)) {
unsigned long dma_prep_flags = 0;
int i;
......@@ -281,7 +339,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
for (i = 0; i < src_cnt; i++) {
unmap->addr[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
src_offs ? src_offs[i] : offset,
len, DMA_TO_DEVICE);
unmap->to_cnt++;
}
unmap->len = len;
......@@ -312,7 +371,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
submit->flags |= ASYNC_TX_XOR_DROP_DST;
submit->flags &= ~ASYNC_TX_ACK;
tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
tx = async_xor_offs(dest, offset, src_list, src_offs,
src_cnt, len, submit);
async_tx_quiesce(&tx);
......@@ -325,6 +385,32 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
return tx;
}
EXPORT_SYMBOL_GPL(async_xor_val_offs);
/**
* async_xor_val - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously
* @src_list: array of source pages
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @result: 0 if sum == 0 else non-zero
* @submit: submission / completion modifiers
*
* honored flags: ASYNC_TX_ACK
*
* src_list note: if the dest is also a source it must be at index zero.
* The contents of this array will be overwritten if a scribble region
* is not specified.
*/
struct dma_async_tx_descriptor *
async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum sum_check_flags *result,
struct async_submit_ctl *submit)
{
return async_xor_val_offs(dest, offset, src_list, NULL, src_cnt,
len, result, submit);
}
EXPORT_SYMBOL_GPL(async_xor_val);
MODULE_AUTHOR("Intel Corporation");
......
......@@ -18,6 +18,7 @@
#define NDISKS 64 /* Including P and Q */
static struct page *dataptrs[NDISKS];
unsigned int dataoffs[NDISKS];
static addr_conv_t addr_conv[NDISKS];
static struct page *data[NDISKS+3];
static struct page *spare;
......@@ -38,6 +39,7 @@ static void makedata(int disks)
for (i = 0; i < disks; i++) {
prandom_bytes(page_address(data[i]), PAGE_SIZE);
dataptrs[i] = data[i];
dataoffs[i] = 0;
}
}
......@@ -52,7 +54,8 @@ static char disk_type(int d, int disks)
}
/* Recover two failed blocks. */
static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
struct page **ptrs, unsigned int *offs)
{
struct async_submit_ctl submit;
struct completion cmp;
......@@ -66,7 +69,8 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru
if (faila == disks-2) {
/* P+Q failure. Just rebuild the syndrome. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
tx = async_gen_syndrome(ptrs, offs,
disks, bytes, &submit);
} else {
struct page *blocks[NDISKS];
struct page *dest;
......@@ -89,22 +93,26 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru
tx = async_xor(dest, blocks, 0, count, bytes, &submit);
init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
tx = async_gen_syndrome(ptrs, offs,
disks, bytes, &submit);
}
} else {
if (failb == disks-2) {
/* data+P failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
tx = async_raid6_datap_recov(disks, bytes,
faila, ptrs, offs, &submit);
} else {
/* data+data failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
tx = async_raid6_2data_recov(disks, bytes,
faila, failb, ptrs, offs, &submit);
}
}
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
tx = async_syndrome_val(ptrs, offs,
disks, bytes, &result, spare, 0, &submit);
async_tx_issue_pending(tx);
if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
......@@ -126,7 +134,7 @@ static int test_disks(int i, int j, int disks)
dataptrs[i] = recovi;
dataptrs[j] = recovj;
raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs, dataoffs);
erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
......@@ -162,7 +170,7 @@ static int test(int disks, int *tests)
/* Generate assumed good syndrome */
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
tx = async_gen_syndrome(dataptrs, dataoffs, disks, PAGE_SIZE, &submit);
async_tx_issue_pending(tx);
if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
......
......@@ -42,6 +42,9 @@ struct nullb_device {
struct badblocks badblocks;
unsigned int nr_zones;
unsigned int nr_zones_imp_open;
unsigned int nr_zones_exp_open;
unsigned int nr_zones_closed;
struct blk_zone *zones;
sector_t zone_size_sects;
......@@ -51,6 +54,8 @@ struct nullb_device {
unsigned long zone_size; /* zone size in MB if device is zoned */
unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int zone_max_open; /* max number of open zones */
unsigned int zone_max_active; /* max number of active zones */
unsigned int submit_queues; /* number of submission queues */
unsigned int home_node; /* home node for the device */
unsigned int queue_mode; /* block interface */
......
......@@ -164,6 +164,10 @@ static bool shared_tags;
module_param(shared_tags, bool, 0444);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
static bool g_shared_tag_bitmap;
module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
static int g_irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
......@@ -208,6 +212,14 @@ static unsigned int g_zone_nr_conv;
module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
static unsigned int g_zone_max_open;
module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
static unsigned int g_zone_max_active;
module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
......@@ -347,6 +359,8 @@ NULLB_DEVICE_ATTR(zoned, bool, NULL);
NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
......@@ -464,6 +478,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_size,
&nullb_device_attr_zone_capacity,
&nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
NULL,
};
......@@ -517,7 +533,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
......@@ -580,6 +596,8 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_size = g_zone_size;
dev->zone_capacity = g_zone_capacity;
dev->zone_nr_conv = g_zone_nr_conv;
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
return dev;
}
......@@ -1692,6 +1710,8 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
set->flags = BLK_MQ_F_SHOULD_MERGE;
if (g_no_sched)
set->flags |= BLK_MQ_F_NO_SCHED;
if (g_shared_tag_bitmap)
set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
set->driver_data = NULL;
if ((nullb && nullb->dev->blocking) || g_blocking)
......
......@@ -51,6 +51,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
dev->zone_nr_conv);
}
/* Max active zones has to be < nbr of seq zones in order to be enforceable */
if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
dev->zone_max_active = 0;
pr_info("zone_max_active limit disabled, limit >= zone count\n");
}
/* Max open zones has to be <= max active zones */
if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
dev->zone_max_open = dev->zone_max_active;
pr_info("changed the maximum number of open zones to %u\n",
dev->nr_zones);
} else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
dev->zone_max_open = 0;
pr_info("zone_max_open limit disabled, limit >= zone count\n");
}
for (i = 0; i < dev->zone_nr_conv; i++) {
struct blk_zone *zone = &dev->zones[i];
......@@ -99,6 +115,8 @@ int null_register_zoned_dev(struct nullb *nullb)
}
blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
blk_queue_max_open_zones(q, dev->zone_max_open);
blk_queue_max_active_zones(q, dev->zone_max_active);
return 0;
}
......@@ -159,6 +177,103 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
return (zone->wp - sector) << SECTOR_SHIFT;
}
static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone)
{
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
switch (zone->cond) {
case BLK_ZONE_COND_CLOSED:
/* close operation on closed is not an error */
return BLK_STS_OK;
case BLK_ZONE_COND_IMP_OPEN:
dev->nr_zones_imp_open--;
break;
case BLK_ZONE_COND_EXP_OPEN:
dev->nr_zones_exp_open--;
break;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_FULL:
default:
return BLK_STS_IOERR;
}
if (zone->wp == zone->start) {
zone->cond = BLK_ZONE_COND_EMPTY;
} else {
zone->cond = BLK_ZONE_COND_CLOSED;
dev->nr_zones_closed++;
}
return BLK_STS_OK;
}
static void null_close_first_imp_zone(struct nullb_device *dev)
{
unsigned int i;
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
null_close_zone(dev, &dev->zones[i]);
return;
}
}
}
static bool null_can_set_active(struct nullb_device *dev)
{
if (!dev->zone_max_active)
return true;
return dev->nr_zones_exp_open + dev->nr_zones_imp_open +
dev->nr_zones_closed < dev->zone_max_active;
}
static bool null_can_open(struct nullb_device *dev)
{
if (!dev->zone_max_open)
return true;
if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
return true;
if (dev->nr_zones_imp_open && null_can_set_active(dev)) {
null_close_first_imp_zone(dev);
return true;
}
return false;
}
/*
* This function matches the manage open zone resources function in the ZBC standard,
* with the addition of max active zones support (added in the ZNS standard).
*
* The function determines if a zone can transition to implicit open or explicit open,
* while maintaining the max open zone (and max active zone) limit(s). It may close an
* implicit open zone in order to make additional zone resources available.
*
* ZBC states that an implicit open zone shall be closed only if there is not
* room within the open limit. However, with the addition of an active limit,
* it is not certain that closing an implicit open zone will allow a new zone
* to be opened, since we might already be at the active limit capacity.
*/
static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
{
switch (zone->cond) {
case BLK_ZONE_COND_EMPTY:
if (!null_can_set_active(dev))
return false;
fallthrough;
case BLK_ZONE_COND_CLOSED:
return null_can_open(dev);
default:
/* Should never be called for other states */
WARN_ON(1);
return false;
}
}
static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
unsigned int nr_sectors, bool append)
{
......@@ -177,9 +292,18 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
/* Cannot write to a full zone */
return BLK_STS_IOERR;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_CLOSED:
if (!null_has_zone_resources(dev, zone))
return BLK_STS_IOERR;
break;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
case BLK_ZONE_COND_CLOSED:
break;
default:
/* Invalid zone condition */
return BLK_STS_IOERR;
}
/*
* Regular writes must be at the write pointer position.
* Zone append writes are automatically issued at the write
......@@ -199,6 +323,12 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->wp + nr_sectors > zone->start + zone->capacity)
return BLK_STS_IOERR;
if (zone->cond == BLK_ZONE_COND_CLOSED) {
dev->nr_zones_closed--;
dev->nr_zones_imp_open++;
} else if (zone->cond == BLK_ZONE_COND_EMPTY) {
dev->nr_zones_imp_open++;
}
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
......@@ -207,13 +337,110 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return ret;
zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->capacity)
if (zone->wp == zone->start + zone->capacity) {
if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
dev->nr_zones_exp_open--;
else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
dev->nr_zones_imp_open--;
zone->cond = BLK_ZONE_COND_FULL;
}
return BLK_STS_OK;
}
static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
{
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
switch (zone->cond) {
case BLK_ZONE_COND_EXP_OPEN:
/* open operation on exp open is not an error */
return BLK_STS_OK;
case BLK_ZONE_COND_EMPTY:
if (!null_has_zone_resources(dev, zone))
return BLK_STS_IOERR;
break;
case BLK_ZONE_COND_IMP_OPEN:
dev->nr_zones_imp_open--;
break;
case BLK_ZONE_COND_CLOSED:
if (!null_has_zone_resources(dev, zone))
return BLK_STS_IOERR;
dev->nr_zones_closed--;
break;
case BLK_ZONE_COND_FULL:
default:
return BLK_STS_IOERR;
}
zone->cond = BLK_ZONE_COND_EXP_OPEN;
dev->nr_zones_exp_open++;
return BLK_STS_OK;
}
static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
{
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
switch (zone->cond) {
case BLK_ZONE_COND_FULL:
/* finish operation on full is not an error */
return BLK_STS_OK;
case BLK_ZONE_COND_EMPTY:
if (!null_has_zone_resources(dev, zone))
return BLK_STS_IOERR;
break;
case BLK_ZONE_COND_IMP_OPEN:
dev->nr_zones_imp_open--;
break;
case BLK_ZONE_COND_EXP_OPEN:
dev->nr_zones_exp_open--;
break;
case BLK_ZONE_COND_CLOSED:
if (!null_has_zone_resources(dev, zone))
return BLK_STS_IOERR;
dev->nr_zones_closed--;
break;
default:
return BLK_STS_IOERR;
}
zone->cond = BLK_ZONE_COND_FULL;
zone->wp = zone->start + zone->len;
return BLK_STS_OK;
}
static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone)
{
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
switch (zone->cond) {
case BLK_ZONE_COND_EMPTY:
/* reset operation on empty is not an error */
return BLK_STS_OK;
case BLK_ZONE_COND_IMP_OPEN:
dev->nr_zones_imp_open--;
break;
case BLK_ZONE_COND_EXP_OPEN:
dev->nr_zones_exp_open--;
break;
case BLK_ZONE_COND_CLOSED:
dev->nr_zones_closed--;
break;
case BLK_ZONE_COND_FULL:
break;
default:
/* Invalid zone condition */
return BLK_STS_IOERR;
}
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
return BLK_STS_OK;
}
static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
......@@ -222,56 +449,34 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
struct nullb_device *dev = cmd->nq->dev;
unsigned int zone_no = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zone_no];
blk_status_t ret = BLK_STS_OK;
size_t i;
switch (op) {
case REQ_OP_ZONE_RESET_ALL:
for (i = 0; i < dev->nr_zones; i++) {
if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
continue;
zone[i].cond = BLK_ZONE_COND_EMPTY;
zone[i].wp = zone[i].start;
}
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++)
null_reset_zone(dev, &dev->zones[i]);
break;
case REQ_OP_ZONE_RESET:
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
ret = null_reset_zone(dev, zone);
break;
case REQ_OP_ZONE_OPEN:
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
if (zone->cond == BLK_ZONE_COND_FULL)
return BLK_STS_IOERR;
zone->cond = BLK_ZONE_COND_EXP_OPEN;
ret = null_open_zone(dev, zone);
break;
case REQ_OP_ZONE_CLOSE:
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
if (zone->cond == BLK_ZONE_COND_FULL)
return BLK_STS_IOERR;
if (zone->wp == zone->start)
zone->cond = BLK_ZONE_COND_EMPTY;
else
zone->cond = BLK_ZONE_COND_CLOSED;
ret = null_close_zone(dev, zone);
break;
case REQ_OP_ZONE_FINISH:
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
zone->cond = BLK_ZONE_COND_FULL;
zone->wp = zone->start + zone->len;
ret = null_finish_zone(dev, zone);
break;
default:
return BLK_STS_NOTSUPP;
}
if (ret == BLK_STS_OK)
trace_nullb_zone_op(cmd, zone_no, zone->cond);
return BLK_STS_OK;
return ret;
}
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
......
......@@ -439,7 +439,7 @@ static void card_state_change(struct rsxx_cardinfo *card,
case CARD_STATE_FAULT:
dev_crit(CARD_TO_DEV(card),
"Hardware Fault reported!\n");
/* Fall through. */
fallthrough;
/* Everything else, detach DMA interface if it's attached. */
case CARD_STATE_SHUTDOWN:
......
......@@ -49,7 +49,7 @@
*
* bch_bucket_alloc() allocates a single bucket from a specific cache.
*
* bch_bucket_alloc_set() allocates one or more buckets from different caches
* bch_bucket_alloc_set() allocates one bucket from different caches
* out of a cache set.
*
* free_some_buckets() drives all the processes described above. It's called
......@@ -87,8 +87,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
{
struct cache *ca;
struct bucket *b;
unsigned long next = c->nbuckets * c->sb.bucket_size / 1024;
unsigned int i;
unsigned long next = c->nbuckets * c->cache->sb.bucket_size / 1024;
int r;
atomic_sub(sectors, &c->rescale);
......@@ -104,7 +103,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
c->min_prio = USHRT_MAX;
for_each_cache(ca, c, i)
ca = c->cache;
for_each_bucket(b, ca)
if (b->prio &&
b->prio != BTREE_PRIO &&
......@@ -362,7 +361,7 @@ static int bch_allocator_thread(void *arg)
* new stuff to them:
*/
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
if (CACHE_SYNC(&ca->set->sb)) {
if (CACHE_SYNC(&ca->sb)) {
/*
* This could deadlock if an allocation with a btree
* node locked ever blocked - having the btree node
......@@ -488,34 +487,29 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait)
struct bkey *k, bool wait)
{
int i;
struct cache *ca;
long b;
/* No allocation if CACHE_SET_IO_DISABLE bit is set */
if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
return -1;
lockdep_assert_held(&c->bucket_lock);
BUG_ON(!n || n > c->caches_loaded || n > MAX_CACHES_PER_SET);
bkey_init(k);
/* sort by free space/prio of oldest data in caches */
for (i = 0; i < n; i++) {
struct cache *ca = c->cache_by_alloc[i];
long b = bch_bucket_alloc(ca, reserve, wait);
ca = c->cache;
b = bch_bucket_alloc(ca, reserve, wait);
if (b == -1)
goto err;
k->ptr[i] = MAKE_PTR(ca->buckets[b].gen,
k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
bucket_to_sector(c, b),
ca->sb.nr_this_dev);
SET_KEY_PTRS(k, i + 1);
}
SET_KEY_PTRS(k, 1);
return 0;
err:
......@@ -525,12 +519,12 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
}
int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait)
struct bkey *k, bool wait)
{
int ret;
mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
ret = __bch_bucket_alloc_set(c, reserve, k, wait);
mutex_unlock(&c->bucket_lock);
return ret;
}
......@@ -589,7 +583,7 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c,
struct open_bucket, list);
found:
if (!ret->sectors_free && KEY_PTRS(alloc)) {
ret->sectors_free = c->sb.bucket_size;
ret->sectors_free = c->cache->sb.bucket_size;
bkey_copy(&ret->key, alloc);
bkey_init(alloc);
}
......@@ -638,7 +632,7 @@ bool bch_alloc_sectors(struct cache_set *c,
spin_unlock(&c->data_bucket_lock);
if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait))
if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait))
return false;
spin_lock(&c->data_bucket_lock);
......@@ -683,7 +677,7 @@ bool bch_alloc_sectors(struct cache_set *c,
&PTR_CACHE(c, &b->key, i)->sectors_written);
}
if (b->sectors_free < c->sb.block_size)
if (b->sectors_free < c->cache->sb.block_size)
b->sectors_free = 0;
/*
......
......@@ -517,11 +517,7 @@ struct cache_set {
atomic_t idle_counter;
atomic_t at_max_writeback_rate;
struct cache_sb sb;
struct cache *cache[MAX_CACHES_PER_SET];
struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
int caches_loaded;
struct cache *cache;
struct bcache_device **devices;
unsigned int devices_max_used;
......@@ -670,6 +666,7 @@ struct cache_set {
struct mutex verify_lock;
#endif
uint8_t set_uuid[16];
unsigned int nr_uuids;
struct uuid_entry *uuids;
BKEY_PADDED(uuid_bucket);
......@@ -758,9 +755,8 @@ struct bbio {
#define btree_default_blocks(c) \
((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
#define block_bytes(c) ((c)->sb.block_size << 9)
#define bucket_bytes(ca) ((ca)->sb.bucket_size << 9)
#define block_bytes(ca) ((ca)->sb.block_size << 9)
static inline unsigned int meta_bucket_pages(struct cache_sb *sb)
{
......@@ -801,14 +797,14 @@ static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
{
return s & (c->sb.bucket_size - 1);
return s & (c->cache->sb.bucket_size - 1);
}
static inline struct cache *PTR_CACHE(struct cache_set *c,
const struct bkey *k,
unsigned int ptr)
{
return c->cache[PTR_DEV(k, ptr)];
return c->cache;
}
static inline size_t PTR_BUCKET_NR(struct cache_set *c,
......@@ -889,9 +885,6 @@ do { \
/* Looping macros */
#define for_each_cache(ca, cs, iter) \
for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
#define for_each_bucket(b, ca) \
for (b = (ca)->buckets + (ca)->sb.first_bucket; \
b < (ca)->buckets + (ca)->sb.nbuckets; b++)
......@@ -933,10 +926,8 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
static inline void wake_up_allocators(struct cache_set *c)
{
struct cache *ca;
unsigned int i;
struct cache *ca = c->cache;
for_each_cache(ca, c, i)
wake_up_process(ca->alloc_thread);
}
......@@ -994,9 +985,9 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k);
long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait);
struct bkey *k, bool wait);
int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait);
struct bkey *k, bool wait);
bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
unsigned int sectors, unsigned int write_point,
unsigned int write_prio, bool wait);
......
......@@ -104,7 +104,7 @@
static inline struct bset *write_block(struct btree *b)
{
return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c);
return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c->cache);
}
static void bch_btree_init_next(struct btree *b)
......@@ -117,7 +117,7 @@ static void bch_btree_init_next(struct btree *b)
if (b->written < btree_blocks(b))
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->sb));
bset_magic(&b->c->cache->sb));
}
......@@ -155,7 +155,7 @@ void bch_btree_node_read_done(struct btree *b)
* See the comment arount cache_set->fill_iter.
*/
iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
iter->used = 0;
#ifdef CONFIG_BCACHE_DEBUG
......@@ -173,12 +173,12 @@ void bch_btree_node_read_done(struct btree *b)
goto err;
err = "bad btree header";
if (b->written + set_blocks(i, block_bytes(b->c)) >
if (b->written + set_blocks(i, block_bytes(b->c->cache)) >
btree_blocks(b))
goto err;
err = "bad magic";
if (i->magic != bset_magic(&b->c->sb))
if (i->magic != bset_magic(&b->c->cache->sb))
goto err;
err = "bad checksum";
......@@ -199,13 +199,13 @@ void bch_btree_node_read_done(struct btree *b)
bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
b->written += set_blocks(i, block_bytes(b->c));
b->written += set_blocks(i, block_bytes(b->c->cache));
}
err = "corrupted btree";
for (i = write_block(b);
bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key);
i = ((void *) i) + block_bytes(b->c))
i = ((void *) i) + block_bytes(b->c->cache))
if (i->seq == b->keys.set[0].data->seq)
goto err;
......@@ -219,7 +219,7 @@ void bch_btree_node_read_done(struct btree *b)
if (b->written < btree_blocks(b))
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->sb));
bset_magic(&b->c->cache->sb));
out:
mempool_free(iter, &b->c->fill_iter);
return;
......@@ -347,7 +347,7 @@ static void do_btree_node_write(struct btree *b)
b->bio->bi_end_io = btree_node_write_endio;
b->bio->bi_private = cl;
b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c));
b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c->cache));
b->bio->bi_opf = REQ_OP_WRITE | REQ_META | REQ_FUA;
bch_bio_map(b->bio, i);
......@@ -423,10 +423,10 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
do_btree_node_write(b);
atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size,
atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->cache->sb.block_size,
&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
b->written += set_blocks(i, block_bytes(b->c));
b->written += set_blocks(i, block_bytes(b->c->cache));
}
void bch_btree_node_write(struct btree *b, struct closure *parent)
......@@ -514,7 +514,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
* mca -> memory cache
*/
#define mca_reserve(c) (((c->root && c->root->level) \
#define mca_reserve(c) (((!IS_ERR_OR_NULL(c->root) && c->root->level) \
? c->root->level : 1) * 8 + 16)
#define mca_can_free(c) \
max_t(int, 0, c->btree_cache_used - mca_reserve(c))
......@@ -738,7 +738,7 @@ void bch_btree_cache_free(struct cache_set *c)
if (c->verify_data)
list_move(&c->verify_data->list, &c->btree_cache);
free_pages((unsigned long) c->verify_ondisk, ilog2(meta_bucket_pages(&c->sb)));
free_pages((unsigned long) c->verify_ondisk, ilog2(meta_bucket_pages(&c->cache->sb)));
#endif
list_splice(&c->btree_cache_freeable,
......@@ -785,7 +785,8 @@ int bch_btree_cache_alloc(struct cache_set *c)
mutex_init(&c->verify_lock);
c->verify_ondisk = (void *)
__get_free_pages(GFP_KERNEL|__GFP_COMP, ilog2(meta_bucket_pages(&c->sb)));
__get_free_pages(GFP_KERNEL|__GFP_COMP,
ilog2(meta_bucket_pages(&c->cache->sb)));
if (!c->verify_ondisk) {
/*
* Don't worry about the mca_rereserve buckets
......@@ -1091,7 +1092,7 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
mutex_lock(&c->bucket_lock);
retry:
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait))
goto err;
bkey_put(c, &k.key);
......@@ -1108,7 +1109,7 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
}
b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->cache->sb));
mutex_unlock(&c->bucket_lock);
......@@ -1167,12 +1168,11 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
static int btree_check_reserve(struct btree *b, struct btree_op *op)
{
struct cache_set *c = b->c;
struct cache *ca;
unsigned int i, reserve = (c->root->level - b->level) * 2 + 1;
struct cache *ca = c->cache;
unsigned int reserve = (c->root->level - b->level) * 2 + 1;
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i)
if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) {
if (op)
prepare_to_wait(&c->btree_cache_wait, &op->wait,
......@@ -1345,7 +1345,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
if (nodes < 2 ||
__set_blocks(b->keys.set[0].data, keys,
block_bytes(b->c)) > blocks * (nodes - 1))
block_bytes(b->c->cache)) > blocks * (nodes - 1))
return 0;
for (i = 0; i < nodes; i++) {
......@@ -1379,7 +1379,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
k = bkey_next(k)) {
if (__set_blocks(n1, n1->keys + keys +
bkey_u64s(k),
block_bytes(b->c)) > blocks)
block_bytes(b->c->cache)) > blocks)
break;
last = k;
......@@ -1395,7 +1395,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
* though)
*/
if (__set_blocks(n1, n1->keys + n2->keys,
block_bytes(b->c)) >
block_bytes(b->c->cache)) >
btree_blocks(new_nodes[i]))
goto out_unlock_nocoalesce;
......@@ -1404,7 +1404,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
last = &r->b->key;
}
BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) >
BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c->cache)) >
btree_blocks(new_nodes[i]));
if (last)
......@@ -1695,7 +1695,6 @@ static void btree_gc_start(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
unsigned int i;
if (!c->gc_mark_valid)
return;
......@@ -1705,7 +1704,7 @@ static void btree_gc_start(struct cache_set *c)
c->gc_mark_valid = 0;
c->gc_done = ZERO_KEY;
for_each_cache(ca, c, i)
ca = c->cache;
for_each_bucket(b, ca) {
b->last_gc = b->gen;
if (!atomic_read(&b->pin)) {
......@@ -1721,7 +1720,8 @@ static void bch_btree_gc_finish(struct cache_set *c)
{
struct bucket *b;
struct cache *ca;
unsigned int i;
unsigned int i, j;
uint64_t *k;
mutex_lock(&c->bucket_lock);
......@@ -1739,7 +1739,6 @@ static void bch_btree_gc_finish(struct cache_set *c)
struct bcache_device *d = c->devices[i];
struct cached_dev *dc;
struct keybuf_key *w, *n;
unsigned int j;
if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
continue;
......@@ -1756,17 +1755,16 @@ static void bch_btree_gc_finish(struct cache_set *c)
rcu_read_unlock();
c->avail_nbuckets = 0;
for_each_cache(ca, c, i) {
uint64_t *i;
ca = c->cache;
ca->invalidate_needs_gc = 0;
for (i = ca->sb.d; i < ca->sb.d + ca->sb.keys; i++)
SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA);
for (k = ca->sb.d; k < ca->sb.d + ca->sb.keys; k++)
SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA);
for (i = ca->prio_buckets;
i < ca->prio_buckets + prio_buckets(ca) * 2; i++)
SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA);
for (k = ca->prio_buckets;
k < ca->prio_buckets + prio_buckets(ca) * 2; k++)
SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA);
for_each_bucket(b, ca) {
c->need_gc = max(c->need_gc, bucket_gc_gen(b));
......@@ -1779,7 +1777,6 @@ static void bch_btree_gc_finish(struct cache_set *c)
if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
c->avail_nbuckets++;
}
}
mutex_unlock(&c->bucket_lock);
}
......@@ -1830,10 +1827,8 @@ static void bch_btree_gc(struct cache_set *c)
static bool gc_should_run(struct cache_set *c)
{
struct cache *ca;
unsigned int i;
struct cache *ca = c->cache;
for_each_cache(ca, c, i)
if (ca->invalidate_needs_gc)
return true;
......@@ -2081,9 +2076,8 @@ int bch_btree_check(struct cache_set *c)
void bch_initial_gc_finish(struct cache_set *c)
{
struct cache *ca;
struct cache *ca = c->cache;
struct bucket *b;
unsigned int i;
bch_btree_gc_finish(c);
......@@ -2098,7 +2092,6 @@ void bch_initial_gc_finish(struct cache_set *c)
* This is only safe for buckets that have no live data in them, which
* there should always be some of.
*/
for_each_cache(ca, c, i) {
for_each_bucket(b, ca) {
if (fifo_full(&ca->free[RESERVE_PRIO]) &&
fifo_full(&ca->free[RESERVE_BTREE]))
......@@ -2113,7 +2106,6 @@ void bch_initial_gc_finish(struct cache_set *c)
b - ca->buckets);
}
}
}
mutex_unlock(&c->bucket_lock);
}
......@@ -2219,7 +2211,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
goto err;
split = set_blocks(btree_bset_first(n1),
block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
block_bytes(n1->c->cache)) > (btree_blocks(b) * 4) / 5;
if (split) {
unsigned int keys = 0;
......
......@@ -194,7 +194,7 @@ static inline unsigned int bset_block_offset(struct btree *b, struct bset *i)
static inline void set_gc_sectors(struct cache_set *c)
{
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
atomic_set(&c->sectors_to_gc, c->cache->sb.bucket_size * c->nbuckets / 16);
}
void bkey_put(struct cache_set *c, struct bkey *k);
......
......@@ -159,7 +159,7 @@ void closure_debug_destroy(struct closure *cl)
static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
static int debug_show(struct seq_file *f, void *data)
{
struct closure *cl;
......@@ -188,17 +188,7 @@ static int debug_seq_show(struct seq_file *f, void *data)
return 0;
}
static int debug_seq_open(struct inode *inode, struct file *file)
{
return single_open(file, debug_seq_show, NULL);
}
static const struct file_operations debug_ops = {
.owner = THIS_MODULE,
.open = debug_seq_open,
.read = seq_read,
.release = single_release
};
DEFINE_SHOW_ATTRIBUTE(debug);
void __init closure_debug_init(void)
{
......@@ -209,7 +199,7 @@ void __init closure_debug_init(void)
* about this.
*/
closure_debug = debugfs_create_file(
"closures", 0400, bcache_debug, NULL, &debug_ops);
"closures", 0400, bcache_debug, NULL, &debug_fops);
}
#endif
......
......@@ -25,8 +25,8 @@ struct dentry *bcache_debug;
for (i = (start); \
(void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\
i->seq == (start)->seq; \
i = (void *) i + set_blocks(i, block_bytes(b->c)) * \
block_bytes(b->c))
i = (void *) i + set_blocks(i, block_bytes(b->c->cache)) * \
block_bytes(b->c->cache))
void bch_btree_verify(struct btree *b)
{
......@@ -82,14 +82,14 @@ void bch_btree_verify(struct btree *b)
for_each_written_bset(b, ondisk, i) {
unsigned int block = ((void *) i - (void *) ondisk) /
block_bytes(b->c);
block_bytes(b->c->cache);
pr_err("*** on disk block %u:\n", block);
bch_dump_bset(&b->keys, i, block);
}
pr_err("*** block %zu not written\n",
((void *) i - (void *) ondisk) / block_bytes(b->c));
((void *) i - (void *) ondisk) / block_bytes(b->c->cache));
for (j = 0; j < inmemory->keys; j++)
if (inmemory->d[j] != sorted->d[j])
......@@ -238,7 +238,7 @@ void bch_debug_init_cache_set(struct cache_set *c)
if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
snprintf(name, 50, "bcache-%pU", c->set_uuid);
c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
......
......@@ -54,7 +54,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
if (KEY_SIZE(k) + r > c->sb.bucket_size ||
if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
bucket < ca->sb.first_bucket ||
bucket >= ca->sb.nbuckets)
return true;
......@@ -75,7 +75,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
if (KEY_SIZE(k) + r > c->sb.bucket_size)
if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
return "bad, length too big";
if (bucket < ca->sb.first_bucket)
return "bad, short offset";
......@@ -136,7 +136,7 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
size_t n = PTR_BUCKET_NR(b->c, k, j);
pr_cont(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets)
pr_cont(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
......
......@@ -30,7 +30,7 @@ static struct feature feature_list[] = {
for (f = &feature_list[0]; f->compat != 0; f++) { \
if (f->compat != BCH_FEATURE_ ## type) \
continue; \
if (BCH_HAS_ ## type ## _FEATURE(&c->sb, f->mask)) { \
if (BCH_HAS_ ## type ## _FEATURE(&c->cache->sb, f->mask)) { \
if (first) { \
out += snprintf(out, buf + size - out, \
"["); \
......@@ -44,7 +44,7 @@ static struct feature feature_list[] = {
\
out += snprintf(out, buf + size - out, "%s", f->string);\
\
if (BCH_HAS_ ## type ## _FEATURE(&c->sb, f->mask)) \
if (BCH_HAS_ ## type ## _FEATURE(&c->cache->sb, f->mask)) \
out += snprintf(out, buf + size - out, "]"); \
\
first = false; \
......
......@@ -26,7 +26,7 @@ struct bio *bch_bbio_alloc(struct cache_set *c)
struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
struct bio *bio = &b->bio;
bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->sb));
bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
return bio;
}
......
......@@ -98,7 +98,7 @@ reread: left = ca->sb.bucket_size - offset;
return ret;
}
blocks = set_blocks(j, block_bytes(ca->set));
blocks = set_blocks(j, block_bytes(ca));
/*
* Nodes in 'list' are in linear increasing order of
......@@ -179,11 +179,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
ret; \
})
struct cache *ca;
unsigned int iter;
struct cache *ca = c->cache;
int ret = 0;
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
unsigned int i, l, r, m;
......@@ -223,7 +220,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
/* no journal entries on this device? */
if (l == ca->sb.njournal_buckets)
continue;
goto out;
bsearch:
BUG_ON(list_empty(list));
......@@ -283,8 +280,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
ca->sb.njournal_buckets;
}
}
out:
if (!list_empty(list))
c->journal.seq = list_entry(list->prev,
struct journal_replay,
......@@ -342,10 +339,8 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
static bool is_discard_enabled(struct cache_set *s)
{
struct cache *ca;
unsigned int i;
struct cache *ca = s->cache;
for_each_cache(ca, s, i)
if (ca->discard)
return true;
......@@ -633,9 +628,10 @@ static void do_journal_discard(struct cache *ca)
static void journal_reclaim(struct cache_set *c)
{
struct bkey *k = &c->journal.key;
struct cache *ca;
struct cache *ca = c->cache;
uint64_t last_seq;
unsigned int iter, n = 0;
unsigned int next;
struct journal_device *ja = &ca->journal;
atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
......@@ -647,46 +643,31 @@ static void journal_reclaim(struct cache_set *c)
/* Update last_idx */
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
while (ja->last_idx != ja->cur_idx &&
ja->seq[ja->last_idx] < last_seq)
ja->last_idx = (ja->last_idx + 1) %
ca->sb.njournal_buckets;
}
for_each_cache(ca, c, iter)
do_journal_discard(ca);
if (c->journal.blocks_free)
goto out;
/*
* Allocate:
* XXX: Sort by free journal space
*/
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
/* No space available on this device */
if (next == ja->discard_idx)
continue;
goto out;
ja->cur_idx = next;
k->ptr[n++] = MAKE_PTR(0,
k->ptr[0] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
atomic_long_inc(&c->reclaimed_journal_buckets);
}
if (n) {
bkey_init(k);
SET_KEY_PTRS(k, n);
c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
}
SET_KEY_PTRS(k, 1);
c->journal.blocks_free = ca->sb.bucket_size >> c->block_bits;
out:
if (!journal_full(&c->journal))
__closure_wake_up(&c->journal.wait);
......@@ -750,11 +731,11 @@ static void journal_write_unlocked(struct closure *cl)
__releases(c->journal.lock)
{
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
struct cache *ca;
struct cache *ca = c->cache;
struct journal_write *w = c->journal.cur;
struct bkey *k = &c->journal.key;
unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) *
c->sb.block_size;
unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
ca->sb.block_size;
struct bio *bio;
struct bio_list list;
......@@ -773,17 +754,15 @@ static void journal_write_unlocked(struct closure *cl)
return;
}
c->journal.blocks_free -= set_blocks(w->data, block_bytes(c));
c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
w->data->btree_level = c->root->level;
bkey_copy(&w->data->btree_root, &c->root->key);
bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
for_each_cache(ca, c, i)
w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
w->data->magic = jset_magic(&c->sb);
w->data->magic = jset_magic(&ca->sb);
w->data->version = BCACHE_JSET_VERSION;
w->data->last_seq = last_seq(&c->journal);
w->data->csum = csum_set(w->data);
......@@ -859,6 +838,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
size_t sectors;
struct closure cl;
bool wait = false;
struct cache *ca = c->cache;
closure_init_stack(&cl);
......@@ -868,10 +848,10 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
struct journal_write *w = c->journal.cur;
sectors = __set_blocks(w->data, w->data->keys + nkeys,
block_bytes(c)) * c->sb.block_size;
block_bytes(ca)) * ca->sb.block_size;
if (sectors <= min_t(size_t,
c->journal.blocks_free * c->sb.block_size,
c->journal.blocks_free * ca->sb.block_size,
PAGE_SECTORS << JSET_BITS))
return w;
......@@ -936,7 +916,7 @@ atomic_t *bch_journal(struct cache_set *c,
if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
return NULL;
if (!CACHE_SYNC(&c->sb))
if (!CACHE_SYNC(&c->cache->sb))
return NULL;
w = journal_wait_for_write(c, bch_keylist_nkeys(keys));
......
......@@ -196,18 +196,17 @@ static unsigned int bucket_heap_top(struct cache *ca)
void bch_moving_gc(struct cache_set *c)
{
struct cache *ca;
struct cache *ca = c->cache;
struct bucket *b;
unsigned int i;
unsigned long sectors_to_move, reserve_sectors;
if (!c->copy_gc_enabled)
return;
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i) {
unsigned long sectors_to_move = 0;
unsigned long reserve_sectors = ca->sb.bucket_size *
sectors_to_move = 0;
reserve_sectors = ca->sb.bucket_size *
fifo_used(&ca->free[RESERVE_MOVINGGC]);
ca->heap.used = 0;
......@@ -238,7 +237,6 @@ void bch_moving_gc(struct cache_set *c)
while (heap_pop(&ca->heap, b, bucket_cmp))
SET_GC_MOVE(b, 1);
}
mutex_unlock(&c->bucket_lock);
......
......@@ -99,7 +99,7 @@ static int bch_keylist_realloc(struct keylist *l, unsigned int u64s,
* bch_data_insert_keys() will insert the keys created so far
* and finish the rest when the keylist is empty.
*/
if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
if (newsize * sizeof(uint64_t) > block_bytes(c->cache) - sizeof(struct jset))
return -ENOMEM;
return __bch_keylist_realloc(l, u64s);
......@@ -394,8 +394,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;
}
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
if (bio->bi_iter.bi_sector & (c->cache->sb.block_size - 1) ||
bio_sectors(bio) & (c->cache->sb.block_size - 1)) {
pr_debug("skipping unaligned io\n");
goto skip;
}
......
This diff is collapsed.
......@@ -711,10 +711,10 @@ SHOW(__bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
sysfs_print(synchronous, CACHE_SYNC(&c->sb));
sysfs_print(synchronous, CACHE_SYNC(&c->cache->sb));
sysfs_print(journal_delay_ms, c->journal_delay_ms);
sysfs_hprint(bucket_size, bucket_bytes(c));
sysfs_hprint(block_size, block_bytes(c));
sysfs_hprint(bucket_size, bucket_bytes(c->cache));
sysfs_hprint(block_size, block_bytes(c->cache));
sysfs_print(tree_depth, c->root->level);
sysfs_print(root_usage_percent, bch_root_usage(c));
......@@ -812,8 +812,8 @@ STORE(__bch_cache_set)
if (attr == &sysfs_synchronous) {
bool sync = strtoul_or_return(buf);
if (sync != CACHE_SYNC(&c->sb)) {
SET_CACHE_SYNC(&c->sb, sync);
if (sync != CACHE_SYNC(&c->cache->sb)) {
SET_CACHE_SYNC(&c->cache->sb, sync);
bcache_write_super(c);
}
}
......
......@@ -35,7 +35,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc)
* This is the size of the cache, minus the amount used for
* flash-only devices
*/
uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size -
atomic_long_read(&c->flash_dev_dirty_sectors);
/*
......
......@@ -357,11 +357,12 @@ static int read_page(struct file *file, unsigned long index,
struct inode *inode = file_inode(file);
struct buffer_head *bh;
sector_t block, blk_cur;
unsigned long blocksize = i_blocksize(inode);
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
(unsigned long long)index << PAGE_SHIFT);
bh = alloc_page_buffers(page, 1<<inode->i_blkbits, false);
bh = alloc_page_buffers(page, blocksize, false);
if (!bh) {
ret = -ENOMEM;
goto out;
......@@ -383,10 +384,10 @@ static int read_page(struct file *file, unsigned long index,
bh->b_blocknr = block;
bh->b_bdev = inode->i_sb->s_bdev;
if (count < (1<<inode->i_blkbits))
if (count < blocksize)
count = 0;
else
count -= (1<<inode->i_blkbits);
count -= blocksize;
bh->b_end_io = end_bitmap_write;
bh->b_private = bitmap;
......@@ -605,8 +606,8 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
if (bitmap->cluster_slot >= 0) {
sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
sector_div(bm_blocks,
bitmap->mddev->bitmap_info.chunksize >> 9);
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
(bitmap->mddev->bitmap_info.chunksize >> 9));
/* bits to bytes */
bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
/* to 4k blocks */
......@@ -1367,7 +1368,7 @@ __acquires(bitmap->lock)
if (bitmap->bp[page].hijacked ||
bitmap->bp[page].map == NULL)
csize = ((sector_t)1) << (bitmap->chunkshift +
PAGE_COUNTER_SHIFT - 1);
PAGE_COUNTER_SHIFT);
else
csize = ((sector_t)1) << bitmap->chunkshift;
*blocks = csize - (offset & (csize - 1));
......@@ -1949,6 +1950,7 @@ int md_bitmap_load(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_bitmap_load);
/* caller need to free returned bitmap with md_bitmap_free() */
struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot)
{
int rv = 0;
......@@ -2012,6 +2014,7 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
md_bitmap_unplug(mddev->bitmap);
*low = lo;
*high = hi;
md_bitmap_free(bitmap);
return rv;
}
......@@ -2615,4 +2618,3 @@ struct attribute_group md_bitmap_group = {
.name = "bitmap",
.attrs = md_bitmap_attrs,
};
......@@ -1166,6 +1166,7 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
* can't resize bitmap
*/
goto out;
md_bitmap_free(bitmap);
}
return 0;
......
......@@ -8582,6 +8582,26 @@ void md_write_end(struct mddev *mddev)
EXPORT_SYMBOL(md_write_end);
/* This is used by raid0 and raid10 */
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size)
{
struct bio *discard_bio = NULL;
if (__blkdev_issue_discard(rdev->bdev, start, size,
GFP_NOIO, 0, &discard_bio) || !discard_bio)
return;
bio_chain(discard_bio, bio);
bio_clone_blkg_association(discard_bio, bio);
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
discard_bio, disk_devt(mddev->gendisk),
bio->bi_iter.bi_sector);
submit_bio_noacct(discard_bio);
}
EXPORT_SYMBOL(md_submit_discard_bio);
/* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before
......@@ -9544,7 +9564,7 @@ static int __init md_init(void)
goto err_misc_wq;
md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
if (!md_misc_wq)
if (!md_rdev_misc_wq)
goto err_rdev_misc_wq;
if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
......
......@@ -713,6 +713,8 @@ extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
extern void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
......
......@@ -426,23 +426,6 @@ static void raid0_free(struct mddev *mddev, void *priv)
kfree(conf);
}
/*
* Is io distribute over 1 or more chunks ?
*/
static inline int is_io_in_chunk_boundary(struct mddev *mddev,
unsigned int chunk_sects, struct bio *bio)
{
if (likely(is_power_of_2(chunk_sects))) {
return chunk_sects >=
((bio->bi_iter.bi_sector & (chunk_sects-1))
+ bio_sectors(bio));
} else{
sector_t sector = bio->bi_iter.bi_sector;
return chunk_sects >= (sector_div(sector, chunk_sects)
+ bio_sectors(bio));
}
}
static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
{
struct r0conf *conf = mddev->private;
......@@ -494,7 +477,6 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
for (disk = 0; disk < zone->nb_dev; disk++) {
sector_t dev_start, dev_end;
struct bio *discard_bio = NULL;
struct md_rdev *rdev;
if (disk < start_disk_index)
......@@ -517,18 +499,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
rdev = conf->devlist[(zone - conf->strip_zone) *
conf->strip_zone[0].nb_dev + disk];
if (__blkdev_issue_discard(rdev->bdev,
md_submit_discard_bio(mddev, rdev, bio,
dev_start + zone->dev_start + rdev->data_offset,
dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
!discard_bio)
continue;
bio_chain(discard_bio, bio);
bio_clone_blkg_association(discard_bio, bio);
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
discard_bio, disk_devt(mddev->gendisk),
bio->bi_iter.bi_sector);
submit_bio_noacct(discard_bio);
dev_end - dev_start);
}
bio_endio(bio);
}
......
This diff is collapsed.
......@@ -179,5 +179,6 @@ enum r10bio_state {
R10BIO_Previous,
/* failfast devices did receive failfast requests. */
R10BIO_FailFast,
R10BIO_Discard,
};
#endif
This diff is collapsed.
......@@ -195,6 +195,7 @@ enum reconstruct_states {
reconstruct_state_result,
};
#define DEFAULT_STRIPE_SIZE 4096
struct stripe_head {
struct hlist_node hash;
struct list_head lru; /* inactive_list or handle_list */
......@@ -246,6 +247,13 @@ struct stripe_head {
int target, target2;
enum sum_check_flags zero_sum_result;
} ops;
#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
/* These pages will be used by bios in dev[i] */
struct page **pages;
int nr_pages; /* page array size */
int stripes_per_page;
#endif
struct r5dev {
/* rreq and rvec are used for the replacement device when
* writing data to both devices.
......@@ -253,6 +261,7 @@ struct stripe_head {
struct bio req, rreq;
struct bio_vec vec, rvec;
struct page *page, *orig_page;
unsigned int offset; /* offset of the page */
struct bio *toread, *read, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
......@@ -472,7 +481,6 @@ struct disk_info {
*/
#define NR_STRIPES 256
#define DEFAULT_STRIPE_SIZE 4096
#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
#define STRIPE_SIZE PAGE_SIZE
......@@ -771,6 +779,25 @@ static inline int algorithm_is_DDF(int layout)
return layout >= 8 && layout <= 10;
}
#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
/*
* Return offset of the corresponding page for r5dev.
*/
static inline int raid5_get_page_offset(struct stripe_head *sh, int disk_idx)
{
return (disk_idx % sh->stripes_per_page) * RAID5_STRIPE_SIZE(sh->raid_conf);
}
/*
* Return corresponding page address for r5dev.
*/
static inline struct page *
raid5_get_dev_page(struct stripe_head *sh, int disk_idx)
{
return sh->pages[disk_idx / sh->stripes_per_page];
}
#endif
extern void md_raid5_kick_device(struct r5conf *conf);
extern int raid5_set_cache_size(struct mddev *mddev, int size);
extern sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous);
......
This diff is collapsed.
......@@ -300,7 +300,7 @@ struct nvme_ctrl {
unsigned long quirks;
struct nvme_id_power_state psd[32];
struct nvme_effects_log *effects;
struct list_head cels;
struct xarray cels;
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
......@@ -758,10 +758,9 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_revalidate_zones(struct nvme_ns *ns);
#ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
unsigned lbaf);
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
int nvme_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
......@@ -778,9 +777,7 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
return BLK_STS_NOTSUPP;
}
static inline int nvme_update_zone_info(struct gendisk *disk,
struct nvme_ns *ns,
unsigned lbaf)
static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
{
dev_warn(ns->ctrl->device,
"Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
......@@ -825,7 +822,7 @@ static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl)
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 opcode);
void nvme_execute_passthru_rq(struct request *rq);
struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
void nvme_put_ns(struct nvme_ns *ns);
......
This diff is collapsed.
This diff is collapsed.
......@@ -727,7 +727,9 @@ u16 nvmet_set_feat_kato(struct nvmet_req *req)
{
u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
nvmet_stop_keep_alive_timer(req->sq->ctrl);
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_start_keep_alive_timer(req->sq->ctrl);
nvmet_set_result(req, req->sq->ctrl->kato);
......
......@@ -395,7 +395,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
nvmet_ctrl_fatal_error(ctrl);
}
static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
......@@ -407,7 +407,7 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
......
......@@ -1019,7 +1019,7 @@ static void
nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
{
/* if LLDD not implemented, leave as NULL */
if (!hostport->hosthandle)
if (!hostport || !hostport->hosthandle)
return;
nvmet_fc_hostport_put(hostport);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment