Commit 1355b37f authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-3.13/post-mq-drivers' into for-linus

parents f618ef7c c8694948
......@@ -39,15 +39,15 @@ Module configuration options
============================
If you use the floppy driver as a module, use the following syntax:
modprobe floppy <options>
modprobe floppy floppy="<options>"
Example:
modprobe floppy omnibook messages
modprobe floppy floppy="omnibook messages"
If you need certain options enabled every time you load the floppy driver,
you can put:
options floppy omnibook messages
options floppy floppy="omnibook messages"
in a configuration file in /etc/modprobe.d/.
......
......@@ -110,7 +110,7 @@ source "drivers/block/mtip32xx/Kconfig"
config BLK_CPQ_DA
tristate "Compaq SMART2 support"
depends on PCI && VIRT_TO_BUS
depends on PCI && VIRT_TO_BUS && 0
help
This is the driver for Compaq Smart Array controllers. Everyone
using these boards should say Y here. See the file
......@@ -319,6 +319,16 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the
module will be called nvme.
config BLK_DEV_SKD
tristate "STEC S1120 Block Driver"
depends on PCI
depends on 64BIT
---help---
Saying Y or M here will enable support for the
STEC, Inc. S1120 PCIe SSD.
Use device /dev/skd$N amd /dev/skd$Np$M.
config BLK_DEV_OSD
tristate "OSD object-as-blkdev support"
depends on SCSI_OSD_ULD
......
......@@ -23,6 +23,7 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_MG_DISK) += mg_disk.o
obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_BLK_DEV_SKD) += skd.o
obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
......@@ -44,4 +45,5 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
nvme-y := nvme-core.o nvme-scsi.o
skd-y := skd_main.o
swim_mod-y := swim.o swim_asm.o
......@@ -5183,7 +5183,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rebuild_lun_table(h, 1, 0);
cciss_engage_scsi(h);
h->busy_initializing = 0;
return 1;
return 0;
clean4:
cciss_free_cmd_pool(h);
......
......@@ -1474,7 +1474,8 @@ enum determine_dev_size {
DS_ERROR = -1,
DS_UNCHANGED = 0,
DS_SHRUNK = 1,
DS_GREW = 2
DS_GREW = 2,
DS_GREW_FROM_ZERO = 3,
};
extern enum determine_dev_size
drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
......
......@@ -2750,13 +2750,6 @@ int __init drbd_init(void)
return err;
}
err = drbd_genl_register();
if (err) {
printk(KERN_ERR "drbd: unable to register generic netlink family\n");
goto fail;
}
register_reboot_notifier(&drbd_notifier);
/*
......@@ -2767,6 +2760,15 @@ int __init drbd_init(void)
drbd_proc = NULL; /* play safe for drbd_cleanup */
idr_init(&minors);
rwlock_init(&global_state_lock);
INIT_LIST_HEAD(&drbd_tconns);
err = drbd_genl_register();
if (err) {
printk(KERN_ERR "drbd: unable to register generic netlink family\n");
goto fail;
}
err = drbd_create_mempools();
if (err)
goto fail;
......@@ -2778,9 +2780,6 @@ int __init drbd_init(void)
goto fail;
}
rwlock_init(&global_state_lock);
INIT_LIST_HEAD(&drbd_tconns);
retry.wq = create_singlethread_workqueue("drbd-reissue");
if (!retry.wq) {
printk(KERN_ERR "drbd: unable to create retry workqueue\n");
......
......@@ -955,7 +955,7 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res
}
if (size > la_size_sect)
rv = DS_GREW;
rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
if (size < la_size_sect)
rv = DS_SHRUNK;
......@@ -1132,9 +1132,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
/* We may ignore peer limits if the peer is modern enough.
Because new from 8.3.8 onwards the peer can use multiple
BIOs for a single peer_request */
if (mdev->state.conn >= C_CONNECTED) {
if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
if (mdev->tconn->agreed_pro_version < 94)
peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
else if (mdev->tconn->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET;
......
......@@ -1890,29 +1890,11 @@ static u32 seq_max(u32 a, u32 b)
return seq_greater(a, b) ? a : b;
}
static bool need_peer_seq(struct drbd_conf *mdev)
{
struct drbd_tconn *tconn = mdev->tconn;
int tp;
/*
* We only need to keep track of the last packet_seq number of our peer
* if we are in dual-primary mode and we have the resolve-conflicts flag set; see
* handle_write_conflicts().
*/
rcu_read_lock();
tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
rcu_read_unlock();
return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
}
static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
{
unsigned int newest_peer_seq;
if (need_peer_seq(mdev)) {
if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
spin_lock(&mdev->peer_seq_lock);
newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
mdev->peer_seq = newest_peer_seq;
......@@ -1972,22 +1954,31 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s
{
DEFINE_WAIT(wait);
long timeout;
int ret;
int ret = 0, tp;
if (!need_peer_seq(mdev))
if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
return 0;
spin_lock(&mdev->peer_seq_lock);
for (;;) {
if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
ret = 0;
break;
}
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
rcu_read_lock();
tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
rcu_read_unlock();
if (!tp)
break;
/* Only need to wait if two_primaries is enabled */
prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
spin_unlock(&mdev->peer_seq_lock);
rcu_read_lock();
......@@ -2228,8 +2219,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
}
goto out_interrupted;
}
} else
} else {
update_peer_seq(mdev, peer_seq);
spin_lock_irq(&mdev->tconn->req_lock);
}
list_add(&peer_req->w.list, &mdev->active_ee);
spin_unlock_irq(&mdev->tconn->req_lock);
......@@ -4132,7 +4125,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
(unsigned int)bs.buf_len);
return -EIO;
}
look_ahead >>= bits;
/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
if (likely(bits < 64))
look_ahead >>= bits;
else
look_ahead = 0;
have -= bits;
bits = bitstream_get_bits(&bs, &tmp, 64 - have);
......
......@@ -1306,6 +1306,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
int backing_limit;
if (bio_size && get_ldev(mdev)) {
unsigned int max_hw_sectors = queue_max_hw_sectors(q);
struct request_queue * const b =
mdev->ldev->backing_bdev->bd_disk->queue;
if (b->merge_bvec_fn) {
......@@ -1313,6 +1314,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
limit = min(limit, backing_limit);
}
put_ldev(mdev);
if ((limit >> 9) > max_hw_sectors)
limit = max_hw_sectors << 9;
}
return limit;
}
......
......@@ -894,13 +894,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
bio_list_init(&lo->lo_bio_list);
/*
* set queue make_request_fn, and add limits based on lower level
* device
*/
blk_queue_make_request(lo->lo_queue, loop_make_request);
lo->lo_queue->queuedata = lo;
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_flush(lo->lo_queue, REQ_FLUSH);
......@@ -1618,6 +1611,8 @@ static int loop_add(struct loop_device **l, int i)
if (!lo)
goto out;
lo->lo_state = Lo_unbound;
/* allocate id, if @id >= 0, we're requesting that specific id */
if (i >= 0) {
err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
......@@ -1635,6 +1630,12 @@ static int loop_add(struct loop_device **l, int i)
if (!lo->lo_queue)
goto out_free_idr;
/*
* set queue make_request_fn
*/
blk_queue_make_request(lo->lo_queue, loop_make_request);
lo->lo_queue->queuedata = lo;
disk = lo->lo_disk = alloc_disk(1 << part_shift);
if (!disk)
goto out_free_queue;
......
......@@ -936,7 +936,7 @@ static int mg_probe(struct platform_device *plat_dev)
goto probe_err_3b;
}
err = request_irq(host->irq, mg_irq,
IRQF_DISABLED | IRQF_TRIGGER_RISING,
IRQF_TRIGGER_RISING,
MG_DEV_NAME, host);
if (err) {
printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n",
......
This diff is collapsed.
......@@ -140,6 +140,7 @@ enum {
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5,
MTIP_PF_REBUILD_BIT = 6,
MTIP_PF_SR_CLEANUP_BIT = 7,
MTIP_PF_SVC_THD_STOP_BIT = 8,
/* below are bit numbers in 'dd_flag' defined in driver_data */
......@@ -147,15 +148,18 @@ enum {
MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |
(1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT)),
MTIP_DDF_REMOVE_DONE_BIT = 4,
MTIP_DDF_CLEANUP_BIT = 5,
MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7,
MTIP_DDF_REBUILD_FAILED_BIT = 8,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |
(1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT) |
(1 << MTIP_DDF_REBUILD_FAILED_BIT)),
};
struct smart_attr {
......@@ -499,6 +503,8 @@ struct driver_data {
bool trim_supp; /* flag indicating trim support */
bool sr;
int numa_node; /* NUMA support */
char workq_name[32];
......@@ -511,6 +517,8 @@ struct driver_data {
int isr_binding;
struct block_device *bdev;
int unal_qdepth; /* qdepth of unaligned IO queue */
struct list_head online_list; /* linkage for online list */
......
......@@ -473,45 +473,31 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
{
if (!pkt_debugfs_root)
return;
pd->dfs_f_info = NULL;
pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
if (IS_ERR(pd->dfs_d_root)) {
pd->dfs_d_root = NULL;
if (!pd->dfs_d_root)
return;
}
pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
pd->dfs_d_root, pd, &debug_fops);
if (IS_ERR(pd->dfs_f_info)) {
pd->dfs_f_info = NULL;
return;
}
}
static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
{
if (!pkt_debugfs_root)
return;
if (pd->dfs_f_info)
debugfs_remove(pd->dfs_f_info);
debugfs_remove(pd->dfs_f_info);
debugfs_remove(pd->dfs_d_root);
pd->dfs_f_info = NULL;
if (pd->dfs_d_root)
debugfs_remove(pd->dfs_d_root);
pd->dfs_d_root = NULL;
}
static void pkt_debugfs_init(void)
{
pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
if (IS_ERR(pkt_debugfs_root)) {
pkt_debugfs_root = NULL;
return;
}
}
static void pkt_debugfs_cleanup(void)
{
if (!pkt_debugfs_root)
return;
debugfs_remove(pkt_debugfs_root);
pkt_debugfs_root = NULL;
}
......
......@@ -654,7 +654,8 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
for (i = 0; i < card->n_targets; i++) {
spin_lock_bh(&card->ctrl[i].queue_lock);
cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
&card->ctrl[i].queue);
&card->ctrl[i].queue,
COMPLETE_DMA);
spin_unlock_bh(&card->ctrl[i].queue_lock);
cnt += rsxx_dma_cancel(&card->ctrl[i]);
......@@ -748,10 +749,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
card->eeh_state = 0;
st = rsxx_eeh_remap_dmas(card);
if (st)
goto failed_remap_dmas;
spin_lock_irqsave(&card->irq_lock, flags);
if (card->n_targets & RSXX_MAX_TARGETS)
rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
......@@ -778,7 +775,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
return PCI_ERS_RESULT_RECOVERED;
failed_hw_buffers_init:
failed_remap_dmas:
for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev,
......
......@@ -295,13 +295,15 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
return -ENOMEM;
}
blk_size = card->config.data.block_size;
if (card->config_valid) {
blk_size = card->config.data.block_size;
blk_queue_dma_alignment(card->queue, blk_size - 1);
blk_queue_logical_block_size(card->queue, blk_size);
}
blk_queue_make_request(card->queue, rsxx_make_request);
blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
blk_queue_dma_alignment(card->queue, blk_size - 1);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_logical_block_size(card->queue, blk_size);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
......
......@@ -221,6 +221,21 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
}
/*----------------- RSXX DMA Handling -------------------*/
static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma)
{
if (dma->cmd != HW_CMD_BLK_DISCARD) {
if (!pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
pci_unmap_page(ctrl->card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
}
}
kmem_cache_free(rsxx_dma_pool, dma);
}
static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
struct rsxx_dma *dma,
unsigned int status)
......@@ -232,21 +247,14 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
if (status & DMA_CANCELLED)
ctrl->stats.dma_cancelled++;
if (dma->dma_addr)
pci_unmap_page(ctrl->card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (dma->cb)
dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);
kmem_cache_free(rsxx_dma_pool, dma);
rsxx_free_dma(ctrl, dma);
}
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
struct list_head *q)
struct list_head *q, unsigned int done)
{
struct rsxx_dma *dma;
struct rsxx_dma *tmp;
......@@ -254,7 +262,10 @@ int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
list_for_each_entry_safe(dma, tmp, q, list) {
list_del(&dma->list);
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
if (done & COMPLETE_DMA)
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
else
rsxx_free_dma(ctrl, dma);
cnt++;
}
......@@ -370,7 +381,7 @@ static void dma_engine_stalled(unsigned long data)
/* Clean up the DMA queue */
spin_lock(&ctrl->queue_lock);
cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
spin_unlock(&ctrl->queue_lock);
cnt += rsxx_dma_cancel(ctrl);
......@@ -388,6 +399,7 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
int tag;
int cmds_pending = 0;
struct hw_cmd *hw_cmd_buf;
int dir;
hw_cmd_buf = ctrl->cmd.buf;
......@@ -424,6 +436,31 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
continue;
}
if (dma->cmd != HW_CMD_BLK_DISCARD) {
if (dma->cmd == HW_CMD_BLK_WRITE)
dir = PCI_DMA_TODEVICE;
else
dir = PCI_DMA_FROMDEVICE;
/*
* The function pci_map_page is placed here because we
* can only, by design, issue up to 255 commands to the
* hardware at one time per DMA channel. So the maximum
* amount of mapped memory would be 255 * 4 channels *
* 4096 Bytes which is less than 2GB, the limit of a x8
* Non-HWWD PCIe slot. This way the pci_map_page
* function should never fail because of a lack of
* mappable memory.
*/
dma->dma_addr = pci_map_page(ctrl->card->dev, dma->page,
dma->pg_off, dma->sub_page.cnt << 9, dir);
if (pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
push_tracker(ctrl->trackers, tag);
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
continue;
}
}
set_tracker_dma(ctrl->trackers, tag, dma);
hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd;
hw_cmd_buf[ctrl->cmd.idx].tag = tag;
......@@ -620,14 +657,6 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
if (!dma)
return -ENOMEM;
dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len,
dir ? PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (!dma->dma_addr) {
kmem_cache_free(rsxx_dma_pool, dma);
return -ENOMEM;
}
dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
dma->laddr = laddr;
dma->sub_page.off = (dma_off >> 9);
......@@ -736,11 +765,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
return 0;
bvec_err:
for (i = 0; i < card->n_targets; i++) {
spin_lock_bh(&card->ctrl[i].queue_lock);
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
spin_unlock_bh(&card->ctrl[i].queue_lock);
}
for (i = 0; i < card->n_targets; i++)
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
FREE_DMA);
return st;
}
......@@ -990,7 +1017,7 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
/* Clean up the DMA queue */
spin_lock_bh(&ctrl->queue_lock);
rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
spin_unlock_bh(&ctrl->queue_lock);
rsxx_dma_cancel(ctrl);
......@@ -1032,6 +1059,14 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
else
card->ctrl[i].stats.reads_issued--;
if (dma->cmd != HW_CMD_BLK_DISCARD) {
pci_unmap_page(card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
}
list_add_tail(&dma->list, &issued_dmas[i]);
push_tracker(card->ctrl[i].trackers, j);
cnt++;
......@@ -1043,15 +1078,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
card->ctrl[i].stats.sw_q_depth += cnt;
card->ctrl[i].e_cnt = 0;
list_for_each_entry(dma, &card->ctrl[i].queue, list) {
if (dma->dma_addr)
pci_unmap_page(card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
}
spin_unlock_bh(&card->ctrl[i].queue_lock);
}
......@@ -1060,31 +1086,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
return 0;
}
int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
{
struct rsxx_dma *dma;
int i;
for (i = 0; i < card->n_targets; i++) {
spin_lock_bh(&card->ctrl[i].queue_lock);
list_for_each_entry(dma, &card->ctrl[i].queue, list) {
dma->dma_addr = pci_map_page(card->dev, dma->page,
dma->pg_off, get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (!dma->dma_addr) {
spin_unlock_bh(&card->ctrl[i].queue_lock);
kmem_cache_free(rsxx_dma_pool, dma);
return -ENOMEM;
}
}
spin_unlock_bh(&card->ctrl[i].queue_lock);
}
return 0;
}
int rsxx_dma_init(void)
{
rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN);
......
......@@ -52,7 +52,7 @@ struct proc_cmd;
#define RS70_PCI_REV_SUPPORTED 4
#define DRIVER_NAME "rsxx"
#define DRIVER_VERSION "4.0"
#define DRIVER_VERSION "4.0.3.2516"
/* Block size is 4096 */
#define RSXX_HW_BLK_SHIFT 12
......@@ -345,6 +345,11 @@ enum rsxx_creg_stat {
CREG_STAT_TAG_MASK = 0x0000ff00,
};
enum rsxx_dma_finish {
FREE_DMA = 0x0,
COMPLETE_DMA = 0x1,
};
static inline unsigned int CREG_DATA(int N)
{
return CREG_DATA0 + (N << 2);
......@@ -379,7 +384,9 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
int rsxx_dma_setup(struct rsxx_cardinfo *card);
void rsxx_dma_destroy(struct rsxx_cardinfo *card);
int rsxx_dma_init(void);
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
struct list_head *q,
unsigned int done);
int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
void rsxx_dma_cleanup(void);
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
......
This diff is collapsed.
/* Copyright 2012 STEC, Inc.
*
* This file is licensed under the terms of the 3-clause
* BSD License (http://opensource.org/licenses/BSD-3-Clause)
* or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
* at your option. Both licenses are also available in the LICENSE file
* distributed with this project. This file may not be copied, modified,
* or distributed except in accordance with those terms.
*/
#ifndef SKD_S1120_H
#define SKD_S1120_H
#pragma pack(push, s1120_h, 1)
/*
* Q-channel, 64-bit r/w
*/
#define FIT_Q_COMMAND 0x400u
#define FIT_QCMD_QID_MASK (0x3 << 1)
#define FIT_QCMD_QID0 (0x0 << 1)
#define FIT_QCMD_QID_NORMAL FIT_QCMD_QID0
#define FIT_QCMD_QID1 (0x1 << 1)
#define FIT_QCMD_QID2 (0x2 << 1)
#define FIT_QCMD_QID3 (0x3 << 1)
#define FIT_QCMD_FLUSH_QUEUE (0ull) /* add QID */
#define FIT_QCMD_MSGSIZE_MASK (0x3 << 4)
#define FIT_QCMD_MSGSIZE_64 (0x0 << 4)
#define FIT_QCMD_MSGSIZE_128 (0x1 << 4)
#define FIT_QCMD_MSGSIZE_256 (0x2 << 4)
#define FIT_QCMD_MSGSIZE_512 (0x3 << 4)
#define FIT_QCMD_BASE_ADDRESS_MASK (0xFFFFFFFFFFFFFFC0ull)
/*
* Control, 32-bit r/w
*/
#define FIT_CONTROL 0x500u
#define FIT_CR_HARD_RESET (1u << 0u)
#define FIT_CR_SOFT_RESET (1u << 1u)
#define FIT_CR_DIS_TIMESTAMPS (1u << 6u)
#define FIT_CR_ENABLE_INTERRUPTS (1u << 7u)
/*
* Status, 32-bit, r/o
*/
#define FIT_STATUS 0x510u
#define FIT_SR_DRIVE_STATE_MASK 0x000000FFu
#define FIT_SR_SIGNATURE (0xFF << 8)
#define FIT_SR_PIO_DMA (1 << 16)
#define FIT_SR_DRIVE_OFFLINE 0x00
#define FIT_SR_DRIVE_INIT 0x01
/* #define FIT_SR_DRIVE_READY 0x02 */
#define FIT_SR_DRIVE_ONLINE 0x03
#define FIT_SR_DRIVE_BUSY 0x04
#define FIT_SR_DRIVE_FAULT 0x05
#define FIT_SR_DRIVE_DEGRADED 0x06
#define FIT_SR_PCIE_LINK_DOWN 0x07
#define FIT_SR_DRIVE_SOFT_RESET 0x08
#define FIT_SR_DRIVE_INIT_FAULT 0x09
#define FIT_SR_DRIVE_BUSY_SANITIZE 0x0A
#define FIT_SR_DRIVE_BUSY_ERASE 0x0B
#define FIT_SR_DRIVE_FW_BOOTING 0x0C
#define FIT_SR_DRIVE_NEED_FW_DOWNLOAD 0xFE
#define FIT_SR_DEVICE_MISSING 0xFF
#define FIT_SR__RESERVED 0xFFFFFF00u
/*
* FIT_STATUS - Status register data definition
*/
#define FIT_SR_STATE_MASK (0xFF << 0)
#define FIT_SR_SIGNATURE (0xFF << 8)
#define FIT_SR_PIO_DMA (1 << 16)
/*
* Interrupt status, 32-bit r/w1c (w1c ==> write 1 to clear)
*/
#define FIT_INT_STATUS_HOST 0x520u
#define FIT_ISH_FW_STATE_CHANGE (1u << 0u)
#define FIT_ISH_COMPLETION_POSTED (1u << 1u)
#define FIT_ISH_MSG_FROM_DEV (1u << 2u)
#define FIT_ISH_UNDEFINED_3 (1u << 3u)
#define FIT_ISH_UNDEFINED_4 (1u << 4u)
#define FIT_ISH_Q0_FULL (1u << 5u)
#define FIT_ISH_Q1_FULL (1u << 6u)
#define FIT_ISH_Q2_FULL (1u << 7u)
#define FIT_ISH_Q3_FULL (1u << 8u)
#define FIT_ISH_QCMD_FIFO_OVERRUN (1u << 9u)
#define FIT_ISH_BAD_EXP_ROM_READ (1u << 10u)
#define FIT_INT_DEF_MASK \
(FIT_ISH_FW_STATE_CHANGE | \
FIT_ISH_COMPLETION_POSTED | \
FIT_ISH_MSG_FROM_DEV | \
FIT_ISH_Q0_FULL | \
FIT_ISH_Q1_FULL | \
FIT_ISH_Q2_FULL | \
FIT_ISH_Q3_FULL | \
FIT_ISH_QCMD_FIFO_OVERRUN | \
FIT_ISH_BAD_EXP_ROM_READ)
#define FIT_INT_QUEUE_FULL \
(FIT_ISH_Q0_FULL | \
FIT_ISH_Q1_FULL | \
FIT_ISH_Q2_FULL | \
FIT_ISH_Q3_FULL)
#define MSI_MSG_NWL_ERROR_0 0x00000000
#define MSI_MSG_NWL_ERROR_1 0x00000001
#define MSI_MSG_NWL_ERROR_2 0x00000002
#define MSI_MSG_NWL_ERROR_3 0x00000003
#define MSI_MSG_STATE_CHANGE 0x00000004
#define MSI_MSG_COMPLETION_POSTED 0x00000005
#define MSI_MSG_MSG_FROM_DEV 0x00000006
#define MSI_MSG_RESERVED_0 0x00000007
#define MSI_MSG_RESERVED_1 0x00000008
#define MSI_MSG_QUEUE_0_FULL 0x00000009
#define MSI_MSG_QUEUE_1_FULL 0x0000000A
#define MSI_MSG_QUEUE_2_FULL 0x0000000B
#define MSI_MSG_QUEUE_3_FULL 0x0000000C
#define FIT_INT_RESERVED_MASK \
(FIT_ISH_UNDEFINED_3 | \
FIT_ISH_UNDEFINED_4)
/*
* Interrupt mask, 32-bit r/w
* Bit definitions are the same as FIT_INT_STATUS_HOST
*/
#define FIT_INT_MASK_HOST 0x528u
/*
* Message to device, 32-bit r/w
*/
#define FIT_MSG_TO_DEVICE 0x540u
/*
* Message from device, 32-bit, r/o
*/
#define FIT_MSG_FROM_DEVICE 0x548u
/*
* 32-bit messages to/from device, composition/extraction macros
*/
#define FIT_MXD_CONS(TYPE, PARAM, DATA) \
((((TYPE) & 0xFFu) << 24u) | \
(((PARAM) & 0xFFu) << 16u) | \
(((DATA) & 0xFFFFu) << 0u))
#define FIT_MXD_TYPE(MXD) (((MXD) >> 24u) & 0xFFu)
#define FIT_MXD_PARAM(MXD) (((MXD) >> 16u) & 0xFFu)
#define FIT_MXD_DATA(MXD) (((MXD) >> 0u) & 0xFFFFu)
/*
* Types of messages to/from device
*/
#define FIT_MTD_FITFW_INIT 0x01u
#define FIT_MTD_GET_CMDQ_DEPTH 0x02u
#define FIT_MTD_SET_COMPQ_DEPTH 0x03u
#define FIT_MTD_SET_COMPQ_ADDR 0x04u
#define FIT_MTD_ARM_QUEUE 0x05u
#define FIT_MTD_CMD_LOG_HOST_ID 0x07u
#define FIT_MTD_CMD_LOG_TIME_STAMP_LO 0x08u
#define FIT_MTD_CMD_LOG_TIME_STAMP_HI 0x09u
#define FIT_MFD_SMART_EXCEEDED 0x10u
#define FIT_MFD_POWER_DOWN 0x11u
#define FIT_MFD_OFFLINE 0x12u
#define FIT_MFD_ONLINE 0x13u
#define FIT_MFD_FW_RESTARTING 0x14u
#define FIT_MFD_PM_ACTIVE 0x15u
#define FIT_MFD_PM_STANDBY 0x16u
#define FIT_MFD_PM_SLEEP 0x17u
#define FIT_MFD_CMD_PROGRESS 0x18u
#define FIT_MTD_DEBUG 0xFEu
#define FIT_MFD_DEBUG 0xFFu
#define FIT_MFD_MASK (0xFFu)
#define FIT_MFD_DATA_MASK (0xFFu)
#define FIT_MFD_MSG(x) (((x) >> 24) & FIT_MFD_MASK)
#define FIT_MFD_DATA(x) ((x) & FIT_MFD_MASK)
/*
* Extra arg to FIT_MSG_TO_DEVICE, 64-bit r/w
* Used to set completion queue address (FIT_MTD_SET_COMPQ_ADDR)
* (was Response buffer in docs)
*/
#define FIT_MSG_TO_DEVICE_ARG 0x580u
/*
* Hardware (ASIC) version, 32-bit r/o
*/
#define FIT_HW_VERSION 0x588u
/*
* Scatter/gather list descriptor.
* 32-bytes and must be aligned on a 32-byte boundary.
* All fields are in little endian order.
*/
struct fit_sg_descriptor {
uint32_t control;
uint32_t byte_count;
uint64_t host_side_addr;
uint64_t dev_side_addr;
uint64_t next_desc_ptr;
};
#define FIT_SGD_CONTROL_NOT_LAST 0x000u
#define FIT_SGD_CONTROL_LAST 0x40Eu
/*
* Header at the beginning of a FIT message. The header
* is followed by SSDI requests each 64 bytes.
* A FIT message can be up to 512 bytes long and must start
* on a 64-byte boundary.
*/
struct fit_msg_hdr {
uint8_t protocol_id;
uint8_t num_protocol_cmds_coalesced;
uint8_t _reserved[62];
};
#define FIT_PROTOCOL_ID_FIT 1
#define FIT_PROTOCOL_ID_SSDI 2
#define FIT_PROTOCOL_ID_SOFIT 3
#define FIT_PROTOCOL_MINOR_VER(mtd_val) ((mtd_val >> 16) & 0xF)
#define FIT_PROTOCOL_MAJOR_VER(mtd_val) ((mtd_val >> 20) & 0xF)
/*
* Format of a completion entry. The completion queue is circular
* and must have at least as many entries as the maximum number
* of commands that may be issued to the device.
*
* There are no head/tail pointers. The cycle value is used to
* infer the presence of new completion records.
* Initially the cycle in all entries is 0, the index is 0, and
* the cycle value to expect is 1. When completions are added
* their cycle values are set to 1. When the index wraps the
* cycle value to expect is incremented.
*
* Command_context is opaque and taken verbatim from the SSDI command.
* All other fields are big endian.
*/
#define FIT_PROTOCOL_VERSION_0 0
/*
* Protocol major version 1 completion entry.
* The major protocol version is found in bits
* 20-23 of the FIT_MTD_FITFW_INIT response.
*/
struct fit_completion_entry_v1 {
uint32_t num_returned_bytes;
uint16_t tag;
uint8_t status; /* SCSI status */
uint8_t cycle;
};
#define FIT_PROTOCOL_VERSION_1 1
#define FIT_PROTOCOL_VERSION_CURRENT FIT_PROTOCOL_VERSION_1
struct fit_comp_error_info {
uint8_t type:7; /* 00: Bits0-6 indicates the type of sense data. */
uint8_t valid:1; /* 00: Bit 7 := 1 ==> info field is valid. */
uint8_t reserved0; /* 01: Obsolete field */
uint8_t key:4; /* 02: Bits0-3 indicate the sense key. */
uint8_t reserved2:1; /* 02: Reserved bit. */
uint8_t bad_length:1; /* 02: Incorrect Length Indicator */
uint8_t end_medium:1; /* 02: End of Medium */
uint8_t file_mark:1; /* 02: Filemark */
uint8_t info[4]; /* 03: */
uint8_t reserved1; /* 07: Additional Sense Length */
uint8_t cmd_spec[4]; /* 08: Command Specific Information */
uint8_t code; /* 0C: Additional Sense Code */
uint8_t qual; /* 0D: Additional Sense Code Qualifier */
uint8_t fruc; /* 0E: Field Replaceable Unit Code */
uint8_t sks_high:7; /* 0F: Sense Key Specific (MSB) */
uint8_t sks_valid:1; /* 0F: Sense Key Specific Valid */
uint16_t sks_low; /* 10: Sense Key Specific (LSW) */
uint16_t reserved3; /* 12: Part of additional sense bytes (unused) */
uint16_t uec; /* 14: Additional Sense Bytes */
uint64_t per; /* 16: Additional Sense Bytes */
uint8_t reserved4[2]; /* 1E: Additional Sense Bytes (unused) */
};
/* Task management constants */
#define SOFT_TASK_SIMPLE 0x00
#define SOFT_TASK_HEAD_OF_QUEUE 0x01
#define SOFT_TASK_ORDERED 0x02
/* Version zero has the last 32 bits reserved,
* Version one has the last 32 bits sg_list_len_bytes;
*/
struct skd_command_header {
uint64_t sg_list_dma_address;
uint16_t tag;
uint8_t attribute;
uint8_t add_cdb_len; /* In 32 bit words */
uint32_t sg_list_len_bytes;
};
struct skd_scsi_request {
struct skd_command_header hdr;
unsigned char cdb[16];
/* unsigned char _reserved[16]; */
};
struct driver_inquiry_data {
uint8_t peripheral_device_type:5;
uint8_t qualifier:3;
uint8_t page_code;
uint16_t page_length;
uint16_t pcie_bus_number;
uint8_t pcie_device_number;
uint8_t pcie_function_number;
uint8_t pcie_link_speed;
uint8_t pcie_link_lanes;
uint16_t pcie_vendor_id;
uint16_t pcie_device_id;
uint16_t pcie_subsystem_vendor_id;
uint16_t pcie_subsystem_device_id;
uint8_t reserved1[2];
uint8_t reserved2[3];
uint8_t driver_version_length;
uint8_t driver_version[0x14];
};
#pragma pack(pop, s1120_h)
#endif /* SKD_S1120_H */
......@@ -887,6 +887,8 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
unsigned long secure;
struct phys_req preq;
xen_blkif_get(blkif);
preq.sector_number = req->u.discard.sector_number;
preq.nr_sects = req->u.discard.nr_sectors;
......@@ -899,7 +901,6 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
}
blkif->st_ds_req++;
xen_blkif_get(blkif);
secure = (blkif->vbd.discard_secure &&
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
BLKDEV_DISCARD_SECURE : 0;
......
This diff is collapsed.
......@@ -13,15 +13,8 @@ config BCACHE_DEBUG
---help---
Don't select this option unless you're a developer
Enables extra debugging tools (primarily a fuzz tester)
config BCACHE_EDEBUG
bool "Extended runtime checks"
depends on BCACHE
---help---
Don't select this option unless you're a developer
Enables extra runtime checks which significantly affect performance
Enables extra debugging tools, allows expensive runtime checks to be
turned on.
config BCACHE_CLOSURES_DEBUG
bool "Debug closures"
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -148,6 +148,9 @@
struct btree_iter {
size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree *b;
#endif
struct btree_iter_set {
struct bkey *k, *end;
} data[MAX_BSETS];
......@@ -193,54 +196,26 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}
static inline size_t bkey_u64s(const struct bkey *k)
{
BUG_ON(KEY_CSUM(k) > 1);
return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0);
}
static inline size_t bkey_bytes(const struct bkey *k)
{
return bkey_u64s(k) * sizeof(uint64_t);
}
static inline void bkey_copy(struct bkey *dest, const struct bkey *src)
{
memcpy(dest, src, bkey_bytes(src));
}
static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
{
if (!src)
src = &KEY(0, 0, 0);
SET_KEY_INODE(dest, KEY_INODE(src));
SET_KEY_OFFSET(dest, KEY_OFFSET(src));
}
static inline struct bkey *bkey_next(const struct bkey *k)
{
uint64_t *d = (void *) k;
return (struct bkey *) (d + bkey_u64s(k));
}
/* Keylists */
struct keylist {
struct bkey *top;
union {
uint64_t *list;
struct bkey *bottom;
struct bkey *keys;
uint64_t *keys_p;
};
union {
struct bkey *top;
uint64_t *top_p;
};
/* Enough room for btree_split's keys without realloc */
#define KEYLIST_INLINE 16
uint64_t d[KEYLIST_INLINE];
uint64_t inline_keys[KEYLIST_INLINE];
};
static inline void bch_keylist_init(struct keylist *l)
{
l->top = (void *) (l->list = l->d);
l->top_p = l->keys_p = l->inline_keys;
}
static inline void bch_keylist_push(struct keylist *l)
......@@ -256,17 +231,32 @@ static inline void bch_keylist_add(struct keylist *l, struct bkey *k)
static inline bool bch_keylist_empty(struct keylist *l)
{
return l->top == (void *) l->list;
return l->top == l->keys;
}
static inline void bch_keylist_reset(struct keylist *l)
{
l->top = l->keys;
}
static inline void bch_keylist_free(struct keylist *l)
{
if (l->list != l->d)
kfree(l->list);
if (l->keys_p != l->inline_keys)
kfree(l->keys_p);
}
static inline size_t bch_keylist_nkeys(struct keylist *l)
{
return l->top_p - l->keys_p;
}
static inline size_t bch_keylist_bytes(struct keylist *l)
{
return bch_keylist_nkeys(l) * sizeof(uint64_t);
}
void bch_keylist_copy(struct keylist *, struct keylist *);
struct bkey *bch_keylist_pop(struct keylist *);
void bch_keylist_pop_front(struct keylist *);
int bch_keylist_realloc(struct keylist *, int, struct cache_set *);
void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
......@@ -287,7 +277,9 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
}
const char *bch_ptr_status(struct cache_set *, const struct bkey *);
bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *);
bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_ptr_bad(struct btree *, const struct bkey *);
static inline uint8_t gen_after(uint8_t a, uint8_t b)
......@@ -311,7 +303,6 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
struct bkey *bch_next_recurse_key(struct btree *, struct bkey *);
struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
struct btree *, ptr_filter_fn);
......@@ -361,12 +352,30 @@ void bch_bset_fix_lookup_table(struct btree *, struct bkey *);
struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
const struct bkey *);
/*
* Returns the first key that is strictly greater than search
*/
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
const struct bkey *search)
{
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
#define PRECEDING_KEY(_k) \
({ \
struct bkey *_ret = NULL; \
\
if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
\
if (!_ret->low) \
_ret->high--; \
_ret->low--; \
} \
\
_ret; \
})
bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
void bch_btree_sort_lazy(struct btree *);
void bch_btree_sort_into(struct btree *, struct btree *);
......
This diff is collapsed.
......@@ -125,6 +125,7 @@ struct btree {
unsigned long seq;
struct rw_semaphore lock;
struct cache_set *c;
struct btree *parent;
unsigned long flags;
uint16_t written; /* would be nice to kill */
......@@ -200,12 +201,7 @@ static inline bool bkey_written(struct btree *b, struct bkey *k)
static inline void set_gc_sectors(struct cache_set *c)
{
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 8);
}
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
return __bch_ptr_invalid(b->c, b->level, k);
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
}
static inline struct bkey *bch_btree_iter_init(struct btree *b,
......@@ -215,6 +211,16 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b,
return __bch_btree_iter_init(b, iter, search, b->sets);
}
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
if (b->level)
return bch_btree_ptr_invalid(b->c, k);
else
return bch_extent_ptr_invalid(b->c, k);
}
void bkey_put(struct cache_set *c, struct bkey *k);
/* Looping macros */
#define for_each_cached_btree(b, c, iter) \
......@@ -234,51 +240,17 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b,
/* Recursing down the btree */
struct btree_op {
struct closure cl;
struct cache_set *c;
/* Journal entry we have a refcount on */
atomic_t *journal;
/* Bio to be inserted into the cache */
struct bio *cache_bio;
unsigned inode;
uint16_t write_prio;
/* Btree level at which we start taking write locks */
short lock;
/* Btree insertion type */
enum {
BTREE_INSERT,
BTREE_REPLACE
} type:8;
unsigned csum:1;
unsigned skip:1;
unsigned flush_journal:1;
unsigned insert_data_done:1;
unsigned lookup_done:1;
unsigned insert_collision:1;
/* Anything after this point won't get zeroed in do_bio_hook() */
/* Keys to be inserted */
struct keylist keys;
BKEY_PADDED(replace);
};
enum {
BTREE_INSERT_STATUS_INSERT,
BTREE_INSERT_STATUS_BACK_MERGE,
BTREE_INSERT_STATUS_OVERWROTE,
BTREE_INSERT_STATUS_FRONT_MERGE,
};
void bch_btree_op_init_stack(struct btree_op *);
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
memset(op, 0, sizeof(struct btree_op));
op->lock = write_lock_level;
}
static inline void rw_lock(bool w, struct btree *b, int level)
{
......@@ -290,108 +262,71 @@ static inline void rw_lock(bool w, struct btree *b, int level)
static inline void rw_unlock(bool w, struct btree *b)
{
#ifdef CONFIG_BCACHE_EDEBUG
unsigned i;
if (w && b->key.ptr[0])
for (i = 0; i <= b->nsets; i++)
bch_check_key_order(b, b->sets[i].data);
#endif
if (w)
b->seq++;
(w ? up_write : up_read)(&b->lock);
}
#define insert_lock(s, b) ((b)->level <= (s)->lock)
void bch_btree_node_read(struct btree *);
void bch_btree_node_write(struct btree *, struct closure *);
/*
* These macros are for recursing down the btree - they handle the details of
* locking and looking up nodes in the cache for you. They're best treated as
* mere syntax when reading code that uses them.
*
* op->lock determines whether we take a read or a write lock at a given depth.
* If you've got a read lock and find that you need a write lock (i.e. you're
* going to have to split), set op->lock and return -EINTR; btree_root() will
* call you again and you'll have the correct lock.
*/
void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, bool);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
/**
* btree - recurse down the btree on a specified key
* @fn: function to call, which will be passed the child node
* @key: key to recurse on
* @b: parent btree node
* @op: pointer to struct btree_op
*/
#define btree(fn, key, b, op, ...) \
({ \
int _r, l = (b)->level - 1; \
bool _w = l <= (op)->lock; \
struct btree *_b = bch_btree_node_get((b)->c, key, l, op); \
if (!IS_ERR(_b)) { \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
rw_unlock(_w, _b); \
} else \
_r = PTR_ERR(_b); \
_r; \
})
/**
* btree_root - call a function on the root of the btree
* @fn: function to call, which will be passed the child node
* @c: cache set
* @op: pointer to struct btree_op
*/
#define btree_root(fn, c, op, ...) \
({ \
int _r = -EINTR; \
do { \
struct btree *_b = (c)->root; \
bool _w = insert_lock(op, _b); \
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
rw_unlock(_w, _b); \
bch_cannibalize_unlock(c, &(op)->cl); \
} while (_r == -EINTR); \
\
_r; \
})
int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *);
int bch_btree_insert(struct cache_set *, struct keylist *,
atomic_t *, struct bkey *);
int bch_gc_thread_start(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *);
void bch_moving_gc(struct cache_set *);
int bch_btree_check(struct cache_set *);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
static inline bool should_split(struct btree *b)
static inline void wake_up_gc(struct cache_set *c)
{
struct bset *i = write_block(b);
return b->written >= btree_blocks(b) ||
(i->seq == b->sets[0].data->seq &&
b->written + __set_blocks(i, i->keys + 15, b->c)
> btree_blocks(b));
if (c->gc_thread)
wake_up_process(c->gc_thread);
}
void bch_btree_node_read(struct btree *);
void bch_btree_node_write(struct btree *, struct closure *);
#define MAP_DONE 0
#define MAP_CONTINUE 1
void bch_cannibalize_unlock(struct cache_set *, struct closure *);
void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
int, struct btree_op *);
#define MAP_ALL_NODES 0
#define MAP_LEAF_NODES 1
bool bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bio *);
int bch_btree_insert(struct btree_op *, struct cache_set *);
#define MAP_END_KEY 1
int bch_btree_search_recurse(struct btree *, struct btree_op *);
typedef int (btree_map_nodes_fn)(struct btree_op *, struct btree *);
int __bch_btree_map_nodes(struct btree_op *, struct cache_set *,
struct bkey *, btree_map_nodes_fn *, int);
void bch_queue_gc(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *);
void bch_moving_gc(struct closure *);
int bch_btree_check(struct cache_set *, struct btree_op *);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
static inline int bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_nodes_fn *fn)
{
return __bch_btree_map_nodes(op, c, from, fn, MAP_ALL_NODES);
}
static inline int bch_btree_map_leaf_nodes(struct btree_op *op,
struct cache_set *c,
struct bkey *from,
btree_map_nodes_fn *fn)
{
return __bch_btree_map_nodes(op, c, from, fn, MAP_LEAF_NODES);
}
typedef int (btree_map_keys_fn)(struct btree_op *, struct btree *,
struct bkey *);
int bch_btree_map_keys(struct btree_op *, struct cache_set *,
struct bkey *, btree_map_keys_fn *, int);
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
void bch_keybuf_init(struct keybuf *);
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
keybuf_pred_fn *);
void bch_refill_keybuf(struct cache_set *, struct keybuf *,
struct bkey *, keybuf_pred_fn *);
bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
struct bkey *);
void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
......
......@@ -11,17 +11,6 @@
#include "closure.h"
void closure_queue(struct closure *cl)
{
struct workqueue_struct *wq = cl->wq;
if (wq) {
INIT_WORK(&cl->work, cl->work.func);
BUG_ON(!queue_work(wq, &cl->work));
} else
cl->fn(cl);
}
EXPORT_SYMBOL_GPL(closure_queue);
#define CL_FIELD(type, field) \
case TYPE_ ## type: \
return &container_of(cl, struct type, cl)->field
......@@ -30,17 +19,6 @@ static struct closure_waitlist *closure_waitlist(struct closure *cl)
{
switch (cl->type) {
CL_FIELD(closure_with_waitlist, wait);
CL_FIELD(closure_with_waitlist_and_timer, wait);
default:
return NULL;
}
}
static struct timer_list *closure_timer(struct closure *cl)
{
switch (cl->type) {
CL_FIELD(closure_with_timer, timer);
CL_FIELD(closure_with_waitlist_and_timer, timer);
default:
return NULL;
}
......@@ -51,7 +29,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
int r = flags & CLOSURE_REMAINING_MASK;
BUG_ON(flags & CLOSURE_GUARD_MASK);
BUG_ON(!r && (flags & ~(CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING)));
BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
/* Must deliver precisely one wakeup */
if (r == 1 && (flags & CLOSURE_SLEEPING))
......@@ -59,7 +37,6 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
if (!r) {
if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
/* CLOSURE_BLOCKING might be set - clear it */
atomic_set(&cl->remaining,
CLOSURE_REMAINING_INITIALIZER);
closure_queue(cl);
......@@ -90,13 +67,13 @@ void closure_sub(struct closure *cl, int v)
{
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
}
EXPORT_SYMBOL_GPL(closure_sub);
EXPORT_SYMBOL(closure_sub);
void closure_put(struct closure *cl)
{
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
}
EXPORT_SYMBOL_GPL(closure_put);
EXPORT_SYMBOL(closure_put);
static void set_waiting(struct closure *cl, unsigned long f)
{
......@@ -133,7 +110,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
closure_sub(cl, CLOSURE_WAITING + 1);
}
}
EXPORT_SYMBOL_GPL(__closure_wake_up);
EXPORT_SYMBOL(__closure_wake_up);
bool closure_wait(struct closure_waitlist *list, struct closure *cl)
{
......@@ -146,7 +123,7 @@ bool closure_wait(struct closure_waitlist *list, struct closure *cl)
return true;
}
EXPORT_SYMBOL_GPL(closure_wait);
EXPORT_SYMBOL(closure_wait);
/**
* closure_sync() - sleep until a closure a closure has nothing left to wait on
......@@ -169,7 +146,7 @@ void closure_sync(struct closure *cl)
__closure_end_sleep(cl);
}
EXPORT_SYMBOL_GPL(closure_sync);
EXPORT_SYMBOL(closure_sync);
/**
* closure_trylock() - try to acquire the closure, without waiting
......@@ -183,17 +160,17 @@ bool closure_trylock(struct closure *cl, struct closure *parent)
CLOSURE_REMAINING_INITIALIZER) != -1)
return false;
closure_set_ret_ip(cl);
smp_mb();
cl->parent = parent;
if (parent)
closure_get(parent);
closure_set_ret_ip(cl);
closure_debug_create(cl);
return true;
}
EXPORT_SYMBOL_GPL(closure_trylock);
EXPORT_SYMBOL(closure_trylock);
void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list)
......@@ -205,57 +182,11 @@ void __closure_lock(struct closure *cl, struct closure *parent,
if (closure_trylock(cl, parent))
return;
closure_wait_event_sync(wait_list, &wait,
atomic_read(&cl->remaining) == -1);
closure_wait_event(wait_list, &wait,
atomic_read(&cl->remaining) == -1);
}
}
EXPORT_SYMBOL_GPL(__closure_lock);
static void closure_delay_timer_fn(unsigned long data)
{
struct closure *cl = (struct closure *) data;
closure_sub(cl, CLOSURE_TIMER + 1);
}
void do_closure_timer_init(struct closure *cl)
{
struct timer_list *timer = closure_timer(cl);
init_timer(timer);
timer->data = (unsigned long) cl;
timer->function = closure_delay_timer_fn;
}
EXPORT_SYMBOL_GPL(do_closure_timer_init);
bool __closure_delay(struct closure *cl, unsigned long delay,
struct timer_list *timer)
{
if (atomic_read(&cl->remaining) & CLOSURE_TIMER)
return false;
BUG_ON(timer_pending(timer));
timer->expires = jiffies + delay;
atomic_add(CLOSURE_TIMER + 1, &cl->remaining);
add_timer(timer);
return true;
}
EXPORT_SYMBOL_GPL(__closure_delay);
void __closure_flush(struct closure *cl, struct timer_list *timer)
{
if (del_timer(timer))
closure_sub(cl, CLOSURE_TIMER + 1);
}
EXPORT_SYMBOL_GPL(__closure_flush);
void __closure_flush_sync(struct closure *cl, struct timer_list *timer)
{
if (del_timer_sync(timer))
closure_sub(cl, CLOSURE_TIMER + 1);
}
EXPORT_SYMBOL_GPL(__closure_flush_sync);
EXPORT_SYMBOL(__closure_lock);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
......@@ -273,7 +204,7 @@ void closure_debug_create(struct closure *cl)
list_add(&cl->all, &closure_list);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
EXPORT_SYMBOL_GPL(closure_debug_create);
EXPORT_SYMBOL(closure_debug_create);
void closure_debug_destroy(struct closure *cl)
{
......@@ -286,7 +217,7 @@ void closure_debug_destroy(struct closure *cl)
list_del(&cl->all);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
EXPORT_SYMBOL_GPL(closure_debug_destroy);
EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *debug;
......@@ -304,14 +235,12 @@ static int debug_seq_show(struct seq_file *f, void *data)
cl, (void *) cl->ip, cl->fn, cl->parent,
r & CLOSURE_REMAINING_MASK);
seq_printf(f, "%s%s%s%s%s%s\n",
seq_printf(f, "%s%s%s%s\n",
test_bit(WORK_STRUCT_PENDING,
work_data_bits(&cl->work)) ? "Q" : "",
r & CLOSURE_RUNNING ? "R" : "",
r & CLOSURE_BLOCKING ? "B" : "",
r & CLOSURE_STACK ? "S" : "",
r & CLOSURE_SLEEPING ? "Sl" : "",
r & CLOSURE_TIMER ? "T" : "");
r & CLOSURE_SLEEPING ? "Sl" : "");
if (r & CLOSURE_WAITING)
seq_printf(f, " W %pF\n",
......
This diff is collapsed.
......@@ -8,7 +8,6 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
#include "request.h"
#include <linux/console.h>
#include <linux/debugfs.h>
......@@ -77,29 +76,17 @@ int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
return out - buf;
}
int bch_btree_to_text(char *buf, size_t size, const struct btree *b)
{
return scnprintf(buf, size, "%zu level %i/%i",
PTR_BUCKET_NR(b->c, &b->key, 0),
b->level, b->c->root ? b->c->root->level : -1);
}
#if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
static bool skipped_backwards(struct btree *b, struct bkey *k)
{
return bkey_cmp(k, (!b->level)
? &START_KEY(bkey_next(k))
: bkey_next(k)) > 0;
}
#ifdef CONFIG_BCACHE_DEBUG
static void dump_bset(struct btree *b, struct bset *i)
{
struct bkey *k;
struct bkey *k, *next;
unsigned j;
char buf[80];
for (k = i->start; k < end(i); k = bkey_next(k)) {
for (k = i->start; k < end(i); k = next) {
next = bkey_next(k);
bch_bkey_to_text(buf, sizeof(buf), k);
printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
(uint64_t *) k - i->d, i->keys, buf);
......@@ -115,15 +102,21 @@ static void dump_bset(struct btree *b, struct bset *i)
printk(" %s\n", bch_ptr_status(b->c, k));
if (bkey_next(k) < end(i) &&
skipped_backwards(b, k))
if (next < end(i) &&
bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0)
printk(KERN_ERR "Key skipped backwards\n");
}
}
#endif
static void bch_dump_bucket(struct btree *b)
{
unsigned i;
#ifdef CONFIG_BCACHE_DEBUG
console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
console_unlock();
}
void bch_btree_verify(struct btree *b, struct bset *new)
{
......@@ -176,66 +169,44 @@ void bch_btree_verify(struct btree *b, struct bset *new)
mutex_unlock(&b->c->verify_lock);
}
static void data_verify_endio(struct bio *bio, int error)
{
struct closure *cl = bio->bi_private;
closure_put(cl);
}
void bch_data_verify(struct search *s)
void bch_data_verify(struct cached_dev *dc, struct bio *bio)
{
char name[BDEVNAME_SIZE];
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct closure *cl = &s->cl;
struct bio *check;
struct bio_vec *bv;
int i;
if (!s->unaligned_bvec)
bio_for_each_segment(bv, s->orig_bio, i)
bv->bv_offset = 0, bv->bv_len = PAGE_SIZE;
check = bio_clone(s->orig_bio, GFP_NOIO);
check = bio_clone(bio, GFP_NOIO);
if (!check)
return;
if (bio_alloc_pages(check, GFP_NOIO))
goto out_put;
check->bi_rw = READ_SYNC;
check->bi_private = cl;
check->bi_end_io = data_verify_endio;
closure_bio_submit(check, cl, &dc->disk);
closure_sync(cl);
submit_bio_wait(READ_SYNC, check);
bio_for_each_segment(bv, s->orig_bio, i) {
void *p1 = kmap(bv->bv_page);
void *p2 = kmap(check->bi_io_vec[i].bv_page);
bio_for_each_segment(bv, bio, i) {
void *p1 = kmap_atomic(bv->bv_page);
void *p2 = page_address(check->bi_io_vec[i].bv_page);
if (memcmp(p1 + bv->bv_offset,
p2 + bv->bv_offset,
bv->bv_len))
printk(KERN_ERR
"bcache (%s): verify failed at sector %llu\n",
bdevname(dc->bdev, name),
(uint64_t) s->orig_bio->bi_sector);
cache_set_err_on(memcmp(p1 + bv->bv_offset,
p2 + bv->bv_offset,
bv->bv_len),
dc->disk.c,
"verify failed at dev %s sector %llu",
bdevname(dc->bdev, name),
(uint64_t) bio->bi_sector);
kunmap(bv->bv_page);
kunmap(check->bi_io_vec[i].bv_page);
kunmap_atomic(p1);
}
__bio_for_each_segment(bv, check, i, 0)
bio_for_each_segment_all(bv, check, i)
__free_page(bv->bv_page);
out_put:
bio_put(check);
}
#endif
#ifdef CONFIG_BCACHE_EDEBUG
unsigned bch_count_data(struct btree *b)
int __bch_count_data(struct btree *b)
{
unsigned ret = 0;
struct btree_iter iter;
......@@ -247,72 +218,60 @@ unsigned bch_count_data(struct btree *b)
return ret;
}
static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
va_list args)
{
unsigned i;
char buf[80];
console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
vprintk(fmt, args);
console_unlock();
bch_btree_to_text(buf, sizeof(buf), b);
panic("at %s\n", buf);
}
void bch_check_key_order_msg(struct btree *b, struct bset *i,
const char *fmt, ...)
{
struct bkey *k;
if (!i->keys)
return;
for (k = i->start; bkey_next(k) < end(i); k = bkey_next(k))
if (skipped_backwards(b, k)) {
va_list args;
va_start(args, fmt);
vdump_bucket_and_panic(b, fmt, args);
va_end(args);
}
}
void bch_check_keys(struct btree *b, const char *fmt, ...)
void __bch_check_keys(struct btree *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
struct btree_iter iter;
if (b->level)
return;
const char *err;
for_each_key(b, k, &iter) {
if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) {
printk(KERN_ERR "Keys out of order:\n");
goto bug;
}
if (bch_ptr_invalid(b, k))
continue;
if (p && bkey_cmp(p, &START_KEY(k)) > 0) {
printk(KERN_ERR "Overlapping keys:\n");
goto bug;
if (!b->level) {
err = "Keys out of order";
if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
goto bug;
if (bch_ptr_invalid(b, k))
continue;
err = "Overlapping keys";
if (p && bkey_cmp(p, &START_KEY(k)) > 0)
goto bug;
} else {
if (bch_ptr_bad(b, k))
continue;
err = "Duplicate keys";
if (p && !bkey_cmp(p, k))
goto bug;
}
p = k;
}
err = "Key larger than btree node key";
if (p && bkey_cmp(p, &b->key) > 0)
goto bug;
return;
bug:
bch_dump_bucket(b);
va_start(args, fmt);
vdump_bucket_and_panic(b, fmt, args);
vprintk(fmt, args);
va_end(args);
panic("bcache error: %s:\n", err);
}
void bch_btree_iter_next_check(struct btree_iter *iter)
{
struct bkey *k = iter->data->k, *next = bkey_next(k);
if (next < iter->data->end &&
bkey_cmp(k, iter->b->level ? next : &START_KEY(next)) > 0) {
bch_dump_bucket(iter->b);
panic("Key skipped backwards\n");
}
}
#endif
......
......@@ -4,40 +4,44 @@
/* Btree/bkey debug printing */
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
int bch_btree_to_text(char *buf, size_t size, const struct btree *b);
#ifdef CONFIG_BCACHE_EDEBUG
unsigned bch_count_data(struct btree *);
void bch_check_key_order_msg(struct btree *, struct bset *, const char *, ...);
void bch_check_keys(struct btree *, const char *, ...);
#define bch_check_key_order(b, i) \
bch_check_key_order_msg(b, i, "keys out of order")
#define EBUG_ON(cond) BUG_ON(cond)
#else /* EDEBUG */
#define bch_count_data(b) 0
#define bch_check_key_order(b, i) do {} while (0)
#define bch_check_key_order_msg(b, i, ...) do {} while (0)
#define bch_check_keys(b, ...) do {} while (0)
#define EBUG_ON(cond) do {} while (0)
#endif
#ifdef CONFIG_BCACHE_DEBUG
void bch_btree_verify(struct btree *, struct bset *);
void bch_data_verify(struct search *);
void bch_data_verify(struct cached_dev *, struct bio *);
int __bch_count_data(struct btree *);
void __bch_check_keys(struct btree *, const char *, ...);
void bch_btree_iter_next_check(struct btree_iter *);
#define EBUG_ON(cond) BUG_ON(cond)
#define expensive_debug_checks(c) ((c)->expensive_debug_checks)
#define key_merging_disabled(c) ((c)->key_merging_disabled)
#define bypass_torture_test(d) ((d)->bypass_torture_test)
#else /* DEBUG */
static inline void bch_btree_verify(struct btree *b, struct bset *i) {}
static inline void bch_data_verify(struct search *s) {};
static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
static inline int __bch_count_data(struct btree *b) { return -1; }
static inline void __bch_check_keys(struct btree *b, const char *fmt, ...) {}
static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
#define EBUG_ON(cond) do { if (cond); } while (0)
#define expensive_debug_checks(c) 0
#define key_merging_disabled(c) 0
#define bypass_torture_test(d) 0
#endif
#define bch_count_data(b) \
(expensive_debug_checks((b)->c) ? __bch_count_data(b) : -1)
#define bch_check_keys(b, ...) \
do { \
if (expensive_debug_checks((b)->c)) \
__bch_check_keys(b, __VA_ARGS__); \
} while (0)
#ifdef CONFIG_DEBUG_FS
void bch_debug_init_cache_set(struct cache_set *);
#else
......
This diff is collapsed.
......@@ -75,43 +75,6 @@
* nodes that are pinning the oldest journal entries first.
*/
#define BCACHE_JSET_VERSION_UUIDv1 1
/* Always latest UUID format */
#define BCACHE_JSET_VERSION_UUID 1
#define BCACHE_JSET_VERSION 1
/*
* On disk format for a journal entry:
* seq is monotonically increasing; every journal entry has its own unique
* sequence number.
*
* last_seq is the oldest journal entry that still has keys the btree hasn't
* flushed to disk yet.
*
* version is for on disk format changes.
*/
struct jset {
uint64_t csum;
uint64_t magic;
uint64_t seq;
uint32_t version;
uint32_t keys;
uint64_t last_seq;
BKEY_PADDED(uuid_bucket);
BKEY_PADDED(btree_root);
uint16_t btree_level;
uint16_t pad[3];
uint64_t prio_bucket[MAX_CACHES_PER_SET];
union {
struct bkey start[0];
uint64_t d[0];
};
};
/*
* Only used for holding the journal entries we read in btree_journal_read()
* during cache_registration
......@@ -140,7 +103,8 @@ struct journal {
spinlock_t lock;
/* used when waiting because the journal was full */
struct closure_waitlist wait;
struct closure_with_timer io;
struct closure io;
struct delayed_work work;
/* Number of blocks free in the bucket(s) we're currently writing to */
unsigned blocks_free;
......@@ -188,8 +152,7 @@ struct journal_device {
};
#define journal_pin_cmp(c, l, r) \
(fifo_idx(&(c)->journal.pin, (l)->journal) > \
fifo_idx(&(c)->journal.pin, (r)->journal))
(fifo_idx(&(c)->journal.pin, (l)) > fifo_idx(&(c)->journal.pin, (r)))
#define JOURNAL_PIN 20000
......@@ -199,15 +162,14 @@ struct journal_device {
struct closure;
struct cache_set;
struct btree_op;
struct keylist;
void bch_journal(struct closure *);
atomic_t *bch_journal(struct cache_set *, struct keylist *, struct closure *);
void bch_journal_next(struct journal *);
void bch_journal_mark(struct cache_set *, struct list_head *);
void bch_journal_meta(struct cache_set *, struct closure *);
int bch_journal_read(struct cache_set *, struct list_head *,
struct btree_op *);
int bch_journal_replay(struct cache_set *, struct list_head *,
struct btree_op *);
int bch_journal_read(struct cache_set *, struct list_head *);
int bch_journal_replay(struct cache_set *, struct list_head *);
void bch_journal_free(struct cache_set *);
int bch_journal_alloc(struct cache_set *);
......
......@@ -12,8 +12,9 @@
#include <trace/events/bcache.h>
struct moving_io {
struct closure cl;
struct keybuf_key *w;
struct search s;
struct data_insert_op op;
struct bbio bio;
};
......@@ -38,13 +39,13 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
static void moving_io_destructor(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, s.cl);
struct moving_io *io = container_of(cl, struct moving_io, cl);
kfree(io);
}
static void write_moving_finish(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, s.cl);
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct bio *bio = &io->bio.bio;
struct bio_vec *bv;
int i;
......@@ -52,13 +53,12 @@ static void write_moving_finish(struct closure *cl)
bio_for_each_segment_all(bv, bio, i)
__free_page(bv->bv_page);
if (io->s.op.insert_collision)
if (io->op.replace_collision)
trace_bcache_gc_copy_collision(&io->w->key);
bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
bch_keybuf_del(&io->op.c->moving_gc_keys, io->w);
atomic_dec_bug(&io->s.op.c->in_flight);
closure_wake_up(&io->s.op.c->moving_gc_wait);
up(&io->op.c->moving_in_flight);
closure_return_with_destructor(cl, moving_io_destructor);
}
......@@ -66,12 +66,12 @@ static void write_moving_finish(struct closure *cl)
static void read_moving_endio(struct bio *bio, int error)
{
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, s.cl);
struct moving_io, cl);
if (error)
io->s.error = error;
io->op.error = error;
bch_bbio_endio(io->s.op.c, bio, error, "reading data to move");
bch_bbio_endio(io->op.c, bio, error, "reading data to move");
}
static void moving_init(struct moving_io *io)
......@@ -85,54 +85,53 @@ static void moving_init(struct moving_io *io)
bio->bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
PAGE_SECTORS);
bio->bi_private = &io->s.cl;
bio->bi_private = &io->cl;
bio->bi_io_vec = bio->bi_inline_vecs;
bch_bio_map(bio, NULL);
}
static void write_moving(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
struct moving_io *io = container_of(s, struct moving_io, s);
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct data_insert_op *op = &io->op;
if (!s->error) {
if (!op->error) {
moving_init(io);
io->bio.bio.bi_sector = KEY_START(&io->w->key);
s->op.lock = -1;
s->op.write_prio = 1;
s->op.cache_bio = &io->bio.bio;
io->bio.bio.bi_sector = KEY_START(&io->w->key);
op->write_prio = 1;
op->bio = &io->bio.bio;
s->writeback = KEY_DIRTY(&io->w->key);
s->op.csum = KEY_CSUM(&io->w->key);
op->writeback = KEY_DIRTY(&io->w->key);
op->csum = KEY_CSUM(&io->w->key);
s->op.type = BTREE_REPLACE;
bkey_copy(&s->op.replace, &io->w->key);
bkey_copy(&op->replace_key, &io->w->key);
op->replace = true;
closure_init(&s->op.cl, cl);
bch_insert_data(&s->op.cl);
closure_call(&op->cl, bch_data_insert, NULL, cl);
}
continue_at(cl, write_moving_finish, NULL);
continue_at(cl, write_moving_finish, system_wq);
}
static void read_moving_submit(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
struct moving_io *io = container_of(s, struct moving_io, s);
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct bio *bio = &io->bio.bio;
bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
continue_at(cl, write_moving, bch_gc_wq);
continue_at(cl, write_moving, system_wq);
}
static void read_moving(struct closure *cl)
static void read_moving(struct cache_set *c)
{
struct cache_set *c = container_of(cl, struct cache_set, moving_gc);
struct keybuf_key *w;
struct moving_io *io;
struct bio *bio;
struct closure cl;
closure_init_stack(&cl);
/* XXX: if we error, background writeback could stall indefinitely */
......@@ -150,8 +149,8 @@ static void read_moving(struct closure *cl)
w->private = io;
io->w = w;
io->s.op.inode = KEY_INODE(&w->key);
io->s.op.c = c;
io->op.inode = KEY_INODE(&w->key);
io->op.c = c;
moving_init(io);
bio = &io->bio.bio;
......@@ -164,13 +163,8 @@ static void read_moving(struct closure *cl)
trace_bcache_gc_copy(&w->key);
closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
if (atomic_inc_return(&c->in_flight) >= 64) {
closure_wait_event(&c->moving_gc_wait, cl,
atomic_read(&c->in_flight) < 64);
continue_at(cl, read_moving, bch_gc_wq);
}
down(&c->moving_in_flight);
closure_call(&io->cl, read_moving_submit, NULL, &cl);
}
if (0) {
......@@ -180,7 +174,7 @@ err: if (!IS_ERR_OR_NULL(w->private))
bch_keybuf_del(&c->moving_gc_keys, w);
}
closure_return(cl);
closure_sync(&cl);
}
static bool bucket_cmp(struct bucket *l, struct bucket *r)
......@@ -193,15 +187,14 @@ static unsigned bucket_heap_top(struct cache *ca)
return GC_SECTORS_USED(heap_peek(&ca->heap));
}
void bch_moving_gc(struct closure *cl)
void bch_moving_gc(struct cache_set *c)
{
struct cache_set *c = container_of(cl, struct cache_set, gc.cl);
struct cache *ca;
struct bucket *b;
unsigned i;
if (!c->copy_gc_enabled)
closure_return(cl);
return;
mutex_lock(&c->bucket_lock);
......@@ -242,13 +235,11 @@ void bch_moving_gc(struct closure *cl)
c->moving_gc_keys.last_scanned = ZERO_KEY;
closure_init(&c->moving_gc, cl);
read_moving(&c->moving_gc);
closure_return(cl);
read_moving(c);
}
void bch_moving_init_cache_set(struct cache_set *c)
{
bch_keybuf_init(&c->moving_gc_keys);
sema_init(&c->moving_in_flight, 64);
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -38,7 +38,9 @@ struct cache_accounting {
struct cache_stats day;
};
struct search;
struct cache_set;
struct cached_dev;
struct bcache_device;
void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent);
......@@ -50,9 +52,10 @@ void bch_cache_accounting_clear(struct cache_accounting *acc);
void bch_cache_accounting_destroy(struct cache_accounting *acc);
void bch_mark_cache_accounting(struct search *s, bool hit, bool bypass);
void bch_mark_cache_readahead(struct search *s);
void bch_mark_cache_miss_collision(struct search *s);
void bch_mark_sectors_bypassed(struct search *s, int sectors);
void bch_mark_cache_accounting(struct cache_set *, struct bcache_device *,
bool, bool);
void bch_mark_cache_readahead(struct cache_set *, struct bcache_device *);
void bch_mark_cache_miss_collision(struct cache_set *, struct bcache_device *);
void bch_mark_sectors_bypassed(struct cache_set *, struct cached_dev *, int);
#endif /* _BCACHE_STATS_H_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment