Commit b2cc9c19 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer fixes from Jens Axboe:
 "Outside of bcache (which really isn't super big), these are all
  few-liners.  There are a few important fixes in here:

   - Fix blk pm sleeping when holding the queue lock

   - A small collection of bcache fixes that have been done and tested
     since bcache was included in this merge window.

   - A fix for a raid5 regression introduced with the bio changes.

   - Two important fixes for mtip32xx, fixing an oops and potential data
     corruption (or hang) due to wrong bio iteration on stacked devices."

* 'for-linus' of git://git.kernel.dk/linux-block:
  scatterlist: sg_set_buf() argument must be in linear mapping
  raid5: Initialize bi_vcnt
  pktcdvd: silence static checker warning
  block: remove refs to XD disks from documentation
  blkpm: avoid sleep when holding queue lock
  mtip32xx: Correctly handle bio->bi_idx != 0 conditions
  mtip32xx: Fix NULL pointer dereference during module unload
  bcache: Fix error handling in init code
  bcache: clarify free/available/unused space
  bcache: drop "select CLOSURES"
  bcache: Fix incompatible pointer type warning
parents a568fa1c ac4e97ab
......@@ -319,7 +319,10 @@ cache<0..n>
Symlink to each of the cache devices comprising this cache set.
cache_available_percent
Percentage of cache device free.
Percentage of cache device which doesn't contain dirty data, and could
potentially be used for writeback. This doesn't mean this space isn't used
for clean cached data; the unused statistic (in priority_stats) is typically
much lower.
clear_stats
Clears the statistics associated with this cache
......@@ -423,8 +426,11 @@ nbuckets
Total buckets in this cache
priority_stats
Statistics about how recently data in the cache has been accessed. This can
reveal your working set size.
Statistics about how recently data in the cache has been accessed.
This can reveal your working set size. Unused is the percentage of
the cache that doesn't contain any data. Metadata is bcache's
metadata overhead. Average is the average priority of cache buckets.
Next is a list of quantiles with the priority threshold of each.
written
Sum of all data that has been written to the cache; comparison with
......
......@@ -498,12 +498,8 @@ Your cooperation is appreciated.
Each device type has 5 bits (32 minors).
13 block 8-bit MFM/RLL/IDE controller
0 = /dev/xda First XT disk whole disk
64 = /dev/xdb Second XT disk whole disk
Partitions are handled in the same way as IDE disks
(see major number 3).
13 block Previously used for the XT disk (/dev/xdN)
Deleted in kernel v3.9.
14 char Open Sound System (OSS)
0 = /dev/mixer Mixer control
......
......@@ -3351,9 +3351,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
plus one apbt timer for broadcast timer.
x86_mrst_timer=apbt_only | lapic_and_apbt
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
xd_geo= See header of drivers/block/xd.c.
xen_emul_unplug= [HW,X86,XEN]
Unplug Xen emulated devices
Format: [unplug0,][unplug1]
......
......@@ -80,8 +80,6 @@ Valid names are:
/dev/sdd: -> 0x0830 (forth SCSI disk)
/dev/sde: -> 0x0840 (fifth SCSI disk)
/dev/fd : -> 0x0200 (floppy disk)
/dev/xda: -> 0x0c00 (first XT disk, unused in Linux/m68k)
/dev/xdb: -> 0x0c40 (second XT disk, unused in Linux/m68k)
The name must be followed by a decimal number, that stands for the
partition number. Internally, the value of the number is just
......
......@@ -3164,7 +3164,7 @@ void blk_post_runtime_resume(struct request_queue *q, int err)
q->rpm_status = RPM_ACTIVE;
__blk_run_queue(q);
pm_runtime_mark_last_busy(q->dev);
pm_runtime_autosuspend(q->dev);
pm_request_autosuspend(q->dev);
} else {
q->rpm_status = RPM_SUSPENDED;
}
......
......@@ -3002,6 +3002,7 @@ static int mtip_hw_debugfs_init(struct driver_data *dd)
static void mtip_hw_debugfs_exit(struct driver_data *dd)
{
if (dd->dfs_node)
debugfs_remove_recursive(dd->dfs_node);
}
......@@ -3863,7 +3864,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
struct driver_data *dd = queue->queuedata;
struct scatterlist *sg;
struct bio_vec *bvec;
int nents = 0;
int i, nents = 0;
int tag = 0, unaligned = 0;
if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
......@@ -3921,11 +3922,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
}
/* Create the scatter list for this bio. */
bio_for_each_segment(bvec, bio, nents) {
bio_for_each_segment(bvec, bio, i) {
sg_set_page(&sg[nents],
bvec->bv_page,
bvec->bv_len,
bvec->bv_offset);
nents++;
}
/* Issue the read/write. */
......
......@@ -83,7 +83,8 @@
#define MAX_SPEED 0xffff
#define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1))
#define ZONE(sector, pd) (((sector) + (pd)->offset) & \
~(sector_t)((pd)->settings.size - 1))
static DEFINE_MUTEX(pktcdvd_mutex);
static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
......
config BCACHE
tristate "Block device as cache"
select CLOSURES
---help---
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
......
......@@ -1241,7 +1241,7 @@ void bch_cache_set_stop(struct cache_set *);
struct cache_set *bch_cache_set_alloc(struct cache_sb *);
void bch_btree_cache_free(struct cache_set *);
int bch_btree_cache_alloc(struct cache_set *);
void bch_writeback_init_cached_dev(struct cached_dev *);
void bch_cached_dev_writeback_init(struct cached_dev *);
void bch_moving_init_cache_set(struct cache_set *);
void bch_cache_allocator_exit(struct cache *ca);
......
......@@ -93,24 +93,6 @@ static struct attribute *bch_stats_files[] = {
};
static KTYPE(bch_stats);
static void scale_accounting(unsigned long data);
void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent)
{
kobject_init(&acc->total.kobj, &bch_stats_ktype);
kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
kobject_init(&acc->hour.kobj, &bch_stats_ktype);
kobject_init(&acc->day.kobj, &bch_stats_ktype);
closure_init(&acc->cl, parent);
init_timer(&acc->timer);
acc->timer.expires = jiffies + accounting_delay;
acc->timer.data = (unsigned long) acc;
acc->timer.function = scale_accounting;
add_timer(&acc->timer);
}
int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
struct kobject *parent)
{
......@@ -244,3 +226,19 @@ void bch_mark_sectors_bypassed(struct search *s, int sectors)
atomic_add(sectors, &dc->accounting.collector.sectors_bypassed);
atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed);
}
void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent)
{
kobject_init(&acc->total.kobj, &bch_stats_ktype);
kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
kobject_init(&acc->hour.kobj, &bch_stats_ktype);
kobject_init(&acc->day.kobj, &bch_stats_ktype);
closure_init(&acc->cl, parent);
init_timer(&acc->timer);
acc->timer.expires = jiffies + accounting_delay;
acc->timer.data = (unsigned long) acc;
acc->timer.function = scale_accounting;
add_timer(&acc->timer);
}
......@@ -634,11 +634,10 @@ static int open_dev(struct block_device *b, fmode_t mode)
return 0;
}
static int release_dev(struct gendisk *b, fmode_t mode)
static void release_dev(struct gendisk *b, fmode_t mode)
{
struct bcache_device *d = b->private_data;
closure_put(&d->cl);
return 0;
}
static int ioctl_dev(struct block_device *b, fmode_t mode,
......@@ -732,8 +731,7 @@ static void bcache_device_free(struct bcache_device *d)
if (d->c)
bcache_device_detach(d);
if (d->disk)
if (d->disk && d->disk->flags & GENHD_FL_UP)
del_gendisk(d->disk);
if (d->disk && d->disk->queue)
blk_cleanup_queue(d->disk->queue);
......@@ -756,12 +754,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
bio_split_pool_init(&d->bio_split_hook))
return -ENOMEM;
d->disk = alloc_disk(1);
if (!d->disk)
bio_split_pool_init(&d->bio_split_hook) ||
!(d->disk = alloc_disk(1)) ||
!(q = blk_alloc_queue(GFP_KERNEL)))
return -ENOMEM;
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
......@@ -771,10 +766,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
d->disk->fops = &bcache_ops;
d->disk->private_data = d;
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
blk_queue_make_request(q, NULL);
d->disk->queue = q;
q->queuedata = d;
......@@ -999,6 +990,7 @@ static void cached_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock);
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk);
list_del(&dc->list);
......@@ -1006,7 +998,9 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
if (!IS_ERR_OR_NULL(dc->bdev)) {
if (dc->bdev->bd_disk)
blk_sync_queue(bdev_get_queue(dc->bdev));
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
......@@ -1028,73 +1022,67 @@ static void cached_dev_flush(struct closure *cl)
static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
{
int err;
int ret;
struct io *io;
closure_init(&dc->disk.cl, NULL);
set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
struct request_queue *q = bdev_get_queue(dc->bdev);
__module_get(THIS_MODULE);
INIT_LIST_HEAD(&dc->list);
closure_init(&dc->disk.cl, NULL);
set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
err = bcache_device_init(&dc->disk, block_size);
if (err)
goto err;
spin_lock_init(&dc->io_lock);
closure_init_unlocked(&dc->sb_write);
INIT_WORK(&dc->detach, cached_dev_detach_finish);
closure_init_unlocked(&dc->sb_write);
INIT_LIST_HEAD(&dc->io_lru);
spin_lock_init(&dc->io_lock);
bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
dc->sequential_merge = true;
dc->sequential_cutoff = 4 << 20;
INIT_LIST_HEAD(&dc->io_lru);
dc->sb_bio.bi_max_vecs = 1;
dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
for (io = dc->io; io < dc->io + RECENT_IO; io++) {
list_add(&io->lru, &dc->io_lru);
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
}
bch_writeback_init_cached_dev(dc);
ret = bcache_device_init(&dc->disk, block_size);
if (ret)
return ret;
set_capacity(dc->disk.disk,
dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
dc->disk.disk->queue->backing_dev_info.ra_pages =
max(dc->disk.disk->queue->backing_dev_info.ra_pages,
q->backing_dev_info.ra_pages);
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0;
err:
bcache_device_stop(&dc->disk);
return err;
}
/* Cached device - bcache superblock */
static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
static void register_bdev(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev,
struct cached_dev *dc)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
struct gendisk *g;
struct cache_set *c;
if (!dc || cached_dev_init(dc, sb->block_size << 9) != 0)
return err;
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
dc->bdev = bdev;
dc->bdev->bd_holder = dc;
g = dc->disk.disk;
set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
g->queue->backing_dev_info.ra_pages =
max(g->queue->backing_dev_info.ra_pages,
bdev->bd_queue->backing_dev_info.ra_pages);
bio_init(&dc->sb_bio);
dc->sb_bio.bi_max_vecs = 1;
dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
get_page(sb_page);
bch_cached_dev_request_init(dc);
if (cached_dev_init(dc, sb->block_size << 9))
goto err;
err = "error creating kobject";
if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
......@@ -1103,6 +1091,8 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
pr_info("registered backing device %s", bdevname(bdev, name));
list_add(&dc->list, &uncached_devices);
list_for_each_entry(c, &bch_cache_sets, list)
bch_cached_dev_attach(dc, c);
......@@ -1111,15 +1101,10 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
bch_cached_dev_run(dc);
return NULL;
return;
err:
kobject_put(&dc->disk.kobj);
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
/*
* Return NULL instead of an error because kobject_put() cleans
* everything up
*/
return NULL;
bcache_device_stop(&dc->disk);
}
/* Flash only volumes */
......@@ -1717,20 +1702,11 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
size_t free;
struct bucket *b;
if (!ca)
return -ENOMEM;
__module_get(THIS_MODULE);
kobject_init(&ca->kobj, &bch_cache_ktype);
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
INIT_LIST_HEAD(&ca->discards);
bio_init(&ca->sb_bio);
ca->sb_bio.bi_max_vecs = 1;
ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
bio_init(&ca->journal.bio);
ca->journal.bio.bi_max_vecs = 8;
ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
......@@ -1742,18 +1718,17 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
!(ca->buckets = vmalloc(sizeof(struct bucket) *
!(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) ||
!(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
2, GFP_KERNEL)) ||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
!(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
bio_split_pool_init(&ca->bio_split_hook))
goto err;
return -ENOMEM;
ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
memset(ca->buckets, 0, ca->sb.nbuckets * sizeof(struct bucket));
for_each_bucket(b, ca)
atomic_set(&b->pin, 0);
......@@ -1766,22 +1741,28 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
return -ENOMEM;
}
static const char *register_cache(struct cache_sb *sb, struct page *sb_page,
static void register_cache(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev, struct cache *ca)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
if (cache_alloc(sb, ca) != 0)
return err;
ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
ca->bdev->bd_holder = ca;
bio_init(&ca->sb_bio);
ca->sb_bio.bi_max_vecs = 1;
ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
get_page(sb_page);
if (blk_queue_discard(bdev_get_queue(ca->bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
if (cache_alloc(sb, ca) != 0)
goto err;
err = "error creating kobject";
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err;
......@@ -1791,15 +1772,10 @@ static const char *register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
pr_info("registered cache device %s", bdevname(bdev, name));
return NULL;
return;
err:
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
kobject_put(&ca->kobj);
pr_info("error opening %s: %s", bdevname(bdev, name), err);
/* Return NULL instead of an error because kobject_put() cleans
* everything up
*/
return NULL;
}
/* Global interfaces/init */
......@@ -1833,12 +1809,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
bdev = blkdev_get_by_path(strim(path),
FMODE_READ|FMODE_WRITE|FMODE_EXCL,
sb);
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY))
err = "device busy";
if (IS_ERR(bdev) ||
set_blocksize(bdev, 4096))
goto err;
}
err = "failed to set blocksize";
if (set_blocksize(bdev, 4096))
goto err_close;
err = read_super(sb, bdev, &sb_page);
if (err)
......@@ -1846,33 +1825,33 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc)
goto err_close;
err = register_bdev(sb, sb_page, bdev, dc);
register_bdev(sb, sb_page, bdev, dc);
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
goto err_close;
err = register_cache(sb, sb_page, bdev, ca);
register_cache(sb, sb_page, bdev, ca);
}
if (err) {
/* register_(bdev|cache) will only return an error if they
* didn't get far enough to create the kobject - if they did,
* the kobject destructor will do this cleanup.
*/
out:
if (sb_page)
put_page(sb_page);
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
err_close:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err:
if (attr != &ksysfs_register_quiet)
pr_info("error opening %s: %s", path, err);
ret = -EINVAL;
}
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
goto out;
}
static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
......
......@@ -375,7 +375,7 @@ static void read_dirty(struct closure *cl)
refill_dirty(cl);
}
void bch_writeback_init_cached_dev(struct cached_dev *dc)
void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
closure_init_unlocked(&dc->writeback);
init_rwsem(&dc->writeback_lock);
......
......@@ -664,6 +664,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
bi->bi_rw |= REQ_FLUSH;
bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
......@@ -701,6 +702,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
else
rbi->bi_sector = (sh->sector
+ rrdev->data_offset);
rbi->bi_vcnt = 1;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE;
......
......@@ -111,6 +111,9 @@ static inline struct page *sg_page(struct scatterlist *sg)
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(!virt_addr_valid(buf));
#endif
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment