Commit c37511b8 authored by Kent Overstreet's avatar Kent Overstreet

bcache: Fix/revamp tracepoints

The tracepoints were reworked to be more sensible, and fixed a null
pointer deref in one of the tracepoints.

Converted some of the pr_debug()s to tracepoints - this is partly a
performance optimization; it used to be that with DEBUG or
CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it
was changed to an empty inline function.

Some of the pr_debug() statements had rather expensive function calls as
part of the arguments, so this code was getting run unnecessarily even
on non debug kernels - in some fast paths, too.
Signed-off-by: default avatarKent Overstreet <koverstreet@google.com>
parent 57943511
......@@ -65,6 +65,7 @@
#include <linux/kthread.h>
#include <linux/random.h>
#include <trace/events/bcache.h>
#define MAX_IN_FLIGHT_DISCARDS 8U
......@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca)
break;
}
pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
fifo_used(&ca->free), ca->free.size,
fifo_used(&ca->free_inc), ca->free_inc.size,
fifo_used(&ca->unused), ca->unused.size);
trace_bcache_alloc_invalidate(ca);
}
#define allocator_wait(ca, cond) \
......@@ -473,9 +471,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
return r;
}
pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu",
atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
fifo_used(&ca->free_inc), fifo_used(&ca->unused));
trace_bcache_alloc_fail(ca);
if (cl) {
closure_wait(&ca->set->bucket_wait, cl);
......
......@@ -178,7 +178,6 @@
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
#include <linux/bio.h>
#include <linux/blktrace_api.h>
#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/mutex.h>
......@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void)
return local_clock() >> 10;
}
#define MAX_BSETS 4U
#define BTREE_PRIO USHRT_MAX
#define INITIAL_PRIO 32768
......@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
}
/* Blktrace macros */
#define blktrace_msg(c, fmt, ...) \
do { \
struct request_queue *q = bdev_get_queue(c->bdev); \
if (q) \
blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \
} while (0)
#define blktrace_msg_all(s, fmt, ...) \
do { \
struct cache *_c; \
unsigned i; \
for_each_cache(_c, (s), i) \
blktrace_msg(_c, fmt, ##__VA_ARGS__); \
} while (0)
static inline void cached_dev_put(struct cached_dev *dc)
{
if (atomic_dec_and_test(&dc->count))
......
#ifndef _BCACHE_BSET_H
#define _BCACHE_BSET_H
#include <linux/slab.h>
/*
* BKEYS:
*
......@@ -142,6 +144,8 @@
/* Btree key comparison/iteration */
#define MAX_BSETS 4U
struct btree_iter {
size_t size, used;
struct btree_iter_set {
......
......@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b)
struct closure cl;
struct bio *bio;
trace_bcache_btree_read(b);
closure_init_stack(&cl);
pr_debug("%s", pbtree(b));
bio = bch_bbio_alloc(b->c);
bio->bi_rw = REQ_META|READ_SYNC;
......@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b)
bch_bio_map(bio, b->sets[0].data);
trace_bcache_btree_read(bio);
bch_submit_bbio(bio, b->c, &b->key, 0);
closure_sync(&cl);
......@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b)
memcpy(page_address(bv->bv_page),
base + j * PAGE_SIZE, PAGE_SIZE);
trace_bcache_btree_write(b->bio);
bch_submit_bbio(b->bio, b->c, &k.key, 0);
continue_at(cl, btree_node_write_done, NULL);
......@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b)
b->bio->bi_vcnt = 0;
bch_bio_map(b->bio, i);
trace_bcache_btree_write(b->bio);
bch_submit_bbio(b->bio, b->c, &k.key, 0);
closure_sync(cl);
......@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
{
struct bset *i = b->sets[b->nsets].data;
trace_bcache_btree_write(b);
BUG_ON(current->bio_list);
BUG_ON(b->written >= btree_blocks(b));
BUG_ON(b->written && !i->keys);
BUG_ON(b->sets->data->seq != i->seq);
bch_check_key_order(b, i);
cancel_delayed_work(&b->work);
......@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
clear_bit(BTREE_NODE_dirty, &b->flags);
change_bit(BTREE_NODE_write_idx, &b->flags);
bch_check_key_order(b, i);
do_btree_node_write(b);
pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
b->written += set_blocks(i, b->c);
atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
......@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
int ret = -ENOMEM;
struct btree *i;
trace_bcache_btree_cache_cannibalize(c);
if (!cl)
return ERR_PTR(-ENOMEM);
......@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
return ERR_PTR(-EAGAIN);
}
/* XXX: tracepoint */
c->try_harder = cl;
c->try_harder_start = local_clock();
retry:
......@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
{
unsigned i;
trace_bcache_btree_node_free(b);
/*
* The BUG_ON() in btree_node_get() implies that we must have a write
* lock on parent to free or even invalidate a node
*/
BUG_ON(op->lock <= b->level);
BUG_ON(b == b->c->root);
pr_debug("bucket %s", pbtree(b));
if (btree_node_dirty(b))
btree_complete_write(b, btree_current_write(b));
......@@ -1012,12 +1011,16 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
bch_bset_init_next(b);
mutex_unlock(&c->bucket_lock);
trace_bcache_btree_node_alloc(b);
return b;
err_free:
bch_bucket_free(c, &k.key);
__bkey_put(c, &k.key);
err:
mutex_unlock(&c->bucket_lock);
trace_bcache_btree_node_alloc_fail(b);
return b;
}
......@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
btree_node_free(r->b, op);
up_write(&r->b->lock);
pr_debug("coalesced %u nodes", nodes);
trace_bcache_btree_gc_coalesce(nodes);
gc->nodes--;
nodes--;
......@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl)
struct btree_op op;
uint64_t start_time = local_clock();
trace_bcache_gc_start(c->sb.set_uuid);
blktrace_msg_all(c, "Starting gc");
trace_bcache_gc_start(c);
memset(&stats, 0, sizeof(struct gc_stat));
closure_init_stack(&writes);
......@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl)
closure_sync(&writes);
if (ret) {
blktrace_msg_all(c, "Stopped gc");
pr_warn("gc failed!");
continue_at(cl, bch_btree_gc, bch_gc_wq);
}
......@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl)
stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets;
memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
blktrace_msg_all(c, "Finished gc");
trace_bcache_gc_end(c->sb.set_uuid);
trace_bcache_gc_end(c);
continue_at(cl, bch_moving_gc, bch_gc_wq);
}
......@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
pbtree(b), n1->sets[0].data->keys);
if (split) {
unsigned keys = 0;
trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
if (IS_ERR(n2))
goto err_free1;
......@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
bch_keylist_add(&op->keys, &n2->key);
bch_btree_node_write(n2, &op->cl);
rw_unlock(true, n2);
} else
} else {
trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
bch_btree_insert_keys(n1, op);
}
bch_keylist_add(&op->keys, &n1->key);
bch_btree_node_write(n1, &op->cl);
......@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b)
{
unsigned i;
trace_bcache_btree_set_root(b);
BUG_ON(!b->written);
for (i = 0; i < KEY_PTRS(&b->key); i++)
......@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b)
__bkey_put(b->c, &b->key);
bch_journal_meta(b->c, NULL);
pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
}
/* Cache lookup */
......@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
n->bi_end_io = bch_cache_read_endio;
n->bi_private = &s->cl;
trace_bcache_cache_hit(n);
__bch_submit_bbio(n, b->c);
}
......
......@@ -9,6 +9,8 @@
#include "bset.h"
#include "debug.h"
#include <linux/blkdev.h>
static void bch_bi_idx_hack_endio(struct bio *bio, int error)
{
struct bio *p = bio->bi_private;
......
......@@ -9,6 +9,8 @@
#include "debug.h"
#include "request.h"
#include <trace/events/bcache.h>
/*
* Journal replay/recovery:
*
......@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
for (k = i->j.start;
k < end(&i->j);
k = bkey_next(k)) {
pr_debug("%s", pkey(k));
trace_bcache_journal_replay_key(k);
bkey_copy(op->keys.top, k);
bch_keylist_push(&op->keys);
......@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl)
spin_lock(&c->journal.lock);
if (journal_full(&c->journal)) {
/* XXX: tracepoint */
trace_bcache_journal_full(c);
closure_wait(&c->journal.wait, cl);
journal_reclaim(c);
......@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl)
if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
b > c->journal.blocks_free) {
/* XXX: If we were inserting so many keys that they won't fit in
trace_bcache_journal_entry_full(c);
/*
* XXX: If we were inserting so many keys that they won't fit in
* an _empty_ journal write, we'll deadlock. For now, handle
* this in bch_keylist_realloc() - but something to think about.
*/
BUG_ON(!w->data->keys);
/* XXX: tracepoint */
BUG_ON(!closure_wait(&w->wait, cl));
closure_flush(&c->journal.io);
......
......@@ -9,6 +9,8 @@
#include "debug.h"
#include "request.h"
#include <trace/events/bcache.h>
struct moving_io {
struct keybuf_key *w;
struct search s;
......@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl)
while (bv-- != bio->bi_io_vec)
__free_page(bv->bv_page);
pr_debug("%s %s", io->s.op.insert_collision
? "collision moving" : "moved",
pkey(&io->w->key));
if (io->s.op.insert_collision)
trace_bcache_gc_copy_collision(&io->w->key);
bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
......@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl)
struct moving_io *io = container_of(s, struct moving_io, s);
if (!s->error) {
trace_bcache_write_moving(&io->bio.bio);
moving_init(io);
io->bio.bio.bi_sector = KEY_START(&io->w->key);
......@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl)
struct moving_io *io = container_of(s, struct moving_io, s);
struct bio *bio = &io->bio.bio;
trace_bcache_read_moving(bio);
bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
continue_at(cl, write_moving, bch_gc_wq);
......@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl)
if (bch_bio_alloc_pages(bio, GFP_KERNEL))
goto err;
pr_debug("%s", pkey(&w->key));
trace_bcache_gc_copy(&w->key);
closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
......
......@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl)
if (KEY_CSUM(k))
bio_csum(n, k);
pr_debug("%s", pkey(k));
trace_bcache_cache_insert(k);
bch_keylist_push(&op->keys);
trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
n->bi_rw |= REQ_WRITE;
bch_submit_bbio(n, op->c, k, 0);
} while (n != bio);
......@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl)
int i;
if (s->recoverable) {
/* The cache read failed, but we can retry from the backing
* device.
*/
pr_debug("recovering at sector %llu",
(uint64_t) s->orig_bio->bi_sector);
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
s->error = 0;
bv = s->bio.bio.bi_io_vec;
......@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl)
/* XXX: invalidate cache */
trace_bcache_read_retry(&s->bio.bio);
closure_bio_submit(&s->bio.bio, &s->cl, s->d);
}
......@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl)
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
if (s->error)
continue_at_nobarrier(cl, request_read_error, bcache_wq);
......@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
bio_get(s->op.cache_bio);
trace_bcache_cache_miss(s->orig_bio);
closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
return ret;
......@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s)
if (should_writeback(dc, s->orig_bio))
s->writeback = true;
trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
if (!s->writeback) {
s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
dc->disk.bio_split);
trace_bcache_writethrough(s->orig_bio);
closure_bio_submit(bio, cl, s->d);
} else {
s->op.cache_bio = bio;
trace_bcache_writeback(s->orig_bio);
bch_writeback_add(dc, bio_sectors(bio));
}
out:
......@@ -1058,7 +1053,6 @@ static void request_write(struct cached_dev *dc, struct search *s)
s->op.skip = true;
s->op.cache_bio = s->orig_bio;
bio_get(s->op.cache_bio);
trace_bcache_write_skip(s->orig_bio);
if ((bio->bi_rw & REQ_DISCARD) &&
!blk_queue_discard(bdev_get_queue(dc->bdev)))
......@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
/* Cached devices - read & write stuff */
int bch_get_congested(struct cache_set *c)
unsigned bch_get_congested(struct cache_set *c)
{
int i;
long rand;
if (!c->congested_read_threshold_us &&
!c->congested_write_threshold_us)
......@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c)
i += CONGESTED_MAX;
return i <= 0 ? 1 : fract_exp_two(i, 6);
if (i > 0)
i = fract_exp_two(i, 6);
rand = get_random_int();
i -= bitmap_weight(&rand, BITS_PER_LONG);
return i > 0 ? i : 1;
}
static void add_sequential(struct task_struct *t)
......@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
{
struct cache_set *c = s->op.c;
struct bio *bio = &s->bio.bio;
long rand;
int cutoff = bch_get_congested(c);
unsigned mode = cache_mode(dc, bio);
unsigned sectors, congested = bch_get_congested(c);
if (atomic_read(&dc->disk.detaching) ||
c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
......@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
goto skip;
}
if (!cutoff) {
cutoff = dc->sequential_cutoff >> 9;
if (!congested && !dc->sequential_cutoff)
goto rescale;
if (!cutoff)
goto rescale;
if (mode == CACHE_MODE_WRITEBACK &&
(bio->bi_rw & REQ_WRITE) &&
(bio->bi_rw & REQ_SYNC))
goto rescale;
}
if (!congested &&
mode == CACHE_MODE_WRITEBACK &&
(bio->bi_rw & REQ_WRITE) &&
(bio->bi_rw & REQ_SYNC))
goto rescale;
if (dc->sequential_merge) {
struct io *i;
......@@ -1192,12 +1188,19 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
add_sequential(s->task);
}
rand = get_random_int();
cutoff -= bitmap_weight(&rand, BITS_PER_LONG);
sectors = max(s->task->sequential_io,
s->task->sequential_io_avg) >> 9;
if (cutoff <= (int) (max(s->task->sequential_io,
s->task->sequential_io_avg) >> 9))
if (dc->sequential_cutoff &&
sectors >= dc->sequential_cutoff >> 9) {
trace_bcache_bypass_sequential(s->orig_bio);
goto skip;
}
if (congested && sectors >= congested) {
trace_bcache_bypass_congested(s->orig_bio);
goto skip;
}
rescale:
bch_rescale_priorities(c, bio_sectors(bio));
......
......@@ -30,7 +30,7 @@ struct search {
};
void bch_cache_read_endio(struct bio *, int);
int bch_get_congested(struct cache_set *);
unsigned bch_get_congested(struct cache_set *);
void bch_insert_data(struct closure *cl);
void bch_btree_insert_async(struct closure *);
void bch_cache_read_endio(struct bio *, int);
......
......@@ -11,6 +11,7 @@
#include "debug.h"
#include "request.h"
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
......@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca)
pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
fifo_used(&ca->free_inc), fifo_used(&ca->unused));
blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
for (i = prio_buckets(ca) - 1; i >= 0; --i) {
long bucket;
......
......@@ -10,6 +10,7 @@
#include "btree.h"
#include "request.h"
#include <linux/blkdev.h>
#include <linux/sort.h>
static const char * const cache_replacement_policies[] = {
......
......@@ -2,6 +2,7 @@
#include "btree.h"
#include "request.h"
#include <linux/blktrace_api.h>
#include <linux/module.h>
#define CREATE_TRACE_POINTS
......@@ -9,18 +10,42 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
......@@ -15,8 +15,6 @@
struct closure;
#include <trace/events/bcache.h>
#ifdef CONFIG_BCACHE_EDEBUG
#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0)
......
......@@ -10,6 +10,8 @@
#include "btree.h"
#include "debug.h"
#include <trace/events/bcache.h>
static struct workqueue_struct *dirty_wq;
static void read_dirty(struct closure *);
......@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl)
for (i = 0; i < KEY_PTRS(&w->key); i++)
atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
pr_debug("clearing %s", pkey(&w->key));
bch_btree_insert(&op, dc->disk.c);
closure_sync(&op.cl);
if (op.insert_collision)
trace_bcache_writeback_collision(&w->key);
atomic_long_inc(op.insert_collision
? &dc->disk.c->writeback_keys_failed
: &dc->disk.c->writeback_keys_done);
......@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl)
io->bio.bi_bdev = io->dc->bdev;
io->bio.bi_end_io = dirty_endio;
trace_bcache_write_dirty(&io->bio);
closure_bio_submit(&io->bio, cl, &io->dc->disk);
continue_at(cl, write_dirty_finish, dirty_wq);
......@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
trace_bcache_read_dirty(&io->bio);
closure_bio_submit(&io->bio, cl, &io->dc->disk);
continue_at(cl, write_dirty, dirty_wq);
......@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl)
if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
goto err_free;
pr_debug("%s", pkey(&w->key));
trace_bcache_writeback(&w->key);
closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
......
......@@ -9,9 +9,7 @@
struct search;
DECLARE_EVENT_CLASS(bcache_request,
TP_PROTO(struct search *s, struct bio *bio),
TP_ARGS(s, bio),
TP_STRUCT__entry(
......@@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request,
__field(dev_t, orig_sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
__array(char, comm, TASK_COMM_LEN )
),
TP_fast_assign(
......@@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request,
__entry->orig_sector = bio->bi_sector - 16;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs,
(unsigned long long)__entry->sector,
__entry->nr_sector, __entry->comm,
__entry->orig_major, __entry->orig_minor,
__entry->rwbs, (unsigned long long)__entry->sector,
__entry->nr_sector, __entry->orig_major, __entry->orig_minor,
(unsigned long long)__entry->orig_sector)
);
DEFINE_EVENT(bcache_request, bcache_request_start,
DECLARE_EVENT_CLASS(bkey,
TP_PROTO(struct bkey *k),
TP_ARGS(k),
TP_PROTO(struct search *s, struct bio *bio),
TP_STRUCT__entry(
__field(u32, size )
__field(u32, inode )
__field(u64, offset )
__field(bool, dirty )
),
TP_ARGS(s, bio)
TP_fast_assign(
__entry->inode = KEY_INODE(k);
__entry->offset = KEY_OFFSET(k);
__entry->size = KEY_SIZE(k);
__entry->dirty = KEY_DIRTY(k);
),
TP_printk("%u:%llu len %u dirty %u", __entry->inode,
__entry->offset, __entry->size, __entry->dirty)
);
DEFINE_EVENT(bcache_request, bcache_request_end,
DECLARE_EVENT_CLASS(btree_node,
TP_PROTO(struct btree *b),
TP_ARGS(b),
TP_STRUCT__entry(
__field(size_t, bucket )
),
TP_fast_assign(
__entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
),
TP_printk("bucket %zu", __entry->bucket)
);
/* request.c */
DEFINE_EVENT(bcache_request, bcache_request_start,
TP_PROTO(struct search *s, struct bio *bio),
TP_ARGS(s, bio)
);
DEFINE_EVENT(bcache_request, bcache_request_end,
TP_PROTO(struct search *s, struct bio *bio),
TP_ARGS(s, bio)
);
DECLARE_EVENT_CLASS(bcache_bio,
TP_PROTO(struct bio *bio),
TP_ARGS(bio),
TP_STRUCT__entry(
......@@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio,
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
__array(char, comm, TASK_COMM_LEN )
),
TP_fast_assign(
......@@ -78,191 +104,295 @@ DECLARE_EVENT_CLASS(bcache_bio,
__entry->sector = bio->bi_sector;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
TP_printk("%d,%d %s %llu + %u [%s]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs,
(unsigned long long)__entry->sector,
__entry->nr_sector, __entry->comm)
TP_printk("%d,%d %s %llu + %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
(unsigned long long)__entry->sector, __entry->nr_sector)
);
DEFINE_EVENT(bcache_bio, bcache_passthrough,
DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
DEFINE_EVENT(bcache_bio, bcache_cache_hit,
DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
DEFINE_EVENT(bcache_bio, bcache_cache_miss,
TRACE_EVENT(bcache_read,
TP_PROTO(struct bio *bio, bool hit, bool bypass),
TP_ARGS(bio, hit, bypass),
TP_PROTO(struct bio *bio),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
__field(bool, cache_hit )
__field(bool, bypass )
),
TP_ARGS(bio)
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
__entry->cache_hit = hit;
__entry->bypass = bypass;
),
TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, (unsigned long long)__entry->sector,
__entry->nr_sector, __entry->cache_hit, __entry->bypass)
);
DEFINE_EVENT(bcache_bio, bcache_read_retry,
TRACE_EVENT(bcache_write,
TP_PROTO(struct bio *bio, bool writeback, bool bypass),
TP_ARGS(bio, writeback, bypass),
TP_PROTO(struct bio *bio),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
__field(bool, writeback )
__field(bool, bypass )
),
TP_ARGS(bio)
);
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
__entry->writeback = writeback;
__entry->bypass = bypass;
),
DEFINE_EVENT(bcache_bio, bcache_writethrough,
TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, (unsigned long long)__entry->sector,
__entry->nr_sector, __entry->writeback, __entry->bypass)
);
DEFINE_EVENT(bcache_bio, bcache_read_retry,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
DEFINE_EVENT(bcache_bio, bcache_writeback,
DEFINE_EVENT(bkey, bcache_cache_insert,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
TP_PROTO(struct bio *bio),
/* Journal */
TP_ARGS(bio)
);
DECLARE_EVENT_CLASS(cache_set,
TP_PROTO(struct cache_set *c),
TP_ARGS(c),
DEFINE_EVENT(bcache_bio, bcache_write_skip,
TP_STRUCT__entry(
__array(char, uuid, 16 )
),
TP_PROTO(struct bio *bio),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.set_uuid, 16);
),
TP_ARGS(bio)
TP_printk("%pU", __entry->uuid)
);
DEFINE_EVENT(bcache_bio, bcache_btree_read,
TP_PROTO(struct bio *bio),
DEFINE_EVENT(bkey, bcache_journal_replay_key,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
TP_ARGS(bio)
DEFINE_EVENT(cache_set, bcache_journal_full,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
DEFINE_EVENT(bcache_bio, bcache_btree_write,
DEFINE_EVENT(cache_set, bcache_journal_entry_full,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
DEFINE_EVENT(bcache_bio, bcache_journal_write,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
DEFINE_EVENT(bcache_bio, bcache_write_dirty,
/* Btree */
TP_PROTO(struct bio *bio),
DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
TP_ARGS(bio)
DEFINE_EVENT(btree_node, bcache_btree_read,
TP_PROTO(struct btree *b),
TP_ARGS(b)
);
DEFINE_EVENT(bcache_bio, bcache_read_dirty,
TRACE_EVENT(bcache_btree_write,
TP_PROTO(struct btree *b),
TP_ARGS(b),
TP_PROTO(struct bio *bio),
TP_STRUCT__entry(
__field(size_t, bucket )
__field(unsigned, block )
__field(unsigned, keys )
),
TP_ARGS(bio)
TP_fast_assign(
__entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
__entry->block = b->written;
__entry->keys = b->sets[b->nsets].data->keys;
),
TP_printk("bucket %zu", __entry->bucket)
);
DEFINE_EVENT(bcache_bio, bcache_write_moving,
DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
TP_PROTO(struct btree *b),
TP_ARGS(b)
);
TP_PROTO(struct bio *bio),
DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
TP_PROTO(struct btree *b),
TP_ARGS(b)
);
TP_ARGS(bio)
DEFINE_EVENT(btree_node, bcache_btree_node_free,
TP_PROTO(struct btree *b),
TP_ARGS(b)
);
DEFINE_EVENT(bcache_bio, bcache_read_moving,
TRACE_EVENT(bcache_btree_gc_coalesce,
TP_PROTO(unsigned nodes),
TP_ARGS(nodes),
TP_PROTO(struct bio *bio),
TP_STRUCT__entry(
__field(unsigned, nodes )
),
TP_ARGS(bio)
);
TP_fast_assign(
__entry->nodes = nodes;
),
DEFINE_EVENT(bcache_bio, bcache_journal_write,
TP_printk("coalesced %u nodes", __entry->nodes)
);
TP_PROTO(struct bio *bio),
DEFINE_EVENT(cache_set, bcache_gc_start,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
TP_ARGS(bio)
DEFINE_EVENT(cache_set, bcache_gc_end,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
DECLARE_EVENT_CLASS(bcache_cache_bio,
DEFINE_EVENT(bkey, bcache_gc_copy,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
TP_PROTO(struct bio *bio,
sector_t orig_sector,
struct block_device* orig_bdev),
DEFINE_EVENT(bkey, bcache_gc_copy_collision,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
TP_ARGS(bio, orig_sector, orig_bdev),
DECLARE_EVENT_CLASS(btree_split,
TP_PROTO(struct btree *b, unsigned keys),
TP_ARGS(b, keys),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(dev_t, orig_dev )
__field(sector_t, sector )
__field(sector_t, orig_sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
__array(char, comm, TASK_COMM_LEN )
__field(size_t, bucket )
__field(unsigned, keys )
),
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->orig_dev = orig_bdev->bd_dev;
__entry->sector = bio->bi_sector;
__entry->orig_sector = orig_sector;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
__entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
__entry->keys = keys;
),
TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs,
(unsigned long long)__entry->sector,
__entry->nr_sector, __entry->comm,
MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
(unsigned long long)__entry->orig_sector)
TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
);
DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
TP_PROTO(struct bio *bio,
sector_t orig_sector,
struct block_device *orig_bdev),
DEFINE_EVENT(btree_split, bcache_btree_node_split,
TP_PROTO(struct btree *b, unsigned keys),
TP_ARGS(b, keys)
);
TP_ARGS(bio, orig_sector, orig_bdev)
DEFINE_EVENT(btree_split, bcache_btree_node_compact,
TP_PROTO(struct btree *b, unsigned keys),
TP_ARGS(b, keys)
);
DECLARE_EVENT_CLASS(bcache_gc,
DEFINE_EVENT(btree_node, bcache_btree_set_root,
TP_PROTO(struct btree *b),
TP_ARGS(b)
);
TP_PROTO(uint8_t *uuid),
/* Allocator */
TP_ARGS(uuid),
TRACE_EVENT(bcache_alloc_invalidate,
TP_PROTO(struct cache *ca),
TP_ARGS(ca),
TP_STRUCT__entry(
__field(uint8_t *, uuid)
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, free_inc_size )
__field(unsigned, unused )
),
TP_fast_assign(
__entry->uuid = uuid;
__entry->free = fifo_used(&ca->free);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->free_inc_size = ca->free_inc.size;
__entry->unused = fifo_used(&ca->unused);
),
TP_printk("%pU", __entry->uuid)
TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
__entry->free_inc, __entry->free_inc_size, __entry->unused)
);
TRACE_EVENT(bcache_alloc_fail,
TP_PROTO(struct cache *ca),
TP_ARGS(ca),
DEFINE_EVENT(bcache_gc, bcache_gc_start,
TP_STRUCT__entry(
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, unused )
__field(unsigned, blocked )
),
TP_PROTO(uint8_t *uuid),
TP_fast_assign(
__entry->free = fifo_used(&ca->free);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->unused = fifo_used(&ca->unused);
__entry->blocked = atomic_read(&ca->set->prio_blocked);
),
TP_ARGS(uuid)
TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
__entry->free_inc, __entry->unused, __entry->blocked)
);
DEFINE_EVENT(bcache_gc, bcache_gc_end,
/* Background writeback */
TP_PROTO(uint8_t *uuid),
DEFINE_EVENT(bkey, bcache_writeback,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
TP_ARGS(uuid)
DEFINE_EVENT(bkey, bcache_writeback_collision,
TP_PROTO(struct bkey *k),
TP_ARGS(k)
);
#endif /* _TRACE_BCACHE_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment