btree_iter.c 80.5 KB
Newer Older
1 2 3 4
// SPDX-License-Identifier: GPL-2.0

#include "bcachefs.h"
#include "bkey_methods.h"
5
#include "bkey_buf.h"
6 7
#include "btree_cache.h"
#include "btree_iter.h"
8
#include "btree_key_cache.h"
9
#include "btree_locking.h"
10
#include "btree_update.h"
11
#include "debug.h"
12
#include "error.h"
13
#include "extents.h"
14
#include "journal.h"
15
#include "recovery.h"
16
#include "replicas.h"
17
#include "subvolume.h"
18 19 20 21
#include "trace.h"

#include <linux/prefetch.h>

Kent Overstreet's avatar
Kent Overstreet committed
22 23 24 25 26 27
static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
				       struct btree_path *);

static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
/*
 * Unlocks before scheduling
 * Note: does not revalidate iterator
 */
static inline int bch2_trans_cond_resched(struct btree_trans *trans)
{
	if (need_resched() || race_fault()) {
		bch2_trans_unlock(trans);
		schedule();
		return bch2_trans_relock(trans) ? 0 : -EINTR;
	} else {
		return 0;
	}
}

Kent Overstreet's avatar
Kent Overstreet committed
43 44 45 46 47
static inline int __btree_path_cmp(const struct btree_path *l,
				   enum btree_id	r_btree_id,
				   bool			r_cached,
				   struct bpos		r_pos,
				   unsigned		r_level)
48
{
Kent Overstreet's avatar
Kent Overstreet committed
49
	return   cmp_int(l->btree_id,	r_btree_id) ?:
50
		 cmp_int((int) l->cached,	(int) r_cached) ?:
Kent Overstreet's avatar
Kent Overstreet committed
51 52 53 54 55 56 57 58
		 bpos_cmp(l->pos,	r_pos) ?:
		-cmp_int(l->level,	r_level);
}

static inline int btree_path_cmp(const struct btree_path *l,
				 const struct btree_path *r)
{
	return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
59 60
}

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
{
	/* Are we iterating over keys in all snapshots? */
	if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
		p = bpos_successor(p);
	} else {
		p = bpos_nosnap_successor(p);
		p.snapshot = iter->snapshot;
	}

	return p;
}

static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
{
	/* Are we iterating over keys in all snapshots? */
	if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
		p = bpos_predecessor(p);
	} else {
		p = bpos_nosnap_predecessor(p);
		p.snapshot = iter->snapshot;
	}

	return p;
}

Kent Overstreet's avatar
Kent Overstreet committed
87
static inline bool is_btree_node(struct btree_path *path, unsigned l)
88 89
{
	return l < BTREE_MAX_DEPTH &&
Kent Overstreet's avatar
Kent Overstreet committed
90
		(unsigned long) path->l[l].b >= 128;
91 92
}

93
static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
94
{
95
	struct bpos pos = iter->pos;
96

97 98
	if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
	    bkey_cmp(pos, POS_MAX))
99
		pos = bkey_successor(iter, pos);
100
	return pos;
101 102
}

Kent Overstreet's avatar
Kent Overstreet committed
103
static inline bool btree_path_pos_before_node(struct btree_path *path,
104 105
					      struct btree *b)
{
Kent Overstreet's avatar
Kent Overstreet committed
106
	return bpos_cmp(path->pos, b->data->min_key) < 0;
107 108
}

Kent Overstreet's avatar
Kent Overstreet committed
109
static inline bool btree_path_pos_after_node(struct btree_path *path,
110 111
					     struct btree *b)
{
Kent Overstreet's avatar
Kent Overstreet committed
112
	return bpos_cmp(b->key.k.p, path->pos) < 0;
113 114
}

Kent Overstreet's avatar
Kent Overstreet committed
115
static inline bool btree_path_pos_in_node(struct btree_path *path,
116 117
					  struct btree *b)
{
Kent Overstreet's avatar
Kent Overstreet committed
118 119 120
	return path->btree_id == b->c.btree_id &&
		!btree_path_pos_before_node(path, b) &&
		!btree_path_pos_after_node(path, b);
121 122
}

123 124
/* Btree node locking: */

125
void bch2_btree_node_unlock_write(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
126
			struct btree_path *path, struct btree *b)
127
{
Kent Overstreet's avatar
Kent Overstreet committed
128
	bch2_btree_node_unlock_write_inlined(trans, path, b);
129 130
}

131
void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
132
{
Kent Overstreet's avatar
Kent Overstreet committed
133
	struct btree_path *linked;
134 135
	unsigned readers = 0;

Kent Overstreet's avatar
Kent Overstreet committed
136 137 138
	trans_for_each_path(trans, linked)
		if (linked->l[b->c.level].b == b &&
		    btree_node_read_locked(linked, b->c.level))
139 140 141 142 143 144 145 146
			readers++;

	/*
	 * Must drop our read locks before calling six_lock_write() -
	 * six_unlock() won't do wakeups until the reader count
	 * goes to 0, and it's safe because we have the node intent
	 * locked:
	 */
147 148 149 150 151 152
	if (!b->c.lock.readers)
		atomic64_sub(__SIX_VAL(read_lock, readers),
			     &b->c.lock.state.counter);
	else
		this_cpu_sub(*b->c.lock.readers, readers);

153
	btree_node_lock_type(trans->c, b, SIX_LOCK_write);
154 155 156 157 158 159

	if (!b->c.lock.readers)
		atomic64_add(__SIX_VAL(read_lock, readers),
			     &b->c.lock.state.counter);
	else
		this_cpu_add(*b->c.lock.readers, readers);
160 161
}

162
bool __bch2_btree_node_relock(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
163
			      struct btree_path *path, unsigned level)
164
{
Kent Overstreet's avatar
Kent Overstreet committed
165 166
	struct btree *b = btree_path_node(path, level);
	int want = __btree_lock_want(path, level);
167

Kent Overstreet's avatar
Kent Overstreet committed
168
	if (!is_btree_node(path, level))
169
		goto fail;
170 171

	if (race_fault())
172
		goto fail;
173

Kent Overstreet's avatar
Kent Overstreet committed
174 175
	if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
	    (btree_node_lock_seq_matches(path, b, level) &&
176
	     btree_node_lock_increment(trans, b, level, want))) {
177
		mark_btree_node_locked(path, level, want);
178 179
		return true;
	}
180 181 182 183 184 185 186 187
fail:
	trace_btree_node_relock_fail(trans->fn, _RET_IP_,
				     path->btree_id,
				     &path->pos,
				     (unsigned long) b,
				     path->l[level].lock_seq,
				     is_btree_node(path, level) ? b->c.lock.state.seq : 0);
	return false;
188 189
}

190 191
bool bch2_btree_node_upgrade(struct btree_trans *trans,
			     struct btree_path *path, unsigned level)
192
{
Kent Overstreet's avatar
Kent Overstreet committed
193
	struct btree *b = path->l[level].b;
194

Kent Overstreet's avatar
Kent Overstreet committed
195
	if (!is_btree_node(path, level))
196 197
		return false;

198 199 200 201 202 203 204 205 206 207 208
	switch (btree_lock_want(path, level)) {
	case BTREE_NODE_UNLOCKED:
		BUG_ON(btree_node_locked(path, level));
		return true;
	case BTREE_NODE_READ_LOCKED:
		BUG_ON(btree_node_intent_locked(path, level));
		return bch2_btree_node_relock(trans, path, level);
	case BTREE_NODE_INTENT_LOCKED:
		break;
	}

Kent Overstreet's avatar
Kent Overstreet committed
209
	if (btree_node_intent_locked(path, level))
210 211 212 213 214
		return true;

	if (race_fault())
		return false;

Kent Overstreet's avatar
Kent Overstreet committed
215
	if (btree_node_locked(path, level)
216
	    ? six_lock_tryupgrade(&b->c.lock)
Kent Overstreet's avatar
Kent Overstreet committed
217
	    : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
218 219
		goto success;

Kent Overstreet's avatar
Kent Overstreet committed
220
	if (btree_node_lock_seq_matches(path, b, level) &&
221
	    btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
Kent Overstreet's avatar
Kent Overstreet committed
222
		btree_node_unlock(path, level);
223 224 225 226 227
		goto success;
	}

	return false;
success:
228
	mark_btree_node_intent_locked(path, level);
229 230 231
	return true;
}

Kent Overstreet's avatar
Kent Overstreet committed
232 233
static inline bool btree_path_get_locks(struct btree_trans *trans,
					struct btree_path *path,
234
					bool upgrade)
235
{
Kent Overstreet's avatar
Kent Overstreet committed
236
	unsigned l = path->level;
237 238 239
	int fail_idx = -1;

	do {
Kent Overstreet's avatar
Kent Overstreet committed
240
		if (!btree_path_node(path, l))
241 242 243
			break;

		if (!(upgrade
Kent Overstreet's avatar
Kent Overstreet committed
244
		      ? bch2_btree_node_upgrade(trans, path, l)
245
		      : bch2_btree_node_relock(trans, path, l)))
246 247 248
			fail_idx = l;

		l++;
Kent Overstreet's avatar
Kent Overstreet committed
249
	} while (l < path->locks_want);
250 251 252

	/*
	 * When we fail to get a lock, we have to ensure that any child nodes
Kent Overstreet's avatar
Kent Overstreet committed
253
	 * can't be relocked so bch2_btree_path_traverse has to walk back up to
254 255
	 * the node that we failed to relock:
	 */
256 257 258 259 260 261 262 263
	if (fail_idx >= 0) {
		__bch2_btree_path_unlock(path);
		btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);

		do {
			path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
			--fail_idx;
		} while (fail_idx >= 0);
264 265
	}

Kent Overstreet's avatar
Kent Overstreet committed
266 267
	if (path->uptodate == BTREE_ITER_NEED_RELOCK)
		path->uptodate = BTREE_ITER_UPTODATE;
268

Kent Overstreet's avatar
Kent Overstreet committed
269
	bch2_trans_verify_locks(trans);
270

Kent Overstreet's avatar
Kent Overstreet committed
271
	return path->uptodate < BTREE_ITER_NEED_RELOCK;
272 273
}

274
static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
275
				  bool cached)
276
{
277
	return !cached
278 279 280 281
		? container_of(_b, struct btree, c)->key.k.p
		: container_of(_b, struct bkey_cached, c)->key.pos;
}

282
/* Slowpath: */
283
bool __bch2_btree_node_lock(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
284 285 286
			    struct btree_path *path,
			    struct btree *b,
			    struct bpos pos, unsigned level,
287
			    enum six_lock_type type,
288 289
			    six_lock_should_sleep_fn should_sleep_fn, void *p,
			    unsigned long ip)
290
{
Kent Overstreet's avatar
Kent Overstreet committed
291
	struct btree_path *linked, *deadlock_path = NULL;
292
	u64 start_time = local_clock();
293
	unsigned reason = 9;
294
	bool ret;
295

296
	/* Check if it's safe to block: */
Kent Overstreet's avatar
Kent Overstreet committed
297
	trans_for_each_path(trans, linked) {
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
		if (!linked->nodes_locked)
			continue;

		/*
		 * Can't block taking an intent lock if we have _any_ nodes read
		 * locked:
		 *
		 * - Our read lock blocks another thread with an intent lock on
		 *   the same node from getting a write lock, and thus from
		 *   dropping its intent lock
		 *
		 * - And the other thread may have multiple nodes intent locked:
		 *   both the node we want to intent lock, and the node we
		 *   already have read locked - deadlock:
		 */
		if (type == SIX_LOCK_intent &&
		    linked->nodes_locked != linked->nodes_intent_locked) {
Kent Overstreet's avatar
Kent Overstreet committed
315
			deadlock_path = linked;
316
			reason = 1;
317 318
		}

Kent Overstreet's avatar
Kent Overstreet committed
319 320 321
		if (linked->btree_id != path->btree_id) {
			if (linked->btree_id > path->btree_id) {
				deadlock_path = linked;
322 323 324 325 326 327
				reason = 3;
			}
			continue;
		}

		/*
Kent Overstreet's avatar
Kent Overstreet committed
328 329
		 * Within the same btree, cached paths come before non
		 * cached paths:
330
		 */
Kent Overstreet's avatar
Kent Overstreet committed
331 332 333
		if (linked->cached != path->cached) {
			if (path->cached) {
				deadlock_path = linked;
334 335 336 337 338
				reason = 4;
			}
			continue;
		}

339 340
		/*
		 * Interior nodes must be locked before their descendants: if
Kent Overstreet's avatar
Kent Overstreet committed
341
		 * another path has possible descendants locked of the node
342 343
		 * we're about to lock, it must have the ancestors locked too:
		 */
344
		if (level > __fls(linked->nodes_locked)) {
Kent Overstreet's avatar
Kent Overstreet committed
345
			deadlock_path = linked;
346
			reason = 5;
347 348 349
		}

		/* Must lock btree nodes in key order: */
350
		if (btree_node_locked(linked, level) &&
351
		    bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
352
						 linked->cached)) <= 0) {
Kent Overstreet's avatar
Kent Overstreet committed
353
			deadlock_path = linked;
354
			reason = 7;
355
		}
356 357
	}

Kent Overstreet's avatar
Kent Overstreet committed
358
	if (unlikely(deadlock_path)) {
359
		trace_trans_restart_would_deadlock(trans->fn, ip,
360
				trans->in_traverse_all, reason,
Kent Overstreet's avatar
Kent Overstreet committed
361 362 363 364 365
				deadlock_path->btree_id,
				deadlock_path->cached,
				&deadlock_path->pos,
				path->btree_id,
				path->cached,
366
				&pos);
367
		btree_trans_restart(trans);
368 369
		return false;
	}
370

371 372 373
	if (six_trylock_type(&b->c.lock, type))
		return true;

Kent Overstreet's avatar
Kent Overstreet committed
374
	trans->locking_path_idx = path->idx;
375
	trans->locking_pos	= pos;
Kent Overstreet's avatar
Kent Overstreet committed
376
	trans->locking_btree_id	= path->btree_id;
377 378
	trans->locking_level	= level;
	trans->locking		= b;
379

380 381 382
	ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;

	trans->locking = NULL;
383

384 385 386 387
	if (ret)
		bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
				       start_time);
	return ret;
388 389 390 391 392
}

/* Btree iterator locking: */

#ifdef CONFIG_BCACHEFS_DEBUG
Kent Overstreet's avatar
Kent Overstreet committed
393 394

static void bch2_btree_path_verify_locks(struct btree_path *path)
395 396 397
{
	unsigned l;

398
	if (!path->nodes_locked) {
399 400
		BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
		       btree_path_node(path, path->level));
401 402
		return;
	}
403

404
	for (l = 0; btree_path_node(path, l); l++)
Kent Overstreet's avatar
Kent Overstreet committed
405 406
		BUG_ON(btree_lock_want(path, l) !=
		       btree_node_locked_type(path, l));
407
}
408

Kent Overstreet's avatar
Kent Overstreet committed
409
void bch2_trans_verify_locks(struct btree_trans *trans)
410
{
Kent Overstreet's avatar
Kent Overstreet committed
411
	struct btree_path *path;
412

Kent Overstreet's avatar
Kent Overstreet committed
413 414
	trans_for_each_path(trans, path)
		bch2_btree_path_verify_locks(path);
415
}
416
#else
Kent Overstreet's avatar
Kent Overstreet committed
417
static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
418 419
#endif

Kent Overstreet's avatar
Kent Overstreet committed
420 421
/* Btree path locking: */

422 423 424
/*
 * Only for btree_cache.c - only relocks intent locks
 */
Kent Overstreet's avatar
Kent Overstreet committed
425 426
bool bch2_btree_path_relock_intent(struct btree_trans *trans,
				   struct btree_path *path)
427 428 429
{
	unsigned l;

Kent Overstreet's avatar
Kent Overstreet committed
430 431
	for (l = path->level;
	     l < path->locks_want && btree_path_node(path, l);
432
	     l++) {
Kent Overstreet's avatar
Kent Overstreet committed
433
		if (!bch2_btree_node_relock(trans, path, l)) {
434
			__bch2_btree_path_unlock(path);
Kent Overstreet's avatar
Kent Overstreet committed
435
			btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
436 437
			trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_,
						   path->btree_id, &path->pos);
438
			btree_trans_restart(trans);
439 440 441 442 443 444 445
			return false;
		}
	}

	return true;
}

446
__flatten
Kent Overstreet's avatar
Kent Overstreet committed
447 448
static bool bch2_btree_path_relock(struct btree_trans *trans,
			struct btree_path *path, unsigned long trace_ip)
449
{
450
	bool ret = btree_path_get_locks(trans, path, false);
451

452 453 454
	if (!ret) {
		trace_trans_restart_relock_path(trans->fn, trace_ip,
						path->btree_id, &path->pos);
455
		btree_trans_restart(trans);
456
	}
457
	return ret;
458 459
}

Kent Overstreet's avatar
Kent Overstreet committed
460 461
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
			       struct btree_path *path,
462 463
			       unsigned new_locks_want)
{
Kent Overstreet's avatar
Kent Overstreet committed
464
	struct btree_path *linked;
465

Kent Overstreet's avatar
Kent Overstreet committed
466
	EBUG_ON(path->locks_want >= new_locks_want);
467

Kent Overstreet's avatar
Kent Overstreet committed
468
	path->locks_want = new_locks_want;
469

470
	if (btree_path_get_locks(trans, path, true))
471 472 473 474 475 476 477
		return true;

	/*
	 * XXX: this is ugly - we'd prefer to not be mucking with other
	 * iterators in the btree_trans here.
	 *
	 * On failure to upgrade the iterator, setting iter->locks_want and
Kent Overstreet's avatar
Kent Overstreet committed
478
	 * calling get_locks() is sufficient to make bch2_btree_path_traverse()
479 480 481 482 483 484 485 486 487 488 489
	 * get the locks we want on transaction restart.
	 *
	 * But if this iterator was a clone, on transaction restart what we did
	 * to this iterator isn't going to be preserved.
	 *
	 * Possibly we could add an iterator field for the parent iterator when
	 * an iterator is a copy - for now, we'll just upgrade any other
	 * iterators with the same btree id.
	 *
	 * The code below used to be needed to ensure ancestor nodes get locked
	 * before interior nodes - now that's handled by
Kent Overstreet's avatar
Kent Overstreet committed
490
	 * bch2_btree_path_traverse_all().
491
	 */
Kent Overstreet's avatar
Kent Overstreet committed
492 493 494 495
	trans_for_each_path(trans, linked)
		if (linked != path &&
		    linked->cached == path->cached &&
		    linked->btree_id == path->btree_id &&
496 497
		    linked->locks_want < new_locks_want) {
			linked->locks_want = new_locks_want;
498
			btree_path_get_locks(trans, linked, true);
499 500 501
		}

	return false;
502 503
}

Kent Overstreet's avatar
Kent Overstreet committed
504
void __bch2_btree_path_downgrade(struct btree_path *path,
505
				 unsigned new_locks_want)
506
{
507
	unsigned l;
508

Kent Overstreet's avatar
Kent Overstreet committed
509
	EBUG_ON(path->locks_want < new_locks_want);
510

Kent Overstreet's avatar
Kent Overstreet committed
511
	path->locks_want = new_locks_want;
512

Kent Overstreet's avatar
Kent Overstreet committed
513 514 515 516
	while (path->nodes_locked &&
	       (l = __fls(path->nodes_locked)) >= path->locks_want) {
		if (l > path->level) {
			btree_node_unlock(path, l);
517
		} else {
Kent Overstreet's avatar
Kent Overstreet committed
518 519 520
			if (btree_node_intent_locked(path, l)) {
				six_lock_downgrade(&path->l[l].b->c.lock);
				path->nodes_intent_locked ^= 1 << l;
521
			}
522
			break;
523 524
		}
	}
525

Kent Overstreet's avatar
Kent Overstreet committed
526
	bch2_btree_path_verify_locks(path);
527 528
}

529 530
void bch2_trans_downgrade(struct btree_trans *trans)
{
Kent Overstreet's avatar
Kent Overstreet committed
531
	struct btree_path *path;
532

Kent Overstreet's avatar
Kent Overstreet committed
533 534
	trans_for_each_path(trans, path)
		bch2_btree_path_downgrade(path);
535 536
}

537 538 539
/* Btree transaction locking: */

bool bch2_trans_relock(struct btree_trans *trans)
540
{
Kent Overstreet's avatar
Kent Overstreet committed
541
	struct btree_path *path;
542

543 544 545
	if (unlikely(trans->restarted))
		return false;

Kent Overstreet's avatar
Kent Overstreet committed
546 547 548
	trans_for_each_path(trans, path)
		if (path->should_be_locked &&
		    !bch2_btree_path_relock(trans, path, _RET_IP_)) {
549
			trace_trans_restart_relock(trans->fn, _RET_IP_,
Kent Overstreet's avatar
Kent Overstreet committed
550
					path->btree_id, &path->pos);
551
			BUG_ON(!trans->restarted);
552
			return false;
553
		}
554
	return true;
555 556
}

557
void bch2_trans_unlock(struct btree_trans *trans)
558
{
Kent Overstreet's avatar
Kent Overstreet committed
559
	struct btree_path *path;
560

Kent Overstreet's avatar
Kent Overstreet committed
561 562
	trans_for_each_path(trans, path)
		__bch2_btree_path_unlock(path);
563 564
}

565 566 567 568
/* Btree iterator: */

#ifdef CONFIG_BCACHEFS_DEBUG

Kent Overstreet's avatar
Kent Overstreet committed
569 570
static void bch2_btree_path_verify_cached(struct btree_trans *trans,
					  struct btree_path *path)
571 572
{
	struct bkey_cached *ck;
Kent Overstreet's avatar
Kent Overstreet committed
573
	bool locked = btree_node_locked(path, 0);
574

Kent Overstreet's avatar
Kent Overstreet committed
575
	if (!bch2_btree_node_relock(trans, path, 0))
576 577
		return;

Kent Overstreet's avatar
Kent Overstreet committed
578 579 580
	ck = (void *) path->l[0].b;
	BUG_ON(ck->key.btree_id != path->btree_id ||
	       bkey_cmp(ck->key.pos, path->pos));
581 582

	if (!locked)
Kent Overstreet's avatar
Kent Overstreet committed
583
		btree_node_unlock(path, 0);
584 585
}

Kent Overstreet's avatar
Kent Overstreet committed
586 587
static void bch2_btree_path_verify_level(struct btree_trans *trans,
				struct btree_path *path, unsigned level)
588
{
Kent Overstreet's avatar
Kent Overstreet committed
589
	struct btree_path_level *l;
590 591
	struct btree_node_iter tmp;
	bool locked;
592
	struct bkey_packed *p, *k;
593
	char buf1[100], buf2[100], buf3[100];
594
	const char *msg;
595

596
	if (!bch2_debug_check_iterators)
597 598
		return;

Kent Overstreet's avatar
Kent Overstreet committed
599
	l	= &path->l[level];
600
	tmp	= l->iter;
Kent Overstreet's avatar
Kent Overstreet committed
601
	locked	= btree_node_locked(path, level);
602

Kent Overstreet's avatar
Kent Overstreet committed
603
	if (path->cached) {
604
		if (!level)
Kent Overstreet's avatar
Kent Overstreet committed
605
			bch2_btree_path_verify_cached(trans, path);
606 607 608
		return;
	}

Kent Overstreet's avatar
Kent Overstreet committed
609
	if (!btree_path_node(path, level))
610 611
		return;

Kent Overstreet's avatar
Kent Overstreet committed
612
	if (!bch2_btree_node_relock(trans, path, level))
613 614
		return;

Kent Overstreet's avatar
Kent Overstreet committed
615
	BUG_ON(!btree_path_pos_in_node(path, l->b));
616 617

	bch2_btree_node_iter_verify(&l->iter, l->b);
618 619

	/*
620
	 * For interior nodes, the iterator will have skipped past deleted keys:
621
	 */
622
	p = level
623
		? bch2_btree_node_iter_prev(&tmp, l->b)
624 625
		: bch2_btree_node_iter_prev_all(&tmp, l->b);
	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
626

Kent Overstreet's avatar
Kent Overstreet committed
627
	if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
628 629
		msg = "before";
		goto err;
630 631
	}

Kent Overstreet's avatar
Kent Overstreet committed
632
	if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
633 634 635
		msg = "after";
		goto err;
	}
636

637
	if (!locked)
Kent Overstreet's avatar
Kent Overstreet committed
638
		btree_node_unlock(path, level);
639 640 641
	return;
err:
	strcpy(buf2, "(none)");
642 643
	strcpy(buf3, "(none)");

Kent Overstreet's avatar
Kent Overstreet committed
644
	bch2_bpos_to_text(&PBUF(buf1), path->pos);
645 646 647

	if (p) {
		struct bkey uk = bkey_unpack_key(l->b, p);
648
		bch2_bkey_to_text(&PBUF(buf2), &uk);
649
	}
650

651 652
	if (k) {
		struct bkey uk = bkey_unpack_key(l->b, k);
653
		bch2_bkey_to_text(&PBUF(buf3), &uk);
654
	}
655

Kent Overstreet's avatar
Kent Overstreet committed
656 657
	panic("path should be %s key at level %u:\n"
	      "path pos %s\n"
658 659
	      "prev key %s\n"
	      "cur  key %s\n",
660
	      msg, level, buf1, buf2, buf3);
661 662
}

Kent Overstreet's avatar
Kent Overstreet committed
663 664
static void bch2_btree_path_verify(struct btree_trans *trans,
				   struct btree_path *path)
665
{
666
	struct bch_fs *c = trans->c;
667
	unsigned i;
668

Kent Overstreet's avatar
Kent Overstreet committed
669 670 671 672
	EBUG_ON(path->btree_id >= BTREE_ID_NR);

	for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
		if (!path->l[i].b) {
673 674
			BUG_ON(!path->cached &&
			       c->btree_roots[path->btree_id].b->c.level > i);
Kent Overstreet's avatar
Kent Overstreet committed
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
			break;
		}

		bch2_btree_path_verify_level(trans, path, i);
	}

	bch2_btree_path_verify_locks(path);
}

void bch2_trans_verify_paths(struct btree_trans *trans)
{
	struct btree_path *path;

	trans_for_each_path(trans, path)
		bch2_btree_path_verify(trans, path);
}

static void bch2_btree_iter_verify(struct btree_iter *iter)
{
	struct btree_trans *trans = iter->trans;

	BUG_ON(iter->btree_id >= BTREE_ID_NR);

	BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached);
699

700 701 702
	BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
	       (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));

703
	BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
704 705 706
	       (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
	       !btree_type_has_snapshots(iter->btree_id));

707 708
	if (iter->update_path)
		bch2_btree_path_verify(trans, iter->update_path);
Kent Overstreet's avatar
Kent Overstreet committed
709
	bch2_btree_path_verify(trans, iter->path);
710 711
}

712 713
static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{
714 715 716
	BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
	       !iter->pos.snapshot);

717 718 719
	BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
	       iter->pos.snapshot != iter->snapshot);

720 721
	BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
	       bkey_cmp(iter->pos, iter->k.p) > 0);
722 723
}

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
{
	struct btree_trans *trans = iter->trans;
	struct btree_iter copy;
	struct bkey_s_c prev;
	int ret = 0;

	if (!bch2_debug_check_iterators)
		return 0;

	if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
		return 0;

	if (bkey_err(k) || !k.k)
		return 0;

	BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
					  iter->snapshot,
					  k.k->p.snapshot));

	bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
745
			     BTREE_ITER_NOPRESERVE|
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
			     BTREE_ITER_ALL_SNAPSHOTS);
	prev = bch2_btree_iter_prev(&copy);
	if (!prev.k)
		goto out;

	ret = bkey_err(prev);
	if (ret)
		goto out;

	if (!bkey_cmp(prev.k->p, k.k->p) &&
	    bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
				      prev.k->p.snapshot) > 0) {
		char buf1[100], buf2[200];

		bch2_bkey_to_text(&PBUF(buf1), k.k);
		bch2_bkey_to_text(&PBUF(buf2), prev.k);

		panic("iter snap %u\n"
		      "k    %s\n"
		      "prev %s\n",
		      iter->snapshot,
		      buf1, buf2);
	}
out:
	bch2_trans_iter_exit(trans, &copy);
	return ret;
}

774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
			    struct bpos pos, bool key_cache)
{
	struct btree_path *path;
	unsigned idx;
	char buf[100];

	trans_for_each_path_inorder(trans, path, idx) {
		int cmp = cmp_int(path->btree_id, id) ?:
			cmp_int(path->cached, key_cache);

		if (cmp > 0)
			break;
		if (cmp < 0)
			continue;

		if (!(path->nodes_locked & 1) ||
		    !path->should_be_locked)
			continue;

		if (!key_cache) {
			if (bkey_cmp(pos, path->l[0].b->data->min_key) >= 0 &&
			    bkey_cmp(pos, path->l[0].b->key.k.p) <= 0)
				return;
		} else {
			if (!bkey_cmp(pos, path->pos))
				return;
		}
	}

	bch2_dump_trans_paths_updates(trans);
	panic("not locked: %s %s%s\n",
	      bch2_btree_ids[id],
	      (bch2_bpos_to_text(&PBUF(buf), pos), buf),
	      key_cache ? " cached" : "");
}

811 812
#else

Kent Overstreet's avatar
Kent Overstreet committed
813 814 815 816
static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
						struct btree_path *path, unsigned l) {}
static inline void bch2_btree_path_verify(struct btree_trans *trans,
					  struct btree_path *path) {}
817
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
818
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
819
static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
820

821 822
#endif

Kent Overstreet's avatar
Kent Overstreet committed
823 824
/* Btree path: fixups after btree updates */

825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
					struct btree *b,
					struct bset_tree *t,
					struct bkey_packed *k)
{
	struct btree_node_iter_set *set;

	btree_node_iter_for_each(iter, set)
		if (set->end == t->end_offset) {
			set->k = __btree_node_key_to_offset(b, k);
			bch2_btree_node_iter_sort(iter, b);
			return;
		}

	bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
}

Kent Overstreet's avatar
Kent Overstreet committed
842
static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
843 844
					       struct btree *b,
					       struct bkey_packed *where)
845
{
Kent Overstreet's avatar
Kent Overstreet committed
846
	struct btree_path_level *l = &path->l[b->c.level];
847

848 849 850
	if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
		return;

Kent Overstreet's avatar
Kent Overstreet committed
851
	if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
852
		bch2_btree_node_iter_advance(&l->iter, l->b);
853 854
}

Kent Overstreet's avatar
Kent Overstreet committed
855
void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
856 857 858
				      struct btree *b,
				      struct bkey_packed *where)
{
Kent Overstreet's avatar
Kent Overstreet committed
859
	struct btree_path *path;
860

Kent Overstreet's avatar
Kent Overstreet committed
861 862 863
	trans_for_each_path_with_node(trans, b, path) {
		__bch2_btree_path_fix_key_modified(path, b, where);
		bch2_btree_path_verify_level(trans, path, b->c.level);
864 865 866
	}
}

Kent Overstreet's avatar
Kent Overstreet committed
867 868 869 870 871 872 873
static void __bch2_btree_node_iter_fix(struct btree_path *path,
				       struct btree *b,
				       struct btree_node_iter *node_iter,
				       struct bset_tree *t,
				       struct bkey_packed *where,
				       unsigned clobber_u64s,
				       unsigned new_u64s)
874 875 876 877 878
{
	const struct bkey_packed *end = btree_bkey_last(b, t);
	struct btree_node_iter_set *set;
	unsigned offset = __btree_node_key_to_offset(b, where);
	int shift = new_u64s - clobber_u64s;
879
	unsigned old_end = t->end_offset - shift;
880 881 882 883
	unsigned orig_iter_pos = node_iter->data[0].k;
	bool iter_current_key_modified =
		orig_iter_pos >= offset &&
		orig_iter_pos <= offset + clobber_u64s;
884 885 886 887 888 889 890

	btree_node_iter_for_each(node_iter, set)
		if (set->end == old_end)
			goto found;

	/* didn't find the bset in the iterator - might have to readd it: */
	if (new_u64s &&
Kent Overstreet's avatar
Kent Overstreet committed
891
	    bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
892
		bch2_btree_node_iter_push(node_iter, b, where, end);
893 894 895
		goto fixup_done;
	} else {
		/* Iterator is after key that changed */
896
		return;
897 898
	}
found:
899
	set->end = t->end_offset;
900 901 902

	/* Iterator hasn't gotten to the key that changed yet: */
	if (set->k < offset)
903
		return;
904 905

	if (new_u64s &&
Kent Overstreet's avatar
Kent Overstreet committed
906
	    bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
907 908 909 910 911 912
		set->k = offset;
	} else if (set->k < offset + clobber_u64s) {
		set->k = offset + new_u64s;
		if (set->k == set->end)
			bch2_btree_node_iter_set_drop(node_iter, set);
	} else {
913
		/* Iterator is after key that changed */
914
		set->k = (int) set->k + shift;
915
		return;
916 917 918
	}

	bch2_btree_node_iter_sort(node_iter, b);
919 920 921
fixup_done:
	if (node_iter->data[0].k != orig_iter_pos)
		iter_current_key_modified = true;
922

923
	/*
924 925 926
	 * When a new key is added, and the node iterator now points to that
	 * key, the iterator might have skipped past deleted keys that should
	 * come after the key the iterator now points to. We have to rewind to
927 928
	 * before those deleted keys - otherwise
	 * bch2_btree_node_iter_prev_all() breaks:
929
	 */
930
	if (!bch2_btree_node_iter_end(node_iter) &&
931
	    iter_current_key_modified &&
932
	    b->c.level) {
933 934 935 936
		struct bset_tree *t;
		struct bkey_packed *k, *k2, *p;

		k = bch2_btree_node_iter_peek_all(node_iter, b);
937 938

		for_each_bset(b, t) {
939 940 941
			bool set_pos = false;

			if (node_iter->data[0].end == t->end_offset)
942 943
				continue;

944 945 946 947 948 949
			k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);

			while ((p = bch2_bkey_prev_all(b, t, k2)) &&
			       bkey_iter_cmp(b, k, p) < 0) {
				k2 = p;
				set_pos = true;
950
			}
951 952 953 954

			if (set_pos)
				btree_node_iter_set_set_pos(node_iter,
							    b, t, k2);
955 956 957 958
		}
	}
}

959
void bch2_btree_node_iter_fix(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
960
			      struct btree_path *path,
961 962 963 964 965
			      struct btree *b,
			      struct btree_node_iter *node_iter,
			      struct bkey_packed *where,
			      unsigned clobber_u64s,
			      unsigned new_u64s)
966
{
967
	struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
Kent Overstreet's avatar
Kent Overstreet committed
968
	struct btree_path *linked;
969

Kent Overstreet's avatar
Kent Overstreet committed
970 971
	if (node_iter != &path->l[b->c.level].iter) {
		__bch2_btree_node_iter_fix(path, b, node_iter, t,
972
					   where, clobber_u64s, new_u64s);
973

974
		if (bch2_debug_check_iterators)
975
			bch2_btree_node_iter_verify(node_iter, b);
976
	}
977

Kent Overstreet's avatar
Kent Overstreet committed
978
	trans_for_each_path_with_node(trans, b, linked) {
979
		__bch2_btree_node_iter_fix(linked, b,
980 981
					   &linked->l[b->c.level].iter, t,
					   where, clobber_u64s, new_u64s);
Kent Overstreet's avatar
Kent Overstreet committed
982
		bch2_btree_path_verify_level(trans, linked, b->c.level);
983
	}
984 985
}

Kent Overstreet's avatar
Kent Overstreet committed
986 987 988 989
/* Btree path level: pointer to a particular btree node and node iter */

static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
						  struct btree_path_level *l,
990 991 992 993 994 995 996 997 998 999
						  struct bkey *u,
						  struct bkey_packed *k)
{
	struct bkey_s_c ret;

	if (unlikely(!k)) {
		/*
		 * signal to bch2_btree_iter_peek_slot() that we're currently at
		 * a hole
		 */
1000
		u->type = KEY_TYPE_deleted;
1001 1002 1003 1004 1005
		return bkey_s_c_null;
	}

	ret = bkey_disassemble(l->b, k, u);

1006 1007 1008 1009 1010 1011 1012 1013
	/*
	 * XXX: bch2_btree_bset_insert_key() generates invalid keys when we
	 * overwrite extents - it sets k->type = KEY_TYPE_deleted on the key
	 * being overwritten but doesn't change k->size. But this is ok, because
	 * those keys are never written out, we just have to avoid a spurious
	 * assertion here:
	 */
	if (bch2_debug_check_bkeys && !bkey_deleted(ret.k))
Kent Overstreet's avatar
Kent Overstreet committed
1014
		bch2_bkey_debugcheck(c, l->b, ret);
1015 1016 1017 1018

	return ret;
}

Kent Overstreet's avatar
Kent Overstreet committed
1019 1020 1021
static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
							struct btree_path_level *l,
							struct bkey *u)
1022
{
Kent Overstreet's avatar
Kent Overstreet committed
1023
	return __btree_iter_unpack(c, l, u,
1024 1025 1026
			bch2_btree_node_iter_peek_all(&l->iter, l->b));
}

Kent Overstreet's avatar
Kent Overstreet committed
1027 1028 1029 1030
static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
						    struct btree_path *path,
						    struct btree_path_level *l,
						    struct bkey *u)
1031
{
Kent Overstreet's avatar
Kent Overstreet committed
1032
	struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
1033
			bch2_btree_node_iter_peek(&l->iter, l->b));
1034

Kent Overstreet's avatar
Kent Overstreet committed
1035 1036
	path->pos = k.k ? k.k->p : l->b->key.k.p;
	trans->paths_sorted = false;
1037
	return k;
1038 1039
}

Kent Overstreet's avatar
Kent Overstreet committed
1040 1041 1042 1043
static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
						    struct btree_path *path,
						    struct btree_path_level *l,
						    struct bkey *u)
1044
{
Kent Overstreet's avatar
Kent Overstreet committed
1045
	struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
1046
			bch2_btree_node_iter_prev(&l->iter, l->b));
1047

Kent Overstreet's avatar
Kent Overstreet committed
1048 1049
	path->pos = k.k ? k.k->p : l->b->data->min_key;
	trans->paths_sorted = false;
1050
	return k;
1051 1052
}

Kent Overstreet's avatar
Kent Overstreet committed
1053 1054
static inline bool btree_path_advance_to_pos(struct btree_path *path,
					     struct btree_path_level *l,
1055
					     int max_advance)
1056
{
1057 1058 1059 1060
	struct bkey_packed *k;
	int nr_advanced = 0;

	while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
Kent Overstreet's avatar
Kent Overstreet committed
1061
	       bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
1062 1063 1064 1065 1066 1067 1068 1069
		if (max_advance > 0 && nr_advanced >= max_advance)
			return false;

		bch2_btree_node_iter_advance(&l->iter, l->b);
		nr_advanced++;
	}

	return true;
1070 1071 1072 1073 1074
}

/*
 * Verify that iterator for parent node points to child node:
 */
Kent Overstreet's avatar
Kent Overstreet committed
1075 1076
static void btree_path_verify_new_node(struct btree_trans *trans,
				       struct btree_path *path, struct btree *b)
1077
{
1078
	struct bch_fs *c = trans->c;
Kent Overstreet's avatar
Kent Overstreet committed
1079
	struct btree_path_level *l;
1080 1081 1082 1083 1084 1085 1086
	unsigned plevel;
	bool parent_locked;
	struct bkey_packed *k;

	if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
		return;

1087 1088 1089
	if (trans->journal_replay_not_finished)
		return;

1090
	plevel = b->c.level + 1;
Kent Overstreet's avatar
Kent Overstreet committed
1091
	if (!btree_path_node(path, plevel))
1092 1093
		return;

Kent Overstreet's avatar
Kent Overstreet committed
1094
	parent_locked = btree_node_locked(path, plevel);
1095

Kent Overstreet's avatar
Kent Overstreet committed
1096
	if (!bch2_btree_node_relock(trans, path, plevel))
1097 1098
		return;

Kent Overstreet's avatar
Kent Overstreet committed
1099
	l = &path->l[plevel];
1100 1101 1102 1103
	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
	if (!k ||
	    bkey_deleted(k) ||
	    bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
1104 1105 1106 1107
		char buf1[100];
		char buf2[100];
		char buf3[100];
		char buf4[100];
1108 1109
		struct bkey uk = bkey_unpack_key(b, k);

1110
		bch2_dump_btree_node(c, l->b);
Kent Overstreet's avatar
Kent Overstreet committed
1111
		bch2_bpos_to_text(&PBUF(buf1), path->pos);
1112 1113 1114
		bch2_bkey_to_text(&PBUF(buf2), &uk);
		bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
		bch2_bpos_to_text(&PBUF(buf3), b->data->max_key);
1115
		panic("parent iter doesn't point to new node:\n"
1116
		      "iter pos %s %s\n"
1117
		      "iter key %s\n"
1118
		      "new node %s-%s\n",
Kent Overstreet's avatar
Kent Overstreet committed
1119
		      bch2_btree_ids[path->btree_id], buf1,
1120
		      buf2, buf3, buf4);
1121 1122 1123
	}

	if (!parent_locked)
1124
		btree_node_unlock(path, plevel);
1125 1126
}

Kent Overstreet's avatar
Kent Overstreet committed
1127
static inline void __btree_path_level_init(struct btree_path *path,
1128
					   unsigned level)
1129
{
Kent Overstreet's avatar
Kent Overstreet committed
1130
	struct btree_path_level *l = &path->l[level];
1131

Kent Overstreet's avatar
Kent Overstreet committed
1132
	bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
1133

1134 1135 1136 1137 1138 1139
	/*
	 * Iterators to interior nodes should always be pointed at the first non
	 * whiteout:
	 */
	if (level)
		bch2_btree_node_iter_peek(&l->iter, l->b);
1140 1141
}

Kent Overstreet's avatar
Kent Overstreet committed
1142 1143
static inline void btree_path_level_init(struct btree_trans *trans,
					 struct btree_path *path,
1144
					 struct btree *b)
1145
{
Kent Overstreet's avatar
Kent Overstreet committed
1146
	BUG_ON(path->cached);
1147

Kent Overstreet's avatar
Kent Overstreet committed
1148
	btree_path_verify_new_node(trans, path, b);
1149

Kent Overstreet's avatar
Kent Overstreet committed
1150
	EBUG_ON(!btree_path_pos_in_node(path, b));
1151
	EBUG_ON(b->c.lock.state.seq & 1);
1152

Kent Overstreet's avatar
Kent Overstreet committed
1153 1154 1155
	path->l[b->c.level].lock_seq = b->c.lock.state.seq;
	path->l[b->c.level].b = b;
	__btree_path_level_init(path, b->c.level);
1156 1157
}

Kent Overstreet's avatar
Kent Overstreet committed
1158 1159
/* Btree path: fixups after btree node updates: */

1160 1161 1162 1163
/*
 * A btree node is being replaced - update the iterator to point to the new
 * node:
 */
1164
void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
1165
{
Kent Overstreet's avatar
Kent Overstreet committed
1166
	struct btree_path *path;
1167

Kent Overstreet's avatar
Kent Overstreet committed
1168 1169 1170
	trans_for_each_path(trans, path)
		if (!path->cached &&
		    btree_path_pos_in_node(path, b)) {
1171 1172
			enum btree_node_locked_type t =
				btree_lock_want(path, b->c.level);
1173

1174 1175 1176
			if (path->nodes_locked &&
			    t != BTREE_NODE_UNLOCKED) {
				btree_node_unlock(path, b->c.level);
1177
				six_lock_increment(&b->c.lock, (enum six_lock_type) t);
1178
				mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t);
1179 1180
			}

Kent Overstreet's avatar
Kent Overstreet committed
1181
			btree_path_level_init(trans, path, b);
1182 1183 1184 1185 1186 1187 1188
		}
}

/*
 * A btree node has been modified in such a way as to invalidate iterators - fix
 * them:
 */
1189
void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
1190
{
Kent Overstreet's avatar
Kent Overstreet committed
1191
	struct btree_path *path;
1192

Kent Overstreet's avatar
Kent Overstreet committed
1193 1194
	trans_for_each_path_with_node(trans, b, path)
		__btree_path_level_init(path, b->c.level);
1195 1196
}

Kent Overstreet's avatar
Kent Overstreet committed
1197 1198
/* Btree path: traverse, set_pos: */

1199 1200 1201 1202 1203 1204 1205 1206
static int lock_root_check_fn(struct six_lock *lock, void *p)
{
	struct btree *b = container_of(lock, struct btree, c.lock);
	struct btree **rootp = p;

	return b == *rootp ? 0 : -1;
}

Kent Overstreet's avatar
Kent Overstreet committed
1207 1208
static inline int btree_path_lock_root(struct btree_trans *trans,
				       struct btree_path *path,
1209 1210
				       unsigned depth_want,
				       unsigned long trace_ip)
1211
{
1212
	struct bch_fs *c = trans->c;
Kent Overstreet's avatar
Kent Overstreet committed
1213
	struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
1214 1215 1216
	enum six_lock_type lock_type;
	unsigned i;

Kent Overstreet's avatar
Kent Overstreet committed
1217
	EBUG_ON(path->nodes_locked);
1218 1219

	while (1) {
1220
		b = READ_ONCE(*rootp);
Kent Overstreet's avatar
Kent Overstreet committed
1221
		path->level = READ_ONCE(b->c.level);
1222

Kent Overstreet's avatar
Kent Overstreet committed
1223
		if (unlikely(path->level < depth_want)) {
1224 1225 1226 1227 1228 1229
			/*
			 * the root is at a lower depth than the depth we want:
			 * got to the end of the btree, or we're walking nodes
			 * greater than some depth and there are no nodes >=
			 * that depth
			 */
Kent Overstreet's avatar
Kent Overstreet committed
1230 1231 1232
			path->level = depth_want;
			for (i = path->level; i < BTREE_MAX_DEPTH; i++)
				path->l[i].b = NULL;
1233
			return 1;
1234 1235
		}

Kent Overstreet's avatar
Kent Overstreet committed
1236 1237 1238
		lock_type = __btree_lock_want(path, path->level);
		if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX,
					      path->level, lock_type,
1239
					      lock_root_check_fn, rootp,
1240 1241 1242 1243 1244
					      trace_ip))) {
			if (trans->restarted)
				return -EINTR;
			continue;
		}
1245

1246
		if (likely(b == READ_ONCE(*rootp) &&
Kent Overstreet's avatar
Kent Overstreet committed
1247
			   b->c.level == path->level &&
1248
			   !race_fault())) {
Kent Overstreet's avatar
Kent Overstreet committed
1249 1250 1251 1252 1253 1254
			for (i = 0; i < path->level; i++)
				path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
			path->l[path->level].b = b;
			for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
				path->l[i].b = NULL;

1255
			mark_btree_node_locked(path, path->level, lock_type);
Kent Overstreet's avatar
Kent Overstreet committed
1256
			btree_path_level_init(trans, path, b);
1257 1258 1259
			return 0;
		}

1260
		six_unlock_type(&b->c.lock, lock_type);
1261 1262 1263 1264
	}
}

noinline
Kent Overstreet's avatar
Kent Overstreet committed
1265
static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
1266
{
1267
	struct bch_fs *c = trans->c;
Kent Overstreet's avatar
Kent Overstreet committed
1268
	struct btree_path_level *l = path_l(path);
1269 1270
	struct btree_node_iter node_iter = l->iter;
	struct bkey_packed *k;
1271
	struct bkey_buf tmp;
1272
	unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
Kent Overstreet's avatar
Kent Overstreet committed
1273 1274 1275
		? (path->level > 1 ? 0 :  2)
		: (path->level > 1 ? 1 : 16);
	bool was_locked = btree_node_locked(path, path->level);
1276
	int ret = 0;
1277

1278 1279
	bch2_bkey_buf_init(&tmp);

1280
	while (nr && !ret) {
Kent Overstreet's avatar
Kent Overstreet committed
1281
		if (!bch2_btree_node_relock(trans, path, path->level))
1282
			break;
1283 1284 1285 1286 1287 1288

		bch2_btree_node_iter_advance(&node_iter, l->b);
		k = bch2_btree_node_iter_peek(&node_iter, l->b);
		if (!k)
			break;

1289
		bch2_bkey_buf_unpack(&tmp, c, l->b, k);
Kent Overstreet's avatar
Kent Overstreet committed
1290 1291
		ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
					       path->level - 1);
1292 1293 1294
	}

	if (!was_locked)
Kent Overstreet's avatar
Kent Overstreet committed
1295
		btree_node_unlock(path, path->level);
1296 1297

	bch2_bkey_buf_exit(&tmp, c);
1298
	return ret;
1299 1300
}

1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
				 struct btree_and_journal_iter *jiter)
{
	struct bch_fs *c = trans->c;
	struct bkey_s_c k;
	struct bkey_buf tmp;
	unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
		? (path->level > 1 ? 0 :  2)
		: (path->level > 1 ? 1 : 16);
	bool was_locked = btree_node_locked(path, path->level);
	int ret = 0;

	bch2_bkey_buf_init(&tmp);

	while (nr && !ret) {
		if (!bch2_btree_node_relock(trans, path, path->level))
			break;

		bch2_btree_and_journal_iter_advance(jiter);
		k = bch2_btree_and_journal_iter_peek(jiter);
		if (!k.k)
			break;

		bch2_bkey_buf_reassemble(&tmp, c, k);
		ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
					       path->level - 1);
	}

	if (!was_locked)
		btree_node_unlock(path, path->level);

	bch2_bkey_buf_exit(&tmp, c);
	return ret;
}

1336
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
1337
					    struct btree_path *path,
1338 1339
					    unsigned plevel, struct btree *b)
{
Kent Overstreet's avatar
Kent Overstreet committed
1340 1341
	struct btree_path_level *l = &path->l[plevel];
	bool locked = btree_node_locked(path, plevel);
1342 1343 1344
	struct bkey_packed *k;
	struct bch_btree_ptr_v2 *bp;

Kent Overstreet's avatar
Kent Overstreet committed
1345
	if (!bch2_btree_node_relock(trans, path, plevel))
1346 1347 1348 1349 1350 1351 1352 1353 1354
		return;

	k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
	BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);

	bp = (void *) bkeyp_val(&l->b->format, k);
	bp->mem_ptr = (unsigned long)b;

	if (!locked)
Kent Overstreet's avatar
Kent Overstreet committed
1355
		btree_node_unlock(path, plevel);
1356 1357
}

1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
						     struct btree_path *path,
						     unsigned flags,
						     struct bkey_buf *out)
{
	struct bch_fs *c = trans->c;
	struct btree_path_level *l = path_l(path);
	struct btree_and_journal_iter jiter;
	struct bkey_s_c k;
	int ret = 0;

	__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);

	k = bch2_btree_and_journal_iter_peek(&jiter);

	bch2_bkey_buf_reassemble(out, c, k);

	if (flags & BTREE_ITER_PREFETCH)
		ret = btree_path_prefetch_j(trans, path, &jiter);

	bch2_btree_and_journal_iter_exit(&jiter);
	return ret;
}

Kent Overstreet's avatar
Kent Overstreet committed
1382 1383 1384
static __always_inline int btree_path_down(struct btree_trans *trans,
					   struct btree_path *path,
					   unsigned flags,
1385
					   unsigned long trace_ip)
1386
{
1387
	struct bch_fs *c = trans->c;
Kent Overstreet's avatar
Kent Overstreet committed
1388
	struct btree_path_level *l = path_l(path);
1389
	struct btree *b;
Kent Overstreet's avatar
Kent Overstreet committed
1390 1391
	unsigned level = path->level - 1;
	enum six_lock_type lock_type = __btree_lock_want(path, level);
1392 1393
	struct bkey_buf tmp;
	int ret;
1394

Kent Overstreet's avatar
Kent Overstreet committed
1395
	EBUG_ON(!btree_node_locked(path, path->level));
1396

1397
	bch2_bkey_buf_init(&tmp);
1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412

	if (unlikely(trans->journal_replay_not_finished)) {
		ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
		if (ret)
			goto err;
	} else {
		bch2_bkey_buf_unpack(&tmp, c, l->b,
				 bch2_btree_node_iter_peek(&l->iter, l->b));

		if (flags & BTREE_ITER_PREFETCH) {
			ret = btree_path_prefetch(trans, path);
			if (ret)
				goto err;
		}
	}
1413

Kent Overstreet's avatar
Kent Overstreet committed
1414
	b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
1415 1416 1417
	ret = PTR_ERR_OR_ZERO(b);
	if (unlikely(ret))
		goto err;
1418

1419
	mark_btree_node_locked(path, level, lock_type);
Kent Overstreet's avatar
Kent Overstreet committed
1420
	btree_path_level_init(trans, path, b);
1421

1422 1423
	if (likely(!trans->journal_replay_not_finished &&
		   tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
1424
	    unlikely(b != btree_node_mem_ptr(tmp.k)))
Kent Overstreet's avatar
Kent Overstreet committed
1425
		btree_node_mem_ptr_set(trans, path, level + 1, b);
1426

Kent Overstreet's avatar
Kent Overstreet committed
1427 1428 1429
	if (btree_node_read_locked(path, level + 1))
		btree_node_unlock(path, level + 1);
	path->level = level;
1430

Kent Overstreet's avatar
Kent Overstreet committed
1431
	bch2_btree_path_verify_locks(path);
1432 1433 1434
err:
	bch2_bkey_buf_exit(&tmp, c);
	return ret;
1435 1436
}

Kent Overstreet's avatar
Kent Overstreet committed
1437 1438
static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
				   unsigned, unsigned long);
1439

Kent Overstreet's avatar
Kent Overstreet committed
1440
static int __btree_path_traverse_all(struct btree_trans *trans, int ret,
1441
				     unsigned long trace_ip)
1442
{
1443
	struct bch_fs *c = trans->c;
Kent Overstreet's avatar
Kent Overstreet committed
1444
	struct btree_path *path, *prev = NULL;
1445
	int i;
1446

1447 1448 1449 1450 1451
	if (trans->in_traverse_all)
		return -EINTR;

	trans->in_traverse_all = true;
retry_all:
1452 1453
	trans->restarted = false;

Kent Overstreet's avatar
Kent Overstreet committed
1454 1455
	trans_for_each_path(trans, path)
		path->should_be_locked = false;
1456

Kent Overstreet's avatar
Kent Overstreet committed
1457
	btree_trans_sort_paths(trans);
1458

Kent Overstreet's avatar
Kent Overstreet committed
1459
	trans_for_each_path_inorder_reverse(trans, path, i) {
1460
		if (prev) {
Kent Overstreet's avatar
Kent Overstreet committed
1461 1462 1463 1464 1465
			if (path->btree_id == prev->btree_id &&
			    path->locks_want < prev->locks_want)
				__bch2_btree_path_upgrade(trans, path, prev->locks_want);
			else if (!path->locks_want && prev->locks_want)
				__bch2_btree_path_upgrade(trans, path, 1);
1466
		}
1467

Kent Overstreet's avatar
Kent Overstreet committed
1468
		prev = path;
1469 1470
	}

1471
	bch2_trans_unlock(trans);
1472
	cond_resched();
1473

1474
	if (unlikely(ret == -ENOMEM)) {
1475 1476 1477 1478 1479 1480 1481 1482 1483 1484
		struct closure cl;

		closure_init_stack(&cl);

		do {
			ret = bch2_btree_cache_cannibalize_lock(c, &cl);
			closure_sync(&cl);
		} while (ret);
	}

1485
	if (unlikely(ret == -EIO))
1486 1487 1488 1489
		goto out;

	BUG_ON(ret && ret != -EINTR);

1490
	/* Now, redo traversals in correct order: */
1491 1492
	i = 0;
	while (i < trans->nr_sorted) {
Kent Overstreet's avatar
Kent Overstreet committed
1493
		path = trans->paths + trans->sorted[i];
1494

Kent Overstreet's avatar
Kent Overstreet committed
1495
		EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
1496

Kent Overstreet's avatar
Kent Overstreet committed
1497
		ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
1498 1499
		if (ret)
			goto retry_all;
1500

Kent Overstreet's avatar
Kent Overstreet committed
1501
		EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
1502

1503 1504
		if (path->nodes_locked ||
		    !btree_path_node(path, path->level))
1505
			i++;
1506
	}
1507 1508 1509

	/*
	 * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock()
Kent Overstreet's avatar
Kent Overstreet committed
1510
	 * and relock(), relock() won't relock since path->should_be_locked
1511 1512
	 * isn't set yet, which is all fine
	 */
Kent Overstreet's avatar
Kent Overstreet committed
1513 1514
	trans_for_each_path(trans, path)
		BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
1515 1516
out:
	bch2_btree_cache_cannibalize_unlock(c);
1517

1518 1519 1520
	trans->in_traverse_all = false;

	trace_trans_traverse_all(trans->fn, trace_ip);
1521
	return ret;
1522
}
1523

Kent Overstreet's avatar
Kent Overstreet committed
1524
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
1525
{
Kent Overstreet's avatar
Kent Overstreet committed
1526
	return __btree_path_traverse_all(trans, 0, _RET_IP_);
1527 1528
}

Kent Overstreet's avatar
Kent Overstreet committed
1529 1530
static inline bool btree_path_good_node(struct btree_trans *trans,
					struct btree_path *path,
1531 1532
					unsigned l, int check_pos)
{
Kent Overstreet's avatar
Kent Overstreet committed
1533 1534
	if (!is_btree_node(path, l) ||
	    !bch2_btree_node_relock(trans, path, l))
1535 1536
		return false;

Kent Overstreet's avatar
Kent Overstreet committed
1537
	if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
1538
		return false;
Kent Overstreet's avatar
Kent Overstreet committed
1539
	if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
1540 1541 1542 1543
		return false;
	return true;
}

Kent Overstreet's avatar
Kent Overstreet committed
1544 1545
static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
						     struct btree_path *path,
1546
						     int check_pos)
1547
{
1548
	unsigned i, l = path->level;
1549

Kent Overstreet's avatar
Kent Overstreet committed
1550 1551 1552 1553
	while (btree_path_node(path, l) &&
	       !btree_path_good_node(trans, path, l, check_pos)) {
		btree_node_unlock(path, l);
		path->l[l].b = BTREE_ITER_NO_NODE_UP;
1554 1555 1556
		l++;
	}

1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
	/* If we need intent locks, take them too: */
	for (i = l + 1;
	     i < path->locks_want && btree_path_node(path, i);
	     i++)
		if (!bch2_btree_node_relock(trans, path, i))
			while (l <= i) {
				btree_node_unlock(path, l);
				path->l[l].b = BTREE_ITER_NO_NODE_UP;
				l++;
			}

1568 1569 1570 1571 1572 1573 1574 1575 1576 1577
	return l;
}

/*
 * This is the main state machine for walking down the btree - walks down to a
 * specified depth
 *
 * Returns 0 on success, -EIO on error (error reading in a btree node).
 *
 * On error, caller (peek_node()/peek_key()) must return NULL; the error is
1578
 * stashed in the iterator and returned from bch2_trans_exit().
1579
 */
Kent Overstreet's avatar
Kent Overstreet committed
1580 1581 1582
static int btree_path_traverse_one(struct btree_trans *trans,
				   struct btree_path *path,
				   unsigned flags,
1583
				   unsigned long trace_ip)
1584
{
1585
	unsigned depth_want = path->level;
1586
	int ret = 0;
1587

1588 1589 1590 1591 1592
	if (unlikely(trans->restarted)) {
		ret = -EINTR;
		goto out;
	}

1593
	/*
Kent Overstreet's avatar
Kent Overstreet committed
1594 1595
	 * Ensure we obey path->should_be_locked: if it's set, we can't unlock
	 * and re-traverse the path without a transaction restart:
1596
	 */
Kent Overstreet's avatar
Kent Overstreet committed
1597 1598
	if (path->should_be_locked) {
		ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
1599 1600 1601
		goto out;
	}

Kent Overstreet's avatar
Kent Overstreet committed
1602 1603
	if (path->cached) {
		ret = bch2_btree_path_traverse_cached(trans, path, flags);
1604 1605
		goto out;
	}
1606

Kent Overstreet's avatar
Kent Overstreet committed
1607
	if (unlikely(path->level >= BTREE_MAX_DEPTH))
1608
		goto out;
1609

Kent Overstreet's avatar
Kent Overstreet committed
1610
	path->level = btree_path_up_until_good_node(trans, path, 0);
1611 1612

	/*
Kent Overstreet's avatar
Kent Overstreet committed
1613
	 * Note: path->nodes[path->level] may be temporarily NULL here - that
1614 1615
	 * would indicate to other code that we got to the end of the btree,
	 * here it indicates that relocking the root failed - it's critical that
Kent Overstreet's avatar
Kent Overstreet committed
1616
	 * btree_path_lock_root() comes next and that it can't fail
1617
	 */
Kent Overstreet's avatar
Kent Overstreet committed
1618 1619 1620 1621
	while (path->level > depth_want) {
		ret = btree_path_node(path, path->level)
			? btree_path_down(trans, path, flags, trace_ip)
			: btree_path_lock_root(trans, path, depth_want, trace_ip);
1622
		if (unlikely(ret)) {
1623 1624
			if (ret == 1) {
				/*
1625 1626
				 * No nodes at this level - got to the end of
				 * the btree:
1627 1628 1629 1630
				 */
				ret = 0;
				goto out;
			}
1631

Kent Overstreet's avatar
Kent Overstreet committed
1632 1633
			__bch2_btree_path_unlock(path);
			path->level = depth_want;
1634

Kent Overstreet's avatar
Kent Overstreet committed
1635 1636
			if (ret == -EIO)
				path->l[path->level].b =
1637
					BTREE_ITER_NO_NODE_ERROR;
Kent Overstreet's avatar
Kent Overstreet committed
1638 1639
			else
				path->l[path->level].b =
1640
					BTREE_ITER_NO_NODE_DOWN;
1641
			goto out;
1642 1643 1644
		}
	}

Kent Overstreet's avatar
Kent Overstreet committed
1645
	path->uptodate = BTREE_ITER_UPTODATE;
1646
out:
1647
	BUG_ON((ret == -EINTR) != !!trans->restarted);
Kent Overstreet's avatar
Kent Overstreet committed
1648
	bch2_btree_path_verify(trans, path);
1649
	return ret;
1650 1651
}

Kent Overstreet's avatar
Kent Overstreet committed
1652 1653 1654 1655
static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);

int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
					  struct btree_path *path, unsigned flags)
1656
{
Kent Overstreet's avatar
Kent Overstreet committed
1657 1658 1659
	if (path->uptodate < BTREE_ITER_NEED_RELOCK)
		return 0;

1660
	return  bch2_trans_cond_resched(trans) ?:
Kent Overstreet's avatar
Kent Overstreet committed
1661
		btree_path_traverse_one(trans, path, flags, _RET_IP_);
1662 1663
}

Kent Overstreet's avatar
Kent Overstreet committed
1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680
static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
			    struct btree_path *src)
{
	unsigned i, offset = offsetof(struct btree_path, pos);

	memcpy((void *) dst + offset,
	       (void *) src + offset,
	       sizeof(struct btree_path) - offset);

	for (i = 0; i < BTREE_MAX_DEPTH; i++)
		if (btree_node_locked(dst, i))
			six_lock_increment(&dst->l[i].b->c.lock,
					   __btree_lock_want(dst, i));

	trans->paths_sorted = false;
}

1681 1682
static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
					   bool intent)
Kent Overstreet's avatar
Kent Overstreet committed
1683
{
1684
	struct btree_path *new = btree_path_alloc(trans, src);
Kent Overstreet's avatar
Kent Overstreet committed
1685

1686
	btree_path_copy(trans, new, src);
Kent Overstreet's avatar
Kent Overstreet committed
1687
	__btree_path_get(new, intent);
1688 1689 1690 1691 1692 1693 1694
	return new;
}

struct btree_path * __must_check
__bch2_btree_path_make_mut(struct btree_trans *trans,
			   struct btree_path *path, bool intent)
{
Kent Overstreet's avatar
Kent Overstreet committed
1695
	__btree_path_put(path, intent);
1696
	path = btree_path_clone(trans, path, intent);
Kent Overstreet's avatar
Kent Overstreet committed
1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740
	path->preserve = false;
#ifdef CONFIG_BCACHEFS_DEBUG
	path->ip_allocated = _RET_IP_;
#endif
	return path;
}

static struct btree_path * __must_check
__bch2_btree_path_set_pos(struct btree_trans *trans,
			  struct btree_path *path, struct bpos new_pos,
			  bool intent, int cmp)
{
	unsigned l = path->level;

	EBUG_ON(trans->restarted);
	EBUG_ON(!path->ref);

	path = bch2_btree_path_make_mut(trans, path, intent);

	path->pos		= new_pos;
	path->should_be_locked	= false;
	trans->paths_sorted	= false;

	if (unlikely(path->cached)) {
		btree_node_unlock(path, 0);
		path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
		btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
		goto out;
	}

	l = btree_path_up_until_good_node(trans, path, cmp);

	if (btree_path_node(path, l)) {
		/*
		 * We might have to skip over many keys, or just a few: try
		 * advancing the node iterator, and if we have to skip over too
		 * many keys just reinit it (or if we're rewinding, since that
		 * is expensive).
		 */
		if (cmp < 0 ||
		    !btree_path_advance_to_pos(path, &path->l[l], 8))
			__btree_path_level_init(path, l);
	}

1741
	if (l != path->level) {
Kent Overstreet's avatar
Kent Overstreet committed
1742
		btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1743 1744
		__bch2_btree_path_unlock(path);
	}
Kent Overstreet's avatar
Kent Overstreet committed
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778
out:
	bch2_btree_path_verify(trans, path);
	return path;
}

static inline struct btree_path * __must_check
btree_path_set_pos(struct btree_trans *trans,
		   struct btree_path *path, struct bpos new_pos,
		   bool intent)
{
	int cmp = bpos_cmp(new_pos, path->pos);

	return cmp
		? __bch2_btree_path_set_pos(trans, path, new_pos, intent, cmp)
		: path;
}

/* Btree path: main interface: */

static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
{
	struct btree_path *next;

	next = prev_btree_path(trans, path);
	if (next && !btree_path_cmp(next, path))
		return next;

	next = next_btree_path(trans, path);
	if (next && !btree_path_cmp(next, path))
		return next;

	return NULL;
}

1779
static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
Kent Overstreet's avatar
Kent Overstreet committed
1780 1781 1782 1783
{
	struct btree_path *next;

	next = prev_btree_path(trans, path);
1784 1785
	if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
		return next;
Kent Overstreet's avatar
Kent Overstreet committed
1786 1787

	next = next_btree_path(trans, path);
1788 1789
	if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
		return next;
Kent Overstreet's avatar
Kent Overstreet committed
1790

1791
	return NULL;
Kent Overstreet's avatar
Kent Overstreet committed
1792 1793 1794
}

static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
1795
{
Kent Overstreet's avatar
Kent Overstreet committed
1796 1797 1798
	__bch2_btree_path_unlock(path);
	btree_path_list_remove(trans, path);
	trans->paths_allocated &= ~(1ULL << path->idx);
1799 1800
}

Kent Overstreet's avatar
Kent Overstreet committed
1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817
void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
{
	struct btree_path *dup;

	EBUG_ON(trans->paths + path->idx != path);
	EBUG_ON(!path->ref);

	if (!__btree_path_put(path, intent))
		return;

	/*
	 * Perhaps instead we should check for duplicate paths in traverse_all:
	 */
	if (path->preserve &&
	    (dup = have_path_at_pos(trans, path))) {
		dup->preserve = true;
		path->preserve = false;
1818
		goto free;
Kent Overstreet's avatar
Kent Overstreet committed
1819 1820 1821
	}

	if (!path->preserve &&
1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
	    (dup = have_node_at_pos(trans, path)))
		goto free;
	return;
free:
	if (path->should_be_locked &&
	    !btree_node_locked(dup, path->level))
		return;

	dup->should_be_locked |= path->should_be_locked;
	__bch2_path_free(trans, path);
Kent Overstreet's avatar
Kent Overstreet committed
1832 1833 1834 1835 1836 1837 1838 1839
}

noinline __cold
void bch2_dump_trans_paths_updates(struct btree_trans *trans)
{
	struct btree_path *path;
	struct btree_insert_entry *i;
	unsigned idx;
1840
	char buf1[300], buf2[300];
Kent Overstreet's avatar
Kent Overstreet committed
1841 1842 1843 1844

	btree_trans_sort_paths(trans);

	trans_for_each_path_inorder(trans, path, idx)
1845
		printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree %s pos %s locks %u %pS\n",
Kent Overstreet's avatar
Kent Overstreet committed
1846
		       path->idx, path->ref, path->intent_ref,
1847 1848
		       path->should_be_locked ? " S" : "",
		       path->preserve ? " P" : "",
Kent Overstreet's avatar
Kent Overstreet committed
1849
		       bch2_btree_ids[path->btree_id],
1850
		       (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1),
1851
		       path->nodes_locked,
Kent Overstreet's avatar
Kent Overstreet committed
1852 1853 1854 1855 1856 1857 1858
#ifdef CONFIG_BCACHEFS_DEBUG
		       (void *) path->ip_allocated
#else
		       NULL
#endif
		       );

1859 1860 1861 1862 1863
	trans_for_each_update(trans, i) {
		struct bkey u;
		struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);

		printk(KERN_ERR "update: btree %s %pS\n  old %s\n  new %s",
Kent Overstreet's avatar
Kent Overstreet committed
1864
		       bch2_btree_ids[i->btree_id],
1865 1866 1867 1868
		       (void *) i->ip_allocated,
		       (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1),
		       (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2));
	}
Kent Overstreet's avatar
Kent Overstreet committed
1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897
}

static struct btree_path *btree_path_alloc(struct btree_trans *trans,
					   struct btree_path *pos)
{
	struct btree_path *path;
	unsigned idx;

	if (unlikely(trans->paths_allocated ==
		     ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
		bch2_dump_trans_paths_updates(trans);
		panic("trans path oveflow\n");
	}

	idx = __ffs64(~trans->paths_allocated);
	trans->paths_allocated |= 1ULL << idx;

	path = &trans->paths[idx];

	path->idx		= idx;
	path->ref		= 0;
	path->intent_ref	= 0;
	path->nodes_locked	= 0;
	path->nodes_intent_locked = 0;

	btree_path_list_add(trans, pos, path);
	return path;
}

1898
struct btree_path *bch2_path_get(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
1899 1900
				 enum btree_id btree_id, struct bpos pos,
				 unsigned locks_want, unsigned level,
1901
				 unsigned flags)
Kent Overstreet's avatar
Kent Overstreet committed
1902
{
1903
	struct btree_path *path, *path_pos = NULL;
1904 1905
	bool cached = flags & BTREE_ITER_CACHED;
	bool intent = flags & BTREE_ITER_INTENT;
Kent Overstreet's avatar
Kent Overstreet committed
1906 1907 1908 1909
	int i;

	BUG_ON(trans->restarted);

1910
	btree_trans_sort_paths(trans);
Kent Overstreet's avatar
Kent Overstreet committed
1911

1912 1913 1914 1915 1916 1917 1918
	trans_for_each_path_inorder(trans, path, i) {
		if (__btree_path_cmp(path,
				     btree_id,
				     cached,
				     pos,
				     level) > 0)
			break;
Kent Overstreet's avatar
Kent Overstreet committed
1919

1920
		path_pos = path;
Kent Overstreet's avatar
Kent Overstreet committed
1921 1922
	}

1923 1924 1925 1926 1927 1928
	if (path_pos &&
	    path_pos->cached	== cached &&
	    path_pos->btree_id	== btree_id &&
	    path_pos->level	== level) {
		__btree_path_get(path_pos, intent);
		path = btree_path_set_pos(trans, path_pos, pos, intent);
Kent Overstreet's avatar
Kent Overstreet committed
1929
	} else {
1930 1931
		path = btree_path_alloc(trans, path_pos);
		path_pos = NULL;
Kent Overstreet's avatar
Kent Overstreet committed
1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950

		__btree_path_get(path, intent);
		path->pos			= pos;
		path->btree_id			= btree_id;
		path->cached			= cached;
		path->uptodate			= BTREE_ITER_NEED_TRAVERSE;
		path->should_be_locked		= false;
		path->level			= level;
		path->locks_want		= locks_want;
		path->nodes_locked		= 0;
		path->nodes_intent_locked	= 0;
		for (i = 0; i < ARRAY_SIZE(path->l); i++)
			path->l[i].b		= BTREE_ITER_NO_NODE_INIT;
#ifdef CONFIG_BCACHEFS_DEBUG
		path->ip_allocated		= _RET_IP_;
#endif
		trans->paths_sorted		= false;
	}

1951 1952 1953
	if (!(flags & BTREE_ITER_NOPRESERVE))
		path->preserve = true;

Kent Overstreet's avatar
Kent Overstreet committed
1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
	if (path->intent_ref)
		locks_want = max(locks_want, level + 1);

	/*
	 * If the path has locks_want greater than requested, we don't downgrade
	 * it here - on transaction restart because btree node split needs to
	 * upgrade locks, we might be putting/getting the iterator again.
	 * Downgrading iterators only happens via bch2_trans_downgrade(), after
	 * a successful transaction commit.
	 */

	locks_want = min(locks_want, BTREE_MAX_DEPTH);
	if (locks_want > path->locks_want) {
		path->locks_want = locks_want;
1968
		btree_path_get_locks(trans, path, true);
Kent Overstreet's avatar
Kent Overstreet committed
1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
	}

	return path;
}

inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
{

	struct bkey_s_c k;

	BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);

	if (!path->cached) {
		struct btree_path_level *l = path_l(path);
		struct bkey_packed *_k =
			bch2_btree_node_iter_peek_all(&l->iter, l->b);

		k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;

		EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);

		if (!k.k || bpos_cmp(path->pos, k.k->p))
			goto hole;
	} else {
		struct bkey_cached *ck = (void *) path->l[0].b;

		EBUG_ON(path->btree_id != ck->key.btree_id ||
			bkey_cmp(path->pos, ck->key.pos));

		/* BTREE_ITER_CACHED_NOFILL? */
		if (unlikely(!ck->valid))
			goto hole;

		k = bkey_i_to_s_c(ck->k);
	}

	return k;
hole:
	bkey_init(u);
	u->p = path->pos;
	return (struct bkey_s_c) { u, NULL };
}

/* Btree iterators: */

2014 2015 2016 2017 2018 2019
int __must_check
__bch2_btree_iter_traverse(struct btree_iter *iter)
{
	return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
}

2020 2021 2022
int __must_check
bch2_btree_iter_traverse(struct btree_iter *iter)
{
2023 2024
	int ret;

Kent Overstreet's avatar
Kent Overstreet committed
2025 2026 2027
	iter->path = btree_path_set_pos(iter->trans, iter->path,
					btree_iter_search_key(iter),
					iter->flags & BTREE_ITER_INTENT);
2028

Kent Overstreet's avatar
Kent Overstreet committed
2029
	ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
2030 2031 2032
	if (ret)
		return ret;

Kent Overstreet's avatar
Kent Overstreet committed
2033
	iter->path->should_be_locked = true;
2034
	return 0;
2035 2036
}

2037 2038 2039 2040
/* Iterate across nodes (leaf and interior nodes) */

struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
{
2041
	struct btree_trans *trans = iter->trans;
2042
	struct btree *b = NULL;
2043 2044
	int ret;

Kent Overstreet's avatar
Kent Overstreet committed
2045
	EBUG_ON(iter->path->cached);
2046
	bch2_btree_iter_verify(iter);
2047

2048
	ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2049
	if (ret)
2050
		goto err;
2051

Kent Overstreet's avatar
Kent Overstreet committed
2052
	b = btree_path_node(iter->path, iter->path->level);
2053
	if (!b)
2054
		goto out;
2055

2056
	BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0);
2057

2058
	bkey_init(&iter->k);
Kent Overstreet's avatar
Kent Overstreet committed
2059
	iter->k.p = iter->pos = b->key.k.p;
2060 2061 2062

	iter->path = btree_path_set_pos(trans, iter->path, b->key.k.p,
					iter->flags & BTREE_ITER_INTENT);
Kent Overstreet's avatar
Kent Overstreet committed
2063
	iter->path->should_be_locked = true;
2064
	BUG_ON(iter->path->uptodate);
2065 2066 2067
out:
	bch2_btree_iter_verify_entry_exit(iter);
	bch2_btree_iter_verify(iter);
2068

2069
	return b;
2070 2071 2072
err:
	b = ERR_PTR(ret);
	goto out;
2073 2074
}

2075
struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
2076
{
Kent Overstreet's avatar
Kent Overstreet committed
2077 2078
	struct btree_trans *trans = iter->trans;
	struct btree_path *path = iter->path;
2079
	struct btree *b = NULL;
2080
	unsigned l;
2081 2082
	int ret;

2083
	BUG_ON(trans->restarted);
Kent Overstreet's avatar
Kent Overstreet committed
2084
	EBUG_ON(iter->path->cached);
2085
	bch2_btree_iter_verify(iter);
2086

2087
	/* already at end? */
Kent Overstreet's avatar
Kent Overstreet committed
2088
	if (!btree_path_node(path, path->level))
2089
		return NULL;
2090

2091 2092 2093 2094 2095 2096 2097
	/* got to end? */
	if (!btree_path_node(path, path->level + 1)) {
		btree_node_unlock(path, path->level);
		path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
		path->level++;
		return NULL;
	}
2098

2099 2100 2101 2102
	if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
		__bch2_btree_path_unlock(path);
		path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
		path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
2103 2104
		trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_,
					   path->btree_id, &path->pos);
2105 2106
		btree_trans_restart(trans);
		ret = -EINTR;
2107
		goto err;
2108
	}
2109

2110
	b = btree_path_node(path, path->level + 1);
2111

2112 2113 2114 2115 2116
	if (!bpos_cmp(iter->pos, b->key.k.p)) {
		btree_node_unlock(path, path->level);
		path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
		path->level++;
	} else {
2117 2118 2119 2120
		/*
		 * Haven't gotten to the end of the parent node: go back down to
		 * the next child node
		 */
Kent Overstreet's avatar
Kent Overstreet committed
2121 2122 2123
		path = iter->path =
			btree_path_set_pos(trans, path, bpos_successor(iter->pos),
					   iter->flags & BTREE_ITER_INTENT);
2124

Kent Overstreet's avatar
Kent Overstreet committed
2125
		path->level = iter->min_depth;
2126 2127 2128 2129 2130

		for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
			if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
				btree_node_unlock(path, l);

Kent Overstreet's avatar
Kent Overstreet committed
2131
		btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
2132 2133
		bch2_btree_iter_verify(iter);

Kent Overstreet's avatar
Kent Overstreet committed
2134
		ret = bch2_btree_path_traverse(trans, path, iter->flags);
2135 2136
		if (ret)
			goto err;
2137

Kent Overstreet's avatar
Kent Overstreet committed
2138
		b = path->l[path->level].b;
2139 2140
	}

2141
	bkey_init(&iter->k);
Kent Overstreet's avatar
Kent Overstreet committed
2142
	iter->k.p = iter->pos = b->key.k.p;
2143 2144 2145

	iter->path = btree_path_set_pos(trans, iter->path, b->key.k.p,
					iter->flags & BTREE_ITER_INTENT);
Kent Overstreet's avatar
Kent Overstreet committed
2146
	iter->path->should_be_locked = true;
2147
	BUG_ON(iter->path->uptodate);
2148 2149 2150
out:
	bch2_btree_iter_verify_entry_exit(iter);
	bch2_btree_iter_verify(iter);
2151

2152
	return b;
2153 2154 2155
err:
	b = ERR_PTR(ret);
	goto out;
2156 2157 2158 2159
}

/* Iterate across keys (in leaf nodes only) */

2160
inline bool bch2_btree_iter_advance(struct btree_iter *iter)
2161
{
2162
	struct bpos pos = iter->k.p;
2163 2164 2165
	bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
		    ? bpos_cmp(pos, SPOS_MAX)
		    : bkey_cmp(pos, SPOS_MAX)) != 0;
2166

2167
	if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
2168
		pos = bkey_successor(iter, pos);
2169
	bch2_btree_iter_set_pos(iter, pos);
2170
	return ret;
2171 2172
}

2173
inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
2174 2175
{
	struct bpos pos = bkey_start_pos(&iter->k);
2176 2177 2178
	bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
		    ? bpos_cmp(pos, POS_MIN)
		    : bkey_cmp(pos, POS_MIN)) != 0;
2179

2180
	if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
2181
		pos = bkey_predecessor(iter, pos);
2182
	bch2_btree_iter_set_pos(iter, pos);
2183
	return ret;
2184 2185
}

2186
static noinline
Kent Overstreet's avatar
Kent Overstreet committed
2187
struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
2188 2189
{
	struct btree_insert_entry *i;
2190
	struct bkey_i *ret = NULL;
2191

2192
	trans_for_each_update(iter->trans, i) {
2193 2194 2195
		if (i->btree_id < iter->btree_id)
			continue;
		if (i->btree_id > iter->btree_id)
2196
			break;
Kent Overstreet's avatar
Kent Overstreet committed
2197
		if (bpos_cmp(i->k->k.p, iter->path->pos) < 0)
2198 2199 2200 2201
			continue;
		if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0)
			ret = i->k;
	}
2202

2203 2204 2205
	return ret;
}

2206 2207 2208 2209 2210 2211 2212
static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
{
	return iter->flags & BTREE_ITER_WITH_UPDATES
		? __bch2_btree_trans_peek_updates(iter)
		: NULL;
}

2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261
static struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
						 struct btree_path *path)
{
	struct journal_keys *keys = &trans->c->journal_keys;
	size_t idx = bch2_journal_key_search(keys, path->btree_id,
					     path->level, path->pos);

	while (idx < keys->nr && keys->d[idx].overwritten)
		idx++;

	return (idx < keys->nr &&
		keys->d[idx].btree_id	== path->btree_id &&
		keys->d[idx].level	== path->level)
		? keys->d[idx].k
		: NULL;
}

static noinline
struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
					      struct btree_iter *iter)
{
	struct bkey_i *k = __btree_trans_peek_journal(trans, iter->path);

	if (k && !bpos_cmp(k->k.p, iter->pos)) {
		iter->k = k->k;
		return bkey_i_to_s_c(k);
	} else {
		return bkey_s_c_null;
	}
}

static noinline
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
					 struct btree_iter *iter,
					 struct bkey_s_c k)
{
	struct bkey_i *next_journal =
		__btree_trans_peek_journal(trans, iter->path);

	if (next_journal &&
	    bpos_cmp(next_journal->k.p,
		     k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
		iter->k = next_journal->k;
		k = bkey_i_to_s_c(next_journal);
	}

	return k;
}

2262
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
2263
{
Kent Overstreet's avatar
Kent Overstreet committed
2264
	struct btree_trans *trans = iter->trans;
2265
	struct bkey_i *next_update;
2266
	struct bkey_s_c k;
2267
	int ret;
2268

Kent Overstreet's avatar
Kent Overstreet committed
2269
	EBUG_ON(iter->path->cached || iter->path->level);
2270
	bch2_btree_iter_verify(iter);
2271 2272

	while (1) {
Kent Overstreet's avatar
Kent Overstreet committed
2273 2274
		iter->path = btree_path_set_pos(trans, iter->path, search_key,
				   iter->flags & BTREE_ITER_INTENT);
2275

Kent Overstreet's avatar
Kent Overstreet committed
2276
		ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2277 2278 2279 2280 2281 2282
		if (unlikely(ret)) {
			/* ensure that iter->k is consistent with iter->pos: */
			bch2_btree_iter_set_pos(iter, iter->pos);
			k = bkey_s_c_err(ret);
			goto out;
		}
2283

Kent Overstreet's avatar
Kent Overstreet committed
2284
		k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
2285

2286 2287 2288 2289
		if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
			k = btree_trans_peek_journal(trans, iter, k);

		next_update = btree_trans_peek_updates(iter);
2290

2291
		if (next_update &&
2292
		    bpos_cmp(next_update->k.p,
Kent Overstreet's avatar
Kent Overstreet committed
2293
			     k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
2294
			iter->k = next_update->k;
2295
			k = bkey_i_to_s_c(next_update);
2296
		}
2297

2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311
		if (k.k && bkey_deleted(k.k)) {
			/*
			 * If we've got a whiteout, and it's after the search
			 * key, advance the search key to the whiteout instead
			 * of just after the whiteout - it might be a btree
			 * whiteout, with a real key at the same position, since
			 * in the btree deleted keys sort before non deleted.
			 */
			search_key = bpos_cmp(search_key, k.k->p)
				? k.k->p
				: bpos_successor(k.k->p);
			continue;
		}

2312
		if (likely(k.k)) {
2313
			break;
Kent Overstreet's avatar
Kent Overstreet committed
2314
		} else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
2315
			/* Advance to next leaf node: */
Kent Overstreet's avatar
Kent Overstreet committed
2316
			search_key = bpos_successor(iter->path->l[0].b->key.k.p);
2317 2318
		} else {
			/* End of btree: */
2319 2320 2321 2322
			bch2_btree_iter_set_pos(iter, SPOS_MAX);
			k = bkey_s_c_null;
			goto out;
		}
2323
	}
2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340
out:
	bch2_btree_iter_verify(iter);

	return k;
}

/**
 * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
 * current position
 */
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
	struct btree_trans *trans = iter->trans;
	struct bpos search_key = btree_iter_search_key(iter);
	struct bkey_s_c k;
	int ret;

2341 2342 2343 2344 2345 2346
	if (iter->update_path) {
		bch2_path_put(trans, iter->update_path,
			      iter->flags & BTREE_ITER_INTENT);
		iter->update_path = NULL;
	}

2347 2348 2349 2350 2351 2352 2353
	bch2_btree_iter_verify_entry_exit(iter);

	while (1) {
		k = __bch2_btree_iter_peek(iter, search_key);
		if (!k.k || bkey_err(k))
			goto out;

2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388
		if (iter->update_path &&
		    bkey_cmp(iter->update_path->pos, k.k->p)) {
			bch2_path_put(trans, iter->update_path,
				      iter->flags & BTREE_ITER_INTENT);
			iter->update_path = NULL;
		}

		if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
		    (iter->flags & BTREE_ITER_INTENT) &&
		    !(iter->flags & BTREE_ITER_IS_EXTENTS) &&
		    !iter->update_path) {
			struct bpos pos = k.k->p;

			if (pos.snapshot < iter->snapshot) {
				search_key = bpos_successor(k.k->p);
				continue;
			}

			pos.snapshot = iter->snapshot;

			/*
			 * advance, same as on exit for iter->path, but only up
			 * to snapshot
			 */
			__btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT);
			iter->update_path = iter->path;

			iter->update_path = btree_path_set_pos(trans,
						iter->update_path, pos,
						iter->flags & BTREE_ITER_INTENT);

			BUG_ON(!(iter->update_path->nodes_locked & 1));
			iter->update_path->should_be_locked = true;
		}

2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409
		/*
		 * We can never have a key in a leaf node at POS_MAX, so
		 * we don't have to check these successor() calls:
		 */
		if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
		    !bch2_snapshot_is_ancestor(trans->c,
					       iter->snapshot,
					       k.k->p.snapshot)) {
			search_key = bpos_successor(k.k->p);
			continue;
		}

		if (bkey_whiteout(k.k) &&
		    !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
			search_key = bkey_successor(iter, k.k->p);
			continue;
		}

		break;
	}

2410
	/*
2411 2412
	 * iter->pos should be mononotically increasing, and always be equal to
	 * the key we just returned - except extents can straddle iter->pos:
2413
	 */
2414 2415 2416
	if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
		iter->pos = k.k->p;
	else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
2417
		iter->pos = bkey_start_pos(k.k);
2418 2419 2420 2421

	iter->path = btree_path_set_pos(trans, iter->path, k.k->p,
				iter->flags & BTREE_ITER_INTENT);
	BUG_ON(!iter->path->nodes_locked);
2422
out:
2423 2424 2425 2426 2427 2428
	if (iter->update_path) {
		BUG_ON(!(iter->update_path->nodes_locked & 1));
		iter->update_path->should_be_locked = true;
	}
	iter->path->should_be_locked = true;

2429
	if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
2430 2431
		iter->pos.snapshot = iter->snapshot;

2432 2433 2434 2435 2436
	ret = bch2_btree_iter_verify_ret(iter, k);
	if (unlikely(ret)) {
		bch2_btree_iter_set_pos(iter, iter->pos);
		k = bkey_s_c_err(ret);
	}
Kent Overstreet's avatar
Kent Overstreet committed
2437

2438
	bch2_btree_iter_verify_entry_exit(iter);
2439

2440 2441 2442
	return k;
}

2443 2444 2445 2446
/**
 * bch2_btree_iter_next: returns first key greater than iterator's current
 * position
 */
2447 2448
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
{
2449
	if (!bch2_btree_iter_advance(iter))
2450
		return bkey_s_c_null;
2451

2452
	return bch2_btree_iter_peek(iter);
2453 2454
}

2455 2456 2457 2458 2459
/**
 * bch2_btree_iter_peek_prev: returns first key less than or equal to
 * iterator's current position
 */
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
2460
{
Kent Overstreet's avatar
Kent Overstreet committed
2461
	struct btree_trans *trans = iter->trans;
2462
	struct bpos search_key = iter->pos;
2463
	struct btree_path *saved_path = NULL;
2464
	struct bkey_s_c k;
2465 2466
	struct bkey saved_k;
	const struct bch_val *saved_v;
2467 2468
	int ret;

Kent Overstreet's avatar
Kent Overstreet committed
2469
	EBUG_ON(iter->path->cached || iter->path->level);
2470
	EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
2471 2472 2473 2474

	if (iter->flags & BTREE_ITER_WITH_JOURNAL)
		return bkey_s_c_err(-EIO);

2475 2476 2477
	bch2_btree_iter_verify(iter);
	bch2_btree_iter_verify_entry_exit(iter);

2478 2479 2480
	if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
		search_key.snapshot = U32_MAX;

2481
	while (1) {
Kent Overstreet's avatar
Kent Overstreet committed
2482 2483
		iter->path = btree_path_set_pos(trans, iter->path, search_key,
						iter->flags & BTREE_ITER_INTENT);
2484

Kent Overstreet's avatar
Kent Overstreet committed
2485
		ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2486
		if (unlikely(ret)) {
2487 2488
			/* ensure that iter->k is consistent with iter->pos: */
			bch2_btree_iter_set_pos(iter, iter->pos);
2489
			k = bkey_s_c_err(ret);
2490
			goto out;
2491
		}
2492

Kent Overstreet's avatar
Kent Overstreet committed
2493 2494
		k = btree_path_level_peek(trans, iter->path,
					  &iter->path->l[0], &iter->k);
2495 2496
		if (!k.k ||
		    ((iter->flags & BTREE_ITER_IS_EXTENTS)
2497 2498
		     ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
		     : bpos_cmp(k.k->p, search_key) > 0))
Kent Overstreet's avatar
Kent Overstreet committed
2499 2500
			k = btree_path_level_prev(trans, iter->path,
						  &iter->path->l[0], &iter->k);
2501

2502
		if (likely(k.k)) {
2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545
			if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
				if (k.k->p.snapshot == iter->snapshot)
					goto got_key;

				/*
				 * If we have a saved candidate, and we're no
				 * longer at the same _key_ (not pos), return
				 * that candidate
				 */
				if (saved_path && bkey_cmp(k.k->p, saved_k.p)) {
					bch2_path_put(trans, iter->path,
						      iter->flags & BTREE_ITER_INTENT);
					iter->path = saved_path;
					saved_path = NULL;
					iter->k	= saved_k;
					k.v	= saved_v;
					goto got_key;
				}

				if (bch2_snapshot_is_ancestor(iter->trans->c,
							      iter->snapshot,
							      k.k->p.snapshot)) {
					if (saved_path)
						bch2_path_put(trans, saved_path,
						      iter->flags & BTREE_ITER_INTENT);
					saved_path = btree_path_clone(trans, iter->path,
								iter->flags & BTREE_ITER_INTENT);
					saved_k = *k.k;
					saved_v = k.v;
				}

				search_key = bpos_predecessor(k.k->p);
				continue;
			}
got_key:
			if (bkey_whiteout(k.k) &&
			    !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
				search_key = bkey_predecessor(iter, k.k->p);
				if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
					search_key.snapshot = U32_MAX;
				continue;
			}

2546
			break;
Kent Overstreet's avatar
Kent Overstreet committed
2547
		} else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
2548
			/* Advance to previous leaf node: */
Kent Overstreet's avatar
Kent Overstreet committed
2549
			search_key = bpos_predecessor(iter->path->l[0].b->data->min_key);
2550 2551
		} else {
			/* Start of btree: */
2552
			bch2_btree_iter_set_pos(iter, POS_MIN);
2553
			k = bkey_s_c_null;
2554
			goto out;
2555
		}
2556
	}
2557

2558
	EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
2559 2560

	/* Extents can straddle iter->pos: */
2561
	if (bkey_cmp(k.k->p, iter->pos) < 0)
2562
		iter->pos = k.k->p;
2563 2564 2565

	if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
		iter->pos.snapshot = iter->snapshot;
2566
out:
2567 2568
	if (saved_path)
		bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
Kent Overstreet's avatar
Kent Overstreet committed
2569 2570
	iter->path->should_be_locked = true;

2571 2572
	bch2_btree_iter_verify_entry_exit(iter);
	bch2_btree_iter_verify(iter);
Kent Overstreet's avatar
Kent Overstreet committed
2573

2574 2575 2576
	return k;
}

2577 2578 2579 2580 2581 2582
/**
 * bch2_btree_iter_prev: returns first key less than iterator's current
 * position
 */
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
{
2583
	if (!bch2_btree_iter_rewind(iter))
2584
		return bkey_s_c_null;
2585

2586
	return bch2_btree_iter_peek_prev(iter);
2587 2588
}

2589
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
2590
{
2591
	struct btree_trans *trans = iter->trans;
2592
	struct bpos search_key;
2593
	struct bkey_s_c k;
2594 2595
	int ret;

Kent Overstreet's avatar
Kent Overstreet committed
2596
	EBUG_ON(iter->path->level);
2597 2598 2599
	bch2_btree_iter_verify(iter);
	bch2_btree_iter_verify_entry_exit(iter);

2600 2601
	/* extents can't span inode numbers: */
	if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
2602
	    unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
2603 2604
		if (iter->pos.inode == KEY_INODE_MAX)
			return bkey_s_c_null;
2605

2606 2607
		bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
	}
2608

2609
	search_key = btree_iter_search_key(iter);
Kent Overstreet's avatar
Kent Overstreet committed
2610 2611
	iter->path = btree_path_set_pos(trans, iter->path, search_key,
					iter->flags & BTREE_ITER_INTENT);
2612

Kent Overstreet's avatar
Kent Overstreet committed
2613
	ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2614 2615
	if (unlikely(ret))
		return bkey_s_c_err(ret);
2616

2617 2618
	if ((iter->flags & BTREE_ITER_CACHED) ||
	    !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
2619
		struct bkey_i *next_update;
2620

2621
		if ((next_update = btree_trans_peek_updates(iter)) &&
Kent Overstreet's avatar
Kent Overstreet committed
2622
		    !bpos_cmp(next_update->k.p, iter->pos)) {
2623 2624
			iter->k = next_update->k;
			k = bkey_i_to_s_c(next_update);
2625
			goto out;
2626 2627
		}

2628 2629 2630 2631 2632
		if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
		    (k = btree_trans_peek_slot_journal(trans, iter)).k)
			goto out;

		k = bch2_btree_path_peek_slot(iter->path, &iter->k);
2633 2634 2635
	} else {
		struct bpos next;

2636
		if (iter->flags & BTREE_ITER_INTENT) {
Kent Overstreet's avatar
Kent Overstreet committed
2637
			struct btree_iter iter2;
2638

Kent Overstreet's avatar
Kent Overstreet committed
2639 2640
			bch2_trans_copy_iter(&iter2, iter);
			k = bch2_btree_iter_peek(&iter2);
2641

Kent Overstreet's avatar
Kent Overstreet committed
2642 2643 2644 2645 2646
			if (k.k && !bkey_err(k)) {
				iter->k = iter2.k;
				k.k = &iter->k;
			}
			bch2_trans_iter_exit(trans, &iter2);
2647 2648 2649 2650 2651 2652
		} else {
			struct bpos pos = iter->pos;

			k = bch2_btree_iter_peek(iter);
			iter->pos = pos;
		}
2653 2654 2655 2656 2657 2658 2659 2660 2661

		if (unlikely(bkey_err(k)))
			return k;

		next = k.k ? bkey_start_pos(k.k) : POS_MAX;

		if (bkey_cmp(iter->pos, next) < 0) {
			bkey_init(&iter->k);
			iter->k.p = iter->pos;
2662 2663 2664 2665 2666 2667 2668 2669 2670 2671

			if (iter->flags & BTREE_ITER_IS_EXTENTS) {
				bch2_key_resize(&iter->k,
						min_t(u64, KEY_SIZE_MAX,
						      (next.inode == iter->pos.inode
						       ? next.offset
						       : KEY_OFFSET_MAX) -
						      iter->pos.offset));
				EBUG_ON(!iter->k.size);
			}
2672 2673 2674

			k = (struct bkey_s_c) { &iter->k, NULL };
		}
2675
	}
2676
out:
Kent Overstreet's avatar
Kent Overstreet committed
2677 2678
	iter->path->should_be_locked = true;

2679 2680
	bch2_btree_iter_verify_entry_exit(iter);
	bch2_btree_iter_verify(iter);
2681 2682 2683
	ret = bch2_btree_iter_verify_ret(iter, k);
	if (unlikely(ret))
		return bkey_s_c_err(ret);
2684

2685
	return k;
2686 2687 2688 2689
}

struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
{
2690
	if (!bch2_btree_iter_advance(iter))
2691
		return bkey_s_c_null;
2692

2693
	return bch2_btree_iter_peek_slot(iter);
2694 2695
}

2696 2697
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
{
2698
	if (!bch2_btree_iter_rewind(iter))
2699 2700 2701 2702 2703
		return bkey_s_c_null;

	return bch2_btree_iter_peek_slot(iter);
}

2704 2705
/* new transactional stuff: */

2706 2707 2708
#ifdef CONFIG_BCACHEFS_DEBUG
static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
{
Kent Overstreet's avatar
Kent Overstreet committed
2709
	struct btree_path *path;
2710 2711
	unsigned i;

Kent Overstreet's avatar
Kent Overstreet committed
2712
	BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
2713

Kent Overstreet's avatar
Kent Overstreet committed
2714 2715 2716
	trans_for_each_path(trans, path) {
		BUG_ON(path->sorted_idx >= trans->nr_sorted);
		BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
2717 2718 2719 2720 2721
	}

	for (i = 0; i < trans->nr_sorted; i++) {
		unsigned idx = trans->sorted[i];

Kent Overstreet's avatar
Kent Overstreet committed
2722 2723
		EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
		BUG_ON(trans->paths[idx].sorted_idx != i);
2724 2725 2726 2727 2728
	}
}

static void btree_trans_verify_sorted(struct btree_trans *trans)
{
Kent Overstreet's avatar
Kent Overstreet committed
2729
	struct btree_path *path, *prev = NULL;
2730 2731
	unsigned i;

Kent Overstreet's avatar
Kent Overstreet committed
2732 2733 2734
	trans_for_each_path_inorder(trans, path, i) {
		BUG_ON(prev && btree_path_cmp(prev, path) > 0);
		prev = path;
2735 2736
	}
}
2737 2738 2739 2740
#else
static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
#endif
2741

2742
void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
2743 2744 2745 2746
{
	int i, l = 0, r = trans->nr_sorted, inc = 1;
	bool swapped;

2747 2748 2749 2750 2751
	btree_trans_verify_sorted_refs(trans);

	if (trans->paths_sorted)
		goto out;

2752 2753 2754 2755 2756 2757 2758 2759 2760 2761
	/*
	 * Cocktail shaker sort: this is efficient because iterators will be
	 * mostly sorteda.
	 */
	do {
		swapped = false;

		for (i = inc > 0 ? l : r - 2;
		     i + 1 < r && i >= l;
		     i += inc) {
Kent Overstreet's avatar
Kent Overstreet committed
2762 2763
			if (btree_path_cmp(trans->paths + trans->sorted[i],
					   trans->paths + trans->sorted[i + 1]) > 0) {
2764
				swap(trans->sorted[i], trans->sorted[i + 1]);
Kent Overstreet's avatar
Kent Overstreet committed
2765 2766
				trans->paths[trans->sorted[i]].sorted_idx = i;
				trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777
				swapped = true;
			}
		}

		if (inc > 0)
			--r;
		else
			l++;
		inc = -inc;
	} while (swapped);

Kent Overstreet's avatar
Kent Overstreet committed
2778
	trans->paths_sorted = true;
2779
out:
2780 2781 2782
	btree_trans_verify_sorted(trans);
}

Kent Overstreet's avatar
Kent Overstreet committed
2783 2784
static inline void btree_path_list_remove(struct btree_trans *trans,
					  struct btree_path *path)
2785 2786 2787
{
	unsigned i;

Kent Overstreet's avatar
Kent Overstreet committed
2788
	EBUG_ON(path->sorted_idx >= trans->nr_sorted);
2789 2790
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
	trans->nr_sorted--;
Kent Overstreet's avatar
Kent Overstreet committed
2791 2792 2793
	memmove_u64s_down_small(trans->sorted + path->sorted_idx,
				trans->sorted + path->sorted_idx + 1,
				DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
2794
#else
Kent Overstreet's avatar
Kent Overstreet committed
2795
	array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
2796
#endif
Kent Overstreet's avatar
Kent Overstreet committed
2797 2798
	for (i = path->sorted_idx; i < trans->nr_sorted; i++)
		trans->paths[trans->sorted[i]].sorted_idx = i;
2799

Kent Overstreet's avatar
Kent Overstreet committed
2800
	path->sorted_idx = U8_MAX;
2801 2802
}

Kent Overstreet's avatar
Kent Overstreet committed
2803 2804 2805
static inline void btree_path_list_add(struct btree_trans *trans,
				       struct btree_path *pos,
				       struct btree_path *path)
2806 2807 2808
{
	unsigned i;

2809
	path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted;
2810 2811

#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
Kent Overstreet's avatar
Kent Overstreet committed
2812 2813 2814
	memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
			      trans->sorted + path->sorted_idx,
			      DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
2815
	trans->nr_sorted++;
Kent Overstreet's avatar
Kent Overstreet committed
2816
	trans->sorted[path->sorted_idx] = path->idx;
2817
#else
Kent Overstreet's avatar
Kent Overstreet committed
2818
	array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
2819 2820
#endif

Kent Overstreet's avatar
Kent Overstreet committed
2821 2822
	for (i = path->sorted_idx; i < trans->nr_sorted; i++)
		trans->paths[trans->sorted[i]].sorted_idx = i;
2823

2824
	btree_trans_verify_sorted_refs(trans);
2825 2826
}

Kent Overstreet's avatar
Kent Overstreet committed
2827
void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
2828
{
Kent Overstreet's avatar
Kent Overstreet committed
2829 2830 2831
	if (iter->path)
		bch2_path_put(trans, iter->path,
			      iter->flags & BTREE_ITER_INTENT);
2832 2833 2834
	if (iter->update_path)
		bch2_path_put(trans, iter->update_path,
			      iter->flags & BTREE_ITER_INTENT);
Kent Overstreet's avatar
Kent Overstreet committed
2835
	iter->path = NULL;
2836
	iter->update_path = NULL;
2837 2838
}

Kent Overstreet's avatar
Kent Overstreet committed
2839 2840 2841 2842 2843 2844
static void __bch2_trans_iter_init(struct btree_trans *trans,
				   struct btree_iter *iter,
				   enum btree_id btree_id, struct bpos pos,
				   unsigned locks_want,
				   unsigned depth,
				   unsigned flags)
2845
{
2846 2847
	EBUG_ON(trans->restarted);

2848 2849
	if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
	    btree_node_type_is_extents(btree_id))
2850
		flags |= BTREE_ITER_IS_EXTENTS;
2851

2852 2853
	if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
	    !btree_type_has_snapshots(btree_id))
2854 2855
		flags &= ~BTREE_ITER_ALL_SNAPSHOTS;

2856 2857 2858
	if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
	    btree_type_has_snapshots(btree_id))
		flags |= BTREE_ITER_FILTER_SNAPSHOTS;
2859

2860 2861 2862
	if (trans->journal_replay_not_finished)
		flags |= BTREE_ITER_WITH_JOURNAL;

Kent Overstreet's avatar
Kent Overstreet committed
2863 2864
	iter->trans	= trans;
	iter->path	= NULL;
2865
	iter->update_path = NULL;
Kent Overstreet's avatar
Kent Overstreet committed
2866 2867
	iter->btree_id	= btree_id;
	iter->min_depth	= depth;
2868 2869
	iter->flags	= flags;
	iter->snapshot	= pos.snapshot;
Kent Overstreet's avatar
Kent Overstreet committed
2870 2871 2872 2873
	iter->pos	= pos;
	iter->k.type	= KEY_TYPE_deleted;
	iter->k.p	= pos;
	iter->k.size	= 0;
2874

2875 2876
	iter->path = bch2_path_get(trans, btree_id, iter->pos,
				   locks_want, depth, flags);
2877 2878
}

Kent Overstreet's avatar
Kent Overstreet committed
2879 2880 2881 2882
void bch2_trans_iter_init(struct btree_trans *trans,
			  struct btree_iter *iter,
			  unsigned btree_id, struct bpos pos,
			  unsigned flags)
2883
{
Kent Overstreet's avatar
Kent Overstreet committed
2884 2885
	__bch2_trans_iter_init(trans, iter, btree_id, pos,
			       0, 0, flags);
2886 2887
}

Kent Overstreet's avatar
Kent Overstreet committed
2888 2889 2890 2891 2892 2893 2894
void bch2_trans_node_iter_init(struct btree_trans *trans,
			       struct btree_iter *iter,
			       enum btree_id btree_id,
			       struct bpos pos,
			       unsigned locks_want,
			       unsigned depth,
			       unsigned flags)
2895
{
Kent Overstreet's avatar
Kent Overstreet committed
2896 2897 2898 2899 2900 2901 2902 2903 2904
	__bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth,
			       BTREE_ITER_NOT_EXTENTS|
			       __BTREE_ITER_ALL_SNAPSHOTS|
			       BTREE_ITER_ALL_SNAPSHOTS|
			       flags);
	BUG_ON(iter->path->locks_want	 < min(locks_want, BTREE_MAX_DEPTH));
	BUG_ON(iter->path->level	!= depth);
	BUG_ON(iter->min_depth		!= depth);
}
2905

Kent Overstreet's avatar
Kent Overstreet committed
2906 2907 2908 2909 2910
void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
{
	*dst = *src;
	if (src->path)
		__btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
2911 2912
	if (src->update_path)
		__btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
2913 2914
}

2915
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
2916
{
2917 2918 2919 2920
	size_t new_top = trans->mem_top + size;
	void *p;

	if (new_top > trans->mem_bytes) {
2921
		size_t old_bytes = trans->mem_bytes;
2922
		size_t new_bytes = roundup_pow_of_two(new_top);
2923 2924 2925 2926 2927 2928 2929 2930 2931 2932
		void *new_mem;

		WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);

		new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
		if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
			new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
			new_bytes = BTREE_TRANS_MEM_MAX;
			kfree(trans->mem);
		}
2933 2934

		if (!new_mem)
2935
			return ERR_PTR(-ENOMEM);
2936 2937 2938 2939

		trans->mem = new_mem;
		trans->mem_bytes = new_bytes;

2940
		if (old_bytes) {
2941
			trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes);
2942
			btree_trans_restart(trans);
2943
			return ERR_PTR(-EINTR);
2944
		}
2945 2946
	}

2947
	p = trans->mem + trans->mem_top;
2948
	trans->mem_top += size;
2949
	memset(p, 0, size);
2950
	return p;
2951 2952
}

2953
/**
2954
 * bch2_trans_begin() - reset a transaction after a interrupted attempt
2955 2956 2957 2958
 * @trans: transaction to reset
 *
 * While iterating over nodes or updating nodes a attempt to lock a btree
 * node may return EINTR when the trylock fails. When this occurs
2959
 * bch2_trans_begin() should be called and the transaction retried.
2960
 */
2961
void bch2_trans_begin(struct btree_trans *trans)
2962
{
Kent Overstreet's avatar
Kent Overstreet committed
2963 2964
	struct btree_insert_entry *i;
	struct btree_path *path;
2965

Kent Overstreet's avatar
Kent Overstreet committed
2966 2967
	trans_for_each_update(trans, i)
		__btree_path_put(i->path, true);
2968

2969
	memset(&trans->journal_res, 0, sizeof(trans->journal_res));
2970
	trans->extra_journal_res	= 0;
2971
	trans->nr_updates		= 0;
2972
	trans->mem_top			= 0;
2973

2974
	trans->hooks			= NULL;
2975 2976 2977
	trans->extra_journal_entries	= NULL;
	trans->extra_journal_entry_u64s	= 0;

2978 2979 2980 2981 2982 2983 2984 2985
	if (trans->fs_usage_deltas) {
		trans->fs_usage_deltas->used = 0;
		memset((void *) trans->fs_usage_deltas +
		       offsetof(struct replicas_delta_list, memset_start), 0,
		       (void *) &trans->fs_usage_deltas->memset_end -
		       (void *) &trans->fs_usage_deltas->memset_start);
	}

Kent Overstreet's avatar
Kent Overstreet committed
2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997
	trans_for_each_path(trans, path) {
		/*
		 * XXX: we probably shouldn't be doing this if the transaction
		 * was restarted, but currently we still overflow transaction
		 * iterators if we do that
		 */
		if (!path->ref && !path->preserve)
			__bch2_path_free(trans, path);
		else
			path->preserve = path->should_be_locked = false;
	}

2998
	bch2_trans_cond_resched(trans);
2999

3000
	if (trans->restarted)
Kent Overstreet's avatar
Kent Overstreet committed
3001
		bch2_btree_path_traverse_all(trans);
3002 3003

	trans->restarted = false;
3004 3005
}

Kent Overstreet's avatar
Kent Overstreet committed
3006
static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
3007
{
Kent Overstreet's avatar
Kent Overstreet committed
3008
	size_t paths_bytes	= sizeof(struct btree_path) * BTREE_ITER_MAX;
3009
	size_t updates_bytes	= sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
3010
	void *p = NULL;
3011 3012 3013

	BUG_ON(trans->used_mempool);

3014
#ifdef __KERNEL__
Kent Overstreet's avatar
Kent Overstreet committed
3015
	p = this_cpu_xchg(c->btree_paths_bufs->path , NULL);
3016 3017
#endif
	if (!p)
Kent Overstreet's avatar
Kent Overstreet committed
3018
		p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
3019

Kent Overstreet's avatar
Kent Overstreet committed
3020
	trans->paths		= p; p += paths_bytes;
3021 3022 3023
	trans->updates		= p; p += updates_bytes;
}

3024 3025 3026 3027
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
		       unsigned expected_nr_iters,
		       size_t expected_mem_bytes,
		       const char *fn)
3028
	__acquires(&c->btree_trans_barrier)
3029
{
3030
	memset(trans, 0, sizeof(*trans));
3031
	trans->c		= c;
3032
	trans->fn		= fn;
3033 3034
	trans->journal_replay_not_finished =
		!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
3035

Kent Overstreet's avatar
Kent Overstreet committed
3036
	bch2_trans_alloc_paths(trans, c);
3037

3038 3039 3040
	if (expected_mem_bytes) {
		expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes);
		trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
3041 3042 3043 3044 3045

		if (!unlikely(trans->mem)) {
			trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
			trans->mem_bytes = BTREE_TRANS_MEM_MAX;
		} else {
3046
			trans->mem_bytes = expected_mem_bytes;
3047
		}
3048
	}
3049

3050 3051
	trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);

3052 3053 3054 3055 3056 3057
	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
		trans->pid = current->pid;
		mutex_lock(&c->btree_trans_lock);
		list_add(&trans->list, &c->btree_trans_list);
		mutex_unlock(&c->btree_trans_lock);
	}
3058 3059
}

Kent Overstreet's avatar
Kent Overstreet committed
3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070
static void check_btree_paths_leaked(struct btree_trans *trans)
{
#ifdef CONFIG_BCACHEFS_DEBUG
	struct bch_fs *c = trans->c;
	struct btree_path *path;

	trans_for_each_path(trans, path)
		if (path->ref)
			goto leaked;
	return;
leaked:
3071
	bch_err(c, "btree paths leaked from %s!", trans->fn);
Kent Overstreet's avatar
Kent Overstreet committed
3072 3073 3074 3075 3076 3077 3078 3079 3080 3081
	trans_for_each_path(trans, path)
		if (path->ref)
			printk(KERN_ERR "  btree %s %pS\n",
			       bch2_btree_ids[path->btree_id],
			       (void *) path->ip_allocated);
	/* Be noisy about this: */
	bch2_fatal_error(c);
#endif
}

3082
void bch2_trans_exit(struct btree_trans *trans)
3083
	__releases(&c->btree_trans_barrier)
3084
{
Kent Overstreet's avatar
Kent Overstreet committed
3085
	struct btree_insert_entry *i;
3086 3087
	struct bch_fs *c = trans->c;

3088
	bch2_trans_unlock(trans);
3089

Kent Overstreet's avatar
Kent Overstreet committed
3090 3091 3092
	trans_for_each_update(trans, i)
		__btree_path_put(i->path, true);
	trans->nr_updates		= 0;
3093

Kent Overstreet's avatar
Kent Overstreet committed
3094
	check_btree_paths_leaked(trans);
3095

3096 3097 3098 3099 3100
	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
		mutex_lock(&c->btree_trans_lock);
		list_del(&trans->list);
		mutex_unlock(&c->btree_trans_lock);
	}
3101

3102 3103
	srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);

Kent Overstreet's avatar
Kent Overstreet committed
3104
	bch2_journal_preres_put(&c->journal, &trans->journal_preres);
3105

3106 3107 3108 3109
	if (trans->fs_usage_deltas) {
		if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
		    REPLICAS_DELTA_LIST_MAX)
			mempool_free(trans->fs_usage_deltas,
Kent Overstreet's avatar
Kent Overstreet committed
3110
				     &c->replicas_delta_pool);
3111 3112 3113
		else
			kfree(trans->fs_usage_deltas);
	}
3114 3115

	if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
Kent Overstreet's avatar
Kent Overstreet committed
3116
		mempool_free(trans->mem, &c->btree_trans_mem_pool);
3117 3118
	else
		kfree(trans->mem);
3119

3120 3121 3122 3123
#ifdef __KERNEL__
	/*
	 * Userspace doesn't have a real percpu implementation:
	 */
Kent Overstreet's avatar
Kent Overstreet committed
3124
	trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths);
3125
#endif
3126

Kent Overstreet's avatar
Kent Overstreet committed
3127 3128
	if (trans->paths)
		mempool_free(trans->paths, &c->btree_paths_pool);
3129

3130
	trans->mem	= (void *) 0x1;
Kent Overstreet's avatar
Kent Overstreet committed
3131
	trans->paths	= (void *) 0x1;
3132
}
3133

3134
static void __maybe_unused
Kent Overstreet's avatar
Kent Overstreet committed
3135
bch2_btree_path_node_to_text(struct printbuf *out,
3136
			     struct btree_bkey_cached_common *_b,
3137
			     bool cached)
3138
{
3139 3140
	pr_buf(out, "    l=%u %s:",
	       _b->level, bch2_btree_ids[_b->btree_id]);
3141
	bch2_bpos_to_text(out, btree_node_pos(_b, cached));
3142 3143
}

3144
#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
3145
static bool trans_has_locks(struct btree_trans *trans)
3146
{
Kent Overstreet's avatar
Kent Overstreet committed
3147
	struct btree_path *path;
3148

Kent Overstreet's avatar
Kent Overstreet committed
3149 3150
	trans_for_each_path(trans, path)
		if (path->nodes_locked)
3151 3152 3153
			return true;
	return false;
}
3154
#endif
3155

3156 3157
void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
{
3158
#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
3159
	struct btree_trans *trans;
Kent Overstreet's avatar
Kent Overstreet committed
3160
	struct btree_path *path;
3161 3162 3163 3164 3165
	struct btree *b;
	unsigned l;

	mutex_lock(&c->btree_trans_lock);
	list_for_each_entry(trans, &c->btree_trans_list, list) {
3166
		if (!trans_has_locks(trans))
3167 3168
			continue;

3169
		pr_buf(out, "%i %s\n", trans->pid, trans->fn);
3170

Kent Overstreet's avatar
Kent Overstreet committed
3171 3172
		trans_for_each_path(trans, path) {
			if (!path->nodes_locked)
3173 3174
				continue;

3175
			pr_buf(out, "  path %u %c l=%u %s:",
Kent Overstreet's avatar
Kent Overstreet committed
3176 3177
			       path->idx,
			       path->cached ? 'c' : 'b',
3178
			       path->level,
Kent Overstreet's avatar
Kent Overstreet committed
3179 3180
			       bch2_btree_ids[path->btree_id]);
			bch2_bpos_to_text(out, path->pos);
3181 3182 3183
			pr_buf(out, "\n");

			for (l = 0; l < BTREE_MAX_DEPTH; l++) {
Kent Overstreet's avatar
Kent Overstreet committed
3184
				if (btree_node_locked(path, l)) {
3185
					pr_buf(out, "    %s l=%u ",
Kent Overstreet's avatar
Kent Overstreet committed
3186 3187 3188 3189
					       btree_node_intent_locked(path, l) ? "i" : "r", l);
					bch2_btree_path_node_to_text(out,
							(void *) path->l[l].b,
							path->cached);
3190 3191 3192 3193 3194 3195 3196
					pr_buf(out, "\n");
				}
			}
		}

		b = READ_ONCE(trans->locking);
		if (b) {
Kent Overstreet's avatar
Kent Overstreet committed
3197 3198 3199 3200
			path = &trans->paths[trans->locking_path_idx];
			pr_buf(out, "  locking path %u %c l=%u %s:",
			       trans->locking_path_idx,
			       path->cached ? 'c' : 'b',
3201 3202 3203 3204
			       trans->locking_level,
			       bch2_btree_ids[trans->locking_btree_id]);
			bch2_bpos_to_text(out, trans->locking_pos);

3205
			pr_buf(out, " node ");
Kent Overstreet's avatar
Kent Overstreet committed
3206 3207
			bch2_btree_path_node_to_text(out,
					(void *) b, path->cached);
3208 3209 3210 3211 3212 3213 3214
			pr_buf(out, "\n");
		}
	}
	mutex_unlock(&c->btree_trans_lock);
#endif
}

3215 3216
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
3217 3218
	if (c->btree_trans_barrier_initialized)
		cleanup_srcu_struct(&c->btree_trans_barrier);
3219
	mempool_exit(&c->btree_trans_mem_pool);
Kent Overstreet's avatar
Kent Overstreet committed
3220
	mempool_exit(&c->btree_paths_pool);
3221 3222 3223 3224 3225
}

int bch2_fs_btree_iter_init(struct bch_fs *c)
{
	unsigned nr = BTREE_ITER_MAX;
3226
	int ret;
3227

3228 3229 3230
	INIT_LIST_HEAD(&c->btree_trans_list);
	mutex_init(&c->btree_trans_lock);

3231
	ret   = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
Kent Overstreet's avatar
Kent Overstreet committed
3232
			sizeof(struct btree_path) * nr +
3233 3234
			sizeof(struct btree_insert_entry) * nr) ?:
		mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
3235 3236 3237 3238 3239
					  BTREE_TRANS_MEM_MAX) ?:
		init_srcu_struct(&c->btree_trans_barrier);
	if (!ret)
		c->btree_trans_barrier_initialized = true;
	return ret;
3240
}