Commit a3e72262 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: New varints

Previous varint implementation used by the inode code was not nearly as
fast as it could have been; partly because it was attempting to encode
integers up to 96 bits (for timestamps) but this meant that encoding and
decoding the length required a table lookup.

Instead, we'll just encode timestamps greater than 64 bits as two
separate varints; this will make decoding/encoding of inodes
significantly faster overall.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b3d1e6ca
...@@ -57,4 +57,5 @@ bcachefs-y := \ ...@@ -57,4 +57,5 @@ bcachefs-y := \
tests.o \ tests.o \
trace.o \ trace.o \
util.o \ util.o \
varint.o \
xattr.o xattr.o
...@@ -673,10 +673,10 @@ struct bch_inode_generation { ...@@ -673,10 +673,10 @@ struct bch_inode_generation {
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
#define BCH_INODE_FIELDS() \ #define BCH_INODE_FIELDS() \
x(bi_atime, 64) \ x(bi_atime, 96) \
x(bi_ctime, 64) \ x(bi_ctime, 96) \
x(bi_mtime, 64) \ x(bi_mtime, 96) \
x(bi_otime, 64) \ x(bi_otime, 96) \
x(bi_size, 64) \ x(bi_size, 64) \
x(bi_sectors, 64) \ x(bi_sectors, 64) \
x(bi_uid, 32) \ x(bi_uid, 32) \
...@@ -743,7 +743,8 @@ enum { ...@@ -743,7 +743,8 @@ enum {
#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED) #define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
/* Dirents */ /* Dirents */
...@@ -1334,13 +1335,15 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); ...@@ -1334,13 +1335,15 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
x(btree_ptr_v2, 11) \ x(btree_ptr_v2, 11) \
x(extents_above_btree_updates, 12) \ x(extents_above_btree_updates, 12) \
x(btree_updates_journalled, 13) \ x(btree_updates_journalled, 13) \
x(reflink_inline_data, 14) x(reflink_inline_data, 14) \
x(new_varint, 15)
#define BCH_SB_FEATURES_ALL \ #define BCH_SB_FEATURES_ALL \
((1ULL << BCH_FEATURE_new_siphash)| \ ((1ULL << BCH_FEATURE_new_siphash)| \
(1ULL << BCH_FEATURE_new_extent_overwrite)| \ (1ULL << BCH_FEATURE_new_extent_overwrite)| \
(1ULL << BCH_FEATURE_btree_ptr_v2)| \ (1ULL << BCH_FEATURE_btree_ptr_v2)| \
(1ULL << BCH_FEATURE_extents_above_btree_updates)) (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
(1ULL << BCH_FEATURE_new_varint))\
enum bch_sb_feature { enum bch_sb_feature {
#define x(f, n) BCH_FEATURE_##f, #define x(f, n) BCH_FEATURE_##f,
......
...@@ -537,7 +537,7 @@ static int check_extents(struct bch_fs *c) ...@@ -537,7 +537,7 @@ static int check_extents(struct bch_fs *c)
bch2_trans_unlock(&trans); bch2_trans_unlock(&trans);
bch2_inode_pack(&p, &w.inode); bch2_inode_pack(c, &p, &w.inode);
ret = bch2_btree_insert(c, BTREE_ID_INODES, ret = bch2_btree_insert(c, BTREE_ID_INODES,
&p.inode.k_i, NULL, NULL, &p.inode.k_i, NULL, NULL,
...@@ -808,7 +808,7 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) ...@@ -808,7 +808,7 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
0, NULL); 0, NULL);
root_inode->bi_inum = BCACHEFS_ROOT_INO; root_inode->bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed, root_inode); bch2_inode_pack(c, &packed, root_inode);
return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL, NULL, NULL,
...@@ -1326,7 +1326,7 @@ static int check_inode(struct btree_trans *trans, ...@@ -1326,7 +1326,7 @@ static int check_inode(struct btree_trans *trans,
if (do_update) { if (do_update) {
struct bkey_inode_buf p; struct bkey_inode_buf p;
bch2_inode_pack(&p, &u); bch2_inode_pack(c, &p, &u);
ret = __bch2_trans_do(trans, NULL, NULL, ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "extents.h" #include "extents.h"
#include "inode.h" #include "inode.h"
#include "str_hash.h" #include "str_hash.h"
#include "varint.h"
#include <linux/random.h> #include <linux/random.h>
...@@ -89,22 +90,17 @@ static int inode_decode_field(const u8 *in, const u8 *end, ...@@ -89,22 +90,17 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes; return bytes;
} }
void bch2_inode_pack(struct bkey_inode_buf *packed, static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode) const struct bch_inode_unpacked *inode)
{ {
u8 *out = packed->inode.v.fields; struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1]; u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out; u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0; unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes; unsigned bytes;
bkey_inode_init(&packed->inode.k_i); #define x(_name, _bits) \
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
#define x(_name, _bits) \
out += inode_encode_field(out, end, 0, inode->_name); \ out += inode_encode_field(out, end, 0, inode->_name); \
nr_fields++; \ nr_fields++; \
\ \
...@@ -123,7 +119,69 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, ...@@ -123,7 +119,69 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
set_bkey_val_bytes(&packed->inode.k, bytes); set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes); memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields); SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes;
int ret;
#define x(_name, _bits) \
nr_fields++; \
\
if (inode->_name) { \
ret = bch2_varint_encode(out, inode->_name); \
out += ret; \
\
if (_bits > 64) \
*out++ = 0; \
\
last_nonzero_field = out; \
last_nonzero_fieldnr = nr_fields; \
} else { \
*out++ = 0; \
\
if (_bits > 64) \
*out++ = 0; \
}
BCH_INODE_FIELDS()
#undef x
BUG_ON(out > end);
out = last_nonzero_field;
nr_fields = last_nonzero_fieldnr;
bytes = out - (u8 *) &packed->inode.v;
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
void bch2_inode_pack(struct bch_fs *c,
struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
bkey_inode_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) {
SET_INODE_NEW_VARINT(&packed->inode.v, true);
bch2_inode_pack_v2(packed, inode);
} else {
bch2_inode_pack_v1(packed, inode);
}
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked; struct bch_inode_unpacked unpacked;
...@@ -135,26 +193,23 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, ...@@ -135,26 +193,23 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
BUG_ON(unpacked.bi_mode != inode->bi_mode); BUG_ON(unpacked.bi_mode != inode->bi_mode);
#define x(_name, _bits) BUG_ON(unpacked._name != inode->_name); #define x(_name, _bits) if (unpacked._name != inode->_name) \
panic("unpacked %llu should be %llu", \
(u64) unpacked._name, (u64) inode->_name);
BCH_INODE_FIELDS() BCH_INODE_FIELDS()
#undef x #undef x
} }
} }
int bch2_inode_unpack(struct bkey_s_c_inode inode, static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked) struct bch_inode_unpacked *unpacked)
{ {
const u8 *in = inode.v->fields; const u8 *in = inode.v->fields;
const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k); const u8 *end = bkey_val_end(inode);
u64 field[2]; u64 field[2];
unsigned fieldnr = 0, field_bits; unsigned fieldnr = 0, field_bits;
int ret; int ret;
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
#define x(_name, _bits) \ #define x(_name, _bits) \
if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
unsigned offset = offsetof(struct bch_inode_unpacked, _name);\ unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
...@@ -177,6 +232,62 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, ...@@ -177,6 +232,62 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
#undef x #undef x
/* XXX: signal if there were more fields than expected? */ /* XXX: signal if there were more fields than expected? */
return 0;
}
static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
const u8 *in = inode.v->fields;
const u8 *end = bkey_val_end(inode);
unsigned fieldnr = 0;
int ret;
u64 v[2];
#define x(_name, _bits) \
if (fieldnr < INODE_NR_FIELDS(inode.v)) { \
ret = bch2_varint_decode(in, end, &v[0]); \
if (ret < 0) \
return ret; \
in += ret; \
\
if (_bits > 64) { \
ret = bch2_varint_decode(in, end, &v[1]); \
if (ret < 0) \
return ret; \
in += ret; \
} else { \
v[1] = 0; \
} \
} else { \
v[0] = v[1] = 0; \
} \
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
fieldnr++;
BCH_INODE_FIELDS()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
int bch2_inode_unpack(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
if (INODE_NEW_VARINT(inode.v)) {
return bch2_inode_unpack_v2(inode, unpacked);
} else {
return bch2_inode_unpack_v1(inode, unpacked);
}
return 0; return 0;
} }
...@@ -223,7 +334,7 @@ int bch2_inode_write(struct btree_trans *trans, ...@@ -223,7 +334,7 @@ int bch2_inode_write(struct btree_trans *trans,
if (IS_ERR(inode_p)) if (IS_ERR(inode_p))
return PTR_ERR(inode_p); return PTR_ERR(inode_p);
bch2_inode_pack(inode_p, inode); bch2_inode_pack(trans->c, inode_p, inode);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0; return 0;
} }
...@@ -426,10 +537,7 @@ int bch2_inode_create(struct btree_trans *trans, ...@@ -426,10 +537,7 @@ int bch2_inode_create(struct btree_trans *trans,
inode_u->bi_inum = k.k->p.offset; inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k); inode_u->bi_generation = bkey_generation(k);
bch2_inode_pack(inode_p, inode_u); return bch2_inode_write(trans, iter, inode_u);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
bch2_trans_iter_put(trans, iter);
return 0;
} }
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
...@@ -553,32 +661,3 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, ...@@ -553,32 +661,3 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
return bch2_trans_do(c, NULL, NULL, 0, return bch2_trans_do(c, NULL, NULL, 0,
bch2_inode_find_by_inum_trans(&trans, inode_nr, inode)); bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
} }
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_inode_pack_test(void)
{
struct bch_inode_unpacked *u, test_inodes[] = {
{
.bi_atime = U64_MAX,
.bi_ctime = U64_MAX,
.bi_mtime = U64_MAX,
.bi_otime = U64_MAX,
.bi_size = U64_MAX,
.bi_sectors = U64_MAX,
.bi_uid = U32_MAX,
.bi_gid = U32_MAX,
.bi_nlink = U32_MAX,
.bi_generation = U32_MAX,
.bi_dev = U32_MAX,
},
};
for (u = test_inodes;
u < test_inodes + ARRAY_SIZE(test_inodes);
u++) {
struct bkey_inode_buf p;
bch2_inode_pack(&p, u);
}
}
#endif
...@@ -24,6 +24,14 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, ...@@ -24,6 +24,14 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
.val_to_text = bch2_inode_generation_to_text, \ .val_to_text = bch2_inode_generation_to_text, \
} }
#if 0
typedef struct {
u64 lo;
u32 hi;
} __packed __aligned(4) u96;
#endif
typedef u64 u96;
struct bch_inode_unpacked { struct bch_inode_unpacked {
u64 bi_inum; u64 bi_inum;
__le64 bi_hash_seed; __le64 bi_hash_seed;
...@@ -43,7 +51,8 @@ struct bkey_inode_buf { ...@@ -43,7 +51,8 @@ struct bkey_inode_buf {
#undef x #undef x
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
struct btree_iter *bch2_inode_peek(struct btree_trans *, struct btree_iter *bch2_inode_peek(struct btree_trans *,
...@@ -166,10 +175,4 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, ...@@ -166,10 +175,4 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
} }
} }
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_inode_pack_test(void);
#else
static inline void bch2_inode_pack_test(void) {}
#endif
#endif /* _BCACHEFS_INODE_H */ #endif /* _BCACHEFS_INODE_H */
...@@ -310,7 +310,7 @@ int bch2_extent_update(struct btree_trans *trans, ...@@ -310,7 +310,7 @@ int bch2_extent_update(struct btree_trans *trans,
inode_u.bi_sectors += delta; inode_u.bi_sectors += delta;
if (delta || new_i_size) { if (delta || new_i_size) {
bch2_inode_pack(&inode_p, &inode_u); bch2_inode_pack(trans->c, &inode_p, &inode_u);
bch2_trans_update(trans, inode_iter, bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0); &inode_p.inode.k_i, 0);
} }
......
...@@ -1320,7 +1320,7 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -1320,7 +1320,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init(c, &root_inode, 0, 0, bch2_inode_init(c, &root_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed_inode, &root_inode); bch2_inode_pack(c, &packed_inode, &root_inode);
err = "error creating root directory"; err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_INODES, ret = bch2_btree_insert(c, BTREE_ID_INODES,
......
...@@ -2027,7 +2027,6 @@ static void bcachefs_exit(void) ...@@ -2027,7 +2027,6 @@ static void bcachefs_exit(void)
static int __init bcachefs_init(void) static int __init bcachefs_init(void)
{ {
bch2_bkey_pack_test(); bch2_bkey_pack_test();
bch2_inode_pack_test();
if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) || if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
bch2_chardev_init() || bch2_chardev_init() ||
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
#include <linux/math.h>
#include <asm/unaligned.h>
#include "varint.h"
int bch2_varint_encode(u8 *out, u64 v)
{
unsigned bits = fls64(v|1);
unsigned bytes = DIV_ROUND_UP(bits, 7);
if (likely(bytes < 9)) {
v <<= bytes;
v |= ~(~0 << (bytes - 1));
} else {
*out++ = 255;
bytes = 9;
}
put_unaligned_le64(v, out);
return bytes;
}
int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
{
u64 v = get_unaligned_le64(in);
unsigned bytes = ffz(v & 255) + 1;
if (unlikely(in + bytes > end))
return -1;
if (likely(bytes < 9)) {
v >>= bytes;
v &= ~(~0ULL << (7 * bytes));
} else {
v = get_unaligned_le64(++in);
}
*out = v;
return bytes;
}
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_VARINT_H
#define _BCACHEFS_VARINT_H
int bch2_varint_encode(u8 *, u64);
int bch2_varint_decode(const u8 *, const u8 *, u64 *);
#endif /* _BCACHEFS_VARINT_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment