Commit a3e72262 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: New varints

Previous varint implementation used by the inode code was not nearly as
fast as it could have been; partly because it was attempting to encode
integers up to 96 bits (for timestamps) but this meant that encoding and
decoding the length required a table lookup.

Instead, we'll just encode timestamps greater than 64 bits as two
separate varints; this will make decoding/encoding of inodes
significantly faster overall.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b3d1e6ca
......@@ -57,4 +57,5 @@ bcachefs-y := \
tests.o \
trace.o \
util.o \
varint.o \
xattr.o
......@@ -673,10 +673,10 @@ struct bch_inode_generation {
} __attribute__((packed, aligned(8)));
#define BCH_INODE_FIELDS() \
x(bi_atime, 64) \
x(bi_ctime, 64) \
x(bi_mtime, 64) \
x(bi_otime, 64) \
x(bi_atime, 96) \
x(bi_ctime, 96) \
x(bi_mtime, 96) \
x(bi_otime, 96) \
x(bi_size, 64) \
x(bi_sectors, 64) \
x(bi_uid, 32) \
......@@ -743,7 +743,8 @@ enum {
#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
/* Dirents */
......@@ -1334,13 +1335,15 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
x(btree_ptr_v2, 11) \
x(extents_above_btree_updates, 12) \
x(btree_updates_journalled, 13) \
x(reflink_inline_data, 14)
x(reflink_inline_data, 14) \
x(new_varint, 15)
#define BCH_SB_FEATURES_ALL \
((1ULL << BCH_FEATURE_new_siphash)| \
(1ULL << BCH_FEATURE_new_extent_overwrite)| \
(1ULL << BCH_FEATURE_btree_ptr_v2)| \
(1ULL << BCH_FEATURE_extents_above_btree_updates))
(1ULL << BCH_FEATURE_extents_above_btree_updates)|\
(1ULL << BCH_FEATURE_new_varint))\
enum bch_sb_feature {
#define x(f, n) BCH_FEATURE_##f,
......
......@@ -537,7 +537,7 @@ static int check_extents(struct bch_fs *c)
bch2_trans_unlock(&trans);
bch2_inode_pack(&p, &w.inode);
bch2_inode_pack(c, &p, &w.inode);
ret = bch2_btree_insert(c, BTREE_ID_INODES,
&p.inode.k_i, NULL, NULL,
......@@ -808,7 +808,7 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
0, NULL);
root_inode->bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed, root_inode);
bch2_inode_pack(c, &packed, root_inode);
return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL,
......@@ -1326,7 +1326,7 @@ static int check_inode(struct btree_trans *trans,
if (do_update) {
struct bkey_inode_buf p;
bch2_inode_pack(&p, &u);
bch2_inode_pack(c, &p, &u);
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
......
......@@ -8,6 +8,7 @@
#include "extents.h"
#include "inode.h"
#include "str_hash.h"
#include "varint.h"
#include <linux/random.h>
......@@ -89,22 +90,17 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes;
}
void bch2_inode_pack(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
u8 *out = packed->inode.v.fields;
struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes;
bkey_inode_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
#define x(_name, _bits) \
#define x(_name, _bits) \
out += inode_encode_field(out, end, 0, inode->_name); \
nr_fields++; \
\
......@@ -123,7 +119,69 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes;
int ret;
#define x(_name, _bits) \
nr_fields++; \
\
if (inode->_name) { \
ret = bch2_varint_encode(out, inode->_name); \
out += ret; \
\
if (_bits > 64) \
*out++ = 0; \
\
last_nonzero_field = out; \
last_nonzero_fieldnr = nr_fields; \
} else { \
*out++ = 0; \
\
if (_bits > 64) \
*out++ = 0; \
}
BCH_INODE_FIELDS()
#undef x
BUG_ON(out > end);
out = last_nonzero_field;
nr_fields = last_nonzero_fieldnr;
bytes = out - (u8 *) &packed->inode.v;
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
void bch2_inode_pack(struct bch_fs *c,
struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
bkey_inode_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) {
SET_INODE_NEW_VARINT(&packed->inode.v, true);
bch2_inode_pack_v2(packed, inode);
} else {
bch2_inode_pack_v1(packed, inode);
}
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
......@@ -135,26 +193,23 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
BUG_ON(unpacked.bi_mode != inode->bi_mode);
#define x(_name, _bits) BUG_ON(unpacked._name != inode->_name);
#define x(_name, _bits) if (unpacked._name != inode->_name) \
panic("unpacked %llu should be %llu", \
(u64) unpacked._name, (u64) inode->_name);
BCH_INODE_FIELDS()
#undef x
}
}
int bch2_inode_unpack(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
const u8 *in = inode.v->fields;
const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
const u8 *end = bkey_val_end(inode);
u64 field[2];
unsigned fieldnr = 0, field_bits;
int ret;
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
#define x(_name, _bits) \
if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
......@@ -177,6 +232,62 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
const u8 *in = inode.v->fields;
const u8 *end = bkey_val_end(inode);
unsigned fieldnr = 0;
int ret;
u64 v[2];
#define x(_name, _bits) \
if (fieldnr < INODE_NR_FIELDS(inode.v)) { \
ret = bch2_varint_decode(in, end, &v[0]); \
if (ret < 0) \
return ret; \
in += ret; \
\
if (_bits > 64) { \
ret = bch2_varint_decode(in, end, &v[1]); \
if (ret < 0) \
return ret; \
in += ret; \
} else { \
v[1] = 0; \
} \
} else { \
v[0] = v[1] = 0; \
} \
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
fieldnr++;
BCH_INODE_FIELDS()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
int bch2_inode_unpack(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
if (INODE_NEW_VARINT(inode.v)) {
return bch2_inode_unpack_v2(inode, unpacked);
} else {
return bch2_inode_unpack_v1(inode, unpacked);
}
return 0;
}
......@@ -223,7 +334,7 @@ int bch2_inode_write(struct btree_trans *trans,
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
bch2_inode_pack(inode_p, inode);
bch2_inode_pack(trans->c, inode_p, inode);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0;
}
......@@ -426,10 +537,7 @@ int bch2_inode_create(struct btree_trans *trans,
inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k);
bch2_inode_pack(inode_p, inode_u);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
bch2_trans_iter_put(trans, iter);
return 0;
return bch2_inode_write(trans, iter, inode_u);
}
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
......@@ -553,32 +661,3 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
return bch2_trans_do(c, NULL, NULL, 0,
bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
}
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_inode_pack_test(void)
{
struct bch_inode_unpacked *u, test_inodes[] = {
{
.bi_atime = U64_MAX,
.bi_ctime = U64_MAX,
.bi_mtime = U64_MAX,
.bi_otime = U64_MAX,
.bi_size = U64_MAX,
.bi_sectors = U64_MAX,
.bi_uid = U32_MAX,
.bi_gid = U32_MAX,
.bi_nlink = U32_MAX,
.bi_generation = U32_MAX,
.bi_dev = U32_MAX,
},
};
for (u = test_inodes;
u < test_inodes + ARRAY_SIZE(test_inodes);
u++) {
struct bkey_inode_buf p;
bch2_inode_pack(&p, u);
}
}
#endif
......@@ -24,6 +24,14 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
.val_to_text = bch2_inode_generation_to_text, \
}
#if 0
typedef struct {
u64 lo;
u32 hi;
} __packed __aligned(4) u96;
#endif
typedef u64 u96;
struct bch_inode_unpacked {
u64 bi_inum;
__le64 bi_hash_seed;
......@@ -43,7 +51,8 @@ struct bkey_inode_buf {
#undef x
} __attribute__((packed, aligned(8)));
void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
struct btree_iter *bch2_inode_peek(struct btree_trans *,
......@@ -166,10 +175,4 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
}
}
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_inode_pack_test(void);
#else
static inline void bch2_inode_pack_test(void) {}
#endif
#endif /* _BCACHEFS_INODE_H */
......@@ -310,7 +310,7 @@ int bch2_extent_update(struct btree_trans *trans,
inode_u.bi_sectors += delta;
if (delta || new_i_size) {
bch2_inode_pack(&inode_p, &inode_u);
bch2_inode_pack(trans->c, &inode_p, &inode_u);
bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0);
}
......
......@@ -1320,7 +1320,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init(c, &root_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed_inode, &root_inode);
bch2_inode_pack(c, &packed_inode, &root_inode);
err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_INODES,
......
......@@ -2027,7 +2027,6 @@ static void bcachefs_exit(void)
static int __init bcachefs_init(void)
{
bch2_bkey_pack_test();
bch2_inode_pack_test();
if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
bch2_chardev_init() ||
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
#include <linux/math.h>
#include <asm/unaligned.h>
#include "varint.h"
int bch2_varint_encode(u8 *out, u64 v)
{
unsigned bits = fls64(v|1);
unsigned bytes = DIV_ROUND_UP(bits, 7);
if (likely(bytes < 9)) {
v <<= bytes;
v |= ~(~0 << (bytes - 1));
} else {
*out++ = 255;
bytes = 9;
}
put_unaligned_le64(v, out);
return bytes;
}
int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
{
u64 v = get_unaligned_le64(in);
unsigned bytes = ffz(v & 255) + 1;
if (unlikely(in + bytes > end))
return -1;
if (likely(bytes < 9)) {
v >>= bytes;
v &= ~(~0ULL << (7 * bytes));
} else {
v = get_unaligned_le64(++in);
}
*out = v;
return bytes;
}
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_VARINT_H
#define _BCACHEFS_VARINT_H
int bch2_varint_encode(u8 *, u64);
int bch2_varint_decode(const u8 *, const u8 *, u64 *);
#endif /* _BCACHEFS_VARINT_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment