Commit 5b191d99 authored by Sage Weil's avatar Sage Weil

libceph: decode into cpu-native ceph_pg type

Always decode data into our cpu-native ceph_pg type that has the correct
field widths.  Limit any remaining uses of ceph_pg_v1 to dealing with the
legacy protocol.
Signed-off-by: default avatarSage Weil <sage@inktank.com>
Reviewed-by: default avatarAlex Elder <elder@inktank.com>
parent 12979354
...@@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
u64 len = 1, olen; u64 len = 1, olen;
u64 tmp; u64 tmp;
struct ceph_object_layout ol; struct ceph_object_layout ol;
struct ceph_pg_v1 pgid; struct ceph_pg pgid;
int r; int r;
/* copy and validate */ /* copy and validate */
...@@ -212,7 +212,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -212,7 +212,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
osdc->osdmap); osdc->osdmap);
pgid = ol.ol_pgid; pgid.pool = le32_to_cpu(ol.ol_pgid.pool);
pgid.seed = le16_to_cpu(ol.ol_pgid.ps);
dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
if (dl.osd >= 0) { if (dl.osd >= 0) {
struct ceph_entity_addr *a = struct ceph_entity_addr *a =
......
...@@ -56,7 +56,7 @@ struct ceph_osd_request { ...@@ -56,7 +56,7 @@ struct ceph_osd_request {
struct list_head r_linger_item; struct list_head r_linger_item;
struct list_head r_linger_osd; struct list_head r_linger_osd;
struct ceph_osd *r_osd; struct ceph_osd *r_osd;
struct ceph_pg_v1 r_pgid; struct ceph_pg r_pgid;
int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_pg_osds[CEPH_PG_MAX_SIZE];
int r_num_pg_osds; int r_num_pg_osds;
......
...@@ -18,6 +18,11 @@ ...@@ -18,6 +18,11 @@
* The map can be updated either via an incremental map (diff) describing * The map can be updated either via an incremental map (diff) describing
* the change between two successive epochs, or as a fully encoded map. * the change between two successive epochs, or as a fully encoded map.
*/ */
struct ceph_pg {
uint64_t pool;
uint32_t seed;
};
struct ceph_pg_pool_info { struct ceph_pg_pool_info {
struct rb_node node; struct rb_node node;
int id; int id;
...@@ -28,7 +33,7 @@ struct ceph_pg_pool_info { ...@@ -28,7 +33,7 @@ struct ceph_pg_pool_info {
struct ceph_pg_mapping { struct ceph_pg_mapping {
struct rb_node node; struct rb_node node;
struct ceph_pg_v1 pgid; struct ceph_pg pgid;
int len; int len;
int osds[]; int osds[];
}; };
...@@ -119,10 +124,10 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, ...@@ -119,10 +124,10 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
struct ceph_file_layout *fl, struct ceph_file_layout *fl,
struct ceph_osdmap *osdmap); struct ceph_osdmap *osdmap);
extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
struct ceph_pg_v1 pgid, struct ceph_pg pgid,
int *acting); int *acting);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg_v1 pgid); struct ceph_pg pgid);
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
......
...@@ -131,10 +131,9 @@ static int osdc_show(struct seq_file *s, void *pp) ...@@ -131,10 +131,9 @@ static int osdc_show(struct seq_file *s, void *pp)
req = rb_entry(p, struct ceph_osd_request, r_node); req = rb_entry(p, struct ceph_osd_request, r_node);
seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1, req->r_osd ? req->r_osd->o_osd : -1,
le32_to_cpu(req->r_pgid.pool), req->r_pgid.pool, req->r_pgid.seed);
le16_to_cpu(req->r_pgid.ps));
head = req->r_request->front.iov_base; head = req->r_request->front.iov_base;
op = (void *)(head + 1); op = (void *)(head + 1);
......
...@@ -914,7 +914,7 @@ static int __map_request(struct ceph_osd_client *osdc, ...@@ -914,7 +914,7 @@ static int __map_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req, int force_resend) struct ceph_osd_request *req, int force_resend)
{ {
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg_v1 pgid; struct ceph_pg pgid;
int acting[CEPH_PG_MAX_SIZE]; int acting[CEPH_PG_MAX_SIZE];
int o = -1, num = 0; int o = -1, num = 0;
int err; int err;
...@@ -926,7 +926,8 @@ static int __map_request(struct ceph_osd_client *osdc, ...@@ -926,7 +926,8 @@ static int __map_request(struct ceph_osd_client *osdc,
list_move(&req->r_req_lru_item, &osdc->req_notarget); list_move(&req->r_req_lru_item, &osdc->req_notarget);
return err; return err;
} }
pgid = reqhead->layout.ol_pgid; pgid.pool = le32_to_cpu(reqhead->layout.ol_pgid.pool);
pgid.seed = le16_to_cpu(reqhead->layout.ol_pgid.ps);
req->r_pgid = pgid; req->r_pgid = pgid;
err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
...@@ -943,8 +944,8 @@ static int __map_request(struct ceph_osd_client *osdc, ...@@ -943,8 +944,8 @@ static int __map_request(struct ceph_osd_client *osdc,
(req->r_osd == NULL && o == -1)) (req->r_osd == NULL && o == -1))
return 0; /* no change */ return 0; /* no change */
dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n", dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, req->r_tid, pgid.pool, pgid.seed, o,
req->r_osd ? req->r_osd->o_osd : -1); req->r_osd ? req->r_osd->o_osd : -1);
/* record full pg acting set */ /* record full pg acting set */
......
...@@ -350,14 +350,15 @@ static struct crush_map *crush_decode(void *pbyval, void *end) ...@@ -350,14 +350,15 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
* rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
* to a set of osds) * to a set of osds)
*/ */
static int pgid_cmp(struct ceph_pg_v1 l, struct ceph_pg_v1 r) static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
{ {
u64 a = *(u64 *)&l; if (l.pool < r.pool)
u64 b = *(u64 *)&r; return -1;
if (l.pool > r.pool)
if (a < b) return 1;
if (l.seed < r.seed)
return -1; return -1;
if (a > b) if (l.seed > r.seed)
return 1; return 1;
return 0; return 0;
} }
...@@ -389,7 +390,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, ...@@ -389,7 +390,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
} }
static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
struct ceph_pg_v1 pgid) struct ceph_pg pgid)
{ {
struct rb_node *n = root->rb_node; struct rb_node *n = root->rb_node;
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
...@@ -403,25 +404,26 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, ...@@ -403,25 +404,26 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
} else if (c > 0) { } else if (c > 0) {
n = n->rb_right; n = n->rb_right;
} else { } else {
dout("__lookup_pg_mapping %llx got %p\n", dout("__lookup_pg_mapping %lld.%x got %p\n",
*(u64 *)&pgid, pg); pgid.pool, pgid.seed, pg);
return pg; return pg;
} }
} }
return NULL; return NULL;
} }
static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg_v1 pgid) static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
{ {
struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
if (pg) { if (pg) {
dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
pg);
rb_erase(&pg->node, root); rb_erase(&pg->node, root);
kfree(pg); kfree(pg);
return 0; return 0;
} }
dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
return -ENOENT; return -ENOENT;
} }
...@@ -721,11 +723,14 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -721,11 +723,14 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_32_safe(p, end, len, bad); ceph_decode_32_safe(p, end, len, bad);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
int n, j; int n, j;
struct ceph_pg_v1 pgid; struct ceph_pg pgid;
struct ceph_pg_v1 pgid_v1;
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
ceph_decode_copy(p, &pgid, sizeof(pgid)); ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
pgid.pool = le32_to_cpu(pgid_v1.pool);
pgid.seed = le16_to_cpu(pgid_v1.ps);
n = ceph_decode_32(p); n = ceph_decode_32(p);
err = -EINVAL; err = -EINVAL;
if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
...@@ -743,7 +748,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -743,7 +748,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
err = __insert_pg_mapping(pg, &map->pg_temp); err = __insert_pg_mapping(pg, &map->pg_temp);
if (err) if (err)
goto bad; goto bad;
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
len);
} }
/* crush */ /* crush */
...@@ -944,10 +950,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -944,10 +950,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
while (len--) { while (len--) {
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
int j; int j;
struct ceph_pg_v1 pgid; struct ceph_pg_v1 pgid_v1;
struct ceph_pg pgid;
u32 pglen; u32 pglen;
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
ceph_decode_copy(p, &pgid, sizeof(pgid)); ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
pgid.pool = le32_to_cpu(pgid_v1.pool);
pgid.seed = le16_to_cpu(pgid_v1.ps);
pglen = ceph_decode_32(p); pglen = ceph_decode_32(p);
if (pglen) { if (pglen) {
...@@ -973,8 +982,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -973,8 +982,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
kfree(pg); kfree(pg);
goto bad; goto bad;
} }
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, dout(" added pg_temp %lld.%x len %d\n", pgid.pool,
pglen); pgid.seed, pglen);
} else { } else {
/* remove */ /* remove */
__remove_pg_mapping(&map->pg_temp, pgid); __remove_pg_mapping(&map->pg_temp, pgid);
...@@ -1079,26 +1088,25 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, ...@@ -1079,26 +1088,25 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
struct ceph_osdmap *osdmap) struct ceph_osdmap *osdmap)
{ {
unsigned int num, num_mask; unsigned int num, num_mask;
struct ceph_pg_v1 pgid; struct ceph_pg pgid;
int poolid = le32_to_cpu(fl->fl_pg_pool);
struct ceph_pg_pool_info *pool; struct ceph_pg_pool_info *pool;
unsigned int ps;
BUG_ON(!osdmap); BUG_ON(!osdmap);
pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); pgid.pool = le32_to_cpu(fl->fl_pg_pool);
pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
if (!pool) if (!pool)
return -EIO; return -EIO;
ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
num = le32_to_cpu(pool->v.pg_num); num = le32_to_cpu(pool->v.pg_num);
num_mask = pool->pg_num_mask; num_mask = pool->pg_num_mask;
pgid.ps = cpu_to_le16(ps); dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool,
pgid.preferred = cpu_to_le16(-1); pgid.seed);
pgid.pool = fl->fl_pg_pool;
dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
ol->ol_pgid = pgid; ol->ol_pgid.ps = cpu_to_le16(pgid.seed);
ol->ol_pgid.pool = fl->fl_pg_pool;
ol->ol_pgid.preferred = cpu_to_le16(-1);
ol->ol_stripe_unit = fl->fl_object_stripe_unit; ol->ol_stripe_unit = fl->fl_object_stripe_unit;
return 0; return 0;
} }
...@@ -1108,7 +1116,7 @@ EXPORT_SYMBOL(ceph_calc_object_layout); ...@@ -1108,7 +1116,7 @@ EXPORT_SYMBOL(ceph_calc_object_layout);
* Calculate raw osd vector for the given pgid. Return pointer to osd * Calculate raw osd vector for the given pgid. Return pointer to osd
* array, or NULL on failure. * array, or NULL on failure.
*/ */
static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
int *osds, int *num) int *osds, int *num)
{ {
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
...@@ -1116,8 +1124,8 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, ...@@ -1116,8 +1124,8 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
int ruleno; int ruleno;
unsigned int poolid, ps, pps, t, r; unsigned int poolid, ps, pps, t, r;
poolid = le32_to_cpu(pgid.pool); poolid = pgid.pool;
ps = le16_to_cpu(pgid.ps); ps = pgid.seed;
pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
if (!pool) if (!pool)
...@@ -1126,7 +1134,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, ...@@ -1126,7 +1134,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
/* pg_temp? */ /* pg_temp? */
t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
pool->pgp_num_mask); pool->pgp_num_mask);
pgid.ps = cpu_to_le16(t); pgid.seed = t;
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) { if (pg) {
*num = pg->len; *num = pg->len;
...@@ -1163,7 +1171,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, ...@@ -1163,7 +1171,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
/* /*
* Return acting set for given pgid. * Return acting set for given pgid.
*/ */
int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
int *acting) int *acting)
{ {
int rawosds[CEPH_PG_MAX_SIZE], *osds; int rawosds[CEPH_PG_MAX_SIZE], *osds;
...@@ -1184,7 +1192,7 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, ...@@ -1184,7 +1192,7 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
/* /*
* Return primary osd for given pgid, or -1 if none. * Return primary osd for given pgid, or -1 if none.
*/ */
int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid) int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
{ {
int rawosds[CEPH_PG_MAX_SIZE], *osds; int rawosds[CEPH_PG_MAX_SIZE], *osds;
int i, num = CEPH_PG_MAX_SIZE; int i, num = CEPH_PG_MAX_SIZE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment