Commit df8e96f3 authored by David Woodhouse's avatar David Woodhouse

[JFFS2] Improve read_inode memory usage, v2.

We originally used to read every node and allocate a jffs2_tmp_dnode_info
structure for each, before processing them in (reverse) version order
and discarding the ones which are obsoleted by later nodes.

With huge logfiles, this behaviour caused memory problems. For example, a
file involved in OLPC trac #1292 has 1822391 nodes, and would cause the XO
machine to run out of memory during the first stage of read_inode().

Instead of just inserting nodes into a tree in version order as we find
them, we now put them into a tree in order of their offset within the
file, which allows us to immediately discard nodes which are completely
obsoleted.

We don't use a full tree with 'fragments' pointing to the real data
structure, as we do in the normal fragtree. We sort only on the start
address, and add an 'overlapped' flag to the tmp_dnode_info to indicate
that the node in question is (partially) overlapped by another.

When the scan is complete, we start at the end of the file, adding each
node to a real fragtree as before. Where the node is non-overlapped, we
just add it (it doesn't matter that it's not the latest version; there is
no overlap). When the node at the end of the tree _is_ overlapped, we sort
it and all its overlapping nodes into version order and then add them to
the fragtree in that order.

This 'early discard' reduces the peak allocation of tmp_dnode_info
structures from 1.8M to a mere 62872 (3.5%) in the degenerate case
referenced above.

This version of the patch also correctly rememembers the highest node
version# seen for an inode when it's scanned.
Signed-off-by: default avatarDavid Woodhouse <dwmw2@infradead.org>
parent 44b998e1
This diff is collapsed.
......@@ -225,7 +225,20 @@ struct jffs2_tmp_dnode_info
uint32_t version;
uint32_t data_crc;
uint32_t partial_crc;
uint32_t csize;
uint16_t csize;
uint16_t overlapped;
};
/* Temporary data structure used during readinode. */
struct jffs2_readinode_info
{
struct rb_root tn_root;
struct jffs2_tmp_dnode_info *mdata_tn;
uint32_t highest_version;
uint32_t latest_mctime;
uint32_t mctime_ver;
struct jffs2_full_dirent *fds;
struct jffs2_raw_node_ref *latest_ref;
};
struct jffs2_full_dirent
......@@ -328,6 +341,15 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
#define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb)
#define frag_erase(frag, list) rb_erase(&frag->rb, list);
#define tn_next(tn) rb_entry(rb_next(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
#define tn_prev(tn) rb_entry(rb_prev(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
#define tn_parent(tn) rb_entry(rb_parent(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
#define tn_left(tn) rb_entry((tn)->rb.rb_left, struct jffs2_tmp_dnode_info, rb)
#define tn_right(tn) rb_entry((tn)->rb.rb_right, struct jffs2_tmp_dnode_info, rb)
#define tn_erase(tn, list) rb_erase(&tn->rb, list);
#define tn_last(list) rb_entry(rb_last(list), struct jffs2_tmp_dnode_info, rb)
#define tn_first(list) rb_entry(rb_first(list), struct jffs2_tmp_dnode_info, rb)
/* nodelist.c */
void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list);
void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state);
......@@ -343,7 +365,6 @@ struct rb_node *rb_prev(struct rb_node *);
void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c,
struct jffs2_eraseblock *jeb,
uint32_t ofs, uint32_t len,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment