Commit e0724bf6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6

* 'linux-next' of git://git.infradead.org/ubifs-2.6:
  UBIFS: fix recovery bug
  UBIFS: add R/O compatibility
  UBIFS: fix compiler warnings
  UBIFS: fully sort GCed nodes
  UBIFS: fix commentaries
  UBIFS: introduce a helpful variable
  UBIFS: use KERN_CONT
  UBIFS: fix lprops committing bug
  UBIFS: fix bogus assertion
  UBIFS: fix bug where page is marked uptodate when out of space
  UBIFS: amend key_hash return value
  UBIFS: improve find function interface
  UBIFS: list usage cleanup
  UBIFS: fix dbg_chk_lpt_sz()
parents 38d9aefb de097578
...@@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c) ...@@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c)
} }
/** /**
* ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* *
* This function calculates and returns the number of eraseblocks which should * This function calculates and returns the number of LEBs which should be kept
* be kept for index usage. * for index usage.
*/ */
int ubifs_calc_min_idx_lebs(struct ubifs_info *c) int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
{ {
int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; int idx_lebs;
long long idx_size; long long idx_size;
idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
/* And make sure we have thrice the index size of space reserved */ /* And make sure we have thrice the index size of space reserved */
idx_size = idx_size + (idx_size << 1); idx_size += idx_size << 1;
/* /*
* We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
* pair, nor similarly the two variables for the new index size, so we * pair, nor similarly the two variables for the new index size, so we
* have to do this costly 64-bit division on fast-path. * have to do this costly 64-bit division on fast-path.
*/ */
idx_size += eff_leb_size - 1; idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
idx_lebs = div_u64(idx_size, eff_leb_size);
/* /*
* The index head is not available for the in-the-gaps method, so add an * The index head is not available for the in-the-gaps method, so add an
* extra LEB to compensate. * extra LEB to compensate.
...@@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c) ...@@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c)
* do_budget_space - reserve flash space for index and data growth. * do_budget_space - reserve flash space for index and data growth.
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* *
* This function makes sure UBIFS has enough free eraseblocks for index growth * This function makes sure UBIFS has enough free LEBs for index growth and
* and data. * data.
* *
* When budgeting index space, UBIFS reserves thrice as many LEBs as the index * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
* would take if it was consolidated and written to the flash. This guarantees * would take if it was consolidated and written to the flash. This guarantees
* that the "in-the-gaps" commit method always succeeds and UBIFS will always * that the "in-the-gaps" commit method always succeeds and UBIFS will always
* be able to commit dirty index. So this function basically adds amount of * be able to commit dirty index. So this function basically adds amount of
* budgeted index space to the size of the current index, multiplies this by 3, * budgeted index space to the size of the current index, multiplies this by 3,
* and makes sure this does not exceed the amount of free eraseblocks. * and makes sure this does not exceed the amount of free LEBs.
* *
* Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
* o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
* be large, because UBIFS does not do any index consolidation as long as * be large, because UBIFS does not do any index consolidation as long as
* there is free space. IOW, the index may take a lot of LEBs, but the LEBs * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
* will contain a lot of dirt. * will contain a lot of dirt.
* o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW,
* consolidated to take up to @c->min_idx_lebs LEBs. * the index may be consolidated to take up to @c->min_idx_lebs LEBs.
* *
* This function returns zero in case of success, and %-ENOSPC in case of * This function returns zero in case of success, and %-ENOSPC in case of
* failure. * failure.
...@@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) ...@@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
* This function calculates amount of free space to report to user-space. * This function calculates amount of free space to report to user-space.
* *
* Because UBIFS may introduce substantial overhead (the index, node headers, * Because UBIFS may introduce substantial overhead (the index, node headers,
* alignment, wastage at the end of eraseblocks, etc), it cannot report real * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
* amount of free flash space it has (well, because not all dirty space is * free flash space it has (well, because not all dirty space is reclaimable,
* reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, * UBIFS does not actually know the real amount). If UBIFS did so, it would
* it would bread user expectations about what free space is. Users seem to * bread user expectations about what free space is. Users seem to accustomed
* accustomed to assume that if the file-system reports N bytes of free space, * to assume that if the file-system reports N bytes of free space, they would
* they would be able to fit a file of N bytes to the FS. This almost works for * be able to fit a file of N bytes to the FS. This almost works for
* traditional file-systems, because they have way less overhead than UBIFS. * traditional file-systems, because they have way less overhead than UBIFS.
* So, to keep users happy, UBIFS tries to take the overhead into account. * So, to keep users happy, UBIFS tries to take the overhead into account.
*/ */
......
...@@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) ...@@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
"bad or corrupted node)"); "bad or corrupted node)");
else { else {
for (i = 0; i < nlen && dent->name[i]; i++) for (i = 0; i < nlen && dent->name[i]; i++)
printk("%c", dent->name[i]); printk(KERN_CONT "%c", dent->name[i]);
} }
printk("\n"); printk(KERN_CONT "\n");
break; break;
} }
...@@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) ...@@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
/* /*
* Make sure the last key in our znode is less or * Make sure the last key in our znode is less or
* equivalent than the the key in zbranch which goes * equivalent than the key in the zbranch which goes
* after our pointing zbranch. * after our pointing zbranch.
*/ */
cmp = keys_cmp(c, max, cmp = keys_cmp(c, max,
......
...@@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ...@@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = pos >> PAGE_CACHE_SHIFT; pgoff_t index = pos >> PAGE_CACHE_SHIFT;
int uninitialized_var(err), appending = !!(pos + len > inode->i_size); int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
int skipped_read = 0;
struct page *page; struct page *page;
ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
...@@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ...@@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
/* The page is not loaded from the flash */ /* The page is not loaded from the flash */
if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
/* /*
* We change whole page so no need to load it. But we * We change whole page so no need to load it. But we
* have to set the @PG_checked flag to make the further * have to set the @PG_checked flag to make the further
...@@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ...@@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* the media. * the media.
*/ */
SetPageChecked(page); SetPageChecked(page);
else { skipped_read = 1;
} else {
err = do_readpage(page); err = do_readpage(page);
if (err) { if (err) {
unlock_page(page); unlock_page(page);
...@@ -469,6 +471,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ...@@ -469,6 +471,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
err = allocate_budget(c, page, ui, appending); err = allocate_budget(c, page, ui, appending);
if (unlikely(err)) { if (unlikely(err)) {
ubifs_assert(err == -ENOSPC); ubifs_assert(err == -ENOSPC);
/*
* If we skipped reading the page because we were going to
* write all of it, then it is not up to date.
*/
if (skipped_read) {
ClearPageChecked(page);
ClearPageUptodate(page);
}
/* /*
* Budgeting failed which means it would have to force * Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the * write-back but didn't, because we set the @fast flag in the
...@@ -949,7 +959,7 @@ static int do_writepage(struct page *page, int len) ...@@ -949,7 +959,7 @@ static int do_writepage(struct page *page, int len)
* whole index and correct all inode sizes, which is long an unacceptable. * whole index and correct all inode sizes, which is long an unacceptable.
* *
* To prevent situations like this, UBIFS writes pages back only if they are * To prevent situations like this, UBIFS writes pages back only if they are
* within last synchronized inode size, i.e. the the size which has been * within the last synchronized inode size, i.e. the size which has been
* written to the flash media last time. Otherwise, UBIFS forces inode * written to the flash media last time. Otherwise, UBIFS forces inode
* write-back, thus making sure the on-flash inode contains current inode size, * write-back, thus making sure the on-flash inode contains current inode size,
* and then keeps writing pages back. * and then keeps writing pages back.
......
...@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, ...@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
* ubifs_find_free_space - find a data LEB with free space. * ubifs_find_free_space - find a data LEB with free space.
* @c: the UBIFS file-system description object * @c: the UBIFS file-system description object
* @min_space: minimum amount of required free space * @min_space: minimum amount of required free space
* @free: contains amount of free space in the LEB on exit * @offs: contains offset of where free space starts on exit
* @squeeze: whether to try to find space in a non-empty LEB first * @squeeze: whether to try to find space in a non-empty LEB first
* *
* This function looks for an LEB with at least @min_space bytes of free space. * This function looks for an LEB with at least @min_space bytes of free space.
...@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, ...@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
* failed to find a LEB with @min_space bytes of free space and other a negative * failed to find a LEB with @min_space bytes of free space and other a negative
* error codes in case of failure. * error codes in case of failure.
*/ */
int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze) int squeeze)
{ {
const struct ubifs_lprops *lprops; const struct ubifs_lprops *lprops;
...@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, ...@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
spin_unlock(&c->space_lock); spin_unlock(&c->space_lock);
} }
*free = lprops->free; *offs = c->leb_size - lprops->free;
ubifs_release_lprops(c); ubifs_release_lprops(c);
if (*free == c->leb_size) { if (*offs == 0) {
/* /*
* Ensure that empty LEBs have been unmapped. They may not have * Ensure that empty LEBs have been unmapped. They may not have
* been, for example, because of an unclean unmount. Also * been, for example, because of an unclean unmount. Also
...@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, ...@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
return err; return err;
} }
dbg_find("found LEB %d, free %d", lnum, *free); dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
ubifs_assert(*free >= min_space); ubifs_assert(*offs <= c->leb_size - min_space);
return lnum; return lnum;
out: out:
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
* have to waste large pieces of free space at the end of LEB B, because nodes * have to waste large pieces of free space at the end of LEB B, because nodes
* from LEB A would not fit. And the worst situation is when all nodes are of * from LEB A would not fit. And the worst situation is when all nodes are of
* maximum size. So dark watermark is the amount of free + dirty space in LEB * maximum size. So dark watermark is the amount of free + dirty space in LEB
* which are guaranteed to be reclaimable. If LEB has less space, the GC migh * which are guaranteed to be reclaimable. If LEB has less space, the GC might
* be unable to reclaim it. So, LEBs with free + dirty greater than dark * be unable to reclaim it. So, LEBs with free + dirty greater than dark
* watermark are "good" LEBs from GC's point of few. The other LEBs are not so * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
* good, and GC takes extra care when moving them. * good, and GC takes extra care when moving them.
...@@ -56,14 +56,6 @@ ...@@ -56,14 +56,6 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include "ubifs.h" #include "ubifs.h"
/*
* GC tries to optimize the way it fit nodes to available space, and it sorts
* nodes a little. The below constants are watermarks which define "large",
* "medium", and "small" nodes.
*/
#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
/* /*
* GC may need to move more than one LEB to make progress. The below constants * GC may need to move more than one LEB to make progress. The below constants
* define "soft" and "hard" limits on the number of LEBs the garbage collector * define "soft" and "hard" limits on the number of LEBs the garbage collector
...@@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c) ...@@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c)
} }
/** /**
* joinup - bring data nodes for an inode together. * list_sort - sort a list.
* @c: UBIFS file-system description object * @priv: private data, passed to @cmp
* @sleb: describes scanned LEB * @head: the list to sort
* @inum: inode number * @cmp: the elements comparison function
* @blk: block number *
* @data: list to which to add data nodes * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
* implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
* in ascending order.
* *
* This function looks at the first few nodes in the scanned LEB @sleb and adds * The comparison function @cmp is supposed to return a negative value if @a is
* them to @data if they are data nodes from @inum and have a larger block * than @b, and a positive value if @a is greater than @b. If @a and @b are
* number than @blk. This function returns %0 on success and a negative error * equivalent, then it does not matter what this function returns.
* code on failure.
*/ */
static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, static void list_sort(void *priv, struct list_head *head,
unsigned int blk, struct list_head *data) int (*cmp)(void *priv, struct list_head *a,
struct list_head *b))
{ {
int err, cnt = 6, lnum = sleb->lnum, offs; struct list_head *p, *q, *e, *list, *tail, *oldhead;
struct ubifs_scan_node *snod, *tmp; int insize, nmerges, psize, qsize, i;
union ubifs_key *key;
if (list_empty(head))
return;
list = head->next;
list_del(head);
insize = 1;
for (;;) {
p = oldhead = list;
list = tail = NULL;
nmerges = 0;
while (p) {
nmerges++;
q = p;
psize = 0;
for (i = 0; i < insize; i++) {
psize++;
q = q->next == oldhead ? NULL : q->next;
if (!q)
break;
}
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { qsize = insize;
key = &snod->key; while (psize > 0 || (qsize > 0 && q)) {
if (key_inum(c, key) == inum && if (!psize) {
key_type(c, key) == UBIFS_DATA_KEY && e = q;
key_block(c, key) > blk) { q = q->next;
offs = snod->offs; qsize--;
err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); if (q == oldhead)
if (err < 0) q = NULL;
return err; } else if (!qsize || !q) {
list_del(&snod->list); e = p;
if (err) { p = p->next;
list_add_tail(&snod->list, data); psize--;
blk = key_block(c, key); if (p == oldhead)
} else p = NULL;
kfree(snod); } else if (cmp(priv, p, q) <= 0) {
cnt = 6; e = p;
} else if (--cnt == 0) p = p->next;
psize--;
if (p == oldhead)
p = NULL;
} else {
e = q;
q = q->next;
qsize--;
if (q == oldhead)
q = NULL;
}
if (tail)
tail->next = e;
else
list = e;
e->prev = tail;
tail = e;
}
p = q;
}
tail->next = list;
list->prev = tail;
if (nmerges <= 1)
break; break;
insize *= 2;
} }
return 0;
head->next = list;
head->prev = list->prev;
list->prev->next = head;
list->prev = head;
} }
/** /**
* move_nodes - move nodes. * data_nodes_cmp - compare 2 data nodes.
* @priv: UBIFS file-system description object
* @a: first data node
* @a: second data node
*
* This function compares data nodes @a and @b. Returns %1 if @a has greater
* inode or block number, and %-1 otherwise.
*/
int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
struct ubifs_scan_node *sa, *sb;
cond_resched();
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
if (inuma == inumb) {
unsigned int blka = key_block(c, &sa->key);
unsigned int blkb = key_block(c, &sb->key);
if (blka <= blkb)
return -1;
} else if (inuma <= inumb)
return -1;
return 1;
}
/*
* nondata_nodes_cmp - compare 2 non-data nodes.
* @priv: UBIFS file-system description object
* @a: first node
* @a: second node
*
* This function compares nodes @a and @b. It makes sure that inode nodes go
* first and sorted by length in descending order. Directory entry nodes go
* after inode nodes and are sorted in ascending hash valuer order.
*/
int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
{
int typea, typeb;
ino_t inuma, inumb;
struct ubifs_info *c = priv;
struct ubifs_scan_node *sa, *sb;
cond_resched();
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
typea = key_type(c, &sa->key);
typeb = key_type(c, &sb->key);
ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY);
/* Inodes go before directory entries */
if (typea == UBIFS_INO_KEY) {
if (typeb == UBIFS_INO_KEY)
return sb->len - sa->len;
return -1;
}
if (typeb == UBIFS_INO_KEY)
return 1;
ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY);
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
if (inuma == inumb) {
uint32_t hasha = key_hash(c, &sa->key);
uint32_t hashb = key_hash(c, &sb->key);
if (hasha <= hashb)
return -1;
} else if (inuma <= inumb)
return -1;
return 1;
}
/**
* sort_nodes - sort nodes for GC.
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* @sleb: describes nodes to move * @sleb: describes nodes to sort and contains the result on exit
* @nondata: contains non-data nodes on exit
* @min: minimum node size is returned here
* *
* This function moves valid nodes from data LEB described by @sleb to the GC * This function sorts the list of inodes to garbage collect. First of all, it
* journal head. The obsolete nodes are dropped. * kills obsolete nodes and separates data and non-data nodes to the
* @sleb->nodes and @nondata lists correspondingly.
* *
* When moving nodes we have to deal with classical bin-packing problem: the * Data nodes are then sorted in block number order - this is important for
* space in the current GC journal head LEB and in @c->gc_lnum are the "bins", * bulk-read; data nodes with lower inode number go before data nodes with
* where the nodes in the @sleb->nodes list are the elements which should be * higher inode number, and data nodes with lower block number go before data
* fit optimally to the bins. This function uses the "first fit decreasing" * nodes with higher block number;
* strategy, although it does not really sort the nodes but just split them on
* 3 classes - large, medium, and small, so they are roughly sorted.
* *
* This function returns zero in case of success, %-EAGAIN if commit is * Non-data nodes are sorted as follows.
* required, and other negative error codes in case of other failures. * o First go inode nodes - they are sorted in descending length order.
* o Then go directory entry nodes - they are sorted in hash order, which
* should supposedly optimize 'readdir()'. Direntry nodes with lower parent
* inode number go before direntry nodes with higher parent inode number,
* and direntry nodes with lower name hash values go before direntry nodes
* with higher name hash values.
*
* This function returns zero in case of success and a negative error code in
* case of failure.
*/ */
static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct list_head *nondata, int *min)
{ {
struct ubifs_scan_node *snod, *tmp; struct ubifs_scan_node *snod, *tmp;
struct list_head data, large, medium, small;
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
int avail, err, min = INT_MAX;
unsigned int blk = 0;
ino_t inum = 0;
INIT_LIST_HEAD(&data);
INIT_LIST_HEAD(&large);
INIT_LIST_HEAD(&medium);
INIT_LIST_HEAD(&small);
while (!list_empty(&sleb->nodes)) { *min = INT_MAX;
struct list_head *lst = sleb->nodes.next;
snod = list_entry(lst, struct ubifs_scan_node, list); /* Separate data nodes and non-data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
int err;
ubifs_assert(snod->type != UBIFS_IDX_NODE); ubifs_assert(snod->type != UBIFS_IDX_NODE);
ubifs_assert(snod->type != UBIFS_REF_NODE); ubifs_assert(snod->type != UBIFS_REF_NODE);
...@@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) ...@@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
snod->offs, 0); snod->offs, 0);
if (err < 0) if (err < 0)
goto out; return err;
list_del(lst);
if (!err) { if (!err) {
/* The node is obsolete, remove it from the list */ /* The node is obsolete, remove it from the list */
list_del(&snod->list);
kfree(snod); kfree(snod);
continue; continue;
} }
/* if (snod->len < *min)
* Sort the list of nodes so that data nodes go first, large *min = snod->len;
* nodes go second, and small nodes go last.
*/ if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { list_move_tail(&snod->list, nondata);
if (inum != key_inum(c, &snod->key)) { }
if (inum) {
/* /* Sort data and non-data nodes */
* Try to move data nodes from the same list_sort(c, &sleb->nodes, &data_nodes_cmp);
* inode together. list_sort(c, nondata, &nondata_nodes_cmp);
return 0;
}
/**
* move_node - move a node.
* @c: UBIFS file-system description object
* @sleb: describes the LEB to move nodes from
* @snod: the mode to move
* @wbuf: write-buffer to move node to
*
* This function moves node @snod to @wbuf, changes TNC correspondingly, and
* destroys @snod. Returns zero in case of success and a negative error code in
* case of failure.
*/ */
err = joinup(c, sleb, inum, blk, &data); static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
{
int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
cond_resched();
err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
if (err) if (err)
goto out; return err;
}
inum = key_inum(c, &snod->key);
blk = key_block(c, &snod->key);
}
list_add_tail(lst, &data);
} else if (snod->len > MEDIUM_NODE_WM)
list_add_tail(lst, &large);
else if (snod->len > SMALL_NODE_WM)
list_add_tail(lst, &medium);
else
list_add_tail(lst, &small);
/* And find the smallest node */ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
if (snod->len < min) snod->offs, new_lnum, new_offs,
min = snod->len; snod->len);
} list_del(&snod->list);
kfree(snod);
return err;
}
/* /**
* Join the tree lists so that we'd have one roughly sorted list * move_nodes - move nodes.
* ('large' will be the head of the joined list). * @c: UBIFS file-system description object
* @sleb: describes the LEB to move nodes from
*
* This function moves valid nodes from data LEB described by @sleb to the GC
* journal head. This function returns zero in case of success, %-EAGAIN if
* commit is required, and other negative error codes in case of other
* failures.
*/ */
list_splice(&data, &large); static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
list_splice(&medium, large.prev); {
list_splice(&small, large.prev); int err, min;
LIST_HEAD(nondata);
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
if (wbuf->lnum == -1) { if (wbuf->lnum == -1) {
/* /*
...@@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) ...@@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
*/ */
err = switch_gc_head(c); err = switch_gc_head(c);
if (err) if (err)
goto out; return err;
} }
err = sort_nodes(c, sleb, &nondata, &min);
if (err)
goto out;
/* Write nodes to their new location. Use the first-fit strategy */ /* Write nodes to their new location. Use the first-fit strategy */
while (1) { while (1) {
int avail;
struct ubifs_scan_node *snod, *tmp;
/* Move data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
avail = c->leb_size - wbuf->offs - wbuf->used; avail = c->leb_size - wbuf->offs - wbuf->used;
list_for_each_entry_safe(snod, tmp, &large, list) { if (snod->len > avail)
int new_lnum, new_offs; /*
* Do not skip data nodes in order to optimize
* bulk-read.
*/
break;
err = move_node(c, sleb, snod, wbuf);
if (err)
goto out;
}
/* Move non-data nodes */
list_for_each_entry_safe(snod, tmp, &nondata, list) {
avail = c->leb_size - wbuf->offs - wbuf->used;
if (avail < min) if (avail < min)
break; break;
if (snod->len > avail) if (snod->len > avail) {
/* This node does not fit */ /*
* Keep going only if this is an inode with
* some data. Otherwise stop and switch the GC
* head. IOW, we assume that data-less inode
* nodes and direntry nodes are roughly of the
* same size.
*/
if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
snod->len == UBIFS_INO_NODE_SZ)
break;
continue; continue;
}
cond_resched(); err = move_node(c, sleb, snod, wbuf);
new_lnum = wbuf->lnum;
new_offs = wbuf->offs + wbuf->used;
err = ubifs_wbuf_write_nolock(wbuf, snod->node,
snod->len);
if (err)
goto out;
err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
snod->offs, new_lnum, new_offs,
snod->len);
if (err) if (err)
goto out; goto out;
avail = c->leb_size - wbuf->offs - wbuf->used;
list_del(&snod->list);
kfree(snod);
} }
if (list_empty(&large)) if (list_empty(&sleb->nodes) && list_empty(&nondata))
break; break;
/* /*
...@@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) ...@@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
return 0; return 0;
out: out:
list_for_each_entry_safe(snod, tmp, &large, list) { list_splice_tail(&nondata, &sleb->nodes);
list_del(&snod->list);
kfree(snod);
}
return err; return err;
} }
......
...@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) ...@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
*/ */
static int reserve_space(struct ubifs_info *c, int jhead, int len) static int reserve_space(struct ubifs_info *c, int jhead, int len)
{ {
int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
/* /*
...@@ -139,10 +139,9 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len) ...@@ -139,10 +139,9 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
* Write buffer wasn't seek'ed or there is no enough space - look for an * Write buffer wasn't seek'ed or there is no enough space - look for an
* LEB with some empty space. * LEB with some empty space.
*/ */
lnum = ubifs_find_free_space(c, len, &free, squeeze); lnum = ubifs_find_free_space(c, len, &offs, squeeze);
if (lnum >= 0) { if (lnum >= 0) {
/* Found an LEB, add it to the journal head */ /* Found an LEB, add it to the journal head */
offs = c->leb_size - free;
err = ubifs_add_bud_to_log(c, jhead, lnum, offs); err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
if (err) if (err)
goto out_return; goto out_return;
...@@ -1366,7 +1365,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, ...@@ -1366,7 +1365,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
* @host: host inode * @host: host inode
* *
* This function writes the updated version of an extended attribute inode and * This function writes the updated version of an extended attribute inode and
* the host inode tho the journal (to the base head). The host inode is written * the host inode to the journal (to the base head). The host inode is written
* after the extended attribute inode in order to guarantee that the extended * after the extended attribute inode in order to guarantee that the extended
* attribute will be flushed when the inode is synchronized by 'fsync()' and * attribute will be flushed when the inode is synchronized by 'fsync()' and
* consequently, the write-buffer is synchronized. This function returns zero * consequently, the write-buffer is synchronized. This function returns zero
......
...@@ -381,7 +381,7 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) ...@@ -381,7 +381,7 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* @key: the key to get hash from * @key: the key to get hash from
*/ */
static inline int key_hash(const struct ubifs_info *c, static inline uint32_t key_hash(const struct ubifs_info *c,
const union ubifs_key *key) const union ubifs_key *key)
{ {
return key->u32[1] & UBIFS_S_KEY_HASH_MASK; return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
...@@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c, ...@@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c,
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* @k: the key to get hash from * @k: the key to get hash from
*/ */
static inline int key_hash_flash(const struct ubifs_info *c, const void *k) static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
{ {
const union ubifs_key *key = k; const union ubifs_key *key = k;
......
...@@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) ...@@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
} }
/* /*
* Make sure the the amount of space in buds will not exceed * Make sure the amount of space in buds will not exceed the
* 'c->max_bud_bytes' limit, because we want to guarantee mount time * 'c->max_bud_bytes' limit, because we want to guarantee mount time
* limits. * limits.
* *
...@@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c) ...@@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c)
bud->jhead, c->leb_size - bud->start, bud->jhead, c->leb_size - bud->start,
c->cmt_bud_bytes); c->cmt_bud_bytes);
rb_erase(p1, &c->buds); rb_erase(p1, &c->buds);
list_del(&bud->list);
/* /*
* If the commit does not finish, the recovery will need * If the commit does not finish, the recovery will need
* to replay the journal, in which case the old buds * to replay the journal, in which case the old buds
...@@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c) ...@@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c)
* commit i.e. do not allow them to be garbage * commit i.e. do not allow them to be garbage
* collected. * collected.
*/ */
list_add(&bud->list, &c->old_buds); list_move(&bud->list, &c->old_buds);
} }
} }
spin_unlock(&c->buds_lock); spin_unlock(&c->buds_lock);
......
...@@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c) ...@@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c)
while (offs + len > c->leb_size) { while (offs + len > c->leb_size) {
alen = ALIGN(offs, c->min_io_size); alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs); upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
dbg_chk_lpt_sz(c, 2, alen - offs); dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum); err = alloc_lpt_leb(c, &lnum);
if (err) if (err)
goto no_space; goto no_space;
...@@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c) ...@@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (offs + c->lsave_sz > c->leb_size) { if (offs + c->lsave_sz > c->leb_size) {
alen = ALIGN(offs, c->min_io_size); alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs); upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
dbg_chk_lpt_sz(c, 2, alen - offs); dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum); err = alloc_lpt_leb(c, &lnum);
if (err) if (err)
goto no_space; goto no_space;
...@@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c) ...@@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (offs + c->ltab_sz > c->leb_size) { if (offs + c->ltab_sz > c->leb_size) {
alen = ALIGN(offs, c->min_io_size); alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs); upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
dbg_chk_lpt_sz(c, 2, alen - offs); dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum); err = alloc_lpt_leb(c, &lnum);
if (err) if (err)
goto no_space; goto no_space;
...@@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c) ...@@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c)
alen, UBI_SHORTTERM); alen, UBI_SHORTTERM);
if (err) if (err)
return err; return err;
dbg_chk_lpt_sz(c, 4, alen - wlen);
} }
dbg_chk_lpt_sz(c, 2, 0); dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum); err = realloc_lpt_leb(c, &lnum);
if (err) if (err)
goto no_space; goto no_space;
offs = 0; offs = from = 0;
from = 0;
ubifs_assert(lnum >= c->lpt_first && ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last); lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum); err = ubifs_leb_unmap(c, lnum);
...@@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c) ...@@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c)
UBI_SHORTTERM); UBI_SHORTTERM);
if (err) if (err)
return err; return err;
dbg_chk_lpt_sz(c, 2, alen - wlen); dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum); err = realloc_lpt_leb(c, &lnum);
if (err) if (err)
goto no_space; goto no_space;
offs = 0; offs = from = 0;
ubifs_assert(lnum >= c->lpt_first && ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last); lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum); err = ubifs_leb_unmap(c, lnum);
...@@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c) ...@@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c)
UBI_SHORTTERM); UBI_SHORTTERM);
if (err) if (err)
return err; return err;
dbg_chk_lpt_sz(c, 2, alen - wlen); dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum); err = realloc_lpt_leb(c, &lnum);
if (err) if (err)
goto no_space; goto no_space;
offs = 0; offs = from = 0;
ubifs_assert(lnum >= c->lpt_first && ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last); lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum); err = ubifs_leb_unmap(c, lnum);
...@@ -1756,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) ...@@ -1756,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
/** /**
* dbg_chk_lpt_sz - check LPT does not write more than LPT size. * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
* @c: the UBIFS file-system description object * @c: the UBIFS file-system description object
* @action: action * @action: what to do
* @len: length written * @len: length written
* *
* This function returns %0 on success and a negative error code on failure. * This function returns %0 on success and a negative error code on failure.
* The @action argument may be one of:
* o %0 - LPT debugging checking starts, initialize debugging variables;
* o %1 - wrote an LPT node, increase LPT size by @len bytes;
* o %2 - switched to a different LEB and wasted @len bytes;
* o %3 - check that we've written the right number of bytes.
* o %4 - wasted @len bytes;
*/ */
int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
{ {
...@@ -1917,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) ...@@ -1917,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
lnum, offs); lnum, offs);
err = ubifs_unpack_nnode(c, buf, &nnode); err = ubifs_unpack_nnode(c, buf, &nnode);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) { for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
printk("%d:%d", nnode.nbranch[i].lnum, printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
nnode.nbranch[i].offs); nnode.nbranch[i].offs);
if (i != UBIFS_LPT_FANOUT - 1) if (i != UBIFS_LPT_FANOUT - 1)
printk(", "); printk(KERN_CONT ", ");
} }
printk("\n"); printk(KERN_CONT "\n");
break; break;
} }
case UBIFS_LPT_LTAB: case UBIFS_LPT_LTAB:
......
...@@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, ...@@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
* @lnum: LEB number of the LEB from which @buf was read * @lnum: LEB number of the LEB from which @buf was read
* @offs: offset from which @buf was read * @offs: offset from which @buf was read
* *
* This function scans @buf for more nodes and returns %0 is a node is found and * This function ensures that the corrupted node at @offs is the last thing
* %1 if no more nodes are found. * written to a LEB. This function returns %1 if more data is not found and
* %0 if more data is found.
*/ */
static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
int lnum, int offs) int lnum, int offs)
{ {
int skip, next_offs = 0;
if (len > UBIFS_DATA_NODE_SZ) {
struct ubifs_ch *ch = buf;
int dlen = le32_to_cpu(ch->len);
if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
dlen <= UBIFS_MAX_DATA_NODE_SZ)
/* The corrupt node looks like a data node */
next_offs = ALIGN(offs + dlen, 8);
}
if (c->min_io_size == 1)
skip = 8;
else
skip = ALIGN(offs + 1, c->min_io_size) - offs;
offs += skip;
buf += skip;
len -= skip;
while (len > 8) {
struct ubifs_ch *ch = buf; struct ubifs_ch *ch = buf;
uint32_t magic = le32_to_cpu(ch->magic); int skip, dlen = le32_to_cpu(ch->len);
int ret;
if (magic == UBIFS_NODE_MAGIC) { /* Check for empty space after the corrupt node's common header */
ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
if (ret == SCANNED_A_NODE || ret > 0) { if (is_empty(buf + skip, len - skip))
return 1;
/* /*
* There is a small chance this is just data in * The area after the common header size is not empty, so the common
* a data node, so check that possibility. e.g. * header must be intact. Check it.
* this is part of a file that itself contains
* a UBIFS image.
*/ */
if (next_offs && offs + le32_to_cpu(ch->len) <= if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
next_offs) dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
continue;
dbg_rcvry("unexpected node at %d:%d", lnum,
offs);
return 0; return 0;
} }
} /* Now we know the corrupt node's length we can skip over it */
offs += 8; skip = ALIGN(offs + dlen, c->min_io_size) - offs;
buf += 8; /* After which there should be empty space */
len -= 8; if (is_empty(buf + skip, len - skip))
}
return 1; return 1;
dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
return 0;
} }
/** /**
......
...@@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) ...@@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
dirty -= c->leb_size - lp->free; dirty -= c->leb_size - lp->free;
/* /*
* If the replay order was perfect the dirty space would now be * If the replay order was perfect the dirty space would now be
* zero. The order is not perfect because the the journal heads * zero. The order is not perfect because the journal heads
* race with each other. This is not a problem but is does mean * race with each other. This is not a problem but is does mean
* that the dirty space may temporarily exceed c->leb_size * that the dirty space may temporarily exceed c->leb_size
* during the replay. * during the replay.
......
...@@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c) ...@@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c)
if (tmp64 > DEFAULT_MAX_RP_SIZE) if (tmp64 > DEFAULT_MAX_RP_SIZE)
tmp64 = DEFAULT_MAX_RP_SIZE; tmp64 = DEFAULT_MAX_RP_SIZE;
sup->rp_size = cpu_to_le64(tmp64); sup->rp_size = cpu_to_le64(tmp64);
sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
kfree(sup); kfree(sup);
...@@ -532,19 +533,41 @@ int ubifs_read_superblock(struct ubifs_info *c) ...@@ -532,19 +533,41 @@ int ubifs_read_superblock(struct ubifs_info *c)
if (IS_ERR(sup)) if (IS_ERR(sup))
return PTR_ERR(sup); return PTR_ERR(sup);
c->fmt_version = le32_to_cpu(sup->fmt_version);
c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
/* /*
* The software supports all previous versions but not future versions, * The software supports all previous versions but not future versions,
* due to the unavailability of time-travelling equipment. * due to the unavailability of time-travelling equipment.
*/ */
c->fmt_version = le32_to_cpu(sup->fmt_version);
if (c->fmt_version > UBIFS_FORMAT_VERSION) { if (c->fmt_version > UBIFS_FORMAT_VERSION) {
ubifs_err("on-flash format version is %d, but software only " struct super_block *sb = c->vfs_sb;
"supports up to version %d", c->fmt_version, int mounting_ro = sb->s_flags & MS_RDONLY;
UBIFS_FORMAT_VERSION);
ubifs_assert(!c->ro_media || mounting_ro);
if (!mounting_ro ||
c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
ubifs_err("on-flash format version is w%d/r%d, but "
"software only supports up to version "
"w%d/r%d", c->fmt_version,
c->ro_compat_version, UBIFS_FORMAT_VERSION,
UBIFS_RO_COMPAT_VERSION);
if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
ubifs_msg("only R/O mounting is possible");
err = -EROFS;
} else
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
/*
* The FS is mounted R/O, and the media format is
* R/O-compatible with the UBIFS implementation, so we can
* mount.
*/
c->rw_incompat = 1;
}
if (c->fmt_version < 3) { if (c->fmt_version < 3) {
ubifs_err("on-flash format version %d is not supported", ubifs_err("on-flash format version %d is not supported",
c->fmt_version); c->fmt_version);
...@@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c) ...@@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
c->main_first = c->leb_cnt - c->main_lebs; c->main_first = c->leb_cnt - c->main_lebs;
c->report_rp_size = ubifs_reported_space(c, c->rp_size);
err = validate_sb(c, sup); err = validate_sb(c, sup);
out: out:
......
...@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention) ...@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention)
* Move this one to the end of the list to provide some * Move this one to the end of the list to provide some
* fairness. * fairness.
*/ */
list_del(&c->infos_list); list_move_tail(&c->infos_list, &ubifs_infos);
list_add_tail(&c->infos_list, &ubifs_infos);
mutex_unlock(&c->umount_mutex); mutex_unlock(&c->umount_mutex);
if (freed >= nr) if (freed >= nr)
break; break;
...@@ -263,8 +262,7 @@ static int kick_a_thread(void) ...@@ -263,8 +262,7 @@ static int kick_a_thread(void)
} }
if (i == 1) { if (i == 1) {
list_del(&c->infos_list); list_move_tail(&c->infos_list, &ubifs_infos);
list_add_tail(&c->infos_list, &ubifs_infos);
spin_unlock(&ubifs_infos_lock); spin_unlock(&ubifs_infos_lock);
ubifs_request_bg_commit(c); ubifs_request_bg_commit(c);
......
...@@ -421,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) ...@@ -421,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
seq_printf(s, ",no_chk_data_crc"); seq_printf(s, ",no_chk_data_crc");
if (c->mount_opts.override_compr) { if (c->mount_opts.override_compr) {
seq_printf(s, ",compr="); seq_printf(s, ",compr=%s",
seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); ubifs_compr_name(c->mount_opts.compr_type));
} }
return 0; return 0;
...@@ -700,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c) ...@@ -700,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c)
if (err) if (err)
return err; return err;
/* Initialize effective LEB size used in budgeting calculations */
c->idx_leb_size = c->leb_size - c->max_idx_node_sz;
return 0; return 0;
} }
...@@ -716,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c) ...@@ -716,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c)
long long tmp64; long long tmp64;
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
c->report_rp_size = ubifs_reported_space(c, c->rp_size);
/* /*
* Calculate total amount of FS blocks. This number is not used * Calculate total amount of FS blocks. This number is not used
...@@ -1201,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1201,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_cbuf; goto out_cbuf;
/* Create background thread */ /* Create background thread */
c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) { if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt); err = PTR_ERR(c->bgt);
c->bgt = NULL; c->bgt = NULL;
...@@ -1318,11 +1321,15 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1318,11 +1321,15 @@ static int mount_ubifs(struct ubifs_info *c)
else { else {
c->need_recovery = 0; c->need_recovery = 0;
ubifs_msg("recovery completed"); ubifs_msg("recovery completed");
/* GC LEB has to be empty and taken at this point */ /*
ubifs_assert(c->lst.taken_empty_lebs == 1); * GC LEB has to be empty and taken at this point. But
* the journal head LEBs may also be accounted as
* "empty taken" if they are empty.
*/
ubifs_assert(c->lst.taken_empty_lebs > 0);
} }
} else } else
ubifs_assert(c->lst.taken_empty_lebs == 1); ubifs_assert(c->lst.taken_empty_lebs > 0);
err = dbg_check_filesystem(c); err = dbg_check_filesystem(c);
if (err) if (err)
...@@ -1344,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1344,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c)
x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d "
"LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
ubifs_msg("media format: %d (latest is %d)", ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)",
c->fmt_version, UBIFS_FORMAT_VERSION); c->fmt_version, c->ro_compat_version,
UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
ubifs_msg("reserved for root: %llu bytes (%llu KiB)", ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
c->report_rp_size, c->report_rp_size >> 10); c->report_rp_size, c->report_rp_size >> 10);
...@@ -1485,6 +1493,15 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1485,6 +1493,15 @@ static int ubifs_remount_rw(struct ubifs_info *c)
{ {
int err, lnum; int err, lnum;
if (c->rw_incompat) {
ubifs_err("the file-system is not R/W-compatible");
ubifs_msg("on-flash format version is w%d/r%d, but software "
"only supports up to version w%d/r%d", c->fmt_version,
c->ro_compat_version, UBIFS_FORMAT_VERSION,
UBIFS_RO_COMPAT_VERSION);
return -EROFS;
}
mutex_lock(&c->umount_mutex); mutex_lock(&c->umount_mutex);
dbg_save_space_info(c); dbg_save_space_info(c);
c->remounting_rw = 1; c->remounting_rw = 1;
...@@ -1554,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1554,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
ubifs_create_buds_lists(c); ubifs_create_buds_lists(c);
/* Create background thread */ /* Create background thread */
c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) { if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt); err = PTR_ERR(c->bgt);
c->bgt = NULL; c->bgt = NULL;
...@@ -1775,7 +1792,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) ...@@ -1775,7 +1792,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
c->bu.buf = NULL; c->bu.buf = NULL;
} }
ubifs_assert(c->lst.taken_empty_lebs == 1); ubifs_assert(c->lst.taken_empty_lebs > 0);
return 0; return 0;
} }
......
...@@ -1252,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, ...@@ -1252,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
* splitting in the middle of the colliding sequence. Also, when * splitting in the middle of the colliding sequence. Also, when
* removing the leftmost key, we would have to correct the key of the * removing the leftmost key, we would have to correct the key of the
* parent node, which would introduce additional complications. Namely, * parent node, which would introduce additional complications. Namely,
* if we changed the the leftmost key of the parent znode, the garbage * if we changed the leftmost key of the parent znode, the garbage
* collector would be unable to find it (GC is doing this when GC'ing * collector would be unable to find it (GC is doing this when GC'ing
* indexing LEBs). Although we already have an additional RB-tree where * indexing LEBs). Although we already have an additional RB-tree where
* we save such changed znodes (see 'ins_clr_old_idx_znode()') until * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
......
...@@ -36,9 +36,31 @@ ...@@ -36,9 +36,31 @@
/* UBIFS node magic number (must not have the padding byte first or last) */ /* UBIFS node magic number (must not have the padding byte first or last) */
#define UBIFS_NODE_MAGIC 0x06101831 #define UBIFS_NODE_MAGIC 0x06101831
/* UBIFS on-flash format version */ /*
* UBIFS on-flash format version. This version is increased when the on-flash
* format is changing. If this happens, UBIFS is will support older versions as
* well. But older UBIFS code will not support newer formats. Format changes
* will be rare and only when absolutely necessary, e.g. to fix a bug or to add
* a new feature.
*
* UBIFS went into mainline kernel with format version 4. The older formats
* were development formats.
*/
#define UBIFS_FORMAT_VERSION 4 #define UBIFS_FORMAT_VERSION 4
/*
* Read-only compatibility version. If the UBIFS format is changed, older UBIFS
* implementations will not be able to mount newer formats in read-write mode.
* However, depending on the change, it may be possible to mount newer formats
* in R/O mode. This is indicated by the R/O compatibility version which is
* stored in the super-block.
*
* This is needed to support boot-loaders which only need R/O mounting. With
* this flag it is possible to do UBIFS format changes without a need to update
* boot-loaders.
*/
#define UBIFS_RO_COMPAT_VERSION 0
/* Minimum logical eraseblock size in bytes */ /* Minimum logical eraseblock size in bytes */
#define UBIFS_MIN_LEB_SZ (15*1024) #define UBIFS_MIN_LEB_SZ (15*1024)
...@@ -53,7 +75,7 @@ ...@@ -53,7 +75,7 @@
/* /*
* If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
* shorter than uncompressed data length, UBIFS preferes to leave this data * shorter than uncompressed data length, UBIFS prefers to leave this data
* node uncompress, because it'll be read faster. * node uncompress, because it'll be read faster.
*/ */
#define UBIFS_MIN_COMPRESS_DIFF 64 #define UBIFS_MIN_COMPRESS_DIFF 64
...@@ -586,6 +608,7 @@ struct ubifs_pad_node { ...@@ -586,6 +608,7 @@ struct ubifs_pad_node {
* @padding2: reserved for future, zeroes * @padding2: reserved for future, zeroes
* @time_gran: time granularity in nanoseconds * @time_gran: time granularity in nanoseconds
* @uuid: UUID generated when the file system image was created * @uuid: UUID generated when the file system image was created
* @ro_compat_version: UBIFS R/O compatibility version
*/ */
struct ubifs_sb_node { struct ubifs_sb_node {
struct ubifs_ch ch; struct ubifs_ch ch;
...@@ -612,7 +635,8 @@ struct ubifs_sb_node { ...@@ -612,7 +635,8 @@ struct ubifs_sb_node {
__le64 rp_size; __le64 rp_size;
__le32 time_gran; __le32 time_gran;
__u8 uuid[16]; __u8 uuid[16];
__u8 padding2[3972]; __le32 ro_compat_version;
__u8 padding2[3968];
} __attribute__ ((packed)); } __attribute__ ((packed));
/** /**
......
...@@ -934,6 +934,7 @@ struct ubifs_debug_info; ...@@ -934,6 +934,7 @@ struct ubifs_debug_info;
* by @commit_sem * by @commit_sem
* @cnt_lock: protects @highest_inum and @max_sqnum counters * @cnt_lock: protects @highest_inum and @max_sqnum counters
* @fmt_version: UBIFS on-flash format version * @fmt_version: UBIFS on-flash format version
* @ro_compat_version: R/O compatibility version
* @uuid: UUID from super block * @uuid: UUID from super block
* *
* @lhead_lnum: log head logical eraseblock number * @lhead_lnum: log head logical eraseblock number
...@@ -966,6 +967,7 @@ struct ubifs_debug_info; ...@@ -966,6 +967,7 @@ struct ubifs_debug_info;
* recovery) * recovery)
* @bulk_read: enable bulk-reads * @bulk_read: enable bulk-reads
* @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
* @rw_incompat: the media is not R/W compatible
* *
* @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
* @calc_idx_sz * @calc_idx_sz
...@@ -1015,6 +1017,8 @@ struct ubifs_debug_info; ...@@ -1015,6 +1017,8 @@ struct ubifs_debug_info;
* @min_io_shift: number of bits in @min_io_size minus one * @min_io_shift: number of bits in @min_io_size minus one
* @leb_size: logical eraseblock size in bytes * @leb_size: logical eraseblock size in bytes
* @half_leb_size: half LEB size * @half_leb_size: half LEB size
* @idx_leb_size: how many bytes of an LEB are effectively available when it is
* used to store indexing nodes (@leb_size - @max_idx_node_sz)
* @leb_cnt: count of logical eraseblocks * @leb_cnt: count of logical eraseblocks
* @max_leb_cnt: maximum count of logical eraseblocks * @max_leb_cnt: maximum count of logical eraseblocks
* @old_leb_cnt: count of logical eraseblocks before re-size * @old_leb_cnt: count of logical eraseblocks before re-size
...@@ -1132,8 +1136,8 @@ struct ubifs_debug_info; ...@@ -1132,8 +1136,8 @@ struct ubifs_debug_info;
* previous commit start * previous commit start
* @uncat_list: list of un-categorized LEBs * @uncat_list: list of un-categorized LEBs
* @empty_list: list of empty LEBs * @empty_list: list of empty LEBs
* @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
* @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list * @freeable_cnt: number of freeable LEBs in @freeable_list
* *
* @ltab_lnum: LEB number of LPT's own lprops table * @ltab_lnum: LEB number of LPT's own lprops table
...@@ -1177,6 +1181,7 @@ struct ubifs_info { ...@@ -1177,6 +1181,7 @@ struct ubifs_info {
unsigned long long cmt_no; unsigned long long cmt_no;
spinlock_t cnt_lock; spinlock_t cnt_lock;
int fmt_version; int fmt_version;
int ro_compat_version;
unsigned char uuid[16]; unsigned char uuid[16];
int lhead_lnum; int lhead_lnum;
...@@ -1205,6 +1210,7 @@ struct ubifs_info { ...@@ -1205,6 +1210,7 @@ struct ubifs_info {
unsigned int no_chk_data_crc:1; unsigned int no_chk_data_crc:1;
unsigned int bulk_read:1; unsigned int bulk_read:1;
unsigned int default_compr:2; unsigned int default_compr:2;
unsigned int rw_incompat:1;
struct mutex tnc_mutex; struct mutex tnc_mutex;
struct ubifs_zbranch zroot; struct ubifs_zbranch zroot;
...@@ -1253,6 +1259,7 @@ struct ubifs_info { ...@@ -1253,6 +1259,7 @@ struct ubifs_info {
int min_io_shift; int min_io_shift;
int leb_size; int leb_size;
int half_leb_size; int half_leb_size;
int idx_leb_size;
int leb_cnt; int leb_cnt;
int max_leb_cnt; int max_leb_cnt;
int old_leb_cnt; int old_leb_cnt;
...@@ -1500,7 +1507,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free); ...@@ -1500,7 +1507,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free);
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
/* find.c */ /* find.c */
int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze); int squeeze);
int ubifs_find_free_leb_for_idx(struct ubifs_info *c); int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment