Commit 8f627a8a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6

* 'linux-next' of git://git.infradead.org/ubifs-2.6: (25 commits)
  UBIFS: clean-up commentaries
  UBIFS: save 128KiB or more RAM
  UBIFS: allocate orphans scan buffer on demand
  UBIFS: allocate lpt dump buffer on demand
  UBIFS: allocate ltab checking buffer on demand
  UBIFS: allocate scanning buffer on demand
  UBIFS: allocate dump buffer on demand
  UBIFS: do not check data crc by default
  UBIFS: simplify UBIFS Kconfig menu
  UBIFS: print max. index node size
  UBIFS: handle allocation failures in UBIFS write path
  UBIFS: use max_write_size during recovery
  UBIFS: use max_write_size for write-buffers
  UBIFS: introduce write-buffer size field
  UBI: incorporate LEB offset information
  UBIFS: incorporate maximum write size
  UBI: provide LEB offset information
  UBI: incorporate maximum write size
  UBIFS: fix LEB number in printk
  UBIFS: restrict world-writable debugfs files
  ...
parents fd57ed02 5d630e43
...@@ -82,12 +82,12 @@ Mount options ...@@ -82,12 +82,12 @@ Mount options
bulk_read read more in one go to take advantage of flash bulk_read read more in one go to take advantage of flash
media that read faster sequentially media that read faster sequentially
no_bulk_read (*) do not bulk-read no_bulk_read (*) do not bulk-read
no_chk_data_crc skip checking of CRCs on data nodes in order to no_chk_data_crc (*) skip checking of CRCs on data nodes in order to
improve read performance. Use this option only improve read performance. Use this option only
if the flash media is highly reliable. The effect if the flash media is highly reliable. The effect
of this option is that corruption of the contents of this option is that corruption of the contents
of a file can go unnoticed. of a file can go unnoticed.
chk_data_crc (*) do not skip checking CRCs on data nodes chk_data_crc do not skip checking CRCs on data nodes
compr=none override default compressor and set it to "none" compr=none override default compressor and set it to "none"
compr=lzo override default compressor and set it to "lzo" compr=lzo override default compressor and set it to "lzo"
compr=zlib override default compressor and set it to "zlib" compr=zlib override default compressor and set it to "zlib"
......
...@@ -690,11 +690,25 @@ static int io_init(struct ubi_device *ubi) ...@@ -690,11 +690,25 @@ static int io_init(struct ubi_device *ubi)
ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size);
ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0);
ubi->max_write_size = ubi->mtd->writebufsize;
/*
* Maximum write size has to be greater or equivalent to min. I/O
* size, and be multiple of min. I/O size.
*/
if (ubi->max_write_size < ubi->min_io_size ||
ubi->max_write_size % ubi->min_io_size ||
!is_power_of_2(ubi->max_write_size)) {
ubi_err("bad write buffer size %d for %d min. I/O unit",
ubi->max_write_size, ubi->min_io_size);
return -EINVAL;
}
/* Calculate default aligned sizes of EC and VID headers */ /* Calculate default aligned sizes of EC and VID headers */
ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
dbg_msg("min_io_size %d", ubi->min_io_size); dbg_msg("min_io_size %d", ubi->min_io_size);
dbg_msg("max_write_size %d", ubi->max_write_size);
dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size); dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize); dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize);
dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize); dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize);
......
...@@ -40,7 +40,9 @@ void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di) ...@@ -40,7 +40,9 @@ void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di)
{ {
di->ubi_num = ubi->ubi_num; di->ubi_num = ubi->ubi_num;
di->leb_size = ubi->leb_size; di->leb_size = ubi->leb_size;
di->leb_start = ubi->leb_start;
di->min_io_size = ubi->min_io_size; di->min_io_size = ubi->min_io_size;
di->max_write_size = ubi->max_write_size;
di->ro_mode = ubi->ro_mode; di->ro_mode = ubi->ro_mode;
di->cdev = ubi->cdev.dev; di->cdev = ubi->cdev.dev;
} }
......
...@@ -382,6 +382,8 @@ struct ubi_wl_entry; ...@@ -382,6 +382,8 @@ struct ubi_wl_entry;
* @bad_allowed: whether the MTD device admits of bad physical eraseblocks or * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
* not * not
* @nor_flash: non-zero if working on top of NOR flash * @nor_flash: non-zero if working on top of NOR flash
* @max_write_size: maximum amount of bytes the underlying flash can write at a
* time (MTD write buffer size)
* @mtd: MTD device descriptor * @mtd: MTD device descriptor
* *
* @peb_buf1: a buffer of PEB size used for different purposes * @peb_buf1: a buffer of PEB size used for different purposes
...@@ -463,6 +465,7 @@ struct ubi_device { ...@@ -463,6 +465,7 @@ struct ubi_device {
int vid_hdr_shift; int vid_hdr_shift;
unsigned int bad_allowed:1; unsigned int bad_allowed:1;
unsigned int nor_flash:1; unsigned int nor_flash:1;
int max_write_size;
struct mtd_info *mtd; struct mtd_info *mtd;
void *peb_buf1; void *peb_buf1;
......
...@@ -44,23 +44,20 @@ config UBIFS_FS_ZLIB ...@@ -44,23 +44,20 @@ config UBIFS_FS_ZLIB
# Debugging-related stuff # Debugging-related stuff
config UBIFS_FS_DEBUG config UBIFS_FS_DEBUG
bool "Enable debugging" bool "Enable debugging support"
depends on UBIFS_FS depends on UBIFS_FS
select DEBUG_FS select DEBUG_FS
select KALLSYMS_ALL select KALLSYMS_ALL
help help
This option enables UBIFS debugging. This option enables UBIFS debugging support. It makes sure various
assertions, self-checks, debugging messages and test modes are compiled
config UBIFS_FS_DEBUG_MSG_LVL in (this all is compiled out otherwise). Assertions are light-weight
int "Default message level (0 = no extra messages, 3 = lots)" and this option also enables them. Self-checks, debugging messages and
depends on UBIFS_FS_DEBUG test modes are switched off by default. Thus, it is safe and actually
default "0" recommended to have debugging support enabled, and it should not slow
help down UBIFS. You can then further enable / disable individual debugging
This controls the amount of debugging messages produced by UBIFS. features using UBIFS module parameters and the corresponding sysfs
If reporting bugs, please try to have available a full dump of the interfaces.
messages at level 1 while the misbehaviour was occurring. Level 2
may become necessary if level 1 messages were not enough to find the
bug. Generally Level 3 should be avoided.
config UBIFS_FS_DEBUG_CHKS config UBIFS_FS_DEBUG_CHKS
bool "Enable extra checks" bool "Enable extra checks"
......
...@@ -48,6 +48,56 @@ ...@@ -48,6 +48,56 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "ubifs.h" #include "ubifs.h"
/*
* nothing_to_commit - check if there is nothing to commit.
* @c: UBIFS file-system description object
*
* This is a helper function which checks if there is anything to commit. It is
* used as an optimization to avoid starting the commit if it is not really
* necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
* writing the commit start node to the log), and it is better to avoid doing
* this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
* nothing to commit, it is more optimal to avoid any flash I/O.
*
* This function has to be called with @c->commit_sem locked for writing -
* this function does not take LPT/TNC locks because the @c->commit_sem
* guarantees that we have exclusive access to the TNC and LPT data structures.
*
* This function returns %1 if there is nothing to commit and %0 otherwise.
*/
static int nothing_to_commit(struct ubifs_info *c)
{
/*
* During mounting or remounting from R/O mode to R/W mode we may
* commit for various recovery-related reasons.
*/
if (c->mounting || c->remounting_rw)
return 0;
/*
* If the root TNC node is dirty, we definitely have something to
* commit.
*/
if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
return 0;
/*
* Even though the TNC is clean, the LPT tree may have dirty nodes. For
* example, this may happen if the budgeting subsystem invoked GC to
* make some free space, and the GC found an LEB with only dirty and
* free space. In this case GC would just change the lprops of this
* LEB (by turning all space into free space) and unmap it.
*/
if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
return 0;
ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
ubifs_assert(c->dirty_pn_cnt == 0);
ubifs_assert(c->dirty_nn_cnt == 0);
return 1;
}
/** /**
* do_commit - commit the journal. * do_commit - commit the journal.
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
...@@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c) ...@@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c)
goto out_up; goto out_up;
} }
if (nothing_to_commit(c)) {
up_write(&c->commit_sem);
err = 0;
goto out_cancel;
}
/* Sync all write buffers (necessary for recovery) */ /* Sync all write buffers (necessary for recovery) */
for (i = 0; i < c->jhead_cnt; i++) { for (i = 0; i < c->jhead_cnt; i++) {
err = ubifs_wbuf_sync(&c->jheads[i].wbuf); err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
...@@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c) ...@@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c)
if (err) if (err)
goto out; goto out;
out_cancel:
spin_lock(&c->cs_lock); spin_lock(&c->cs_lock);
c->cmt_state = COMMIT_RESTING; c->cmt_state = COMMIT_RESTING;
wake_up(&c->cmt_wq); wake_up(&c->cmt_wq);
dbg_cmt("commit end"); dbg_cmt("commit end");
spin_unlock(&c->cs_lock); spin_unlock(&c->cs_lock);
return 0; return 0;
out_up: out_up:
......
...@@ -43,8 +43,8 @@ DEFINE_SPINLOCK(dbg_lock); ...@@ -43,8 +43,8 @@ DEFINE_SPINLOCK(dbg_lock);
static char dbg_key_buf0[128]; static char dbg_key_buf0[128];
static char dbg_key_buf1[128]; static char dbg_key_buf1[128];
unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; unsigned int ubifs_msg_flags;
unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; unsigned int ubifs_chk_flags;
unsigned int ubifs_tst_flags; unsigned int ubifs_tst_flags;
module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
...@@ -810,16 +810,24 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) ...@@ -810,16 +810,24 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
{ {
struct ubifs_scan_leb *sleb; struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod; struct ubifs_scan_node *snod;
void *buf;
if (dbg_failure_mode) if (dbg_failure_mode)
return; return;
printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
current->pid, lnum); current->pid, lnum);
sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
return;
}
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) { if (IS_ERR(sleb)) {
ubifs_err("scan error %d", (int)PTR_ERR(sleb)); ubifs_err("scan error %d", (int)PTR_ERR(sleb));
return; goto out;
} }
printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
...@@ -835,6 +843,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) ...@@ -835,6 +843,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
current->pid, lnum); current->pid, lnum);
ubifs_scan_destroy(sleb); ubifs_scan_destroy(sleb);
out:
vfree(buf);
return; return;
} }
...@@ -2690,16 +2701,8 @@ int ubifs_debugging_init(struct ubifs_info *c) ...@@ -2690,16 +2701,8 @@ int ubifs_debugging_init(struct ubifs_info *c)
if (!c->dbg) if (!c->dbg)
return -ENOMEM; return -ENOMEM;
c->dbg->buf = vmalloc(c->leb_size);
if (!c->dbg->buf)
goto out;
failure_mode_init(c); failure_mode_init(c);
return 0; return 0;
out:
kfree(c->dbg);
return -ENOMEM;
} }
/** /**
...@@ -2709,7 +2712,6 @@ int ubifs_debugging_init(struct ubifs_info *c) ...@@ -2709,7 +2712,6 @@ int ubifs_debugging_init(struct ubifs_info *c)
void ubifs_debugging_exit(struct ubifs_info *c) void ubifs_debugging_exit(struct ubifs_info *c)
{ {
failure_mode_exit(c); failure_mode_exit(c);
vfree(c->dbg->buf);
kfree(c->dbg); kfree(c->dbg);
} }
...@@ -2813,19 +2815,19 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) ...@@ -2813,19 +2815,19 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
} }
fname = "dump_lprops"; fname = "dump_lprops";
dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
if (IS_ERR(dent)) if (IS_ERR(dent))
goto out_remove; goto out_remove;
d->dfs_dump_lprops = dent; d->dfs_dump_lprops = dent;
fname = "dump_budg"; fname = "dump_budg";
dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
if (IS_ERR(dent)) if (IS_ERR(dent))
goto out_remove; goto out_remove;
d->dfs_dump_budg = dent; d->dfs_dump_budg = dent;
fname = "dump_tnc"; fname = "dump_tnc";
dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
if (IS_ERR(dent)) if (IS_ERR(dent))
goto out_remove; goto out_remove;
d->dfs_dump_tnc = dent; d->dfs_dump_tnc = dent;
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
/** /**
* ubifs_debug_info - per-FS debugging information. * ubifs_debug_info - per-FS debugging information.
* @buf: a buffer of LEB size, used for various purposes
* @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot: old index root - used by 'dbg_check_old_index()'
* @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
* @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
...@@ -54,7 +53,6 @@ ...@@ -54,7 +53,6 @@
* dfs_dump_tnc: "dump TNC" debugfs knob * dfs_dump_tnc: "dump TNC" debugfs knob
*/ */
struct ubifs_debug_info { struct ubifs_debug_info {
void *buf;
struct ubifs_zbranch old_zroot; struct ubifs_zbranch old_zroot;
int old_zroot_level; int old_zroot_level;
unsigned long long old_zroot_sqnum; unsigned long long old_zroot_sqnum;
...@@ -173,7 +171,7 @@ const char *dbg_key_str1(const struct ubifs_info *c, ...@@ -173,7 +171,7 @@ const char *dbg_key_str1(const struct ubifs_info *c,
#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
/* /*
* Debugging message type flags (must match msg_type_names in debug.c). * Debugging message type flags.
* *
* UBIFS_MSG_GEN: general messages * UBIFS_MSG_GEN: general messages
* UBIFS_MSG_JNL: journal messages * UBIFS_MSG_JNL: journal messages
...@@ -205,14 +203,8 @@ enum { ...@@ -205,14 +203,8 @@ enum {
UBIFS_MSG_RCVRY = 0x1000, UBIFS_MSG_RCVRY = 0x1000,
}; };
/* Debugging message type flags for each default debug message level */
#define UBIFS_MSG_LVL_0 0
#define UBIFS_MSG_LVL_1 0x1
#define UBIFS_MSG_LVL_2 0x7f
#define UBIFS_MSG_LVL_3 0xffff
/* /*
* Debugging check flags (must match chk_names in debug.c). * Debugging check flags.
* *
* UBIFS_CHK_GEN: general checks * UBIFS_CHK_GEN: general checks
* UBIFS_CHK_TNC: check TNC * UBIFS_CHK_TNC: check TNC
...@@ -233,7 +225,7 @@ enum { ...@@ -233,7 +225,7 @@ enum {
}; };
/* /*
* Special testing flags (must match tst_names in debug.c). * Special testing flags.
* *
* UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
* UBIFS_TST_RCVRY: failure mode for recovery testing * UBIFS_TST_RCVRY: failure mode for recovery testing
...@@ -243,22 +235,6 @@ enum { ...@@ -243,22 +235,6 @@ enum {
UBIFS_TST_RCVRY = 0x4, UBIFS_TST_RCVRY = 0x4,
}; };
#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
#else
#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
#endif
#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
#else
#define UBIFS_CHK_FLAGS_DEFAULT 0
#endif
extern spinlock_t dbg_lock; extern spinlock_t dbg_lock;
extern unsigned int ubifs_msg_flags; extern unsigned int ubifs_msg_flags;
......
...@@ -31,6 +31,26 @@ ...@@ -31,6 +31,26 @@
* buffer is full or when it is not used for some time (by timer). This is * buffer is full or when it is not used for some time (by timer). This is
* similar to the mechanism is used by JFFS2. * similar to the mechanism is used by JFFS2.
* *
* UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
* write size (@c->max_write_size). The latter is the maximum amount of bytes
* the underlying flash is able to program at a time, and writing in
* @c->max_write_size units should presumably be faster. Obviously,
* @c->min_io_size <= @c->max_write_size. Write-buffers are of
* @c->max_write_size bytes in size for maximum performance. However, when a
* write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
* boundary) which contains data is written, not the whole write-buffer,
* because this is more space-efficient.
*
* This optimization adds few complications to the code. Indeed, on the one
* hand, we want to write in optimal @c->max_write_size bytes chunks, which
* also means aligning writes at the @c->max_write_size bytes offsets. On the
* other hand, we do not want to waste space when synchronizing the write
* buffer, so during synchronization we writes in smaller chunks. And this makes
* the next write offset to be not aligned to @c->max_write_size bytes. So the
* have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
* to @c->max_write_size bytes again. We do this by temporarily shrinking
* write-buffer size (@wbuf->size).
*
* Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
* mutexes defined inside these objects. Since sometimes upper-level code * mutexes defined inside these objects. Since sometimes upper-level code
* has to lock the write-buffer (e.g. journal space reservation code), many * has to lock the write-buffer (e.g. journal space reservation code), many
...@@ -46,8 +66,8 @@ ...@@ -46,8 +66,8 @@
* UBIFS uses padding when it pads to the next min. I/O unit. In this case it * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
* uses padding nodes or padding bytes, if the padding node does not fit. * uses padding nodes or padding bytes, if the padding node does not fit.
* *
* All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
* every time they are read from the flash media. * they are read from the flash media.
*/ */
#include <linux/crc32.h> #include <linux/crc32.h>
...@@ -88,8 +108,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) ...@@ -88,8 +108,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
* This function may skip data nodes CRC checking if @c->no_chk_data_crc is * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
* true, which is controlled by corresponding UBIFS mount option. However, if * true, which is controlled by corresponding UBIFS mount option. However, if
* @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
* checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
* ignored and CRC is checked. * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
* is checked. This is because during mounting or re-mounting from R/O mode to
* R/W mode we may read journal nodes (when replying the journal or doing the
* recovery) and the journal nodes may potentially be corrupted, so checking is
* required.
* *
* This function returns zero in case of success and %-EUCLEAN in case of bad * This function returns zero in case of success and %-EUCLEAN in case of bad
* CRC or magic. * CRC or magic.
...@@ -131,8 +155,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, ...@@ -131,8 +155,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
node_len > c->ranges[type].max_len) node_len > c->ranges[type].max_len)
goto out_len; goto out_len;
if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
c->no_chk_data_crc) !c->remounting_rw && c->no_chk_data_crc)
return 0; return 0;
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
...@@ -343,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) ...@@ -343,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
* *
* This function synchronizes write-buffer @buf and returns zero in case of * This function synchronizes write-buffer @buf and returns zero in case of
* success or a negative error code in case of failure. * success or a negative error code in case of failure.
*
* Note, although write-buffers are of @c->max_write_size, this function does
* not necessarily writes all @c->max_write_size bytes to the flash. Instead,
* if the write-buffer is only partially filled with data, only the used part
* of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
* This way we waste less space.
*/ */
int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
{ {
struct ubifs_info *c = wbuf->c; struct ubifs_info *c = wbuf->c;
int err, dirt; int err, dirt, sync_len;
cancel_wbuf_timer_nolock(wbuf); cancel_wbuf_timer_nolock(wbuf);
if (!wbuf->used || wbuf->lnum == -1) if (!wbuf->used || wbuf->lnum == -1)
...@@ -357,27 +387,53 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) ...@@ -357,27 +387,53 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
dbg_io("LEB %d:%d, %d bytes, jhead %s", dbg_io("LEB %d:%d, %d bytes, jhead %s",
wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
ubifs_assert(!(wbuf->avail & 7)); ubifs_assert(!(wbuf->avail & 7));
ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
ubifs_assert(wbuf->size >= c->min_io_size);
ubifs_assert(wbuf->size <= c->max_write_size);
ubifs_assert(wbuf->size % c->min_io_size == 0);
ubifs_assert(!c->ro_media && !c->ro_mount); ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
if (c->ro_error) if (c->ro_error)
return -EROFS; return -EROFS;
ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); /*
* Do not write whole write buffer but write only the minimum necessary
* amount of min. I/O units.
*/
sync_len = ALIGN(wbuf->used, c->min_io_size);
dirt = sync_len - wbuf->used;
if (dirt)
ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
c->min_io_size, wbuf->dtype); sync_len, wbuf->dtype);
if (err) { if (err) {
ubifs_err("cannot write %d bytes to LEB %d:%d", ubifs_err("cannot write %d bytes to LEB %d:%d",
c->min_io_size, wbuf->lnum, wbuf->offs); sync_len, wbuf->lnum, wbuf->offs);
dbg_dump_stack(); dbg_dump_stack();
return err; return err;
} }
dirt = wbuf->avail;
spin_lock(&wbuf->lock); spin_lock(&wbuf->lock);
wbuf->offs += c->min_io_size; wbuf->offs += sync_len;
wbuf->avail = c->min_io_size; /*
* Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
* But our goal is to optimize writes and make sure we write in
* @c->max_write_size chunks and to @c->max_write_size-aligned offset.
* Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
* sure that @wbuf->offs + @wbuf->size is aligned to
* @c->max_write_size. This way we make sure that after next
* write-buffer flush we are again at the optimal offset (aligned to
* @c->max_write_size).
*/
if (c->leb_size - wbuf->offs < c->max_write_size)
wbuf->size = c->leb_size - wbuf->offs;
else if (wbuf->offs & (c->max_write_size - 1))
wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
else
wbuf->size = c->max_write_size;
wbuf->avail = wbuf->size;
wbuf->used = 0; wbuf->used = 0;
wbuf->next_ino = 0; wbuf->next_ino = 0;
spin_unlock(&wbuf->lock); spin_unlock(&wbuf->lock);
...@@ -420,7 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, ...@@ -420,7 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
spin_lock(&wbuf->lock); spin_lock(&wbuf->lock);
wbuf->lnum = lnum; wbuf->lnum = lnum;
wbuf->offs = offs; wbuf->offs = offs;
wbuf->avail = c->min_io_size; if (c->leb_size - wbuf->offs < c->max_write_size)
wbuf->size = c->leb_size - wbuf->offs;
else if (wbuf->offs & (c->max_write_size - 1))
wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
else
wbuf->size = c->max_write_size;
wbuf->avail = wbuf->size;
wbuf->used = 0; wbuf->used = 0;
spin_unlock(&wbuf->lock); spin_unlock(&wbuf->lock);
wbuf->dtype = dtype; wbuf->dtype = dtype;
...@@ -500,8 +562,9 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c) ...@@ -500,8 +562,9 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
* *
* This function writes data to flash via write-buffer @wbuf. This means that * This function writes data to flash via write-buffer @wbuf. This means that
* the last piece of the node won't reach the flash media immediately if it * the last piece of the node won't reach the flash media immediately if it
* does not take whole minimal I/O unit. Instead, the node will sit in RAM * does not take whole max. write unit (@c->max_write_size). Instead, the node
* until the write-buffer is synchronized (e.g., by timer). * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
* because more data are appended to the write-buffer).
* *
* This function returns zero in case of success and a negative error code in * This function returns zero in case of success and a negative error code in
* case of failure. If the node cannot be written because there is no more * case of failure. If the node cannot be written because there is no more
...@@ -518,9 +581,14 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ...@@ -518,9 +581,14 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
ubifs_assert(wbuf->size >= c->min_io_size);
ubifs_assert(wbuf->size <= c->max_write_size);
ubifs_assert(wbuf->size % c->min_io_size == 0);
ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
ubifs_assert(!c->ro_media && !c->ro_mount); ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
err = -ENOSPC; err = -ENOSPC;
...@@ -543,14 +611,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ...@@ -543,14 +611,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_io("flush jhead %s wbuf to LEB %d:%d",
dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
wbuf->offs, c->min_io_size, wbuf->offs, wbuf->size,
wbuf->dtype); wbuf->dtype);
if (err) if (err)
goto out; goto out;
spin_lock(&wbuf->lock); spin_lock(&wbuf->lock);
wbuf->offs += c->min_io_size; wbuf->offs += wbuf->size;
wbuf->avail = c->min_io_size; if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
else
wbuf->size = c->leb_size - wbuf->offs;
wbuf->avail = wbuf->size;
wbuf->used = 0; wbuf->used = 0;
wbuf->next_ino = 0; wbuf->next_ino = 0;
spin_unlock(&wbuf->lock); spin_unlock(&wbuf->lock);
...@@ -564,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ...@@ -564,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
goto exit; goto exit;
} }
offs = wbuf->offs;
written = 0;
if (wbuf->used) {
/* /*
* The node is large enough and does not fit entirely within current * The node is large enough and does not fit entirely within
* minimal I/O unit. We have to fill and flush write-buffer and switch * current available space. We have to fill and flush
* to the next min. I/O unit. * write-buffer and switch to the next max. write unit.
*/ */
dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_io("flush jhead %s wbuf to LEB %d:%d",
dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
c->min_io_size, wbuf->dtype); wbuf->size, wbuf->dtype);
if (err) if (err)
goto out; goto out;
offs = wbuf->offs + c->min_io_size; offs += wbuf->size;
len -= wbuf->avail; len -= wbuf->avail;
aligned_len -= wbuf->avail; aligned_len -= wbuf->avail;
written = wbuf->avail; written += wbuf->avail;
} else if (wbuf->offs & (c->max_write_size - 1)) {
/*
* The write-buffer offset is not aligned to
* @c->max_write_size and @wbuf->size is less than
* @c->max_write_size. Write @wbuf->size bytes to make sure the
* following writes are done in optimal @c->max_write_size
* chunks.
*/
dbg_io("write %d bytes to LEB %d:%d",
wbuf->size, wbuf->lnum, wbuf->offs);
err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
wbuf->size, wbuf->dtype);
if (err)
goto out;
offs += wbuf->size;
len -= wbuf->size;
aligned_len -= wbuf->size;
written += wbuf->size;
}
/* /*
* The remaining data may take more whole min. I/O units, so write the * The remaining data may take more whole max. write units, so write the
* remains multiple to min. I/O unit size directly to the flash media. * remains multiple to max. write unit size directly to the flash media.
* We align node length to 8-byte boundary because we anyway flash wbuf * We align node length to 8-byte boundary because we anyway flash wbuf
* if the remaining space is less than 8 bytes. * if the remaining space is less than 8 bytes.
*/ */
n = aligned_len >> c->min_io_shift; n = aligned_len >> c->max_write_shift;
if (n) { if (n) {
n <<= c->min_io_shift; n <<= c->max_write_shift;
dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
wbuf->dtype); wbuf->dtype);
...@@ -606,14 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ...@@ -606,14 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
if (aligned_len) if (aligned_len)
/* /*
* And now we have what's left and what does not take whole * And now we have what's left and what does not take whole
* min. I/O unit, so write it to the write-buffer and we are * max. write unit, so write it to the write-buffer and we are
* done. * done.
*/ */
memcpy(wbuf->buf, buf + written, len); memcpy(wbuf->buf, buf + written, len);
wbuf->offs = offs; wbuf->offs = offs;
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
else
wbuf->size = c->leb_size - wbuf->offs;
wbuf->avail = wbuf->size - aligned_len;
wbuf->used = aligned_len; wbuf->used = aligned_len;
wbuf->avail = c->min_io_size - aligned_len;
wbuf->next_ino = 0; wbuf->next_ino = 0;
spin_unlock(&wbuf->lock); spin_unlock(&wbuf->lock);
...@@ -837,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) ...@@ -837,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
{ {
size_t size; size_t size;
wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
if (!wbuf->buf) if (!wbuf->buf)
return -ENOMEM; return -ENOMEM;
size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
wbuf->inodes = kmalloc(size, GFP_KERNEL); wbuf->inodes = kmalloc(size, GFP_KERNEL);
if (!wbuf->inodes) { if (!wbuf->inodes) {
kfree(wbuf->buf); kfree(wbuf->buf);
...@@ -851,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) ...@@ -851,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
wbuf->used = 0; wbuf->used = 0;
wbuf->lnum = wbuf->offs = -1; wbuf->lnum = wbuf->offs = -1;
wbuf->avail = c->min_io_size; /*
* If the LEB starts at the max. write size aligned address, then
* write-buffer size has to be set to @c->max_write_size. Otherwise,
* set it to something smaller so that it ends at the closest max.
* write size boundary.
*/
size = c->max_write_size - (c->leb_start % c->max_write_size);
wbuf->avail = wbuf->size = size;
wbuf->dtype = UBI_UNKNOWN; wbuf->dtype = UBI_UNKNOWN;
wbuf->sync_callback = NULL; wbuf->sync_callback = NULL;
mutex_init(&wbuf->io_mutex); mutex_init(&wbuf->io_mutex);
......
...@@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
{ {
struct ubifs_data_node *data; struct ubifs_data_node *data;
int err, lnum, offs, compr_type, out_len; int err, lnum, offs, compr_type, out_len;
int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_inode *ui = ubifs_inode(inode);
dbg_jnl("ino %lu, blk %u, len %d, key %s", dbg_jnl("ino %lu, blk %u, len %d, key %s",
...@@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
DBGKEY(key)); DBGKEY(key));
ubifs_assert(len <= UBIFS_BLOCK_SIZE); ubifs_assert(len <= UBIFS_BLOCK_SIZE);
data = kmalloc(dlen, GFP_NOFS); data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
if (!data) if (!data) {
return -ENOMEM; /*
* Fall-back to the write reserve buffer. Note, we might be
* currently on the memory reclaim path, when the kernel is
* trying to free some memory by writing out dirty pages. The
* write reserve buffer helps us to guarantee that we are
* always able to write the data.
*/
allocated = 0;
mutex_lock(&c->write_reserve_mutex);
data = c->write_reserve_buf;
}
data->ch.node_type = UBIFS_DATA_NODE; data->ch.node_type = UBIFS_DATA_NODE;
key_write(c, key, &data->key); key_write(c, key, &data->key);
...@@ -736,6 +746,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -736,6 +746,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
goto out_ro; goto out_ro;
finish_reservation(c); finish_reservation(c);
if (!allocated)
mutex_unlock(&c->write_reserve_mutex);
else
kfree(data); kfree(data);
return 0; return 0;
...@@ -745,6 +758,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ...@@ -745,6 +758,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
ubifs_ro_mode(c, err); ubifs_ro_mode(c, err);
finish_reservation(c); finish_reservation(c);
out_free: out_free:
if (!allocated)
mutex_unlock(&c->write_reserve_mutex);
else
kfree(data); kfree(data);
return err; return err;
} }
......
...@@ -1035,7 +1035,8 @@ static int scan_check_cb(struct ubifs_info *c, ...@@ -1035,7 +1035,8 @@ static int scan_check_cb(struct ubifs_info *c,
struct ubifs_scan_leb *sleb; struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod; struct ubifs_scan_node *snod;
struct ubifs_lp_stats *lst = &data->lst; struct ubifs_lp_stats *lst = &data->lst;
int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
void *buf = NULL;
cat = lp->flags & LPROPS_CAT_MASK; cat = lp->flags & LPROPS_CAT_MASK;
if (cat != LPROPS_UNCAT) { if (cat != LPROPS_UNCAT) {
...@@ -1093,7 +1094,13 @@ static int scan_check_cb(struct ubifs_info *c, ...@@ -1093,7 +1094,13 @@ static int scan_check_cb(struct ubifs_info *c,
} }
} }
sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory to scan LEB %d", lnum);
goto out;
}
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) { if (IS_ERR(sleb)) {
/* /*
* After an unclean unmount, empty and freeable LEBs * After an unclean unmount, empty and freeable LEBs
...@@ -1105,7 +1112,8 @@ static int scan_check_cb(struct ubifs_info *c, ...@@ -1105,7 +1112,8 @@ static int scan_check_cb(struct ubifs_info *c,
lst->empty_lebs += 1; lst->empty_lebs += 1;
lst->total_free += c->leb_size; lst->total_free += c->leb_size;
lst->total_dark += ubifs_calc_dark(c, c->leb_size); lst->total_dark += ubifs_calc_dark(c, c->leb_size);
return LPT_SCAN_CONTINUE; ret = LPT_SCAN_CONTINUE;
goto exit;
} }
if (lp->free + lp->dirty == c->leb_size && if (lp->free + lp->dirty == c->leb_size &&
...@@ -1115,10 +1123,12 @@ static int scan_check_cb(struct ubifs_info *c, ...@@ -1115,10 +1123,12 @@ static int scan_check_cb(struct ubifs_info *c,
lst->total_free += lp->free; lst->total_free += lp->free;
lst->total_dirty += lp->dirty; lst->total_dirty += lp->dirty;
lst->total_dark += ubifs_calc_dark(c, c->leb_size); lst->total_dark += ubifs_calc_dark(c, c->leb_size);
return LPT_SCAN_CONTINUE; ret = LPT_SCAN_CONTINUE;
goto exit;
} }
data->err = PTR_ERR(sleb); data->err = PTR_ERR(sleb);
return LPT_SCAN_STOP; ret = LPT_SCAN_STOP;
goto exit;
} }
is_idx = -1; is_idx = -1;
...@@ -1236,7 +1246,10 @@ static int scan_check_cb(struct ubifs_info *c, ...@@ -1236,7 +1246,10 @@ static int scan_check_cb(struct ubifs_info *c,
} }
ubifs_scan_destroy(sleb); ubifs_scan_destroy(sleb);
return LPT_SCAN_CONTINUE; ret = LPT_SCAN_CONTINUE;
exit:
vfree(buf);
return ret;
out_print: out_print:
ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
...@@ -1246,6 +1259,7 @@ static int scan_check_cb(struct ubifs_info *c, ...@@ -1246,6 +1259,7 @@ static int scan_check_cb(struct ubifs_info *c,
out_destroy: out_destroy:
ubifs_scan_destroy(sleb); ubifs_scan_destroy(sleb);
out: out:
vfree(buf);
data->err = -EINVAL; data->err = -EINVAL;
return LPT_SCAN_STOP; return LPT_SCAN_STOP;
} }
......
...@@ -1628,29 +1628,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) ...@@ -1628,29 +1628,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
{ {
int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
int ret; int ret;
void *buf = c->dbg->buf; void *buf, *p;
if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
return 0; return 0;
buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory for ltab checking");
return 0;
}
dbg_lp("LEB %d", lnum); dbg_lp("LEB %d", lnum);
err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
if (err) { if (err) {
dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
return err; goto out;
} }
while (1) { while (1) {
if (!is_a_node(c, buf, len)) { if (!is_a_node(c, p, len)) {
int i, pad_len; int i, pad_len;
pad_len = get_pad_len(c, buf, len); pad_len = get_pad_len(c, p, len);
if (pad_len) { if (pad_len) {
buf += pad_len; p += pad_len;
len -= pad_len; len -= pad_len;
dirty += pad_len; dirty += pad_len;
continue; continue;
} }
if (!dbg_is_all_ff(buf, len)) { if (!dbg_is_all_ff(p, len)) {
dbg_msg("invalid empty space in LEB %d at %d", dbg_msg("invalid empty space in LEB %d at %d",
lnum, c->leb_size - len); lnum, c->leb_size - len);
err = -EINVAL; err = -EINVAL;
...@@ -1668,16 +1674,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) ...@@ -1668,16 +1674,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
lnum, dirty, c->ltab[i].dirty); lnum, dirty, c->ltab[i].dirty);
err = -EINVAL; err = -EINVAL;
} }
return err; goto out;
} }
node_type = get_lpt_node_type(c, buf, &node_num); node_type = get_lpt_node_type(c, p, &node_num);
node_len = get_lpt_node_len(c, node_type); node_len = get_lpt_node_len(c, node_type);
ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
if (ret == 1) if (ret == 1)
dirty += node_len; dirty += node_len;
buf += node_len; p += node_len;
len -= node_len; len -= node_len;
} }
err = 0;
out:
vfree(buf);
return err;
} }
/** /**
...@@ -1870,25 +1881,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) ...@@ -1870,25 +1881,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
static void dump_lpt_leb(const struct ubifs_info *c, int lnum) static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
{ {
int err, len = c->leb_size, node_type, node_num, node_len, offs; int err, len = c->leb_size, node_type, node_num, node_len, offs;
void *buf = c->dbg->buf; void *buf, *p;
printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
current->pid, lnum); current->pid, lnum);
buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory to dump LPT");
return;
}
err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
if (err) { if (err) {
ubifs_err("cannot read LEB %d, error %d", lnum, err); ubifs_err("cannot read LEB %d, error %d", lnum, err);
return; goto out;
} }
while (1) { while (1) {
offs = c->leb_size - len; offs = c->leb_size - len;
if (!is_a_node(c, buf, len)) { if (!is_a_node(c, p, len)) {
int pad_len; int pad_len;
pad_len = get_pad_len(c, buf, len); pad_len = get_pad_len(c, p, len);
if (pad_len) { if (pad_len) {
printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
lnum, offs, pad_len); lnum, offs, pad_len);
buf += pad_len; p += pad_len;
len -= pad_len; len -= pad_len;
continue; continue;
} }
...@@ -1898,7 +1915,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) ...@@ -1898,7 +1915,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
break; break;
} }
node_type = get_lpt_node_type(c, buf, &node_num); node_type = get_lpt_node_type(c, p, &node_num);
switch (node_type) { switch (node_type) {
case UBIFS_LPT_PNODE: case UBIFS_LPT_PNODE:
{ {
...@@ -1923,7 +1940,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) ...@@ -1923,7 +1940,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
else else
printk(KERN_DEBUG "LEB %d:%d, nnode, ", printk(KERN_DEBUG "LEB %d:%d, nnode, ",
lnum, offs); lnum, offs);
err = ubifs_unpack_nnode(c, buf, &nnode); err = ubifs_unpack_nnode(c, p, &nnode);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) { for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
nnode.nbranch[i].offs); nnode.nbranch[i].offs);
...@@ -1944,15 +1961,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) ...@@ -1944,15 +1961,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
break; break;
default: default:
ubifs_err("LPT node type %d not recognized", node_type); ubifs_err("LPT node type %d not recognized", node_type);
return; goto out;
} }
buf += node_len; p += node_len;
len -= node_len; len -= node_len;
} }
printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
current->pid, lnum); current->pid, lnum);
out:
vfree(buf);
return;
} }
/** /**
......
...@@ -892,15 +892,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) ...@@ -892,15 +892,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
{ {
int lnum, err = 0; int lnum, err = 0;
void *buf;
/* Check no-orphans flag and skip this if no orphans */ /* Check no-orphans flag and skip this if no orphans */
if (c->no_orphs) if (c->no_orphs)
return 0; return 0;
buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory to check orphans");
return 0;
}
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
struct ubifs_scan_leb *sleb; struct ubifs_scan_leb *sleb;
sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) { if (IS_ERR(sleb)) {
err = PTR_ERR(sleb); err = PTR_ERR(sleb);
break; break;
...@@ -912,6 +919,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) ...@@ -912,6 +919,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
break; break;
} }
vfree(buf);
return err; return err;
} }
......
...@@ -28,6 +28,23 @@ ...@@ -28,6 +28,23 @@
* UBIFS always cleans away all remnants of an unclean un-mount, so that * UBIFS always cleans away all remnants of an unclean un-mount, so that
* errors do not accumulate. However UBIFS defers recovery if it is mounted * errors do not accumulate. However UBIFS defers recovery if it is mounted
* read-only, and the flash is not modified in that case. * read-only, and the flash is not modified in that case.
*
* The general UBIFS approach to the recovery is that it recovers from
* corruptions which could be caused by power cuts, but it refuses to recover
* from corruption caused by other reasons. And UBIFS tries to distinguish
* between these 2 reasons of corruptions and silently recover in the former
* case and loudly complain in the latter case.
*
* UBIFS writes only to erased LEBs, so it writes only to the flash space
* containing only 0xFFs. UBIFS also always writes strictly from the beginning
* of the LEB to the end. And UBIFS assumes that the underlying flash media
* writes in @c->max_write_size bytes at a time.
*
* Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
* I/O unit corresponding to offset X to contain corrupted data, all the
* following min. I/O units have to contain empty space (all 0xFFs). If this is
* not true, the corruption cannot be the result of a power cut, and UBIFS
* refuses to mount.
*/ */
#include <linux/crc32.h> #include <linux/crc32.h>
...@@ -363,7 +380,8 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) ...@@ -363,7 +380,8 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
* *
* This function returns %1 if @offs was in the last write to the LEB whose data * This function returns %1 if @offs was in the last write to the LEB whose data
* is in @buf, otherwise %0 is returned. The determination is made by checking * is in @buf, otherwise %0 is returned. The determination is made by checking
* for subsequent empty space starting from the next @c->min_io_size boundary. * for subsequent empty space starting from the next @c->max_write_size
* boundary.
*/ */
static int is_last_write(const struct ubifs_info *c, void *buf, int offs) static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
{ {
...@@ -371,10 +389,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) ...@@ -371,10 +389,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
uint8_t *p; uint8_t *p;
/* /*
* Round up to the next @c->min_io_size boundary i.e. @offs is in the * Round up to the next @c->max_write_size boundary i.e. @offs is in
* last wbuf written. After that should be empty space. * the last wbuf written. After that should be empty space.
*/ */
empty_offs = ALIGN(offs + 1, c->min_io_size); empty_offs = ALIGN(offs + 1, c->max_write_size);
check_len = c->leb_size - empty_offs; check_len = c->leb_size - empty_offs;
p = buf + empty_offs - offs; p = buf + empty_offs - offs;
return is_empty(p, check_len); return is_empty(p, check_len);
...@@ -429,7 +447,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, ...@@ -429,7 +447,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
int skip, dlen = le32_to_cpu(ch->len); int skip, dlen = le32_to_cpu(ch->len);
/* Check for empty space after the corrupt node's common header */ /* Check for empty space after the corrupt node's common header */
skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
if (is_empty(buf + skip, len - skip)) if (is_empty(buf + skip, len - skip))
return 1; return 1;
/* /*
...@@ -441,7 +459,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, ...@@ -441,7 +459,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
return 0; return 0;
} }
/* Now we know the corrupt node's length we can skip over it */ /* Now we know the corrupt node's length we can skip over it */
skip = ALIGN(offs + dlen, c->min_io_size) - offs; skip = ALIGN(offs + dlen, c->max_write_size) - offs;
/* After which there should be empty space */ /* After which there should be empty space */
if (is_empty(buf + skip, len - skip)) if (is_empty(buf + skip, len - skip))
return 1; return 1;
...@@ -671,10 +689,14 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, ...@@ -671,10 +689,14 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
} else { } else {
int corruption = first_non_ff(buf, len); int corruption = first_non_ff(buf, len);
/*
* See header comment for this file for more
* explanations about the reasons we have this check.
*/
ubifs_err("corrupt empty space LEB %d:%d, corruption " ubifs_err("corrupt empty space LEB %d:%d, corruption "
"starts at %d", lnum, offs, corruption); "starts at %d", lnum, offs, corruption);
/* Make sure we dump interesting non-0xFF data */ /* Make sure we dump interesting non-0xFF data */
offs = corruption; offs += corruption;
buf += corruption; buf += corruption;
goto corrupted; goto corrupted;
} }
...@@ -836,12 +858,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, ...@@ -836,12 +858,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
static int recover_head(const struct ubifs_info *c, int lnum, int offs, static int recover_head(const struct ubifs_info *c, int lnum, int offs,
void *sbuf) void *sbuf)
{ {
int len, err; int len = c->max_write_size, err;
if (c->min_io_size > 1)
len = c->min_io_size;
else
len = 512;
if (offs + len > c->leb_size) if (offs + len > c->leb_size)
len = c->leb_size - offs; len = c->leb_size - offs;
......
...@@ -328,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, ...@@ -328,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
if (!quiet) if (!quiet)
ubifs_err("empty space starts at non-aligned offset %d", ubifs_err("empty space starts at non-aligned offset %d",
offs); offs);
goto corrupted;; goto corrupted;
} }
ubifs_end_scan(c, sleb, lnum, offs); ubifs_end_scan(c, sleb, lnum, offs);
......
...@@ -512,9 +512,12 @@ static int init_constants_early(struct ubifs_info *c) ...@@ -512,9 +512,12 @@ static int init_constants_early(struct ubifs_info *c)
c->leb_cnt = c->vi.size; c->leb_cnt = c->vi.size;
c->leb_size = c->vi.usable_leb_size; c->leb_size = c->vi.usable_leb_size;
c->leb_start = c->di.leb_start;
c->half_leb_size = c->leb_size / 2; c->half_leb_size = c->leb_size / 2;
c->min_io_size = c->di.min_io_size; c->min_io_size = c->di.min_io_size;
c->min_io_shift = fls(c->min_io_size) - 1; c->min_io_shift = fls(c->min_io_size) - 1;
c->max_write_size = c->di.max_write_size;
c->max_write_shift = fls(c->max_write_size) - 1;
if (c->leb_size < UBIFS_MIN_LEB_SZ) { if (c->leb_size < UBIFS_MIN_LEB_SZ) {
ubifs_err("too small LEBs (%d bytes), min. is %d bytes", ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
...@@ -533,6 +536,18 @@ static int init_constants_early(struct ubifs_info *c) ...@@ -533,6 +536,18 @@ static int init_constants_early(struct ubifs_info *c)
return -EINVAL; return -EINVAL;
} }
/*
* Maximum write size has to be greater or equivalent to min. I/O
* size, and be multiple of min. I/O size.
*/
if (c->max_write_size < c->min_io_size ||
c->max_write_size % c->min_io_size ||
!is_power_of_2(c->max_write_size)) {
ubifs_err("bad write buffer size %d for %d min. I/O unit",
c->max_write_size, c->min_io_size);
return -EINVAL;
}
/* /*
* UBIFS aligns all node to 8-byte boundary, so to make function in * UBIFS aligns all node to 8-byte boundary, so to make function in
* io.c simpler, assume minimum I/O unit size to be 8 bytes if it is * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
...@@ -541,6 +556,10 @@ static int init_constants_early(struct ubifs_info *c) ...@@ -541,6 +556,10 @@ static int init_constants_early(struct ubifs_info *c)
if (c->min_io_size < 8) { if (c->min_io_size < 8) {
c->min_io_size = 8; c->min_io_size = 8;
c->min_io_shift = 3; c->min_io_shift = 3;
if (c->max_write_size < c->min_io_size) {
c->max_write_size = c->min_io_size;
c->max_write_shift = c->min_io_shift;
}
} }
c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
...@@ -1202,11 +1221,14 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1202,11 +1221,14 @@ static int mount_ubifs(struct ubifs_info *c)
if (c->bulk_read == 1) if (c->bulk_read == 1)
bu_init(c); bu_init(c);
/* if (!c->ro_mount) {
* We have to check all CRCs, even for data nodes, when we mount the FS c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
* (specifically, when we are replaying). GFP_KERNEL);
*/ if (!c->write_reserve_buf)
c->always_chk_crc = 1; goto out_free;
}
c->mounting = 1;
err = ubifs_read_superblock(c); err = ubifs_read_superblock(c);
if (err) if (err)
...@@ -1382,7 +1404,7 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1382,7 +1404,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (err) if (err)
goto out_infos; goto out_infos;
c->always_chk_crc = 0; c->mounting = 0;
ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
c->vi.ubi_num, c->vi.vol_id, c->vi.name); c->vi.ubi_num, c->vi.vol_id, c->vi.name);
...@@ -1403,6 +1425,7 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1403,6 +1425,7 @@ static int mount_ubifs(struct ubifs_info *c)
dbg_msg("compiled on: " __DATE__ " at " __TIME__); dbg_msg("compiled on: " __DATE__ " at " __TIME__);
dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
dbg_msg("max. write size: %d bytes", c->max_write_size);
dbg_msg("LEB size: %d bytes (%d KiB)", dbg_msg("LEB size: %d bytes (%d KiB)",
c->leb_size, c->leb_size >> 10); c->leb_size, c->leb_size >> 10);
dbg_msg("data journal heads: %d", dbg_msg("data journal heads: %d",
...@@ -1432,9 +1455,9 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1432,9 +1455,9 @@ static int mount_ubifs(struct ubifs_info *c)
UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
UBIFS_MAX_DENT_NODE_SZ); UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
dbg_msg("dead watermark: %d", c->dead_wm); dbg_msg("dead watermark: %d", c->dead_wm);
dbg_msg("dark watermark: %d", c->dark_wm); dbg_msg("dark watermark: %d", c->dark_wm);
dbg_msg("LEB overhead: %d", c->leb_overhead); dbg_msg("LEB overhead: %d", c->leb_overhead);
...@@ -1474,6 +1497,7 @@ static int mount_ubifs(struct ubifs_info *c) ...@@ -1474,6 +1497,7 @@ static int mount_ubifs(struct ubifs_info *c)
out_cbuf: out_cbuf:
kfree(c->cbuf); kfree(c->cbuf);
out_free: out_free:
kfree(c->write_reserve_buf);
kfree(c->bu.buf); kfree(c->bu.buf);
vfree(c->ileb_buf); vfree(c->ileb_buf);
vfree(c->sbuf); vfree(c->sbuf);
...@@ -1512,6 +1536,7 @@ static void ubifs_umount(struct ubifs_info *c) ...@@ -1512,6 +1536,7 @@ static void ubifs_umount(struct ubifs_info *c)
kfree(c->cbuf); kfree(c->cbuf);
kfree(c->rcvrd_mst_node); kfree(c->rcvrd_mst_node);
kfree(c->mst_node); kfree(c->mst_node);
kfree(c->write_reserve_buf);
kfree(c->bu.buf); kfree(c->bu.buf);
vfree(c->ileb_buf); vfree(c->ileb_buf);
vfree(c->sbuf); vfree(c->sbuf);
...@@ -1543,7 +1568,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1543,7 +1568,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
mutex_lock(&c->umount_mutex); mutex_lock(&c->umount_mutex);
dbg_save_space_info(c); dbg_save_space_info(c);
c->remounting_rw = 1; c->remounting_rw = 1;
c->always_chk_crc = 1;
err = check_free_space(c); err = check_free_space(c);
if (err) if (err)
...@@ -1598,6 +1622,10 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1598,6 +1622,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
goto out; goto out;
} }
c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
if (!c->write_reserve_buf)
goto out;
err = ubifs_lpt_init(c, 0, 1); err = ubifs_lpt_init(c, 0, 1);
if (err) if (err)
goto out; goto out;
...@@ -1650,7 +1678,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1650,7 +1678,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
dbg_gen("re-mounted read-write"); dbg_gen("re-mounted read-write");
c->ro_mount = 0; c->ro_mount = 0;
c->remounting_rw = 0; c->remounting_rw = 0;
c->always_chk_crc = 0;
err = dbg_check_space_info(c); err = dbg_check_space_info(c);
mutex_unlock(&c->umount_mutex); mutex_unlock(&c->umount_mutex);
return err; return err;
...@@ -1663,11 +1690,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) ...@@ -1663,11 +1690,12 @@ static int ubifs_remount_rw(struct ubifs_info *c)
c->bgt = NULL; c->bgt = NULL;
} }
free_wbufs(c); free_wbufs(c);
kfree(c->write_reserve_buf);
c->write_reserve_buf = NULL;
vfree(c->ileb_buf); vfree(c->ileb_buf);
c->ileb_buf = NULL; c->ileb_buf = NULL;
ubifs_lpt_free(c, 1); ubifs_lpt_free(c, 1);
c->remounting_rw = 0; c->remounting_rw = 0;
c->always_chk_crc = 0;
mutex_unlock(&c->umount_mutex); mutex_unlock(&c->umount_mutex);
return err; return err;
} }
...@@ -1707,6 +1735,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) ...@@ -1707,6 +1735,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
free_wbufs(c); free_wbufs(c);
vfree(c->orph_buf); vfree(c->orph_buf);
c->orph_buf = NULL; c->orph_buf = NULL;
kfree(c->write_reserve_buf);
c->write_reserve_buf = NULL;
vfree(c->ileb_buf); vfree(c->ileb_buf);
c->ileb_buf = NULL; c->ileb_buf = NULL;
ubifs_lpt_free(c, 1); ubifs_lpt_free(c, 1);
...@@ -1937,6 +1967,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1937,6 +1967,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&c->mst_mutex); mutex_init(&c->mst_mutex);
mutex_init(&c->umount_mutex); mutex_init(&c->umount_mutex);
mutex_init(&c->bu_mutex); mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
init_waitqueue_head(&c->cmt_wq); init_waitqueue_head(&c->cmt_wq);
c->buds = RB_ROOT; c->buds = RB_ROOT;
c->old_idx = RB_ROOT; c->old_idx = RB_ROOT;
...@@ -1954,6 +1985,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1954,6 +1985,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&c->old_buds); INIT_LIST_HEAD(&c->old_buds);
INIT_LIST_HEAD(&c->orph_list); INIT_LIST_HEAD(&c->orph_list);
INIT_LIST_HEAD(&c->orph_new); INIT_LIST_HEAD(&c->orph_new);
c->no_chk_data_crc = 1;
c->vfs_sb = sb; c->vfs_sb = sb;
c->highest_inum = UBIFS_FIRST_INO; c->highest_inum = UBIFS_FIRST_INO;
......
...@@ -447,8 +447,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, ...@@ -447,8 +447,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
* *
* Note, this function does not check CRC of data nodes if @c->no_chk_data_crc * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
* is true (it is controlled by corresponding mount option). However, if * is true (it is controlled by corresponding mount option). However, if
* @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
* checked. * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
* because during mounting or re-mounting from R/O mode to R/W mode we may read
* journal nodes (when replying the journal or doing the recovery) and the
* journal nodes may potentially be corrupted, so checking is required.
*/ */
static int try_read_node(const struct ubifs_info *c, void *buf, int type, static int try_read_node(const struct ubifs_info *c, void *buf, int type,
int len, int lnum, int offs) int len, int lnum, int offs)
...@@ -476,7 +479,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, ...@@ -476,7 +479,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
if (node_len != len) if (node_len != len)
return 0; return 0;
if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
!c->remounting_rw)
return 1; return 1;
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
......
...@@ -151,6 +151,12 @@ ...@@ -151,6 +151,12 @@
*/ */
#define WORST_COMPR_FACTOR 2 #define WORST_COMPR_FACTOR 2
/*
* How much memory is needed for a buffer where we comress a data node.
*/
#define COMPRESSED_DATA_NODE_BUF_SZ \
(UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
/* Maximum expected tree height for use by bottom_up_buf */ /* Maximum expected tree height for use by bottom_up_buf */
#define BOTTOM_UP_HEIGHT 64 #define BOTTOM_UP_HEIGHT 64
...@@ -646,6 +652,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, ...@@ -646,6 +652,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
* @offs: write-buffer offset in this logical eraseblock * @offs: write-buffer offset in this logical eraseblock
* @avail: number of bytes available in the write-buffer * @avail: number of bytes available in the write-buffer
* @used: number of used bytes in the write-buffer * @used: number of used bytes in the write-buffer
* @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
* @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
* %UBI_UNKNOWN) * %UBI_UNKNOWN)
* @jhead: journal head the mutex belongs to (note, needed only to shut lockdep * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
...@@ -680,6 +687,7 @@ struct ubifs_wbuf { ...@@ -680,6 +687,7 @@ struct ubifs_wbuf {
int offs; int offs;
int avail; int avail;
int used; int used;
int size;
int dtype; int dtype;
int jhead; int jhead;
int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
...@@ -1003,6 +1011,11 @@ struct ubifs_debug_info; ...@@ -1003,6 +1011,11 @@ struct ubifs_debug_info;
* @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
* @bu: pre-allocated bulk-read information * @bu: pre-allocated bulk-read information
* *
* @write_reserve_mutex: protects @write_reserve_buf
* @write_reserve_buf: on the write path we allocate memory, which might
* sometimes be unavailable, in which case we use this
* write reserve buffer
*
* @log_lebs: number of logical eraseblocks in the log * @log_lebs: number of logical eraseblocks in the log
* @log_bytes: log size in bytes * @log_bytes: log size in bytes
* @log_last: last LEB of the log * @log_last: last LEB of the log
...@@ -1024,7 +1037,12 @@ struct ubifs_debug_info; ...@@ -1024,7 +1037,12 @@ struct ubifs_debug_info;
* *
* @min_io_size: minimal input/output unit size * @min_io_size: minimal input/output unit size
* @min_io_shift: number of bits in @min_io_size minus one * @min_io_shift: number of bits in @min_io_size minus one
* @max_write_size: maximum amount of bytes the underlying flash can write at a
* time (MTD write buffer size)
* @max_write_shift: number of bits in @max_write_size minus one
* @leb_size: logical eraseblock size in bytes * @leb_size: logical eraseblock size in bytes
* @leb_start: starting offset of logical eraseblocks within physical
* eraseblocks
* @half_leb_size: half LEB size * @half_leb_size: half LEB size
* @idx_leb_size: how many bytes of an LEB are effectively available when it is * @idx_leb_size: how many bytes of an LEB are effectively available when it is
* used to store indexing nodes (@leb_size - @max_idx_node_sz) * used to store indexing nodes (@leb_size - @max_idx_node_sz)
...@@ -1166,22 +1184,21 @@ struct ubifs_debug_info; ...@@ -1166,22 +1184,21 @@ struct ubifs_debug_info;
* @rp_uid: reserved pool user ID * @rp_uid: reserved pool user ID
* @rp_gid: reserved pool group ID * @rp_gid: reserved pool group ID
* *
* @empty: if the UBI device is empty * @empty: %1 if the UBI device is empty
* @need_recovery: %1 if the file-system needs recovery
* @replaying: %1 during journal replay
* @mounting: %1 while mounting
* @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
* @replay_tree: temporary tree used during journal replay * @replay_tree: temporary tree used during journal replay
* @replay_list: temporary list used during journal replay * @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay * @replay_buds: list of buds to replay
* @cs_sqnum: sequence number of first node in the log (commit start node) * @cs_sqnum: sequence number of first node in the log (commit start node)
* @replay_sqnum: sequence number of node currently being replayed * @replay_sqnum: sequence number of node currently being replayed
* @need_recovery: file-system needs recovery
* @replaying: set to %1 during journal replay
* @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
* mode * mode
* @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
* FS to R/W mode * FS to R/W mode
* @size_tree: inode size information for recovery * @size_tree: inode size information for recovery
* @remounting_rw: set while re-mounting from R/O mode to R/W mode
* @always_chk_crc: always check CRCs (while mounting and remounting to R/W
* mode)
* @mount_opts: UBIFS-specific mount options * @mount_opts: UBIFS-specific mount options
* *
* @dbg: debugging-related information * @dbg: debugging-related information
...@@ -1250,6 +1267,9 @@ struct ubifs_info { ...@@ -1250,6 +1267,9 @@ struct ubifs_info {
struct mutex bu_mutex; struct mutex bu_mutex;
struct bu_info bu; struct bu_info bu;
struct mutex write_reserve_mutex;
void *write_reserve_buf;
int log_lebs; int log_lebs;
long long log_bytes; long long log_bytes;
int log_last; int log_last;
...@@ -1271,7 +1291,10 @@ struct ubifs_info { ...@@ -1271,7 +1291,10 @@ struct ubifs_info {
int min_io_size; int min_io_size;
int min_io_shift; int min_io_shift;
int max_write_size;
int max_write_shift;
int leb_size; int leb_size;
int leb_start;
int half_leb_size; int half_leb_size;
int idx_leb_size; int idx_leb_size;
int leb_cnt; int leb_cnt;
...@@ -1402,19 +1425,19 @@ struct ubifs_info { ...@@ -1402,19 +1425,19 @@ struct ubifs_info {
gid_t rp_gid; gid_t rp_gid;
/* The below fields are used only during mounting and re-mounting */ /* The below fields are used only during mounting and re-mounting */
int empty; unsigned int empty:1;
unsigned int need_recovery:1;
unsigned int replaying:1;
unsigned int mounting:1;
unsigned int remounting_rw:1;
struct rb_root replay_tree; struct rb_root replay_tree;
struct list_head replay_list; struct list_head replay_list;
struct list_head replay_buds; struct list_head replay_buds;
unsigned long long cs_sqnum; unsigned long long cs_sqnum;
unsigned long long replay_sqnum; unsigned long long replay_sqnum;
int need_recovery;
int replaying;
struct list_head unclean_leb_list; struct list_head unclean_leb_list;
struct ubifs_mst_node *rcvrd_mst_node; struct ubifs_mst_node *rcvrd_mst_node;
struct rb_root size_tree; struct rb_root size_tree;
int remounting_rw;
int always_chk_crc;
struct ubifs_mount_opts mount_opts; struct ubifs_mount_opts mount_opts;
#ifdef CONFIG_UBIFS_FS_DEBUG #ifdef CONFIG_UBIFS_FS_DEBUG
......
...@@ -116,18 +116,40 @@ struct ubi_volume_info { ...@@ -116,18 +116,40 @@ struct ubi_volume_info {
* struct ubi_device_info - UBI device description data structure. * struct ubi_device_info - UBI device description data structure.
* @ubi_num: ubi device number * @ubi_num: ubi device number
* @leb_size: logical eraseblock size on this UBI device * @leb_size: logical eraseblock size on this UBI device
* @leb_start: starting offset of logical eraseblocks within physical
* eraseblocks
* @min_io_size: minimal I/O unit size * @min_io_size: minimal I/O unit size
* @max_write_size: maximum amount of bytes the underlying flash can write at a
* time (MTD write buffer size)
* @ro_mode: if this device is in read-only mode * @ro_mode: if this device is in read-only mode
* @cdev: UBI character device major and minor numbers * @cdev: UBI character device major and minor numbers
* *
* Note, @leb_size is the logical eraseblock size offered by the UBI device. * Note, @leb_size is the logical eraseblock size offered by the UBI device.
* Volumes of this UBI device may have smaller logical eraseblock size if their * Volumes of this UBI device may have smaller logical eraseblock size if their
* alignment is not equivalent to %1. * alignment is not equivalent to %1.
*
* The @max_write_size field describes flash write maximum write unit. For
* example, NOR flash allows for changing individual bytes, so @min_io_size is
* %1. However, it does not mean than NOR flash has to write data byte-by-byte.
* Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when
* writing large chunks of data, they write 64-bytes at a time. Obviously, this
* improves write throughput.
*
* Also, the MTD device may have N interleaved (striped) flash chips
* underneath, in which case @min_io_size can be physical min. I/O size of
* single flash chip, while @max_write_size can be N * @min_io_size.
*
* The @max_write_size field is always greater or equivalent to @min_io_size.
* E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In
* contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND
* page size.
*/ */
struct ubi_device_info { struct ubi_device_info {
int ubi_num; int ubi_num;
int leb_size; int leb_size;
int leb_start;
int min_io_size; int min_io_size;
int max_write_size;
int ro_mode; int ro_mode;
dev_t cdev; dev_t cdev;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment