Commit ea814ab9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Pretty much all bug fixes and clean ups for 4.3, after a lot of
  features and other churn going into 4.2"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  Revert "ext4: remove block_device_ejected"
  ext4: ratelimit the file system mounted message
  ext4: silence a format string false positive
  ext4: simplify some code in read_mmp_block()
  ext4: don't manipulate recovery flag when freezing no-journal fs
  jbd2: limit number of reserved credits
  ext4 crypto: remove duplicate header file
  ext4: update c/mtime on truncate up
  jbd2: avoid infinite loop when destroying aborted journal
  ext4, jbd2: add REQ_FUA flag when recording an error in the superblock
  ext4 crypto: fix spelling typo in comment
  ext4 crypto: exit cleanly if ext4_derive_key_aes() fails
  ext4: reject journal options for ext2 mounts
  ext4: implement cgroup writeback support
  ext4: replace ext4_io_submit->io_op with ->io_wbc
  ext4 crypto: check for too-short encrypted file names
  ext4 crypto: use a jbd2 transaction when adding a crypto policy
  jbd2: speedup jbd2_journal_dirty_metadata()
parents e31fb9e0 bdfe0cbd
......@@ -19,7 +19,6 @@
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
#include <linux/key.h>
#include <linux/list.h>
#include <linux/mempool.h>
#include <linux/random.h>
......@@ -329,6 +328,10 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
return oname->len;
}
}
if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) {
EXT4_ERROR_INODE(inode, "encrypted inode too small");
return -EUCLEAN;
}
if (EXT4_I(inode)->i_crypt_info)
return ext4_fname_decrypt(inode, iname, oname);
......
......@@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
/**
* ext4_derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivatio.
* @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
*
......@@ -220,6 +220,8 @@ int _ext4_get_encryption_info(struct inode *inode)
BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
if (res)
goto out;
got_key:
ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
......
......@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
......@@ -49,7 +50,8 @@ static int ext4_create_encryption_context_from_policy(
struct inode *inode, const struct ext4_encryption_policy *policy)
{
struct ext4_encryption_context ctx;
int res = 0;
handle_t *handle;
int res, res2;
res = ext4_convert_inline_data(inode);
if (res)
......@@ -78,11 +80,22 @@ static int ext4_create_encryption_context_from_policy(
BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
handle = ext4_journal_start(inode, EXT4_HT_MISC,
ext4_jbd2_credits_xattr(inode));
if (IS_ERR(handle))
return PTR_ERR(handle);
res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), 0);
if (!res)
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
}
res2 = ext4_journal_stop(handle);
if (!res)
res = res2;
return res;
}
......
......@@ -187,7 +187,7 @@ typedef struct ext4_io_end {
} ext4_io_end_t;
struct ext4_io_submit {
int io_op;
struct writeback_control *io_wbc;
struct bio *io_bio;
ext4_io_end_t *io_end;
sector_t io_next_block;
......
......@@ -4728,6 +4728,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
/*
* Update c/mtime on truncate up, ext4_truncate() will
* update c/mtime in shrink case below
*/
if (!shrink) {
inode->i_mtime = ext4_current_time(inode);
inode->i_ctime = inode->i_mtime;
}
down_write(&EXT4_I(inode)->i_data_sem);
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
......
......@@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
ext4_fsblk_t mmp_block)
{
struct mmp_struct *mmp;
int ret;
if (*bh)
clear_buffer_uptodate(*bh);
......@@ -76,11 +77,14 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
/* This would be sb_bread(sb, mmp_block), except we need to be sure
* that the MD RAID device cache has been bypassed, and that the read
* is not blocked in the elevator. */
if (!*bh)
if (!*bh) {
*bh = sb_getblk(sb, mmp_block);
if (!*bh)
return -ENOMEM;
if (*bh) {
if (!*bh) {
ret = -ENOMEM;
goto warn_exit;
}
}
get_bh(*bh);
lock_buffer(*bh);
(*bh)->b_end_io = end_buffer_read_sync;
......@@ -89,20 +93,20 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
if (!buffer_uptodate(*bh)) {
brelse(*bh);
*bh = NULL;
}
}
if (unlikely(!*bh)) {
ext4_warning(sb, "Error while reading MMP block %llu",
mmp_block);
return -EIO;
ret = -EIO;
goto warn_exit;
}
mmp = (struct mmp_struct *)((*bh)->b_data);
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
!ext4_mmp_csum_verify(sb, mmp))
return -EINVAL;
if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
ext4_mmp_csum_verify(sb, mmp))
return 0;
ret = -EINVAL;
warn_exit:
ext4_warning(sb, "Error %d while reading MMP block %llu",
ret, mmp_block);
return ret;
}
/*
......@@ -111,7 +115,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
const char *function, unsigned int line, const char *msg)
{
__ext4_warning(sb, function, line, msg);
__ext4_warning(sb, function, line, "%s", msg);
__ext4_warning(sb, function, line,
"MMP failure info: last update time: %llu, last update "
"node: %s, last update device: %s\n",
......
......@@ -354,8 +354,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
struct bio *bio = io->io_bio;
if (bio) {
int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE;
bio_get(io->io_bio);
submit_bio(io->io_op, io->io_bio);
submit_bio(io_op, io->io_bio);
bio_put(io->io_bio);
}
io->io_bio = NULL;
......@@ -364,7 +366,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc)
{
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
io->io_wbc = wbc;
io->io_bio = NULL;
io->io_end = NULL;
}
......@@ -377,6 +379,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_end_io = ext4_end_bio;
......@@ -405,6 +408,7 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
wbc_account_io(io->io_wbc, page, bh->b_size);
io->io_next_block++;
return 0;
}
......
......@@ -60,6 +60,7 @@ static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx;
static struct ext4_features *ext4_feat;
static int ext4_mballoc_ready;
static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
......@@ -321,6 +322,22 @@ static void save_error_info(struct super_block *sb, const char *func,
ext4_commit_super(sb, 1);
}
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
* else. Once this happens, any attempt to call mark_buffer_dirty()
* (for example, by ext4_commit_super), will cause a kernel OOPS.
* This is a kludge to prevent these oops until we can put in a proper
* hook in del_gendisk() to inform the VFS and file system layers.
*/
static int block_device_ejected(struct super_block *sb)
{
struct inode *bd_inode = sb->s_bdev->bd_inode;
struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
return bdi->dev == NULL;
}
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
......@@ -1390,9 +1407,9 @@ static const struct mount_opts {
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
{Opt_journal_dev, 0, MOPT_GTE0},
{Opt_journal_path, 0, MOPT_STRING},
{Opt_journal_ioprio, 0, MOPT_GTE0},
{Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
{Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
{Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
......@@ -3639,6 +3656,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
} else {
sb->s_iflags |= SB_I_CGROUPWB;
}
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
......@@ -4271,6 +4290,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"the device does not support discard");
}
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
......@@ -4613,7 +4633,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
if (!sbh)
if (!sbh || block_device_ejected(sb))
return error;
if (buffer_write_io_error(sbh)) {
/*
......@@ -4661,7 +4681,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
ext4_superblock_csum_set(sb);
mark_buffer_dirty(sbh);
if (sync) {
error = sync_dirty_buffer(sbh);
error = __sync_dirty_buffer(sbh,
test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
if (error)
return error;
......@@ -4829,10 +4850,11 @@ static int ext4_freeze(struct super_block *sb)
error = jbd2_journal_flush(journal);
if (error < 0)
goto out;
}
/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
}
error = ext4_commit_super(sb, 1);
out:
if (journal)
......@@ -4850,8 +4872,11 @@ static int ext4_unfreeze(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
if (EXT4_SB(sb)->s_journal) {
/* Reset the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
}
ext4_commit_super(sb, 1);
return 0;
}
......@@ -5600,6 +5625,7 @@ static int __init ext4_init_fs(void)
{
int i, err;
ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL;
mutex_init(&ext4_li_mtx);
......
......@@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* journal_clean_one_cp_list
*
* Find all the written-back checkpoint buffers in the given list and
* release them.
* release them. If 'destroy' is set, clean all buffers unconditionally.
*
* Called with j_list_lock held.
* Returns 1 if we freed the transaction, 0 otherwise.
*/
static int journal_clean_one_cp_list(struct journal_head *jh)
static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
......@@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
do {
jh = next_jh;
next_jh = jh->b_cpnext;
if (!destroy)
ret = __try_to_free_cp_buf(jh);
else
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
if (!ret)
return freed;
if (ret == 2)
......@@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
* If 'destroy' is set, release all buffers unconditionally.
*
* Called with j_list_lock held.
*/
void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
transaction_t *transaction, *last_transaction, *next_transaction;
int ret;
......@@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
destroy);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
......@@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
* we can possibly see not yet submitted buffers on io_list
*/
ret = journal_clean_one_cp_list(transaction->
t_checkpoint_io_list);
t_checkpoint_io_list, destroy);
if (need_resched())
return;
/*
......@@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
} while (transaction != last_transaction);
}
/*
* Remove buffers from all checkpoint lists as journal is aborted and we just
* need to free memory
*/
void jbd2_journal_destroy_checkpoint(journal_t *journal)
{
/*
* We loop because __jbd2_journal_clean_checkpoint_list() may abort
* early due to a need of rescheduling.
*/
while (1) {
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions) {
spin_unlock(&journal->j_list_lock);
break;
}
__jbd2_journal_clean_checkpoint_list(journal, true);
spin_unlock(&journal->j_list_lock);
cond_resched();
}
}
/*
* journal_remove_checkpoint: called after a buffer has been committed
* to disk (either by being write-back flushed to disk, or being
......
......@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory
*/
spin_lock(&journal->j_list_lock);
__jbd2_journal_clean_checkpoint_list(journal);
__jbd2_journal_clean_checkpoint_list(journal, false);
spin_unlock(&journal->j_list_lock);
jbd_debug(3, "JBD2: commit phase 1\n");
......
......@@ -1456,7 +1456,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
sb->s_errno = cpu_to_be32(journal->j_errno);
read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, WRITE_SYNC);
jbd2_write_superblock(journal, WRITE_FUA);
}
EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
......@@ -1693,8 +1693,17 @@ int jbd2_journal_destroy(journal_t *journal)
while (journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock);
mutex_lock(&journal->j_checkpoint_mutex);
jbd2_log_do_checkpoint(journal);
err = jbd2_log_do_checkpoint(journal);
mutex_unlock(&journal->j_checkpoint_mutex);
/*
* If checkpointing failed, just free the buffers to avoid
* looping forever
*/
if (err) {
jbd2_journal_destroy_checkpoint(journal);
spin_lock(&journal->j_list_lock);
break;
}
spin_lock(&journal->j_list_lock);
}
......
......@@ -204,6 +204,20 @@ static int add_transaction_credits(journal_t *journal, int blocks,
* attach this handle to a new transaction.
*/
atomic_sub(total, &t->t_outstanding_credits);
/*
* Is the number of reserved credits in the current transaction too
* big to fit this handle? Wait until reserved credits are freed.
*/
if (atomic_read(&journal->j_reserved_credits) + total >
journal->j_max_transaction_buffers) {
read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + total <=
journal->j_max_transaction_buffers);
return 1;
}
wait_transaction_locked(journal);
return 1;
}
......@@ -262,20 +276,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
int rsv_blocks = 0;
unsigned long ts = jiffies;
if (handle->h_rsv_handle)
rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
/*
* 1/2 of transaction can be reserved so we can practically handle
* only 1/2 of maximum transaction size per operation
*/
if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) {
printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
current->comm, blocks,
journal->j_max_transaction_buffers / 2);
* Limit the number of reserved credits to 1/2 of maximum transaction
* size and limit the number of total credits to not exceed maximum
* transaction size per operation.
*/
if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
(rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
printk(KERN_ERR "JBD2: %s wants too many credits "
"credits:%d rsv_credits:%d max:%d\n",
current->comm, blocks, rsv_blocks,
journal->j_max_transaction_buffers);
WARN_ON(1);
return -ENOSPC;
}
if (handle->h_rsv_handle)
rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
alloc_transaction:
if (!journal->j_running_transaction) {
/*
......@@ -1280,8 +1298,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
triggers->t_abort(triggers, jh2bh(jh));
}
/**
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
......@@ -1314,12 +1330,41 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
jh = jbd2_journal_grab_journal_head(bh);
if (!jh) {
if (!buffer_jbd(bh)) {
ret = -EUCLEAN;
goto out;
}
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
* crucial to catch bugs so let's do a reliable check until the
* lockless handling is fully proven.
*/
if (jh->b_transaction != transaction &&
jh->b_next_transaction != transaction) {
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction);
jbd_unlock_bh_state(bh);
}
if (jh->b_modified == 1) {
/* If it's in our transaction it must be in BJ_Metadata list. */
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) {
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
jbd_unlock_bh_state(bh);
}
goto out;
}
journal = transaction->t_journal;
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
......@@ -1410,7 +1455,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
spin_unlock(&journal->j_list_lock);
out_unlock_bh:
jbd_unlock_bh_state(bh);
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
return ret;
......
......@@ -1081,8 +1081,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
extern void jbd2_journal_commit_transaction(journal_t *);
/* Checkpoint list management */
void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
void jbd2_journal_destroy_checkpoint(journal_t *journal);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment