Commit 2ac232f3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  jbd: change the field "b_cow_tid" of struct journal_head from type unsigned to tid_t
  ext3.txt: update the links in the section "useful links" to the latest ones
  ext3: Fix data corruption in inodes with journalled data
  ext2: check xattr name_len before acquiring xattr_sem in ext2_xattr_get
  ext3: Fix compilation with -DDX_DEBUG
  quota: Remove unused declaration
  jbd: Use WRITE_SYNC in journal checkpoint.
  jbd: Fix oops in journal_remove_journal_head()
  ext3: Return -EINVAL when start is beyond the end of fs in ext3_trim_fs()
  ext3/ioctl.c: silence sparse warnings about different address spaces
  ext3/ext4 Documentation: remove bh/nobh since it has been deprecated
  ext3: Improve truncate error handling
  ext3: use proper little-endian bitops
  ext2: include fs.h into ext2_fs.h
  ext3: Fix oops in ext3_try_to_allocate_with_rsv()
  jbd: fix a bug of leaking jh->b_jcount
  jbd: remove dependency on __GFP_NOFAIL
  ext3: Convert ext3 to new truncate calling convention
  jbd: Add fixed tracepoints
  ext3: Add fixed tracepoints

Resolve conflicts in fs/ext3/fsync.c due to fsync locking push-down and
new fixed tracepoints.
parents fa8f53ac 5cf49d76
......@@ -147,15 +147,6 @@ grpjquota=<file> during journal replay. They replace the above
package for more details
(http://sourceforge.net/projects/linuxquota).
bh (*) ext3 associates buffer heads to data pages to
nobh (a) cache disk block mapping information
(b) link pages into transaction to provide
ordering guarantees.
"bh" option forces use of buffer heads.
"nobh" option tries to avoid associating buffer
heads (supported only for "writeback" mode).
Specification
=============
Ext3 shares all disk implementation with the ext2 filesystem, and adds
......@@ -227,5 +218,5 @@ kernel source: <file:fs/ext3/>
programs: http://e2fsprogs.sourceforge.net/
http://ext2resize.sourceforge.net
useful links: http://www.ibm.com/developerworks/library/l-fs7.html
http://www.ibm.com/developerworks/library/l-fs8.html
useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html
http://www.ibm.com/developerworks/library/l-fs8/index.html
......@@ -68,12 +68,12 @@ Note: More extensive information for getting started with ext4 can be
'-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
for a fair comparison. When tuning ext3 for best benchmark numbers,
it is often worthwhile to try changing the data journaling mode; '-o
data=writeback,nobh' can be faster for some workloads. (Note
however that running mounted with data=writeback can potentially
leave stale data exposed in recently written files in case of an
unclean shutdown, which could be a security exposure in some
situations.) Configuring the filesystem with a large journal can
also be helpful for metadata-intensive workloads.
data=writeback' can be faster for some workloads. (Note however that
running mounted with data=writeback can potentially leave stale data
exposed in recently written files in case of an unclean shutdown,
which could be a security exposure in some situations.) Configuring
the filesystem with a large journal can also be helpful for
metadata-intensive workloads.
2. Features
===========
......@@ -272,14 +272,6 @@ grpjquota=<file> during journal replay. They replace the above
package for more details
(http://sourceforge.net/projects/linuxquota).
bh (*) ext4 associates buffer heads to data pages to
nobh (a) cache disk block mapping information
(b) link pages into transaction to provide
ordering guarantees.
"bh" option forces use of buffer heads.
"nobh" option tries to avoid associating buffer
heads (supported only for "writeback" mode).
stripe=n Number of filesystem blocks that mballoc will try
to use for allocation size and alignment. For RAID5/6
systems this should be the number of data
......@@ -393,8 +385,7 @@ dioread_nolock locking. If the dioread_nolock option is specified
write and convert the extent to initialized after IO
completes. This approach allows ext4 code to avoid
using inode mutex, which improves scalability on high
speed storages. However this does not work with nobh
option and the mount will fail. Nor does it work with
speed storages. However this does not work with
data journaling and dioread_nolock option will be
ignored with kernel warning. Note that dioread_nolock
code path is only used for extent-based files.
......
......@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
if (name == NULL)
return -EINVAL;
name_len = strlen(name);
if (name_len > 255)
return -ERANGE;
down_read(&EXT2_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT2_I(inode)->i_file_acl)
......@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
error = -EIO;
goto cleanup;
}
/* find named attribute */
name_len = strlen(name);
error = -ERANGE;
if (name_len > 255)
goto cleanup;
/* find named attribute */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
struct ext2_xattr_entry *next =
......
......@@ -21,6 +21,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <trace/events/ext3.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
......@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
desc = ext3_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
trace_ext3_read_block_bitmap(sb, block_group);
bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
......@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
struct rb_node * parent = NULL;
struct ext3_reserve_window_node *this;
trace_ext3_rsv_window_add(sb, rsv);
while (*p)
{
parent = *p;
......@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
rsv = &block_i->rsv_window_node;
if (!rsv_is_empty(&rsv->rsv_window)) {
spin_lock(rsv_lock);
if (!rsv_is_empty(&rsv->rsv_window))
if (!rsv_is_empty(&rsv->rsv_window)) {
trace_ext3_discard_reservation(inode, rsv);
rsv_window_remove(inode->i_sb, rsv);
}
spin_unlock(rsv_lock);
}
}
......@@ -683,14 +688,10 @@ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
struct super_block * sb;
struct super_block *sb = inode->i_sb;
unsigned long dquot_freed_blocks;
sb = inode->i_sb;
if (!sb) {
printk ("ext3_free_blocks: nonexistent device");
return;
}
trace_ext3_free_blocks(inode, block, count);
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
dquot_free_block(inode, dquot_freed_blocks);
......@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
else
start_block = grp_goal + group_first_block;
trace_ext3_alloc_new_reservation(sb, start_block);
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
......@@ -1230,8 +1232,11 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
* check if the first free block is within the
* free space we just reserved
*/
if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
if (start_block >= my_rsv->rsv_start &&
start_block <= my_rsv->rsv_end) {
trace_ext3_reserved(sb, start_block, my_rsv);
return 0; /* success */
}
/*
* if the first free bit we found is out of the reservable space
* continue search for next reservable space,
......@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
*errp = -ENOSPC;
sb = inode->i_sb;
if (!sb) {
printk("ext3_new_block: nonexistent device");
return 0;
}
/*
* Check quota for allocation of this block.
......@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
return 0;
}
trace_ext3_request_blocks(inode, goal, num);
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
es = sbi->s_es;
ext3_debug("goal=%lu.\n", goal);
/*
* Allocate a block from reservation only when
......@@ -1742,6 +1745,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
brelse(bitmap_bh);
dquot_free_block(inode, *count-num);
*count = num;
trace_ext3_allocate_blocks(inode, goal, num,
(unsigned long long)ret_block);
return ret_block;
io_error:
......@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
if ((next - start) < minblocks)
goto free_extent;
trace_ext3_discard_blocks(sb, discard_block, next - start);
/* Send the TRIM command down to the device */
err = sb_issue_discard(sb, discard_block, next - start,
GFP_NOFS, 0);
......@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
if (start >= max_blks)
goto out;
return -EINVAL;
if (start + len > max_blks)
len = max_blks - start;
......@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (ret >= 0)
ret = 0;
out:
range->len = trimmed * sb->s_blocksize;
return ret;
......
......@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
};
const struct inode_operations ext3_file_inode_operations = {
.truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
......
......@@ -30,6 +30,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#include <trace/events/ext3.h>
/*
* akpm: A new design for ext3_sync_file().
......@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret, needs_barrier = 0;
tid_t commit_tid;
trace_ext3_sync_file_enter(file, datasync);
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
goto out;
/*
* Taking the mutex here just to keep consistent with how fsync was
......@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
if (ext3_should_journal_data(inode)) {
mutex_unlock(&inode->i_mutex);
return ext3_force_commit(inode->i_sb);
ret = ext3_force_commit(inode->i_sb);
goto out;
}
if (datasync)
......@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
mutex_unlock(&inode->i_mutex);
out:
trace_ext3_sync_file_exit(inode, ret);
return ret;
}
......@@ -23,6 +23,7 @@
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
#include <trace/events/ext3.h>
#include <asm/byteorder.h>
......@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
......@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
return ERR_PTR(-EPERM);
sb = dir->i_sb;
trace_ext3_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
......@@ -601,6 +604,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
}
ext3_debug("allocating inode %lu\n", inode->i_ino);
trace_ext3_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext3_std_error(sb, err);
......
......@@ -38,10 +38,12 @@
#include <linux/bio.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
#include <trace/events/ext3.h>
#include "xattr.h"
#include "acl.h"
static int ext3_writepage_trans_blocks(struct inode *inode);
static int ext3_block_truncate_page(struct inode *inode, loff_t from);
/*
* Test whether an inode is a fast symlink.
......@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
might_sleep();
trace_ext3_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
......@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
*/
void ext3_evict_inode (struct inode *inode)
{
struct ext3_inode_info *ei = EXT3_I(inode);
struct ext3_block_alloc_info *rsv;
handle_t *handle;
int want_delete = 0;
trace_ext3_evict_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
want_delete = 1;
}
/*
* When journalling data dirty buffers are tracked only in the journal.
* So although mm thinks everything is clean and ready for reaping the
* inode might still have some pages to write in the running
* transaction or waiting to be checkpointed. Thus calling
* journal_invalidatepage() (via truncate_inode_pages()) to discard
* these buffers can cause data loss. Also even if we did not discard
* these buffers, we would have no way to find them after the inode
* is reaped and thus user could see stale data if he tries to read
* them before the transaction is checkpointed. So be careful and
* force everything to disk here... We use ei->i_datasync_tid to
* store the newest transaction containing inode's data.
*
* Note that directories do not have this problem because they don't
* use page cache.
*/
if (inode->i_nlink && ext3_should_journal_data(inode) &&
(S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
log_start_commit(journal, commit_tid);
log_wait_commit(journal, commit_tid);
filemap_write_and_wait(&inode->i_data);
}
truncate_inode_pages(&inode->i_data, 0);
ext3_discard_reservation(inode);
rsv = EXT3_I(inode)->i_block_alloc_info;
EXT3_I(inode)->i_block_alloc_info = NULL;
rsv = ei->i_block_alloc_info;
ei->i_block_alloc_info = NULL;
if (unlikely(rsv))
kfree(rsv);
......@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
if (inode->i_blocks)
ext3_truncate(inode);
/*
* Kill off the orphan record which ext3_truncate created.
* AKPM: I think this can be inside the above `if'.
* Note that ext3_orphan_del() has to be able to cope with the
* deletion of a non-existent orphan - this is because we don't
* know if ext3_truncate() actually created an orphan record.
* (Well, we could do this if we need to, but heck - it works)
* Kill off the orphan record created when the inode lost the last
* link. Note that ext3_orphan_del() has to be able to cope with the
* deletion of a non-existent orphan - ext3_truncate() could
* have removed the record.
*/
ext3_orphan_del(handle, inode);
EXT3_I(inode)->i_dtime = get_seconds();
ei->i_dtime = get_seconds();
/*
* One subtle ordering requirement: if anything has gone wrong
......@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
ext3_fsblk_t first_block = 0;
trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
......@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
if (!create || err == -EIO)
goto cleanup;
/*
* Block out ext3_truncate while we alter the tree
*/
mutex_lock(&ei->truncate_mutex);
/*
......@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
count = ext3_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary);
/*
* Block out ext3_truncate while we alter the tree
*/
err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
offsets + (partial - chain), partial);
......@@ -970,6 +999,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
}
BUFFER_TRACE(bh_result, "returned");
out:
trace_ext3_get_blocks_exit(inode, iblock,
depth ? le32_to_cpu(chain[depth-1].key) : 0,
count, err);
return err;
}
......@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
ext3_truncate(inode);
}
/*
* Truncate blocks that were not used by direct IO write. We have to zero out
* the last file block as well because direct IO might have written to it.
*/
static void ext3_truncate_failed_direct_write(struct inode *inode)
{
ext3_block_truncate_page(inode, inode->i_size);
ext3_truncate(inode);
}
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
......@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
* we allocate blocks but write fails for some reason */
int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
trace_ext3_write_begin(inode, pos, len, flags);
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
......@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
unsigned from, to;
int ret = 0, ret2;
trace_ext3_ordered_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
from = pos & (PAGE_CACHE_SIZE - 1);
......@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
struct inode *inode = file->f_mapping->host;
int ret;
trace_ext3_writeback_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
update_file_sizes(inode, pos, copied);
/*
......@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
{
handle_t *handle = ext3_journal_current_handle();
struct inode *inode = mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode);
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
trace_ext3_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
......@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
if (inode->i_size > EXT3_I(inode)->i_disksize) {
EXT3_I(inode)->i_disksize = inode->i_size;
atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
if (inode->i_size > ei->i_disksize) {
ei->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
......@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
trace_ext3_ordered_writepage(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
......@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
trace_ext3_writeback_writepage(page);
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
......@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page,
if (ext3_journal_current_handle())
goto no_write;
trace_ext3_journalled_writepage(page);
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
......@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page,
if (ret == 0)
ret = err;
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
atomic_set(&EXT3_I(inode)->i_datasync_tid,
handle->h_transaction->t_tid);
unlock_page(page);
} else {
/*
......@@ -1739,6 +1793,7 @@ static int ext3_journalled_writepage(struct page *page,
static int ext3_readpage(struct file *file, struct page *page)
{
trace_ext3_readpage(page);
return mpage_readpage(page, ext3_get_block);
}
......@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
trace_ext3_invalidatepage(page, offset);
/*
* If it's a full truncate we just forget about the pending dirtying
*/
......@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
trace_ext3_releasepage(page);
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
......@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_length(iov, nr_segs);
int retries = 0;
trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
if (rw == WRITE) {
loff_t final_size = offset + count;
......@@ -1827,7 +1887,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
loff_t end = offset + iov_length(iov, nr_segs);
if (end > isize)
vmtruncate(inode, isize);
ext3_truncate_failed_direct_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
......@@ -1841,7 +1901,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
/* This is really bad luck. We've written the data
* but cannot extend i_size. Truncate allocated blocks
* and pretend the write failed... */
ext3_truncate(inode);
ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
goto out;
}
......@@ -1867,6 +1927,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
ret = err;
}
out:
trace_ext3_direct_IO_exit(inode, offset,
iov_length(iov, nr_segs), rw, ret);
return ret;
}
......@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode)
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
static int ext3_block_truncate_page(handle_t *handle, struct page *page,
struct address_space *mapping, loff_t from)
static int ext3_block_truncate_page(struct inode *inode, loff_t from)
{
ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, length, pos;
struct inode *inode = mapping->host;
struct page *page;
handle_t *handle = NULL;
struct buffer_head *bh;
int err = 0;
/* Truncated on block boundary - nothing to do */
blocksize = inode->i_sb->s_blocksize;
if ((from & (blocksize - 1)) == 0)
return 0;
page = grab_cache_page(inode->i_mapping, index);
if (!page)
return -ENOMEM;
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
......@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
/* data=writeback mode doesn't need transaction to zero-out data */
if (!ext3_should_writeback_data(inode)) {
/* We journal at most one block */
handle = ext3_journal_start(inode, 1);
if (IS_ERR(handle)) {
clear_highpage(page);
flush_dcache_page(page);
err = PTR_ERR(handle);
goto unlock;
}
}
if (ext3_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext3_journal_get_write_access(handle, bh);
if (err)
goto unlock;
goto stop;
}
zero_user(page, offset, length);
......@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
err = ext3_journal_dirty_data(handle, bh);
mark_buffer_dirty(bh);
}
stop:
if (handle)
ext3_journal_stop(handle);
unlock:
unlock_page(page);
......@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
int ext3_can_truncate(struct inode *inode)
{
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return 0;
if (S_ISREG(inode->i_mode))
return 1;
if (S_ISDIR(inode->i_mode))
......@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode)
struct ext3_inode_info *ei = EXT3_I(inode);
__le32 *i_data = ei->i_data;
int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
int offsets[4];
Indirect chain[4];
Indirect *partial;
......@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode)
int n;
long last_block;
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
trace_ext3_truncate_enter(inode);
if (!ext3_can_truncate(inode))
goto out_notrans;
......@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode)
if (inode->i_size == 0 && ext3_should_writeback_data(inode))
ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
/*
* We have to lock the EOF page here, because lock_page() nests
* outside journal_start().
*/
if ((inode->i_size & (blocksize - 1)) == 0) {
/* Block boundary? Nothing to do */
page = NULL;
} else {
page = grab_cache_page(mapping,
inode->i_size >> PAGE_CACHE_SHIFT);
if (!page)
goto out_notrans;
}
handle = start_transaction(inode);
if (IS_ERR(handle)) {
if (page) {
clear_highpage(page);
flush_dcache_page(page);
unlock_page(page);
page_cache_release(page);
}
if (IS_ERR(handle))
goto out_notrans;
}
last_block = (inode->i_size + blocksize-1)
>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
if (page)
ext3_block_truncate_page(handle, page, mapping, inode->i_size);
n = ext3_block_to_path(inode, last_block, offsets, NULL);
if (n == 0)
goto out_stop; /* error */
......@@ -2596,6 +2653,7 @@ void ext3_truncate(struct inode *inode)
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
trace_ext3_truncate_exit(inode);
return;
out_notrans:
/*
......@@ -2604,6 +2662,7 @@ void ext3_truncate(struct inode *inode)
*/
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
trace_ext3_truncate_exit(inode);
}
static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
......@@ -2745,6 +2804,7 @@ static int __ext3_get_inode_loc(struct inode *inode,
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
trace_ext3_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ_META, bh);
......@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
}
error = ext3_orphan_add(handle, inode);
if (error) {
ext3_journal_stop(handle);
goto err_out;
}
EXT3_I(inode)->i_disksize = attr->ia_size;
rc = ext3_mark_inode_dirty(handle, inode);
if (!error)
error = rc;
error = ext3_mark_inode_dirty(handle, inode);
ext3_journal_stop(handle);
if (error) {
/* Some hard fs error must have happened. Bail out. */
ext3_orphan_del(NULL, inode);
goto err_out;
}
rc = ext3_block_truncate_page(inode, attr->ia_size);
if (rc) {
/* Cleanup orphan list and exit */
handle = ext3_journal_start(inode, 3);
if (IS_ERR(handle)) {
ext3_orphan_del(NULL, inode);
goto err_out;
}
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
goto err_out;
}
}
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
rc = vmtruncate(inode, attr->ia_size);
if (rc)
goto err_out;
truncate_setsize(inode, attr->ia_size);
ext3_truncate(inode);
}
setattr_copy(inode, attr);
......@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err;
might_sleep();
trace_ext3_mark_inode_dirty(inode, _RET_IP_);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (!err)
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
......
......@@ -285,7 +285,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&range, (struct fstrim_range *)arg,
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
......@@ -293,7 +293,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (ret < 0)
return ret;
if (copy_to_user((struct fstrim_range *)arg, &range,
if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
......
......@@ -36,6 +36,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
#include <trace/events/ext3.h>
#include "namei.h"
#include "xattr.h"
......@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
while (len--) printk("%c", *name++);
ext3fs_dirhash(de->name, de->name_len, &h);
printk(":%x.%u ", h.hash,
((char *) de - base));
(unsigned) ((char *) de - base));
}
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
......@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
*err = -ENOENT;
errout:
dxtrace(printk("%s not found\n", name));
dxtrace(printk("%s not found\n", entry->name));
dx_release (frames);
return NULL;
}
......@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
struct ext3_dir_entry_2 * de;
handle_t *handle;
trace_ext3_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
......@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
end_unlink:
ext3_journal_stop(handle);
brelse (bh);
trace_ext3_unlink_exit(dentry, retval);
return retval;
}
......
......@@ -44,6 +44,9 @@
#include "acl.h"
#include "namei.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ext3.h>
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
......@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
static int ext3_drop_inode(struct inode *inode)
{
int drop = generic_drop_inode(inode);
trace_ext3_drop_inode(inode, drop);
return drop;
}
static void ext3_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
......@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
.destroy_inode = ext3_destroy_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
.drop_inode = ext3_drop_inode,
.evict_inode = ext3_evict_inode,
.put_super = ext3_put_super,
.sync_fs = ext3_sync_fs,
......@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
trace_ext3_sync_fs(sb, wait);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
......
......@@ -803,8 +803,16 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
/* We need to allocate a new block */
ext3_fsblk_t goal = ext3_group_first_block_no(sb,
EXT3_I(inode)->i_block_group);
ext3_fsblk_t block = ext3_new_block(handle, inode,
goal, &error);
ext3_fsblk_t block;
/*
* Protect us agaist concurrent allocations to the
* same inode from ext3_..._writepage(). Reservation
* code does not expect racing allocations.
*/
mutex_lock(&EXT3_I(inode)->truncate_mutex);
block = ext3_new_block(handle, inode, goal, &error);
mutex_unlock(&EXT3_I(inode)->truncate_mutex);
if (error)
goto cleanup;
ea_idebug(inode, "creating block %d", block);
......
......@@ -22,6 +22,8 @@
#include <linux/jbd.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <trace/events/jbd.h>
/*
* Unlink a buffer from a transaction checkpoint list.
......@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
/*
* Get our reference so that bh cannot be freed before
* we unlock it
*/
get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
......@@ -220,8 +226,8 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
spin_lock(&journal->j_list_lock);
goto restart;
}
if (buffer_locked(bh)) {
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
......@@ -240,7 +246,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
*/
released = __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh);
}
......@@ -253,9 +258,12 @@ static void
__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
{
int i;
struct blk_plug plug;
blk_start_plug(&plug);
for (i = 0; i < *batch_count; i++)
write_dirty_buffer(bhs[i], WRITE);
write_dirty_buffer(bhs[i], WRITE_SYNC);
blk_finish_plug(&plug);
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = bhs[i];
......@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh);
} else {
/*
......@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal)
* journal straight away.
*/
result = cleanup_journal_tail(journal);
trace_jbd_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
......@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal)
if (blocknr < journal->j_tail)
freed = freed + journal->j_last - journal->j_first;
trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %u), "
"freeing %u\n",
......@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal)
/*
* journal_clean_one_cp_list
*
* Find all the written-back checkpoint buffers in the given list and release them.
* Find all the written-back checkpoint buffers in the given list and release
* them.
*
* Called with the journal locked.
* Called with j_list_lock held.
* Returns number of bufers reaped (for debug)
*/
......@@ -632,8 +642,8 @@ int __journal_clean_checkpoint_list(journal_t *journal)
* checkpoint lists.
*
* The function returns 1 if it frees the transaction, 0 otherwise.
* The function can free jh and bh.
*
* This function is called with the journal locked.
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
......@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
}
journal = transaction->t_journal;
JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh);
jh->b_cp_transaction = NULL;
journal_put_journal_head(jh);
if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL)
goto out;
JBUFFER_TRACE(jh, "transaction has no more buffers");
/*
* There is one special case to worry about: if we have just pulled the
......@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
* The locking here around t_state is a bit sleazy.
* See the comment at the end of journal_commit_transaction().
*/
if (transaction->t_state != T_FINISHED) {
JBUFFER_TRACE(jh, "belongs to running/committing transaction");
if (transaction->t_state != T_FINISHED)
goto out;
}
/* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */
......@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
wake_up(&journal->j_wait_logspace);
ret = 1;
out:
JBUFFER_TRACE(jh, "exit");
return ret;
}
......@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
/* Get reference for checkpointing transaction */
journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) {
......@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
trace_jbd_drop_transaction(journal, transaction);
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
kfree(transaction);
}
......@@ -21,6 +21,7 @@
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <trace/events/jbd.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
......@@ -204,6 +205,8 @@ static int journal_submit_data_buffers(journal_t *journal,
if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
trace_jbd_do_submit_data(journal,
commit_transaction);
/* Write out all data to prevent deadlocks */
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
......@@ -236,6 +239,8 @@ static int journal_submit_data_buffers(journal_t *journal,
jbd_unlock_bh_state(bh);
if (bufs == journal->j_wbufsize) {
spin_unlock(&journal->j_list_lock);
trace_jbd_do_submit_data(journal,
commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
goto write_out_data;
......@@ -253,10 +258,6 @@ static int journal_submit_data_buffers(journal_t *journal,
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
journal_remove_journal_head(bh);
/* One for our safety reference, other for
* journal_remove_journal_head() */
put_bh(bh);
release_data_buffer(bh);
}
......@@ -266,6 +267,7 @@ static int journal_submit_data_buffers(journal_t *journal,
}
}
spin_unlock(&journal->j_list_lock);
trace_jbd_do_submit_data(journal, commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
return err;
......@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
trace_jbd_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
trace_jbd_commit_locking(journal, commit_transaction);
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
......@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal)
*/
journal_switch_revoke_table(journal);
trace_jbd_commit_flushing(journal, commit_transaction);
commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL;
......@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
}
if (buffer_jbd(bh) && bh2jh(bh) == jh &&
jh->b_transaction == commit_transaction &&
jh->b_jlist == BJ_Locked) {
jh->b_jlist == BJ_Locked)
__journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
......@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
trace_jbd_commit_logging(journal, commit_transaction);
J_ASSERT(commit_transaction->t_nr_buffers <=
commit_transaction->t_outstanding_credits);
......@@ -797,10 +798,16 @@ void journal_commit_transaction(journal_t *journal)
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
int try_to_free = 0;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
/*
* Get a reference so that bh cannot be freed before we are
* done with it.
*/
get_bh(bh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction);
......@@ -858,28 +865,27 @@ void journal_commit_transaction(journal_t *journal)
__journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
} else {
J_ASSERT_BH(bh, !buffer_dirty(bh));
/* The buffer on BJ_Forget list and not jbddirty means
/*
* The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it
* could not have been reallocated until this
* transaction has committed. *BUT* it could be
* reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget
* list. */
* list.
*/
if (!jh->b_next_transaction)
try_to_free = 1;
}
JBUFFER_TRACE(jh, "refile or unfile freed buffer");
__journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
/* needs a brelse */
journal_remove_journal_head(bh);
if (try_to_free)
release_buffer_page(bh);
} else
jbd_unlock_bh_state(bh);
}
else
__brelse(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
......@@ -946,6 +952,7 @@ void journal_commit_transaction(journal_t *journal)
}
spin_unlock(&journal->j_list_lock);
trace_jbd_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
......
......@@ -38,6 +38,9 @@
#include <linux/debugfs.h>
#include <linux/ratelimit.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd.h>
#include <asm/uaccess.h>
#include <asm/page.h>
......@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
} else
write_dirty_buffer(bh, WRITE);
trace_jbd_update_superblock_end(journal, wait);
out:
/* If we have just flushed the log (by marking s_start==0), then
* any future commit will have to be careful to update the
......@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
* A journal_head may be detached from its buffer_head when the journal_head's
* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
* Various places in JBD call journal_remove_journal_head() to indicate that the
* journal_head can be dropped if needed.
* A journal_head is detached from its buffer_head when the journal_head's
* b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
* transaction (b_cp_transaction) hold their references to b_jcount.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
......@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = journal_add_journal_head(bh);
* ...
* (Get another reference for transaction)
* journal_grab_journal_head(bh);
* jh->b_transaction = xxx;
* (Put original reference)
* journal_put_journal_head(jh);
*
* Now, the journal_head's b_jcount is zero, but it is safe from being released
* because it has a non-zero b_transaction.
*/
/*
* Give a buffer_head a journal_head.
*
* Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
......@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
get_bh(bh);
if (jh->b_jcount == 0) {
if (jh->b_transaction == NULL &&
jh->b_next_transaction == NULL &&
jh->b_cp_transaction == NULL) {
J_ASSERT_JH(jh, jh->b_transaction == NULL);
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head");
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__func__);
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
jbd_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__func__);
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
jbd_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh);
__brelse(bh);
journal_free_journal_head(jh);
} else {
BUFFER_TRACE(bh, "journal_head was locked");
}
}
}
/*
* journal_remove_journal_head(): if the buffer isn't attached to a transaction
* and has a zero b_jcount then remove and release its journal_head. If we did
* see that the buffer is not used by any transaction we also "logically"
* decrement ->b_count.
*
* We in fact take an additional increment on ->b_count as a convenience,
* because the caller usually wants to do additional things with the bh
* after calling here.
* The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
* time. Once the caller has run __brelse(), the buffer is eligible for
* reaping by try_to_free_buffers().
*/
void journal_remove_journal_head(struct buffer_head *bh)
{
jbd_lock_bh_journal_head(bh);
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
}
/*
* Drop a reference on the passed journal_head. If it fell to zero then try to
* Drop a reference on the passed journal_head. If it fell to zero then
* release the journal_head from the buffer_head.
*/
void journal_put_journal_head(struct journal_head *jh)
......@@ -1953,10 +1923,11 @@ void journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
if (!jh->b_jcount && !jh->b_transaction) {
if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
__brelse(bh);
}
} else
jbd_unlock_bh_journal_head(bh);
}
......
......@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
#include <linux/backing-dev.h>
static void __journal_temp_unlink_buffer(struct journal_head *jh);
......@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
alloc_transaction:
if (!journal->j_running_transaction) {
new_transaction = kzalloc(sizeof(*new_transaction),
GFP_NOFS|__GFP_NOFAIL);
new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
if (!new_transaction) {
ret = -ENOMEM;
goto out;
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto alloc_transaction;
}
}
......@@ -696,7 +696,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__journal_file_buffer(jh, transaction, BJ_Reserved);
......@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* committed and so it's safe to clear the dirty bit.
*/
clear_buffer_dirty(jh2bh(jh));
jh->b_transaction = transaction;
/* first access by this transaction */
jh->b_modified = 0;
......@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
*/
JBUFFER_TRACE(jh, "cancelling revoke");
journal_cancel_revoke(handle, jh);
journal_put_journal_head(jh);
out:
journal_put_journal_head(jh);
return err;
}
......@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
ret = -EIO;
goto no_journal;
}
if (jh->b_transaction != NULL) {
/* We might have slept so buffer could be refiled now */
if (jh->b_transaction != NULL &&
jh->b_transaction != handle->h_transaction) {
JBUFFER_TRACE(jh, "unfile from commit");
__journal_temp_unlink_buffer(jh);
/* It still points to the committing
......@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
JBUFFER_TRACE(jh, "not on correct data list: unfile");
J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
__journal_temp_unlink_buffer(jh);
jh->b_transaction = handle->h_transaction;
JBUFFER_TRACE(jh, "file as data");
__journal_file_buffer(jh, handle->h_transaction,
BJ_SyncData);
......@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
__journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
__brelse(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
......@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
mark_buffer_dirty(bh); /* Expose it to the VM */
}
/*
* Remove buffer from all transactions.
*
* Called with bh_state lock and j_list_lock
*
* jh and bh may be already freed when this function returns.
*/
void __journal_unfile_buffer(struct journal_head *jh)
{
__journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
journal_put_journal_head(jh);
}
void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{
jbd_lock_bh_state(jh2bh(jh));
struct buffer_head *bh = jh2bh(jh);
/* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
__journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(jh2bh(jh));
jbd_unlock_bh_state(bh);
__brelse(bh);
}
/*
......@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
/* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data");
__journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
__brelse(bh);
}
} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh);
journal_remove_journal_head(bh);
__brelse(bh);
}
}
spin_unlock(&journal->j_list_lock);
......@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
/*
* We take our own ref against the journal_head here to avoid
* having to add tons of locking around each instance of
* journal_remove_journal_head() and journal_put_journal_head().
* journal_put_journal_head().
*/
jh = journal_grab_journal_head(bh);
if (!jh)
......@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
int may_free = 1;
struct buffer_head *bh = jh2bh(jh);
__journal_unfile_buffer(jh);
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
__journal_temp_unlink_buffer(jh);
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
......@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
journal_remove_journal_head(bh);
__brelse(bh);
__journal_unfile_buffer(jh);
}
return may_free;
}
......@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction)
__journal_temp_unlink_buffer(jh);
else
journal_grab_journal_head(bh);
jh->b_transaction = transaction;
switch (jlist) {
......@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
* already started to be used by a subsequent transaction, refile the
* buffer on that transaction's metadata list.
*
* Called under journal->j_list_lock
*
* Called under j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh))
*
* jh and bh may be already free when this function returns
*/
void __journal_refile_buffer(struct journal_head *jh)
{
......@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__journal_temp_unlink_buffer(jh);
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __journal_file_buffer() must not take a
* new one.
*/
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
if (buffer_freed(bh))
......@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
}
/*
* For the unlocked version of this call, also make sure that any
* hanging journal_head is cleaned up if necessary.
* __journal_refile_buffer() with necessary locking added. We take our bh
* reference so that we can safely unlock bh.
*
* __journal_refile_buffer is usually called as part of a single locked
* operation on a buffer_head, in which the caller is probably going to
* be hooking the journal_head onto other lists. In that case it is up
* to the caller to remove the journal_head if necessary. For the
* unlocked journal_refile_buffer call, the caller isn't going to be
* doing anything else to the buffer so we need to do the cleanup
* ourselves to avoid a jh leak.
*
* *** The journal_head may be freed by this call! ***
* The jh and bh may be freed by this call.
*/
void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
/* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
spin_unlock(&journal->j_list_lock);
__brelse(bh);
}
......@@ -18,6 +18,7 @@
#include <linux/types.h>
#include <linux/magic.h>
#include <linux/fs.h>
/*
* The second extended filesystem constants/structures
......
......@@ -418,12 +418,11 @@ struct ext3_inode {
#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
#endif
#define ext3_set_bit __test_and_set_bit_le
#define ext3_set_bit __set_bit_le
#define ext3_set_bit_atomic ext2_set_bit_atomic
#define ext3_clear_bit __test_and_clear_bit_le
#define ext3_clear_bit __clear_bit_le
#define ext3_clear_bit_atomic ext2_clear_bit_atomic
#define ext3_test_bit test_bit_le
#define ext3_find_first_zero_bit find_first_zero_bit_le
#define ext3_find_next_zero_bit find_next_zero_bit_le
/*
......@@ -913,7 +912,7 @@ extern void ext3_dirty_inode(struct inode *, int);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
extern int ext3_can_truncate(struct inode *inode);
extern void ext3_truncate (struct inode *);
extern void ext3_truncate(struct inode *inode);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);
extern void ext3_set_aops(struct inode *inode);
......
......@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *);
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh);
struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
void journal_remove_journal_head(struct buffer_head *bh);
void journal_put_journal_head(struct journal_head *jh);
/*
......
......@@ -45,7 +45,7 @@ struct journal_head {
* has been cowed
* [jbd_lock_bh_state()]
*/
unsigned b_cow_tid;
tid_t b_cow_tid;
/*
* Copy of the buffer data frozen for writing to the log.
......
......@@ -415,13 +415,5 @@ struct quota_module_name {
{QFMT_VFS_V0, "quota_v2"},\
{0, NULL}}
#else
# /* nodep */ include <sys/cdefs.h>
__BEGIN_DECLS
long quotactl __P ((unsigned int, const char *, int, caddr_t));
__END_DECLS
#endif /* __KERNEL__ */
#endif /* _QUOTA_ */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ext3
#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_EXT3_H
#include <linux/tracepoint.h>
TRACE_EVENT(ext3_free_inode,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( uid_t, uid )
__field( gid_t, gid )
__field( blkcnt_t, blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = inode->i_uid;
__entry->gid = inode->i_gid;
__entry->blocks = inode->i_blocks;
),
TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->uid, __entry->gid,
(unsigned long) __entry->blocks)
);
TRACE_EVENT(ext3_request_inode,
TP_PROTO(struct inode *dir, int mode),
TP_ARGS(dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %d,%d dir %lu mode 0%o",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->dir, __entry->mode)
);
TRACE_EVENT(ext3_allocate_inode,
TP_PROTO(struct inode *inode, struct inode *dir, int mode),
TP_ARGS(inode, dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
);
TRACE_EVENT(ext3_evict_inode,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, nlink )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nlink = inode->i_nlink;
),
TP_printk("dev %d,%d ino %lu nlink %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->nlink)
);
TRACE_EVENT(ext3_drop_inode,
TP_PROTO(struct inode *inode, int drop),
TP_ARGS(inode, drop),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, drop )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->drop = drop;
),
TP_printk("dev %d,%d ino %lu drop %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->drop)
);
TRACE_EVENT(ext3_mark_inode_dirty,
TP_PROTO(struct inode *inode, unsigned long IP),
TP_ARGS(inode, IP),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field(unsigned long, ip )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->ip = IP;
),
TP_printk("dev %d,%d ino %lu caller %pF",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (void *)__entry->ip)
);
TRACE_EVENT(ext3_write_begin,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int flags),
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long long) __entry->pos, __entry->len,
__entry->flags)
);
DECLARE_EVENT_CLASS(ext3__write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long long) __entry->pos, __entry->len,
__entry->copied)
);
DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied)
);
DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied)
);
DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied)
);
DECLARE_EVENT_CLASS(ext3__page_op,
TP_PROTO(struct page *page),
TP_ARGS(page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->index = page->index;
__entry->ino = page->mapping->host->i_ino;
__entry->dev = page->mapping->host->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu page_index %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->index)
);
DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage,
TP_PROTO(struct page *page),
TP_ARGS(page)
);
DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage,
TP_PROTO(struct page *page),
TP_ARGS(page)
);
DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage,
TP_PROTO(struct page *page),
TP_ARGS(page)
);
DEFINE_EVENT(ext3__page_op, ext3_readpage,
TP_PROTO(struct page *page),
TP_ARGS(page)
);
DEFINE_EVENT(ext3__page_op, ext3_releasepage,
TP_PROTO(struct page *page),
TP_ARGS(page)
);
TRACE_EVENT(ext3_invalidatepage,
TP_PROTO(struct page *page, unsigned long offset),
TP_ARGS(page, offset),
TP_STRUCT__entry(
__field( pgoff_t, index )
__field( unsigned long, offset )
__field( ino_t, ino )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->index = page->index;
__entry->offset = offset;
__entry->ino = page->mapping->host->i_ino;
__entry->dev = page->mapping->host->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->index, __entry->offset)
);
TRACE_EVENT(ext3_discard_blocks,
TP_PROTO(struct super_block *sb, unsigned long blk,
unsigned long count),
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, blk )
__field( unsigned long, count )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->blk = blk;
__entry->count = count;
),
TP_printk("dev %d,%d blk %lu count %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->blk, __entry->count)
);
TRACE_EVENT(ext3_request_blocks,
TP_PROTO(struct inode *inode, unsigned long goal,
unsigned long count),
TP_ARGS(inode, goal, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned long, count )
__field( unsigned long, goal )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->count = count;
__entry->goal = goal;
),
TP_printk("dev %d,%d ino %lu count %lu goal %lu ",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->count, __entry->goal)
);
TRACE_EVENT(ext3_allocate_blocks,
TP_PROTO(struct inode *inode, unsigned long goal,
unsigned long count, unsigned long block),
TP_ARGS(inode, goal, count, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned long, block )
__field( unsigned long, count )
__field( unsigned long, goal )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->block = block;
__entry->count = count;
__entry->goal = goal;
),
TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->count, __entry->block,
__entry->goal)
);
TRACE_EVENT(ext3_free_blocks,
TP_PROTO(struct inode *inode, unsigned long block,
unsigned long count),
TP_ARGS(inode, block, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( unsigned long, block )
__field( unsigned long, count )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->block = block;
__entry->count = count;
),
TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->block, __entry->count)
);
TRACE_EVENT(ext3_sync_file_enter,
TP_PROTO(struct file *file, int datasync),
TP_ARGS(file, datasync),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
),
TP_fast_assign(
struct dentry *dentry = file->f_path.dentry;
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ino = dentry->d_inode->i_ino;
__entry->datasync = datasync;
__entry->parent = dentry->d_parent->d_inode->i_ino;
),
TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long) __entry->parent, __entry->datasync)
);
TRACE_EVENT(ext3_sync_file_exit,
TP_PROTO(struct inode *inode, int ret),
TP_ARGS(inode, ret),
TP_STRUCT__entry(
__field( int, ret )
__field( ino_t, ino )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->ret = ret;
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->ret)
);
TRACE_EVENT(ext3_sync_fs,
TP_PROTO(struct super_block *sb, int wait),
TP_ARGS(sb, wait),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, wait )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->wait = wait;
),
TP_printk("dev %d,%d wait %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->wait)
);
TRACE_EVENT(ext3_rsv_window_add,
TP_PROTO(struct super_block *sb,
struct ext3_reserve_window_node *rsv_node),
TP_ARGS(sb, rsv_node),
TP_STRUCT__entry(
__field( unsigned long, start )
__field( unsigned long, end )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->start = rsv_node->rsv_window._rsv_start;
__entry->end = rsv_node->rsv_window._rsv_end;
),
TP_printk("dev %d,%d start %lu end %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->start, __entry->end)
);
TRACE_EVENT(ext3_discard_reservation,
TP_PROTO(struct inode *inode,
struct ext3_reserve_window_node *rsv_node),
TP_ARGS(inode, rsv_node),
TP_STRUCT__entry(
__field( unsigned long, start )
__field( unsigned long, end )
__field( ino_t, ino )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->start = rsv_node->rsv_window._rsv_start;
__entry->end = rsv_node->rsv_window._rsv_end;
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu start %lu end %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long)__entry->ino, __entry->start,
__entry->end)
);
TRACE_EVENT(ext3_alloc_new_reservation,
TP_PROTO(struct super_block *sb, unsigned long goal),
TP_ARGS(sb, goal),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, goal )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->goal = goal;
),
TP_printk("dev %d,%d goal %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->goal)
);
TRACE_EVENT(ext3_reserved,
TP_PROTO(struct super_block *sb, unsigned long block,
struct ext3_reserve_window_node *rsv_node),
TP_ARGS(sb, block, rsv_node),
TP_STRUCT__entry(
__field( unsigned long, block )
__field( unsigned long, start )
__field( unsigned long, end )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->block = block;
__entry->start = rsv_node->rsv_window._rsv_start;
__entry->end = rsv_node->rsv_window._rsv_end;
__entry->dev = sb->s_dev;
),
TP_printk("dev %d,%d block %lu, start %lu end %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->block, __entry->start, __entry->end)
);
TRACE_EVENT(ext3_forget,
TP_PROTO(struct inode *inode, int is_metadata, unsigned long block),
TP_ARGS(inode, is_metadata, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( int, is_metadata )
__field( unsigned long, block )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->is_metadata = is_metadata;
__entry->block = block;
),
TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->is_metadata, __entry->block)
);
TRACE_EVENT(ext3_read_block_bitmap,
TP_PROTO(struct super_block *sb, unsigned int group),
TP_ARGS(sb, group),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
),
TP_printk("dev %d,%d group %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->group)
);
TRACE_EVENT(ext3_direct_IO_enter,
TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
TP_ARGS(inode, offset, len, rw),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( loff_t, pos )
__field( unsigned long, len )
__field( int, rw )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->pos = offset;
__entry->len = len;
__entry->rw = rw;
),
TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long long) __entry->pos, __entry->len,
__entry->rw)
);
TRACE_EVENT(ext3_direct_IO_exit,
TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
int rw, int ret),
TP_ARGS(inode, offset, len, rw, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( loff_t, pos )
__field( unsigned long, len )
__field( int, rw )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->pos = offset;
__entry->len = len;
__entry->rw = rw;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long long) __entry->pos, __entry->len,
__entry->rw, __entry->ret)
);
TRACE_EVENT(ext3_unlink_enter,
TP_PROTO(struct inode *parent, struct dentry *dentry),
TP_ARGS(parent, dentry),
TP_STRUCT__entry(
__field( ino_t, parent )
__field( ino_t, ino )
__field( loff_t, size )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->parent = parent->i_ino;
__entry->ino = dentry->d_inode->i_ino;
__entry->size = dentry->d_inode->i_size;
__entry->dev = dentry->d_inode->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu size %lld parent %ld",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long long)__entry->size,
(unsigned long) __entry->parent)
);
TRACE_EVENT(ext3_unlink_exit,
TP_PROTO(struct dentry *dentry, int ret),
TP_ARGS(dentry, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = dentry->d_inode->i_ino;
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->ret)
);
DECLARE_EVENT_CLASS(ext3__truncate,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( blkcnt_t, blocks )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->blocks = inode->i_blocks;
),
TP_printk("dev %d,%d ino %lu blocks %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->blocks)
);
DEFINE_EVENT(ext3__truncate, ext3_truncate_enter,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
DEFINE_EVENT(ext3__truncate, ext3_truncate_exit,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
TRACE_EVENT(ext3_get_blocks_enter,
TP_PROTO(struct inode *inode, unsigned long lblk,
unsigned long len, int create),
TP_ARGS(inode, lblk, len, create),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( unsigned long, lblk )
__field( unsigned long, len )
__field( int, create )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->len = len;
__entry->create = create;
),
TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->lblk, __entry->len, __entry->create)
);
TRACE_EVENT(ext3_get_blocks_exit,
TP_PROTO(struct inode *inode, unsigned long lblk,
unsigned long pblk, unsigned long len, int ret),
TP_ARGS(inode, lblk, pblk, len, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( unsigned long, lblk )
__field( unsigned long, pblk )
__field( unsigned long, len )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->pblk = pblk;
__entry->len = len;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->lblk, __entry->pblk,
__entry->len, __entry->ret)
);
TRACE_EVENT(ext3_load_inode,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino)
);
#endif /* _TRACE_EXT3_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM jbd
#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_JBD_H
#include <linux/jbd.h>
#include <linux/tracepoint.h>
TRACE_EVENT(jbd_checkpoint,
TP_PROTO(journal_t *journal, int result),
TP_ARGS(journal, result),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, result )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->result = result;
),
TP_printk("dev %d,%d result %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->result)
);
DECLARE_EVENT_CLASS(jbd_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %d,%d transaction %d sync %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->transaction, __entry->sync_commit)
);
DEFINE_EVENT(jbd_commit, jbd_start_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction)
);
DEFINE_EVENT(jbd_commit, jbd_commit_locking,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction)
);
DEFINE_EVENT(jbd_commit, jbd_commit_flushing,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction)
);
DEFINE_EVENT(jbd_commit, jbd_commit_logging,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction)
);
TRACE_EVENT(jbd_drop_transaction,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %d,%d transaction %d sync %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->transaction, __entry->sync_commit)
);
TRACE_EVENT(jbd_end_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
__field( int, head )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
__entry->head = journal->j_tail_sequence;
),
TP_printk("dev %d,%d transaction %d sync %d head %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->transaction, __entry->sync_commit, __entry->head)
);
TRACE_EVENT(jbd_do_submit_data,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %d,%d transaction %d sync %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->transaction, __entry->sync_commit)
);
TRACE_EVENT(jbd_cleanup_journal_tail,
TP_PROTO(journal_t *journal, tid_t first_tid,
unsigned long block_nr, unsigned long freed),
TP_ARGS(journal, first_tid, block_nr, freed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( tid_t, tail_sequence )
__field( tid_t, first_tid )
__field(unsigned long, block_nr )
__field(unsigned long, freed )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->tail_sequence = journal->j_tail_sequence;
__entry->first_tid = first_tid;
__entry->block_nr = block_nr;
__entry->freed = freed;
),
TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->tail_sequence, __entry->first_tid,
__entry->block_nr, __entry->freed)
);
TRACE_EVENT(jbd_update_superblock_end,
TP_PROTO(journal_t *journal, int wait),
TP_ARGS(journal, wait),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, wait )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->wait = wait;
),
TP_printk("dev %d,%d wait %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->wait)
);
#endif /* _TRACE_JBD_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment