Commit acd15a83 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (56 commits)
  ocfs2: Make cached block reads the common case.
  ocfs2: Kill the last naked wait_on_buffer() for cached reads.
  ocfs2: Move ocfs2_bread() into dir.c
  ocfs2: Simplify ocfs2_read_block()
  ocfs2: Require an inode for ocfs2_read_block(s)().
  ocfs2: Separate out sync reads from ocfs2_read_blocks()
  ocfs2: Refactor xattr list and remove ocfs2_xattr_handler().
  ocfs2: Calculate EA hash only by its suffix.
  ocfs2: Move trusted and user attribute support into xattr.c
  ocfs2: Uninline ocfs2_xattr_name_hash()
  ocfs2: Don't check for NULL before brelse()
  ocfs2: use smaller counters in ocfs2_remove_xattr_clusters_from_cache
  ocfs2: Documentation update for user_xattr / nouser_xattr mount options
  ocfs2: make la_debug_mutex static
  ocfs2: Remove pointless !!
  ocfs2: Add empty bucket support in xattr.
  ocfs2/xattr.c: Fix a bug when inserting xattr.
  ocfs2: Add xattr mount option in ocfs2_show_options()
  ocfs2: Switch over to JBD2.
  ocfs2: Add the 'inode64' mount option.
  ...
parents 72f22b1e d4a8c93c
......@@ -76,3 +76,9 @@ localalloc=8(*) Allows custom localalloc size in MB. If the value is too
large, the fs will silently revert it to the default.
Localalloc is not enabled for local mounts.
localflocks This disables cluster aware flock.
inode64 Indicates that Ocfs2 is allowed to create inodes at
any location in the filesystem, including those which
will result in inode numbers occupying more than 32
bits of significance.
user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
......@@ -220,17 +220,16 @@ config JBD
tristate
help
This is a generic journalling layer for block devices. It is
currently used by the ext3 and OCFS2 file systems, but it could
also be used to add journal support to other file systems or block
currently used by the ext3 file system, but it could also be
used to add journal support to other file systems or block
devices such as RAID or LVM.
If you are using the ext3 or OCFS2 file systems, you need to
say Y here. If you are not using ext3 OCFS2 then you will probably
want to say N.
If you are using the ext3 file system, you need to say Y here.
If you are not using ext3 then you will probably want to say N.
To compile this device as a module, choose M here: the module will be
called jbd. If you are compiling ext3 or OCFS2 into the kernel,
you cannot compile this code as a module.
called jbd. If you are compiling ext3 into the kernel, you
cannot compile this code as a module.
config JBD_DEBUG
bool "JBD (ext3) debugging support"
......@@ -254,15 +253,16 @@ config JBD2
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
the ext4 filesystem, but it could also be used to add
the ext4 and OCFS2 filesystems, but it could also be used to add
journal support to other file systems or block devices such
as RAID or LVM.
If you are using ext4, you need to say Y here. If you are not
using ext4 then you will probably want to say N.
If you are using ext4 or OCFS2, you need to say Y here.
If you are not using ext4 or OCFS2 then you will
probably want to say N.
To compile this device as a module, choose M here. The module will be
called jbd2. If you are compiling ext4 into the kernel,
called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
you cannot compile this code as a module.
config JBD2_DEBUG
......@@ -448,7 +448,7 @@ config OCFS2_FS
tristate "OCFS2 file system support"
depends on NET && SYSFS
select CONFIGFS_FS
select JBD
select JBD2
select CRC32
help
OCFS2 is a general purpose extent based shared disk cluster file
......@@ -519,6 +519,16 @@ config OCFS2_DEBUG_FS
this option for debugging only as it is likely to decrease
performance of the filesystem.
config OCFS2_COMPAT_JBD
bool "Use JBD for compatibility"
depends on OCFS2_FS
default n
select JBD
help
The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
is backwards compatible with JBD. It is safe to say N here.
However, if you really want to use the original JBD, say Y here.
endif # BLOCK
config DNOTIFY
......
......@@ -34,7 +34,8 @@ ocfs2-objs := \
symlink.o \
sysfile.o \
uptodate.o \
ver.o
ver.o \
xattr.o
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
......
This diff is collapsed.
......@@ -26,30 +26,102 @@
#ifndef OCFS2_ALLOC_H
#define OCFS2_ALLOC_H
/*
* For xattr tree leaf, we limit the leaf byte size to be 64K.
*/
#define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536
/*
* ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract
* the b-tree operations in ocfs2. Now all the b-tree operations are not
* limited to ocfs2_dinode only. Any data which need to allocate clusters
* to store can use b-tree. And it only needs to implement its ocfs2_extent_tree
* and operation.
*
* ocfs2_extent_tree becomes the first-class object for extent tree
* manipulation. Callers of the alloc.c code need to fill it via one of
* the ocfs2_init_*_extent_tree() operations below.
*
* ocfs2_extent_tree contains info for the root of the b-tree, it must have a
* root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
* functions.
* ocfs2_extent_tree_operations abstract the normal operations we do for
* the root of extent b-tree.
*/
struct ocfs2_extent_tree_operations;
struct ocfs2_extent_tree {
struct ocfs2_extent_tree_operations *et_ops;
struct buffer_head *et_root_bh;
struct ocfs2_extent_list *et_root_el;
void *et_object;
unsigned int et_max_leaf_clusters;
};
/*
* ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the
* specified object buffer.
*/
void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv);
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_extent_tree *et,
u32 cpos,
u64 start_blk,
u32 new_clusters,
u8 flags,
struct ocfs2_alloc_context *meta_ac);
enum ocfs2_alloc_restarted {
RESTART_NONE = 0,
RESTART_TRANS,
RESTART_META
};
int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct ocfs2_extent_tree *et,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
struct ocfs2_cached_dealloc_ctxt;
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_mark_extent_written(struct inode *inode,
struct ocfs2_extent_tree *et,
handle_t *handle, u32 cpos, u32 len, u32 phys,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_remove_extent(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_dinode *fe);
/* how many new metadata chunks would an allocation need at maximum? */
static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
struct ocfs2_extent_tree *et);
/*
* how many new metadata chunks would an allocation need at maximum?
*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
* the result may be wrong.
*/
static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el)
{
/*
* Rather than do all the work of determining how much we need
......@@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
* new tree_depth==0 extent_block, and one block at the new
* top-of-the tree.
*/
return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
return le16_to_cpu(root_el->l_tree_depth) + 2;
}
void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
......
......@@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
goto bail;
}
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno,
&bh, OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -128,7 +126,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
err = 0;
bail:
if (bh)
brelse(bh);
mlog_exit(err);
......@@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!PageLocked(page));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -485,10 +480,13 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
}
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
if (ret < 0)
mlog_errno(ret);
}
......@@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
journal_invalidatepage(journal, page, offset);
jbd2_journal_invalidatepage(journal, page, offset);
}
static int ocfs2_releasepage(struct page *page, gfp_t wait)
......@@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
if (!page_has_buffers(page))
return 0;
return journal_try_to_free_buffers(journal, page, wait);
return jbd2_journal_try_to_free_buffers(journal, page, wait);
}
static ssize_t ocfs2_direct_IO(int rw,
......@@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode,
tmppage = wc->w_pages[i];
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
if (ocfs2_should_order_data(inode)) {
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
......@@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
int ret, i, new, should_zero = 0;
u64 v_blkno, p_blkno;
struct inode *inode = mapping->host;
struct ocfs2_extent_tree et;
new = phys == 0 ? 1 : 0;
if (new || unwritten)
......@@ -1255,7 +1258,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
* any additional semaphores or cluster locks.
*/
tmp_pos = cpos;
ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
&tmp_pos, 1, 0, wc->w_di_bh,
wc->w_handle, data_ac,
meta_ac, NULL);
......@@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
goto out;
}
} else if (unwritten) {
ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
ret = ocfs2_mark_extent_written(inode, &et,
wc->w_handle, cpos, 1, phys,
meta_ac, &wc->w_dealloc);
if (ret < 0) {
......@@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle;
struct ocfs2_extent_tree et;
ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
if (ret) {
......@@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* ocfs2_lock_allocators(). It greatly over-estimates
* the work to be done.
*/
ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
extents_to_split, &data_ac, &meta_ac);
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
" clusters_to_add = %u, extents_to_split = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
clusters_to_alloc, extents_to_split);
ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
ret = ocfs2_lock_allocators(inode, &et,
clusters_to_alloc, extents_to_split,
&data_ac, &meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
credits = ocfs2_calc_extend_credits(inode->i_sb, di,
credits = ocfs2_calc_extend_credits(inode->i_sb,
&di->id2.i_list,
clusters_to_alloc);
}
......@@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
if (ocfs2_should_order_data(inode)) {
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
}
......
......@@ -88,22 +88,103 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
return ret;
}
int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
struct buffer_head *bhs[], int flags,
struct inode *inode)
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[])
{
int status = 0;
unsigned int i;
struct buffer_head *bh;
if (!nr) {
mlog(ML_BH_IO, "No buffers will be read!\n");
goto bail;
}
for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(osb->sb, block++);
if (bhs[i] == NULL) {
status = -EIO;
mlog_errno(status);
goto bail;
}
}
bh = bhs[i];
if (buffer_jbd(bh)) {
mlog(ML_ERROR,
"trying to sync read a jbd "
"managed bh (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
continue;
}
if (buffer_dirty(bh)) {
/* This should probably be a BUG, or
* at least return an error. */
mlog(ML_ERROR,
"trying to sync read a dirty "
"buffer! (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
continue;
}
lock_buffer(bh);
if (buffer_jbd(bh)) {
mlog(ML_ERROR,
"block %llu had the JBD bit set "
"while I was in lock_buffer!",
(unsigned long long)bh->b_blocknr);
BUG();
}
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
}
for (i = nr; i > 0; i--) {
bh = bhs[i - 1];
if (buffer_jbd(bh)) {
mlog(ML_ERROR,
"the journal got the buffer while it was "
"locked for io! (blocknr = %llu)\n",
(unsigned long long)bh->b_blocknr);
BUG();
}
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
* to cleanup the other buffers. */
status = -EIO;
put_bh(bh);
bhs[i - 1] = NULL;
}
}
bail:
return status;
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags)
{
int status = 0;
struct super_block *sb;
int i, ignore_cache = 0;
struct buffer_head *bh;
mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
(unsigned long long)block, nr, flags, inode);
mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
inode, (unsigned long long)block, nr, flags);
BUG_ON(!inode);
BUG_ON((flags & OCFS2_BH_READAHEAD) &&
(!inode || !(flags & OCFS2_BH_CACHED)));
(flags & OCFS2_BH_IGNORE_CACHE));
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
if (bhs == NULL) {
status = -EINVAL;
mlog_errno(status);
goto bail;
......@@ -122,18 +203,11 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
goto bail;
}
sb = osb->sb;
if (flags & OCFS2_BH_CACHED && !inode)
flags &= ~OCFS2_BH_CACHED;
if (inode)
mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
bhs[i] = sb_getblk(inode->i_sb, block++);
if (bhs[i] == NULL) {
if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
status = -EIO;
mlog_errno(status);
......@@ -141,7 +215,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
}
}
bh = bhs[i];
ignore_cache = 0;
ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
/* There are three read-ahead cases here which we need to
* be concerned with. All three assume a buffer has
......@@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
* before our is-it-in-flight check.
*/
if (flags & OCFS2_BH_CACHED &&
!ocfs2_buffer_uptodate(inode, bh)) {
if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
mlog(ML_UPTODATE,
"bh (%llu), inode %llu not uptodate\n",
(unsigned long long)bh->b_blocknr,
(unsigned long long)OCFS2_I(inode)->ip_blkno);
/* We're using ignore_cache here to say
* "go to disk" */
ignore_cache = 1;
}
/* XXX: Can we ever get this and *not* have the cached
* flag set? */
if (buffer_jbd(bh)) {
if (!(flags & OCFS2_BH_CACHED) || ignore_cache)
if (ignore_cache)
mlog(ML_BH_IO, "trying to sync read a jbd "
"managed bh (blocknr = %llu)\n",
(unsigned long long)bh->b_blocknr);
continue;
}
if (!(flags & OCFS2_BH_CACHED) || ignore_cache) {
if (ignore_cache) {
if (buffer_dirty(bh)) {
/* This should probably be a BUG, or
* at least return an error. */
......@@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
* previously read-ahead buffer may have
* completed I/O while we were waiting for the
* buffer lock. */
if ((flags & OCFS2_BH_CACHED)
if (!(flags & OCFS2_BH_IGNORE_CACHE)
&& !(flags & OCFS2_BH_READAHEAD)
&& ocfs2_buffer_uptodate(inode, bh)) {
unlock_buffer(bh);
......@@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
if (inode)
ocfs2_set_buffer_uptodate(inode, bh);
}
if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
(unsigned long long)block, nr,
(!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
flags);
bail:
......
......@@ -31,31 +31,29 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
static inline int ocfs2_read_block(struct ocfs2_super *osb,
static inline int ocfs2_read_block(struct inode *inode,
u64 off,
struct buffer_head **bh,
int flags,
struct inode *inode);
struct buffer_head **bh);
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
int ocfs2_read_blocks(struct ocfs2_super *osb,
int ocfs2_read_blocks(struct inode *inode,
u64 block,
int nr,
struct buffer_head *bhs[],
int flags,
struct inode *inode);
int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
#define OCFS2_BH_CACHED 1
#define OCFS2_BH_IGNORE_CACHE 1
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
struct buffer_head **bh, int flags,
struct inode *inode)
static inline int ocfs2_read_block(struct inode *inode, u64 off,
struct buffer_head **bh)
{
int status = 0;
......@@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
goto bail;
}
status = ocfs2_read_blocks(osb, off, 1, bh,
flags, inode);
status = ocfs2_read_blocks(inode, off, 1, bh, 0);
bail:
return status;
......
......@@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(CONN),
define_mask(QUORUM),
define_mask(EXPORT),
define_mask(XATTR),
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),
......
......@@ -112,6 +112,7 @@
#define ML_CONN 0x0000000004000000ULL /* net connection management */
#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
......
......@@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
static struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada)
{
struct buffer_head *bh = NULL;
int tmperr;
u64 p_blkno;
int readflags = 0;
if (reada)
readflags |= OCFS2_BH_READAHEAD;
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
BUG_ON(!reada);
return NULL;
}
down_read(&OCFS2_I(inode)->ip_alloc_sem);
tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (tmperr < 0) {
mlog_errno(tmperr);
goto fail;
}
tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
if (tmperr < 0)
goto fail;
tmperr = 0;
*err = 0;
return bh;
fail:
brelse(bh);
bh = NULL;
*err = -EIO;
return NULL;
}
/*
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
......@@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, dir);
ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -260,14 +302,13 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
if (ocfs2_read_block(dir, block, &bh)) {
/* read error, skip block & hope for the best.
* ocfs2_read_block() has released the bh. */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
block);
brelse(bh);
goto next;
}
i = ocfs2_search_dirblock(bh, dir, name, namelen,
......@@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, dir);
ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
struct ocfs2_inline_data *data;
struct ocfs2_dir_entry *de;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
if (ret) {
mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
......@@ -716,7 +755,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
tmp = ocfs2_bread(inode, ++blk, &err, 1);
if (tmp)
brelse(tmp);
}
last_ra_blk = blk;
......@@ -899,11 +937,9 @@ int ocfs2_find_files_on_disk(const char *name,
leave:
if (status < 0) {
*dirent = NULL;
if (*dirent_bh) {
brelse(*dirent_bh);
*dirent_bh = NULL;
}
}
mlog_exit(status);
return status;
......@@ -951,7 +987,6 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
ret = 0;
bail:
if (dirent_bh)
brelse(dirent_bh);
mlog_exit(ret);
......@@ -1127,7 +1162,6 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
status = 0;
bail:
if (new_bh)
brelse(new_bh);
mlog_exit(status);
......@@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
struct buffer_head *dirdata_bh = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
handle_t *handle;
struct ocfs2_extent_tree et;
ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
alloc = ocfs2_clusters_for_bytes(sb, bytes);
......@@ -1305,8 +1342,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* This should never fail as our extent list is empty and all
* related blocks have been journaled already.
*/
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
NULL);
ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
0, NULL);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -1337,8 +1374,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
}
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
len, 0, NULL);
ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
blkno, len, 0, NULL);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -1383,7 +1420,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
if (extend) {
u32 offset = OCFS2_I(dir)->ip_clusters;
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
BUG_ON(status == -EAGAIN);
......@@ -1430,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
int credits, num_free_extents, drop_alloc_sem = 0;
loff_t dir_i_size;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
struct ocfs2_extent_list *el = &fe->id2.i_list;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle = NULL;
struct buffer_head *new_bh = NULL;
struct ocfs2_dir_entry * de;
struct super_block *sb = osb->sb;
struct ocfs2_extent_tree et;
mlog_entry_void();
......@@ -1479,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
spin_lock(&OCFS2_I(dir)->ip_lock);
if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
spin_unlock(&OCFS2_I(dir)->ip_lock);
num_free_extents = ocfs2_num_free_extents(osb, dir, fe);
ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
if (num_free_extents < 0) {
status = num_free_extents;
mlog_errno(status);
......@@ -1487,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
}
if (!num_free_extents) {
status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -1502,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
goto bail;
}
credits = ocfs2_calc_extend_credits(sb, fe, 1);
credits = ocfs2_calc_extend_credits(sb, el, 1);
} else {
spin_unlock(&OCFS2_I(dir)->ip_lock);
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
......@@ -1568,7 +1608,6 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
if (new_bh)
brelse(new_bh);
mlog_exit(status);
......@@ -1696,7 +1735,6 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = 0;
bail:
if (bh)
brelse(bh);
mlog_exit(status);
......@@ -1756,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
*ret_de_bh = bh;
bh = NULL;
out:
if (bh)
brelse(bh);
return ret;
}
......@@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs2_refresh_inode */
status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno,
bh, OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, oi->ip_blkno, bh);
if (status < 0) {
mlog_errno(status);
goto bail_refresh;
......@@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode,
return 0;
}
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno,
ret_bh,
OCFS2_BH_CACHED,
inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
if (status < 0)
mlog_errno(status);
......
......@@ -293,8 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
&eb_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -382,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
goto no_more_extents;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
ret = ocfs2_read_block(inode,
le64_to_cpu(eb->h_next_leaf_blk),
&next_eb_bh, OCFS2_BH_CACHED, inode);
&next_eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -551,6 +550,66 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
}
int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el)
{
int ret = 0, i;
struct buffer_head *eb_bh = NULL;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_rec *rec;
u32 coff;
if (el->l_tree_depth) {
ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
if (el->l_tree_depth) {
ocfs2_error(inode->i_sb,
"Inode %lu has non zero tree depth in "
"xattr leaf block %llu\n", inode->i_ino,
(unsigned long long)eb_bh->b_blocknr);
ret = -EROFS;
goto out;
}
}
i = ocfs2_search_extent_list(el, v_cluster);
if (i == -1) {
ret = -EROFS;
mlog_errno(ret);
goto out;
} else {
rec = &el->l_recs[i];
BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
if (!rec->e_blkno) {
ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
"record (%u, %u, 0) in xattr", inode->i_ino,
le32_to_cpu(rec->e_cpos),
ocfs2_rec_clusters(el, rec));
ret = -EROFS;
goto out;
}
coff = v_cluster - le32_to_cpu(rec->e_cpos);
*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
le64_to_cpu(rec->e_blkno));
*p_cluster = *p_cluster + coff;
if (num_clusters)
*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
}
out:
if (eb_bh)
brelse(eb_bh);
return ret;
}
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
unsigned int *extent_flags)
......@@ -571,8 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
if (ret == 0)
goto out;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......
......@@ -53,4 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 map_start, u64 map_len);
int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el);
#endif /* _EXTENT_MAP_H */
This diff is collapsed.
......@@ -28,9 +28,12 @@
extern const struct file_operations ocfs2_fops;
extern const struct file_operations ocfs2_dops;
extern const struct file_operations ocfs2_fops_no_plocks;
extern const struct file_operations ocfs2_dops_no_plocks;
extern const struct inode_operations ocfs2_file_iops;
extern const struct inode_operations ocfs2_special_file_iops;
struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted;
struct ocfs2_file_private {
struct file *fp_file;
......@@ -38,12 +41,7 @@ struct ocfs2_file_private {
struct ocfs2_lock_res fp_flock;
};
enum ocfs2_alloc_restarted {
RESTART_NONE = 0,
RESTART_TRANS,
RESTART_META
};
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
......@@ -55,10 +53,6 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
......
......@@ -49,6 +49,7 @@
#include "symlink.h"
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
struct super_block *sb;
struct ocfs2_super *osb;
int status = -EINVAL;
int use_plocks = 1;
mlog_entry("(0x%p, size:%llu)\n", inode,
(unsigned long long)le64_to_cpu(fe->i_size));
......@@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
sb = inode->i_sb;
osb = OCFS2_SB(sb);
if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
use_plocks = 0;
/* this means that read_inode cannot create a superblock inode
* today. change if needed. */
if (!OCFS2_IS_VALID_DINODE(fe) ||
......@@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
if (use_plocks)
inode->i_fop = &ocfs2_fops;
else
inode->i_fop = &ocfs2_fops_no_plocks;
inode->i_op = &ocfs2_file_iops;
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
case S_IFDIR:
inode->i_op = &ocfs2_dir_iops;
if (use_plocks)
inode->i_fop = &ocfs2_dops;
else
inode->i_fop = &ocfs2_dops_no_plocks;
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
case S_IFLNK:
......@@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
}
status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
can_lock ? inode : NULL);
if (can_lock)
status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
else
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
* data and fast symlinks.
*/
if (fe->i_clusters) {
if (ocfs2_should_order_data(inode))
ocfs2_begin_ordered_truncate(inode, 0);
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
......@@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode,
goto bail_unlock_dir;
}
/*Free extended attribute resources associated with this inode.*/
status = ocfs2_xattr_remove(inode, di_bh);
if (status < 0) {
mlog_errno(status);
goto bail_unlock_dir;
}
status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
orphan_dir_bh);
if (status < 0)
......@@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode)
oi->ip_last_trans = 0;
oi->ip_dir_start_lookup = 0;
oi->ip_blkno = 0ULL;
jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
&oi->ip_jinode);
bail:
mlog_exit_void();
......@@ -1106,58 +1133,6 @@ void ocfs2_drop_inode(struct inode *inode)
mlog_exit_void();
}
/*
* TODO: this should probably be merged into ocfs2_get_block
*
* However, you now need to pay attention to the cont_prepare_write()
* stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much
* expects never to extend).
*/
struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada)
{
struct buffer_head *bh = NULL;
int tmperr;
u64 p_blkno;
int readflags = OCFS2_BH_CACHED;
if (reada)
readflags |= OCFS2_BH_READAHEAD;
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
BUG_ON(!reada);
return NULL;
}
down_read(&OCFS2_I(inode)->ip_alloc_sem);
tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (tmperr < 0) {
mlog_errno(tmperr);
goto fail;
}
tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh,
readflags, inode);
if (tmperr < 0)
goto fail;
tmperr = 0;
*err = 0;
return bh;
fail:
if (bh) {
brelse(bh);
bh = NULL;
}
*err = -EIO;
return NULL;
}
/*
* This is called from our getattr.
*/
......
......@@ -40,6 +40,9 @@ struct ocfs2_inode_info
/* protects allocation changes on this inode. */
struct rw_semaphore ip_alloc_sem;
/* protects extended attribute changes on this inode */
struct rw_semaphore ip_xattr_sem;
/* These fields are protected by ip_lock */
spinlock_t ip_lock;
u32 ip_open_count;
......@@ -68,6 +71,7 @@ struct ocfs2_inode_info
struct ocfs2_extent_map ip_extent_map;
struct inode vfs_inode;
struct jbd2_inode ip_jinode;
};
/*
......@@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache;
extern const struct address_space_operations ocfs2_aops;
struct buffer_head *ocfs2_bread(struct inode *inode, int block,
int *err, int reada);
void ocfs2_clear_inode(struct inode *inode);
void ocfs2_delete_inode(struct inode *inode);
void ocfs2_drop_inode(struct inode *inode);
......
......@@ -102,7 +102,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
bail:
mutex_unlock(&inode->i_mutex);
if (bh)
brelse(bh);
mlog_exit(status);
......
......@@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
goto finally;
}
journal_lock_updates(journal->j_journal);
status = journal_flush(journal->j_journal);
journal_unlock_updates(journal->j_journal);
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal);
jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0) {
up_write(&journal->j_trans_barrier);
mlog_errno(status);
......@@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
down_read(&osb->journal->j_trans_barrier);
handle = journal_start(journal, max_buffs);
handle = jbd2_journal_start(journal, max_buffs);
if (IS_ERR(handle)) {
up_read(&osb->journal->j_trans_barrier);
......@@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
BUG_ON(!handle);
ret = journal_stop(handle);
ret = jbd2_journal_stop(handle);
if (ret < 0)
mlog_errno(ret);
......@@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
* transaction. extend_trans will either extend the current handle by
* nblocks, or commit it and start a new one with nblocks credits.
*
* This might call journal_restart() which will commit dirty buffers
* This might call jbd2_journal_restart() which will commit dirty buffers
* and then restart the transaction. Before calling
* ocfs2_extend_trans(), any changed blocks should have been
* dirtied. After calling it, all blocks which need to be changed must
......@@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
#ifdef CONFIG_OCFS2_DEBUG_FS
status = 1;
#else
status = journal_extend(handle, nblocks);
status = jbd2_journal_extend(handle, nblocks);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
#endif
if (status > 0) {
mlog(0, "journal_extend failed, trying journal_restart\n");
status = journal_restart(handle, nblocks);
mlog(0,
"jbd2_journal_extend failed, trying "
"jbd2_journal_restart\n");
status = jbd2_journal_restart(handle, nblocks);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle,
switch (type) {
case OCFS2_JOURNAL_ACCESS_CREATE:
case OCFS2_JOURNAL_ACCESS_WRITE:
status = journal_get_write_access(handle, bh);
status = jbd2_journal_get_write_access(handle, bh);
break;
case OCFS2_JOURNAL_ACCESS_UNDO:
status = journal_get_undo_access(handle, bh);
status = jbd2_journal_get_undo_access(handle, bh);
break;
default:
......@@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle,
mlog_entry("(bh->b_blocknr=%llu)\n",
(unsigned long long)bh->b_blocknr);
status = journal_dirty_metadata(handle, bh);
status = jbd2_journal_dirty_metadata(handle, bh);
if (status < 0)
mlog(ML_ERROR, "Could not dirty metadata buffer. "
"(bh->b_blocknr=%llu)\n",
......@@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle,
return status;
}
#ifdef CONFIG_OCFS2_COMPAT_JBD
int ocfs2_journal_dirty_data(handle_t *handle,
struct buffer_head *bh)
{
......@@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
return err;
}
#endif
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE)
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
void ocfs2_set_journal_params(struct ocfs2_super *osb)
{
......@@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
spin_lock(&journal->j_state_lock);
journal->j_commit_interval = commit_interval;
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
journal->j_flags |= JFS_BARRIER;
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JFS_BARRIER;
journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock);
}
......@@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
/* call the kernels journal init function now */
j_journal = journal_init_inode(inode);
j_journal = jbd2_journal_init_inode(inode);
if (j_journal == NULL) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EINVAL;
goto done;
}
mlog(0, "Returned from journal_init_inode\n");
mlog(0, "Returned from jbd2_journal_init_inode\n");
mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
......@@ -550,7 +554,6 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
if (status < 0) {
if (inode_lock)
ocfs2_inode_unlock(inode, 1);
if (bh != NULL)
brelse(bh);
if (inode) {
OCFS2_I(inode)->ip_open_count--;
......@@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (journal->j_state != OCFS2_JOURNAL_LOADED)
goto done;
/* need to inc inode use count as journal_destroy will iput. */
/* need to inc inode use count - jbd2_journal_destroy will iput. */
if (!igrab(inode))
BUG();
......@@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
if (ocfs2_mount_local(osb)) {
journal_lock_updates(journal->j_journal);
status = journal_flush(journal->j_journal);
journal_unlock_updates(journal->j_journal);
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal);
jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0)
mlog_errno(status);
}
......@@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
}
/* Shutdown the kernel journal system */
journal_destroy(journal->j_journal);
jbd2_journal_destroy(journal->j_journal);
OCFS2_I(inode)->ip_open_count--;
......@@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
{
int olderr;
olderr = journal_errno(journal);
olderr = jbd2_journal_errno(journal);
if (olderr) {
mlog(ML_ERROR, "File system error %d recorded in "
"journal %u.\n", olderr, slot);
mlog(ML_ERROR, "File system on device %s needs checking.\n",
sb->s_id);
journal_ack_err(journal);
journal_clear_err(journal);
jbd2_journal_ack_err(journal);
jbd2_journal_clear_err(journal);
}
}
......@@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
osb = journal->j_osb;
status = journal_load(journal->j_journal);
status = jbd2_journal_load(journal->j_journal);
if (status < 0) {
mlog(ML_ERROR, "Failed to load journal!\n");
goto done;
......@@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
BUG_ON(!journal);
status = journal_wipe(journal->j_journal, full);
status = jbd2_journal_wipe(journal->j_journal, full);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
/* We are reading journal data which should not
* be put in the uptodate cache */
status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
p_blkno, p_blocks, bhs, 0,
NULL);
status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
p_blkno, p_blocks, bhs);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -865,7 +867,6 @@ static int ocfs2_force_read_journal(struct inode *inode)
bail:
for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
if (bhs[i])
brelse(bhs[i]);
mlog_exit(status);
return status;
......@@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
}
SET_INODE_JOURNAL(inode);
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
}
mlog(0, "calling journal_init_inode\n");
journal = journal_init_inode(inode);
journal = jbd2_journal_init_inode(inode);
if (journal == NULL) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EIO;
goto done;
}
status = journal_load(journal);
status = jbd2_journal_load(journal);
if (status < 0) {
mlog_errno(status);
if (!igrab(inode))
BUG();
journal_destroy(journal);
jbd2_journal_destroy(journal);
goto done;
}
......@@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
/* wipe the journal */
mlog(0, "flushing the journal.\n");
journal_lock_updates(journal);
status = journal_flush(journal);
journal_unlock_updates(journal);
jbd2_journal_lock_updates(journal);
status = jbd2_journal_flush(journal);
jbd2_journal_unlock_updates(journal);
if (status < 0)
mlog_errno(status);
......@@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
if (!igrab(inode))
BUG();
journal_destroy(journal);
jbd2_journal_destroy(journal);
done:
/* drop the lock on this nodes journal */
......@@ -1282,7 +1284,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
if (inode)
iput(inode);
if (bh)
brelse(bh);
mlog_exit(status);
......
......@@ -27,7 +27,12 @@
#define OCFS2_JOURNAL_H
#include <linux/fs.h>
#include <linux/jbd.h>
#ifndef CONFIG_OCFS2_COMPAT_JBD
# include <linux/jbd2.h>
#else
# include <linux/jbd.h>
# include "ocfs2_jbd_compat.h"
#endif
enum ocfs2_journal_state {
OCFS2_JOURNAL_FREE = 0,
......@@ -215,7 +220,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
* buffer. Will have to call ocfs2_journal_dirty once
* we've actually dirtied it. Type is one of . or .
* ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
* ocfs2_journal_dirty_data - Indicate that a data buffer should go out before
* ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
* the current handle commits.
*/
......@@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle,
*/
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh);
#ifdef CONFIG_OCFS2_COMPAT_JBD
int ocfs2_journal_dirty_data(handle_t *handle,
struct buffer_head *bh);
#endif
/*
* Credit Macros:
......@@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* simple file updates like chmod, etc. */
#define OCFS2_INODE_UPDATE_CREDITS 1
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
......@@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
+ OCFS2_UNLINK_CREDITS)
/* global bitmap dinode, group desc., relinked group,
* suballocator dinode, group desc., relinked group,
* dinode, xattr block */
#define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \
+ OCFS2_INODE_UPDATE_CREDITS \
+ OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
/*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
* the result may be wrong.
*/
static inline int ocfs2_calc_extend_credits(struct super_block *sb,
struct ocfs2_dinode *fe,
struct ocfs2_extent_list *root_el,
u32 bits_wanted)
{
int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks;
int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks;
/* bitmap dinode, group desc. + relinked group. */
bitmap_blocks = OCFS2_SUBALLOC_ALLOC;
......@@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
* however many metadata chunks needed * a remaining suballoc
* alloc. */
sysfile_bitmap_blocks = 1 +
(OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe);
(OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el);
/* this does not include *new* metadata blocks, which are
* accounted for in sysfile_bitmap_blocks. fe +
* accounted for in sysfile_bitmap_blocks. root_el +
* prev. last_eb_blk + blocks along edge of tree.
* calc_symlink_credits passes because we just need 1
* credit for the dinode there. */
dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth);
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks;
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
......@@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
return credits;
}
static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode);
}
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
new_size);
}
#endif /* OCFS2_JOURNAL_H */
This diff is collapsed.
......@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
u32 *bit_off,
u32 *num_bits);
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters);
void ocfs2_la_enable_worker(struct work_struct *work);
#endif /* OCFS2_LOCALALLOC_H */
......@@ -24,6 +24,7 @@
*/
#include <linux/fs.h>
#include <linux/fcntl.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
......@@ -32,6 +33,7 @@
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
#include "locks.h"
static int ocfs2_do_flock(struct file *file, struct inode *inode,
......@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
else
return ocfs2_do_flock(file, inode, cmd, fl);
}
int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
if (__mandatory_lock(inode))
return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
}
......@@ -27,5 +27,6 @@
#define OCFS2_LOCKS_H
int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl);
#endif /* OCFS2_LOCKS_H */
......@@ -60,6 +60,7 @@
#include "symlink.h"
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -327,13 +328,8 @@ static int ocfs2_mknod(struct inode *dir,
if (status == -ENOSPC)
mlog(0, "Disk is full\n");
if (new_fe_bh)
brelse(new_fe_bh);
if (de_bh)
brelse(de_bh);
if (parent_fe_bh)
brelse(parent_fe_bh);
if ((status < 0) && inode)
......@@ -647,11 +643,8 @@ static int ocfs2_link(struct dentry *old_dentry,
out:
ocfs2_inode_unlock(dir, 1);
if (de_bh)
brelse(de_bh);
if (fe_bh)
brelse(fe_bh);
if (parent_fe_bh)
brelse(parent_fe_bh);
mlog_exit(err);
......@@ -851,16 +844,9 @@ static int ocfs2_unlink(struct inode *dir,
iput(orphan_dir);
}
if (fe_bh)
brelse(fe_bh);
if (dirent_bh)
brelse(dirent_bh);
if (parent_node_bh)
brelse(parent_node_bh);
if (orphan_entry_bh)
brelse(orphan_entry_bh);
mlog_exit(status);
......@@ -1372,23 +1358,14 @@ static int ocfs2_rename(struct inode *old_dir,
if (new_inode)
iput(new_inode);
if (newfe_bh)
brelse(newfe_bh);
if (old_inode_bh)
brelse(old_inode_bh);
if (old_dir_bh)
brelse(old_dir_bh);
if (new_dir_bh)
brelse(new_dir_bh);
if (new_de_bh)
brelse(new_de_bh);
if (old_de_bh)
brelse(old_de_bh);
if (old_inode_de_bh)
brelse(old_inode_de_bh);
if (orphan_entry_bh)
brelse(orphan_entry_bh);
if (insert_entry_bh)
brelse(insert_entry_bh);
mlog_exit(status);
......@@ -1492,7 +1469,6 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
if (bhs) {
for(i = 0; i < blocks; i++)
if (bhs[i])
brelse(bhs[i]);
kfree(bhs);
}
......@@ -1598,7 +1574,7 @@ static int ocfs2_symlink(struct inode *dir,
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
NULL);
......@@ -1659,11 +1635,8 @@ static int ocfs2_symlink(struct inode *dir,
ocfs2_inode_unlock(dir, 1);
if (new_fe_bh)
brelse(new_fe_bh);
if (parent_fe_bh)
brelse(parent_fe_bh);
if (de_bh)
brelse(de_bh);
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
......@@ -1759,7 +1732,6 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
iput(orphan_dir_inode);
}
if (orphan_dir_bh)
brelse(orphan_dir_bh);
mlog_exit(status);
......@@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
status = ocfs2_read_block(osb,
status = ocfs2_read_block(orphan_dir_inode,
OCFS2_I(orphan_dir_inode)->ip_blkno,
&orphan_dir_bh, OCFS2_BH_CACHED,
orphan_dir_inode);
&orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -1829,7 +1800,6 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
(unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
leave:
if (orphan_dir_bh)
brelse(orphan_dir_bh);
mlog_exit(status);
......@@ -1898,7 +1868,6 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
}
leave:
if (target_de_bh)
brelse(target_de_bh);
mlog_exit(status);
......@@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
};
......@@ -34,7 +34,12 @@
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/jbd.h>
#ifndef CONFIG_OCFS2_COMPAT_JBD
# include <linux/jbd2.h>
#else
# include <linux/jbd.h>
# include "ocfs2_jbd_compat.h"
#endif
/* For union ocfs2_dlm_lksb */
#include "stackglue.h"
......@@ -171,9 +176,13 @@ struct ocfs2_alloc_stats
enum ocfs2_local_alloc_state
{
OCFS2_LA_UNUSED = 0,
OCFS2_LA_ENABLED,
OCFS2_LA_DISABLED
OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
* this mountpoint. */
OCFS2_LA_ENABLED, /* Local alloc is in use. */
OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
* of bits has been reduced. */
OCFS2_LA_DISABLED /* Local alloc has temporarily been
* disabled. */
};
enum ocfs2_mount_options
......@@ -184,6 +193,8 @@ enum ocfs2_mount_options
OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
};
#define OCFS2_OSB_SOFT_RO 0x0001
......@@ -214,6 +225,7 @@ struct ocfs2_super
u32 bitmap_cpg;
u8 *uuid;
char *uuid_str;
u32 uuid_hash;
u8 *vol_label;
u64 first_cluster_group_blkno;
u32 fs_generation;
......@@ -241,6 +253,7 @@ struct ocfs2_super
int s_sectsize_bits;
int s_clustersize;
int s_clustersize_bits;
unsigned int s_xattr_inline_size;
atomic_t vol_state;
struct mutex recovery_lock;
......@@ -252,11 +265,27 @@ struct ocfs2_super
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
int local_alloc_size;
enum ocfs2_local_alloc_state local_alloc_state;
struct delayed_work la_enable_wq;
/*
* Must hold local alloc i_mutex and osb->osb_lock to change
* local_alloc_bits. Reads can be done under either lock.
*/
unsigned int local_alloc_bits;
unsigned int local_alloc_default_bits;
enum ocfs2_local_alloc_state local_alloc_state; /* protected
* by osb_lock */
struct buffer_head *local_alloc_bh;
u64 la_last_gd;
#ifdef CONFIG_OCFS2_FS_STATS
struct dentry *local_alloc_debug;
char *local_alloc_debug_buf;
#endif
/* Next two fields are for local node slot recovery during
* mount. */
int dirty;
......@@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)
return 1;
return 0;
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock
......@@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
return pages_per_cluster;
}
static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
unsigned int megs)
{
BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576);
return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
}
static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
{
spin_lock(&osb->osb_lock);
......
......@@ -64,6 +64,7 @@
#define OCFS2_INODE_SIGNATURE "INODE01"
#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
/* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
......@@ -90,7 +91,8 @@
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK)
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR)
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
/*
......@@ -127,10 +129,6 @@
/* Support for data packed into inode blocks */
#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
/* Support for the extended slot map */
#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
/*
* Support for alternate, userspace cluster stacks. If set, the superblock
* field s_cluster_info contains a tag for the alternate stack in use as
......@@ -142,6 +140,12 @@
*/
#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
/* Support for the extended slot map */
#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
......@@ -299,6 +303,12 @@ struct ocfs2_new_group_input {
*/
#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
/*
* Inline extended attribute size (in bytes)
* The value chosen should be aligned to 16 byte boundaries.
*/
#define OCFS2_MIN_XATTR_INLINE_SIZE 256
struct ocfs2_system_inode_info {
char *si_name;
int si_iflags;
......@@ -563,7 +573,7 @@ struct ocfs2_super_block {
/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
before tunefs required */
__le16 s_tunefs_flag;
__le32 s_reserved1;
__le32 s_uuid_hash; /* hash value of uuid */
__le64 s_first_cluster_group; /* Block offset of 1st cluster
* group header */
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
......@@ -571,7 +581,11 @@ struct ocfs2_super_block {
/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
stack. Only valid
with INCOMPAT flag. */
/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/
__le16 s_reserved0;
__le32 s_reserved1;
/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */
/*140*/
/*
......@@ -621,7 +635,8 @@ struct ocfs2_dinode {
belongs to */
__le16 i_suballoc_bit; /* Bit offset in suballocator
block group */
/*10*/ __le32 i_reserved0;
/*10*/ __le16 i_reserved0;
__le16 i_xattr_inline_size;
__le32 i_clusters; /* Cluster count */
__le32 i_uid; /* Owner UID */
__le32 i_gid; /* Owning GID */
......@@ -640,11 +655,12 @@ struct ocfs2_dinode {
__le32 i_atime_nsec;
__le32 i_ctime_nsec;
__le32 i_mtime_nsec;
__le32 i_attr;
/*70*/ __le32 i_attr;
__le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
was set in i_flags */
__le16 i_dyn_features;
/*70*/ __le64 i_reserved2[8];
__le64 i_xattr_loc;
/*80*/ __le64 i_reserved2[7];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
......@@ -715,6 +731,136 @@ struct ocfs2_group_desc
/*40*/ __u8 bg_bitmap[0];
};
/*
* On disk extended attribute structure for OCFS2.
*/
/*
* ocfs2_xattr_entry indicates one extend attribute.
*
* Note that it can be stored in inode, one block or one xattr bucket.
*/
struct ocfs2_xattr_entry {
__le32 xe_name_hash; /* hash value of xattr prefix+suffix. */
__le16 xe_name_offset; /* byte offset from the 1st etnry in the local
local xattr storage(inode, xattr block or
xattr bucket). */
__u8 xe_name_len; /* xattr name len, does't include prefix. */
__u8 xe_type; /* the low 7 bits indicates the name prefix's
* type and the highest 1 bits indicate whether
* the EA is stored in the local storage. */
__le64 xe_value_size; /* real xattr value length. */
};
/*
* On disk structure for xattr header.
*
* One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in
* the local xattr storage.
*/
struct ocfs2_xattr_header {
__le16 xh_count; /* contains the count of how
many records are in the
local xattr storage. */
__le16 xh_free_start; /* current offset for storing
xattr. */
__le16 xh_name_value_len; /* total length of name/value
length in this bucket. */
__le16 xh_num_buckets; /* bucket nums in one extent
record, only valid in the
first bucket. */
__le64 xh_csum;
struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
};
/*
* On disk structure for xattr value root.
*
* It is used when one extended attribute's size is larger, and we will save it
* in an outside cluster. It will stored in a b-tree like file content.
*/
struct ocfs2_xattr_value_root {
/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */
__le32 xr_reserved0;
__le64 xr_last_eb_blk; /* Pointer to last extent block */
/*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */
};
/*
* On disk structure for xattr tree root.
*
* It is used when there are too many extended attributes for one file. These
* attributes will be organized and stored in an indexed-btree.
*/
struct ocfs2_xattr_tree_root {
/*00*/ __le32 xt_clusters; /* clusters covered by xattr. */
__le32 xt_reserved0;
__le64 xt_last_eb_blk; /* Pointer to last extent block */
/*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */
};
#define OCFS2_XATTR_INDEXED 0x1
#define OCFS2_HASH_SHIFT 5
#define OCFS2_XATTR_ROUND 3
#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \
~(OCFS2_XATTR_ROUND))
#define OCFS2_XATTR_BUCKET_SIZE 4096
#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \
/ OCFS2_MIN_BLOCKSIZE)
/*
* On disk structure for xattr block.
*/
struct ocfs2_xattr_block {
/*00*/ __u8 xb_signature[8]; /* Signature for verification */
__le16 xb_suballoc_slot; /* Slot suballocator this
block belongs to. */
__le16 xb_suballoc_bit; /* Bit offset in suballocator
block group */
__le32 xb_fs_generation; /* Must match super block */
/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
__le64 xb_csum;
/*20*/ __le16 xb_flags; /* Indicates whether this block contains
real xattr or a xattr tree. */
__le16 xb_reserved0;
__le32 xb_reserved1;
__le64 xb_reserved2;
/*30*/ union {
struct ocfs2_xattr_header xb_header; /* xattr header if this
block contains xattr */
struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this
block cotains xattr
tree. */
} xb_attrs;
};
#define OCFS2_XATTR_ENTRY_LOCAL 0x80
#define OCFS2_XATTR_TYPE_MASK 0x7F
static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe,
int local)
{
if (local)
xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL;
else
xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL;
}
static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe)
{
return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL;
}
static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type)
{
xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK;
}
static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
{
return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
}
#ifdef __KERNEL__
static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
{
......@@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
struct ocfs2_dinode *di)
{
unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
return sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
xattrsize;
else
return sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
{
int size;
......@@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_extent_recs_per_inode_with_xattr(
struct super_block *sb,
struct ocfs2_dinode *di)
{
int size;
unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
size = sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_list.l_recs) -
xattrsize;
else
size = sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
{
int size;
......@@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
return 0;
}
static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_xattr_block,
xb_attrs.xb_root.xt_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
#else
static inline int ocfs2_fast_symlink_chars(int blocksize)
{
......@@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
return 0;
}
static inline int ocfs2_xattr_recs_per_xb(int blocksize)
{
int size;
size = blocksize -
offsetof(struct ocfs2_xattr_block,
xb_attrs.xb_root.xt_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
#endif /* __KERNEL__ */
......
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* ocfs2_jbd_compat.h
*
* Compatibility defines for JBD.
*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_JBD_COMPAT_H
#define OCFS2_JBD_COMPAT_H
#ifndef CONFIG_OCFS2_COMPAT_JBD
# error Should not have been included
#endif
struct jbd2_inode {
unsigned int dummy;
};
#define JBD2_BARRIER JFS_BARRIER
#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
#define jbd2_journal_ack_err journal_ack_err
#define jbd2_journal_clear_err journal_clear_err
#define jbd2_journal_destroy journal_destroy
#define jbd2_journal_dirty_metadata journal_dirty_metadata
#define jbd2_journal_errno journal_errno
#define jbd2_journal_extend journal_extend
#define jbd2_journal_flush journal_flush
#define jbd2_journal_force_commit journal_force_commit
#define jbd2_journal_get_write_access journal_get_write_access
#define jbd2_journal_get_undo_access journal_get_undo_access
#define jbd2_journal_init_inode journal_init_inode
#define jbd2_journal_invalidatepage journal_invalidatepage
#define jbd2_journal_load journal_load
#define jbd2_journal_lock_updates journal_lock_updates
#define jbd2_journal_restart journal_restart
#define jbd2_journal_start journal_start
#define jbd2_journal_start_commit journal_start_commit
#define jbd2_journal_stop journal_stop
#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
#define jbd2_journal_unlock_updates journal_unlock_updates
#define jbd2_journal_wipe journal_wipe
#define jbd2_log_wait_commit log_wait_commit
static inline int jbd2_journal_file_inode(handle_t *handle,
struct jbd2_inode *inode)
{
return 0;
}
static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
loff_t new_size)
{
return 0;
}
static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
struct inode *inode)
{
return;
}
static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
struct jbd2_inode *jinode)
{
return;
}
#endif /* OCFS2_JBD_COMPAT_H */
......@@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data)
if (cluster > clusters)
break;
ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL);
ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
if (ret < 0) {
mlog_errno(ret);
break;
......@@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode,
* update the superblock last.
* It doesn't matter if the write failed.
*/
ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO,
&super_bh, 0, NULL);
ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
&super_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
......@@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
first_new_cluster - 1);
ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED,
main_bm_inode);
ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
......@@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
goto out_unlock;
}
ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL);
ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
if (ret < 0) {
mlog(ML_ERROR, "Can't read the group descriptor # %llu "
"from the device.", (unsigned long long)input->group);
......
......@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0,
si->si_inode);
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
OCFS2_BH_IGNORE_CACHE);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
......@@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
(unsigned long long)blkno);
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode);
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
......
......@@ -28,6 +28,7 @@
#include "ocfs2.h" /* For struct ocfs2_lock_res */
#include "stackglue.h"
#include <linux/dlm_plock.h>
/*
* The control protocol starts with a handshake. Until the handshake
......@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
{
}
static int user_plock(struct ocfs2_cluster_connection *conn,
u64 ino,
struct file *file,
int cmd,
struct file_lock *fl)
{
/*
* This more or less just demuxes the plock request into any
* one of three dlm calls.
*
* Internally, fs/dlm will pass these to a misc device, which
* a userspace daemon will read and write to.
*
* For now, cancel requests (which happen internally only),
* are turned into unlocks. Most of this function taken from
* gfs2_lock.
*/
if (cmd == F_CANCELLK) {
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
if (IS_GETLK(cmd))
return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
else if (fl->fl_type == F_UNLCK)
return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
else
return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
}
/*
* Compare a requested locking protocol version against the current one.
*
......@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.dlm_unlock = user_dlm_unlock,
.lock_status = user_dlm_lock_status,
.lock_lvb = user_dlm_lvb,
.plock = user_plock,
.dump_lksb = user_dlm_dump_lksb,
};
......
......@@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
int ocfs2_stack_supports_plocks(void)
{
return active_stack && active_stack->sp_ops->plock;
}
EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks);
/*
* ocfs2_plock() can only be safely called if
* ocfs2_stack_supports_plocks() returned true
*/
int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
struct file *file, int cmd, struct file_lock *fl)
{
WARN_ON_ONCE(active_stack->sp_ops->plock == NULL);
if (active_stack->sp_ops->plock)
return active_stack->sp_ops->plock(conn, ino, file, cmd, fl);
return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(ocfs2_plock);
int ocfs2_cluster_connect(const char *stack_name,
const char *group,
int grouplen,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
struct buffer_head *bh);
void ocfs2_remove_from_cache(struct inode *inode,
struct buffer_head *bh);
void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
sector_t block,
u32 c_len);
int ocfs2_buffer_read_ahead(struct inode *inode,
struct buffer_head *bh);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment