Commit acd15a83 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (56 commits)
  ocfs2: Make cached block reads the common case.
  ocfs2: Kill the last naked wait_on_buffer() for cached reads.
  ocfs2: Move ocfs2_bread() into dir.c
  ocfs2: Simplify ocfs2_read_block()
  ocfs2: Require an inode for ocfs2_read_block(s)().
  ocfs2: Separate out sync reads from ocfs2_read_blocks()
  ocfs2: Refactor xattr list and remove ocfs2_xattr_handler().
  ocfs2: Calculate EA hash only by its suffix.
  ocfs2: Move trusted and user attribute support into xattr.c
  ocfs2: Uninline ocfs2_xattr_name_hash()
  ocfs2: Don't check for NULL before brelse()
  ocfs2: use smaller counters in ocfs2_remove_xattr_clusters_from_cache
  ocfs2: Documentation update for user_xattr / nouser_xattr mount options
  ocfs2: make la_debug_mutex static
  ocfs2: Remove pointless !!
  ocfs2: Add empty bucket support in xattr.
  ocfs2/xattr.c: Fix a bug when inserting xattr.
  ocfs2: Add xattr mount option in ocfs2_show_options()
  ocfs2: Switch over to JBD2.
  ocfs2: Add the 'inode64' mount option.
  ...
parents 72f22b1e d4a8c93c
......@@ -76,3 +76,9 @@ localalloc=8(*) Allows custom localalloc size in MB. If the value is too
large, the fs will silently revert it to the default.
Localalloc is not enabled for local mounts.
localflocks This disables cluster aware flock.
inode64 Indicates that Ocfs2 is allowed to create inodes at
any location in the filesystem, including those which
will result in inode numbers occupying more than 32
bits of significance.
user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
......@@ -220,17 +220,16 @@ config JBD
tristate
help
This is a generic journalling layer for block devices. It is
currently used by the ext3 and OCFS2 file systems, but it could
also be used to add journal support to other file systems or block
currently used by the ext3 file system, but it could also be
used to add journal support to other file systems or block
devices such as RAID or LVM.
If you are using the ext3 or OCFS2 file systems, you need to
say Y here. If you are not using ext3 OCFS2 then you will probably
want to say N.
If you are using the ext3 file system, you need to say Y here.
If you are not using ext3 then you will probably want to say N.
To compile this device as a module, choose M here: the module will be
called jbd. If you are compiling ext3 or OCFS2 into the kernel,
you cannot compile this code as a module.
called jbd. If you are compiling ext3 into the kernel, you
cannot compile this code as a module.
config JBD_DEBUG
bool "JBD (ext3) debugging support"
......@@ -254,15 +253,16 @@ config JBD2
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
the ext4 filesystem, but it could also be used to add
the ext4 and OCFS2 filesystems, but it could also be used to add
journal support to other file systems or block devices such
as RAID or LVM.
If you are using ext4, you need to say Y here. If you are not
using ext4 then you will probably want to say N.
If you are using ext4 or OCFS2, you need to say Y here.
If you are not using ext4 or OCFS2 then you will
probably want to say N.
To compile this device as a module, choose M here. The module will be
called jbd2. If you are compiling ext4 into the kernel,
called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
you cannot compile this code as a module.
config JBD2_DEBUG
......@@ -448,7 +448,7 @@ config OCFS2_FS
tristate "OCFS2 file system support"
depends on NET && SYSFS
select CONFIGFS_FS
select JBD
select JBD2
select CRC32
help
OCFS2 is a general purpose extent based shared disk cluster file
......@@ -519,6 +519,16 @@ config OCFS2_DEBUG_FS
this option for debugging only as it is likely to decrease
performance of the filesystem.
config OCFS2_COMPAT_JBD
bool "Use JBD for compatibility"
depends on OCFS2_FS
default n
select JBD
help
The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
is backwards compatible with JBD. It is safe to say N here.
However, if you really want to use the original JBD, say Y here.
endif # BLOCK
config DNOTIFY
......
......@@ -34,7 +34,8 @@ ocfs2-objs := \
symlink.o \
sysfile.o \
uptodate.o \
ver.o
ver.o \
xattr.o
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
......
......@@ -49,6 +49,340 @@
#include "buffer_head_io.h"
/*
* Operations for a specific extent tree type.
*
* To implement an on-disk btree (extent tree) type in ocfs2, add
* an ocfs2_extent_tree_operations structure and the matching
* ocfs2_init_<thingy>_extent_tree() function. That's pretty much it
* for the allocation portion of the extent tree.
*/
struct ocfs2_extent_tree_operations {
/*
* last_eb_blk is the block number of the right most leaf extent
* block. Most on-disk structures containing an extent tree store
* this value for fast access. The ->eo_set_last_eb_blk() and
* ->eo_get_last_eb_blk() operations access this value. They are
* both required.
*/
void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
u64 blkno);
u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
/*
* The on-disk structure usually keeps track of how many total
* clusters are stored in this extent tree. This function updates
* that value. new_clusters is the delta, and must be
* added to the total. Required.
*/
void (*eo_update_clusters)(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 new_clusters);
/*
* If ->eo_insert_check() exists, it is called before rec is
* inserted into the extent tree. It is optional.
*/
int (*eo_insert_check)(struct inode *inode,
struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *rec);
int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et);
/*
* --------------------------------------------------------------
* The remaining are internal to ocfs2_extent_tree and don't have
* accessor functions
*/
/*
* ->eo_fill_root_el() takes et->et_object and sets et->et_root_el.
* It is required.
*/
void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
/*
* ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if
* it exists. If it does not, et->et_max_leaf_clusters is set
* to 0 (unlimited). Optional.
*/
void (*eo_fill_max_leaf_clusters)(struct inode *inode,
struct ocfs2_extent_tree *et);
};
/*
* Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check
* in the methods.
*/
static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno);
static void ocfs2_dinode_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters);
static int ocfs2_dinode_insert_check(struct inode *inode,
struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *rec);
static int ocfs2_dinode_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et);
static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
.eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
.eo_update_clusters = ocfs2_dinode_update_clusters,
.eo_insert_check = ocfs2_dinode_insert_check,
.eo_sanity_check = ocfs2_dinode_sanity_check,
.eo_fill_root_el = ocfs2_dinode_fill_root_el,
};
static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno)
{
struct ocfs2_dinode *di = et->et_object;
BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
di->i_last_eb_blk = cpu_to_le64(blkno);
}
static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
struct ocfs2_dinode *di = et->et_object;
BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
return le64_to_cpu(di->i_last_eb_blk);
}
static void ocfs2_dinode_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
struct ocfs2_dinode *di = et->et_object;
le32_add_cpu(&di->i_clusters, clusters);
spin_lock(&OCFS2_I(inode)->ip_lock);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
static int ocfs2_dinode_insert_check(struct inode *inode,
struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *rec)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
(OCFS2_I(inode)->ip_clusters != rec->e_cpos),
"Device %s, asking for sparse allocation: inode %llu, "
"cpos %u, clusters %u\n",
osb->dev_str,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
rec->e_cpos,
OCFS2_I(inode)->ip_clusters);
return 0;
}
static int ocfs2_dinode_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
int ret = 0;
struct ocfs2_dinode *di;
BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
di = et->et_object;
if (!OCFS2_IS_VALID_DINODE(di)) {
ret = -EIO;
ocfs2_error(inode->i_sb,
"Inode %llu has invalid path root",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
}
return ret;
}
static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
{
struct ocfs2_dinode *di = et->et_object;
et->et_root_el = &di->id2.i_list;
}
static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
{
struct ocfs2_xattr_value_root *xv = et->et_object;
et->et_root_el = &xv->xr_list;
}
static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno)
{
struct ocfs2_xattr_value_root *xv =
(struct ocfs2_xattr_value_root *)et->et_object;
xv->xr_last_eb_blk = cpu_to_le64(blkno);
}
static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
struct ocfs2_xattr_value_root *xv =
(struct ocfs2_xattr_value_root *) et->et_object;
return le64_to_cpu(xv->xr_last_eb_blk);
}
static void ocfs2_xattr_value_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
struct ocfs2_xattr_value_root *xv =
(struct ocfs2_xattr_value_root *)et->et_object;
le32_add_cpu(&xv->xr_clusters, clusters);
}
static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
.eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk,
.eo_update_clusters = ocfs2_xattr_value_update_clusters,
.eo_fill_root_el = ocfs2_xattr_value_fill_root_el,
};
static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
{
struct ocfs2_xattr_block *xb = et->et_object;
et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
}
static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode,
struct ocfs2_extent_tree *et)
{
et->et_max_leaf_clusters =
ocfs2_clusters_for_bytes(inode->i_sb,
OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
}
static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno)
{
struct ocfs2_xattr_block *xb = et->et_object;
struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
xt->xt_last_eb_blk = cpu_to_le64(blkno);
}
static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
struct ocfs2_xattr_block *xb = et->et_object;
struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
return le64_to_cpu(xt->xt_last_eb_blk);
}
static void ocfs2_xattr_tree_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
struct ocfs2_xattr_block *xb = et->et_object;
le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
}
static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk,
.eo_update_clusters = ocfs2_xattr_tree_update_clusters,
.eo_fill_root_el = ocfs2_xattr_tree_fill_root_el,
.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
};
static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
void *obj,
struct ocfs2_extent_tree_operations *ops)
{
et->et_ops = ops;
et->et_root_bh = bh;
if (!obj)
obj = (void *)bh->b_data;
et->et_object = obj;
et->et_ops->eo_fill_root_el(et);
if (!et->et_ops->eo_fill_max_leaf_clusters)
et->et_max_leaf_clusters = 0;
else
et->et_ops->eo_fill_max_leaf_clusters(inode, et);
}
void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh)
{
__ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops);
}
void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh)
{
__ocfs2_init_extent_tree(et, inode, bh, NULL,
&ocfs2_xattr_tree_et_ops);
}
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv)
{
__ocfs2_init_extent_tree(et, inode, bh, xv,
&ocfs2_xattr_value_et_ops);
}
static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 new_last_eb_blk)
{
et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
}
static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
return et->et_ops->eo_get_last_eb_blk(et);
}
static inline void ocfs2_et_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
et->et_ops->eo_update_clusters(inode, et, clusters);
}
static inline int ocfs2_et_insert_check(struct inode *inode,
struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *rec)
{
int ret = 0;
if (et->et_ops->eo_insert_check)
ret = et->et_ops->eo_insert_check(inode, et, rec);
return ret;
}
static inline int ocfs2_et_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
int ret = 0;
if (et->et_ops->eo_sanity_check)
ret = et->et_ops->eo_sanity_check(inode, et);
return ret;
}
static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
struct ocfs2_extent_block *eb);
......@@ -204,17 +538,6 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
return path;
}
/*
* Allocate and initialize a new path based on a disk inode tree.
*/
static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh)
{
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_extent_list *el = &di->id2.i_list;
return ocfs2_new_path(di_bh, el);
}
/*
* Convenience function to journal all components in a path.
*/
......@@ -368,39 +691,35 @@ struct ocfs2_merge_ctxt {
*/
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_dinode *fe)
struct ocfs2_extent_tree *et)
{
int retval;
struct ocfs2_extent_list *el;
struct ocfs2_extent_list *el = NULL;
struct ocfs2_extent_block *eb;
struct buffer_head *eb_bh = NULL;
u64 last_eb_blk = 0;
mlog_entry_void();
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
retval = -EIO;
goto bail;
}
el = et->et_root_el;
last_eb_blk = ocfs2_et_get_last_eb_blk(et);
if (fe->i_last_eb_blk) {
retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
&eb_bh, OCFS2_BH_CACHED, inode);
if (last_eb_blk) {
retval = ocfs2_read_block(inode, last_eb_blk,
&eb_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
} else
el = &fe->id2.i_list;
}
BUG_ON(el->l_tree_depth != 0);
retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
bail:
if (eb_bh)
brelse(eb_bh);
brelse(eb_bh);
mlog_exit(retval);
return retval;
......@@ -486,8 +805,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
bail:
if (status < 0) {
for(i = 0; i < wanted; i++) {
if (bhs[i])
brelse(bhs[i]);
brelse(bhs[i]);
bhs[i] = NULL;
}
}
......@@ -531,7 +849,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
static int ocfs2_add_branch(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_extent_tree *et,
struct buffer_head *eb_bh,
struct buffer_head **last_eb_bh,
struct ocfs2_alloc_context *meta_ac)
......@@ -540,7 +858,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
u64 next_blkno, new_last_eb_blk;
struct buffer_head *bh;
struct buffer_head **new_eb_bhs = NULL;
struct ocfs2_dinode *fe;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *eb_el;
struct ocfs2_extent_list *el;
......@@ -550,13 +867,11 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
BUG_ON(!last_eb_bh || !*last_eb_bh);
fe = (struct ocfs2_dinode *) fe_bh->b_data;
if (eb_bh) {
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
} else
el = &fe->id2.i_list;
el = et->et_root_el;
/* we never add a branch to a leaf. */
BUG_ON(!el->l_tree_depth);
......@@ -646,7 +961,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
mlog_errno(status);
goto bail;
}
status = ocfs2_journal_access(handle, inode, fe_bh,
status = ocfs2_journal_access(handle, inode, et->et_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
......@@ -662,7 +977,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
}
/* Link the new branch into the rest of the tree (el will
* either be on the fe, or the extent block passed in. */
* either be on the root_bh, or the extent block passed in. */
i = le16_to_cpu(el->l_next_free_rec);
el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
......@@ -671,7 +986,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
/* fe needs a new last extent block pointer, as does the
* next_leaf on the previously last-extent-block. */
fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
ocfs2_et_set_last_eb_blk(et, new_last_eb_blk);
eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
......@@ -679,7 +994,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
status = ocfs2_journal_dirty(handle, *last_eb_bh);
if (status < 0)
mlog_errno(status);
status = ocfs2_journal_dirty(handle, fe_bh);
status = ocfs2_journal_dirty(handle, et->et_root_bh);
if (status < 0)
mlog_errno(status);
if (eb_bh) {
......@@ -700,8 +1015,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
bail:
if (new_eb_bhs) {
for (i = 0; i < new_blocks; i++)
if (new_eb_bhs[i])
brelse(new_eb_bhs[i]);
brelse(new_eb_bhs[i]);
kfree(new_eb_bhs);
}
......@@ -717,16 +1031,15 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_extent_tree *et,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **ret_new_eb_bh)
{
int status, i;
u32 new_clusters;
struct buffer_head *new_eb_bh = NULL;
struct ocfs2_dinode *fe;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *fe_el;
struct ocfs2_extent_list *root_el;
struct ocfs2_extent_list *eb_el;
mlog_entry_void();
......@@ -746,8 +1059,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
}
eb_el = &eb->h_list;
fe = (struct ocfs2_dinode *) fe_bh->b_data;
fe_el = &fe->id2.i_list;
root_el = et->et_root_el;
status = ocfs2_journal_access(handle, inode, new_eb_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
......@@ -756,11 +1068,11 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
goto bail;
}
/* copy the fe data into the new extent block */
eb_el->l_tree_depth = fe_el->l_tree_depth;
eb_el->l_next_free_rec = fe_el->l_next_free_rec;
for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
eb_el->l_recs[i] = fe_el->l_recs[i];
/* copy the root extent list data into the new extent block */
eb_el->l_tree_depth = root_el->l_tree_depth;
eb_el->l_next_free_rec = root_el->l_next_free_rec;
for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
eb_el->l_recs[i] = root_el->l_recs[i];
status = ocfs2_journal_dirty(handle, new_eb_bh);
if (status < 0) {
......@@ -768,7 +1080,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
goto bail;
}
status = ocfs2_journal_access(handle, inode, fe_bh,
status = ocfs2_journal_access(handle, inode, et->et_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
......@@ -777,21 +1089,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
new_clusters = ocfs2_sum_rightmost_rec(eb_el);
/* update fe now */
le16_add_cpu(&fe_el->l_tree_depth, 1);
fe_el->l_recs[0].e_cpos = 0;
fe_el->l_recs[0].e_blkno = eb->h_blkno;
fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
fe_el->l_next_free_rec = cpu_to_le16(1);
/* update root_bh now */
le16_add_cpu(&root_el->l_tree_depth, 1);
root_el->l_recs[0].e_cpos = 0;
root_el->l_recs[0].e_blkno = eb->h_blkno;
root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
root_el->l_next_free_rec = cpu_to_le16(1);
/* If this is our 1st tree depth shift, then last_eb_blk
* becomes the allocated extent block */
if (fe_el->l_tree_depth == cpu_to_le16(1))
fe->i_last_eb_blk = eb->h_blkno;
if (root_el->l_tree_depth == cpu_to_le16(1))
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
status = ocfs2_journal_dirty(handle, fe_bh);
status = ocfs2_journal_dirty(handle, et->et_root_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -801,8 +1113,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
new_eb_bh = NULL;
status = 0;
bail:
if (new_eb_bh)
brelse(new_eb_bh);
brelse(new_eb_bh);
mlog_exit(status);
return status;
......@@ -817,22 +1128,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
* 1) a lowest extent block is found, then we pass it back in
* *lowest_eb_bh and return '0'
*
* 2) the search fails to find anything, but the dinode has room. We
* 2) the search fails to find anything, but the root_el has room. We
* pass NULL back in *lowest_eb_bh, but still return '0'
*
* 3) the search fails to find anything AND the dinode is full, in
* 3) the search fails to find anything AND the root_el is full, in
* which case we return > 0
*
* return status < 0 indicates an error.
*/
static int ocfs2_find_branch_target(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_extent_tree *et,
struct buffer_head **target_bh)
{
int status = 0, i;
u64 blkno;
struct ocfs2_dinode *fe;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
struct buffer_head *bh = NULL;
......@@ -842,8 +1152,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
*target_bh = NULL;
fe = (struct ocfs2_dinode *) fe_bh->b_data;
el = &fe->id2.i_list;
el = et->et_root_el;
while(le16_to_cpu(el->l_tree_depth) > 1) {
if (le16_to_cpu(el->l_next_free_rec) == 0) {
......@@ -864,13 +1173,10 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
goto bail;
}
if (bh) {
brelse(bh);
bh = NULL;
}
brelse(bh);
bh = NULL;
status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
inode);
status = ocfs2_read_block(inode, blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -886,8 +1192,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
if (le16_to_cpu(el->l_next_free_rec) <
le16_to_cpu(el->l_count)) {
if (lowest_bh)
brelse(lowest_bh);
brelse(lowest_bh);
lowest_bh = bh;
get_bh(lowest_bh);
}
......@@ -895,14 +1200,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
/* If we didn't find one and the fe doesn't have any room,
* then return '1' */
if (!lowest_bh
&& (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
el = et->et_root_el;
if (!lowest_bh && (el->l_next_free_rec == el->l_count))
status = 1;
*target_bh = lowest_bh;
bail:
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(status);
return status;
......@@ -919,19 +1223,19 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
* *last_eb_bh will be updated by ocfs2_add_branch().
*/
static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
struct buffer_head *di_bh, int *final_depth,
struct ocfs2_extent_tree *et, int *final_depth,
struct buffer_head **last_eb_bh,
struct ocfs2_alloc_context *meta_ac)
{
int ret, shift;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
int depth = le16_to_cpu(di->id2.i_list.l_tree_depth);
struct ocfs2_extent_list *el = et->et_root_el;
int depth = le16_to_cpu(el->l_tree_depth);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *bh = NULL;
BUG_ON(meta_ac == NULL);
shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh);
shift = ocfs2_find_branch_target(osb, inode, et, &bh);
if (shift < 0) {
ret = shift;
mlog_errno(ret);
......@@ -948,7 +1252,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
/* ocfs2_shift_tree_depth will return us a buffer with
* the new extent block (so we can pass that to
* ocfs2_add_branch). */
ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh,
ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
meta_ac, &bh);
if (ret < 0) {
mlog_errno(ret);
......@@ -975,7 +1279,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
/* call ocfs2_add_branch to add the final part of the tree with
* the new data. */
mlog(0, "add branch. bh = %p\n", bh);
ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh,
ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
meta_ac);
if (ret < 0) {
mlog_errno(ret);
......@@ -1236,8 +1540,7 @@ static int __ocfs2_find_path(struct inode *inode,
brelse(bh);
bh = NULL;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
&bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, blkno, &bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -2058,11 +2361,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
struct ocfs2_path *right_path,
int subtree_index,
struct ocfs2_cached_dealloc_ctxt *dealloc,
int *deleted)
int *deleted,
struct ocfs2_extent_tree *et)
{
int ret, i, del_right_subtree = 0, right_has_empty = 0;
struct buffer_head *root_bh, *di_bh = path_root_bh(right_path);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
struct ocfs2_extent_block *eb;
......@@ -2114,7 +2417,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
* We have to update i_last_eb_blk during the meta
* data delete.
*/
ret = ocfs2_journal_access(handle, inode, di_bh,
ret = ocfs2_journal_access(handle, inode, et_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
......@@ -2189,7 +2492,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
ocfs2_update_edge_lengths(inode, handle, left_path);
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
di->i_last_eb_blk = eb->h_blkno;
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
/*
* Removal of the extent in the left leaf was skipped
......@@ -2199,7 +2502,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
if (right_has_empty)
ocfs2_remove_empty_extent(left_leaf_el);
ret = ocfs2_journal_dirty(handle, di_bh);
ret = ocfs2_journal_dirty(handle, et_root_bh);
if (ret)
mlog_errno(ret);
......@@ -2322,7 +2625,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
handle_t *handle, int orig_credits,
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_path **empty_extent_path)
struct ocfs2_path **empty_extent_path,
struct ocfs2_extent_tree *et)
{
int ret, subtree_root, deleted;
u32 right_cpos;
......@@ -2395,7 +2699,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
right_path, subtree_root,
dealloc, &deleted);
dealloc, &deleted, et);
if (ret == -EAGAIN) {
/*
* The rotation has to temporarily stop due to
......@@ -2438,29 +2742,20 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
}
static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc)
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_extent_tree *et)
{
int ret, subtree_index;
u32 cpos;
struct ocfs2_path *left_path = NULL;
struct ocfs2_dinode *di;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
/*
* XXX: This code assumes that the root is an inode, which is
* true for now but may change as tree code gets generic.
*/
di = (struct ocfs2_dinode *)path_root_bh(path)->b_data;
if (!OCFS2_IS_VALID_DINODE(di)) {
ret = -EIO;
ocfs2_error(inode->i_sb,
"Inode %llu has invalid path root",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
goto out;
}
ret = ocfs2_et_sanity_check(inode, et);
if (ret)
goto out;
/*
* There's two ways we handle this depending on
* whether path is the only existing one.
......@@ -2517,7 +2812,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
ocfs2_update_edge_lengths(inode, handle, left_path);
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
di->i_last_eb_blk = eb->h_blkno;
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
} else {
/*
* 'path' is also the leftmost path which
......@@ -2528,12 +2823,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
*/
ocfs2_unlink_path(inode, handle, dealloc, path, 1);
el = &di->id2.i_list;
el = et->et_root_el;
el->l_tree_depth = 0;
el->l_next_free_rec = 0;
memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
di->i_last_eb_blk = 0;
ocfs2_et_set_last_eb_blk(et, 0);
}
ocfs2_journal_dirty(handle, path_root_bh(path));
......@@ -2561,7 +2856,8 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
*/
static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc)
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_extent_tree *et)
{
int ret, orig_credits = handle->h_buffer_credits;
struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
......@@ -2575,7 +2871,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
if (path->p_tree_depth == 0) {
rightmost_no_delete:
/*
* In-inode extents. This is trivially handled, so do
* Inline extents. This is trivially handled, so do
* it up front.
*/
ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
......@@ -2629,7 +2925,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
*/
ret = ocfs2_remove_rightmost_path(inode, handle, path,
dealloc);
dealloc, et);
if (ret)
mlog_errno(ret);
goto out;
......@@ -2641,7 +2937,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
*/
try_rotate:
ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
dealloc, &restart_path);
dealloc, &restart_path, et);
if (ret && ret != -EAGAIN) {
mlog_errno(ret);
goto out;
......@@ -2653,7 +2949,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
tmp_path, dealloc,
&restart_path);
&restart_path, et);
if (ret && ret != -EAGAIN) {
mlog_errno(ret);
goto out;
......@@ -2939,6 +3235,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
handle_t *handle,
struct ocfs2_extent_rec *split_rec,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_extent_tree *et,
int index)
{
int ret, i, subtree_index = 0, has_empty_extent = 0;
......@@ -3059,7 +3356,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
le16_to_cpu(el->l_next_free_rec) == 1) {
ret = ocfs2_remove_rightmost_path(inode, handle,
right_path, dealloc);
right_path,
dealloc, et);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -3086,7 +3384,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
int split_index,
struct ocfs2_extent_rec *split_rec,
struct ocfs2_cached_dealloc_ctxt *dealloc,
struct ocfs2_merge_ctxt *ctxt)
struct ocfs2_merge_ctxt *ctxt,
struct ocfs2_extent_tree *et)
{
int ret = 0;
......@@ -3104,7 +3403,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* illegal.
*/
ret = ocfs2_rotate_tree_left(inode, handle, path,
dealloc);
dealloc, et);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -3147,7 +3446,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
/* The merge left us with an empty extent, remove it. */
ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
ret = ocfs2_rotate_tree_left(inode, handle, path,
dealloc, et);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -3161,7 +3461,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
*/
ret = ocfs2_merge_rec_left(inode, path,
handle, rec,
dealloc,
dealloc, et,
split_index);
if (ret) {
......@@ -3170,7 +3470,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
}
ret = ocfs2_rotate_tree_left(inode, handle, path,
dealloc);
dealloc, et);
/*
* Error from this last rotate is not critical, so
* print but don't bubble it up.
......@@ -3190,7 +3490,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
ret = ocfs2_merge_rec_left(inode,
path,
handle, split_rec,
dealloc,
dealloc, et,
split_index);
if (ret) {
mlog_errno(ret);
......@@ -3213,7 +3513,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
* our leaf. Try to rotate it away.
*/
ret = ocfs2_rotate_tree_left(inode, handle, path,
dealloc);
dealloc, et);
if (ret)
mlog_errno(ret);
ret = 0;
......@@ -3347,16 +3647,6 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
ocfs2_rotate_leaf(el, insert_rec);
}
static inline void ocfs2_update_dinode_clusters(struct inode *inode,
struct ocfs2_dinode *di,
u32 clusters)
{
le32_add_cpu(&di->i_clusters, clusters);
spin_lock(&OCFS2_I(inode)->ip_lock);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
static void ocfs2_adjust_rightmost_records(struct inode *inode,
handle_t *handle,
struct ocfs2_path *path,
......@@ -3558,8 +3848,8 @@ static void ocfs2_split_record(struct inode *inode,
}
/*
* This function only does inserts on an allocation b-tree. For dinode
* lists, ocfs2_insert_at_leaf() is called directly.
* This function only does inserts on an allocation b-tree. For tree
* depth = 0, ocfs2_insert_at_leaf() is called directly.
*
* right_path is the path we want to do the actual insert
* in. left_path should only be passed in if we need to update that
......@@ -3656,7 +3946,7 @@ static int ocfs2_insert_path(struct inode *inode,
static int ocfs2_do_insert_extent(struct inode *inode,
handle_t *handle,
struct buffer_head *di_bh,
struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *insert_rec,
struct ocfs2_insert_type *type)
{
......@@ -3664,13 +3954,11 @@ static int ocfs2_do_insert_extent(struct inode *inode,
u32 cpos;
struct ocfs2_path *right_path = NULL;
struct ocfs2_path *left_path = NULL;
struct ocfs2_dinode *di;
struct ocfs2_extent_list *el;
di = (struct ocfs2_dinode *) di_bh->b_data;
el = &di->id2.i_list;
el = et->et_root_el;
ret = ocfs2_journal_access(handle, inode, di_bh,
ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
......@@ -3682,7 +3970,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
goto out_update_clusters;
}
right_path = ocfs2_new_inode_path(di_bh);
right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
if (!right_path) {
ret = -ENOMEM;
mlog_errno(ret);
......@@ -3732,7 +4020,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
* ocfs2_rotate_tree_right() might have extended the
* transaction without re-journaling our tree root.
*/
ret = ocfs2_journal_access(handle, inode, di_bh,
ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
......@@ -3757,10 +4045,10 @@ static int ocfs2_do_insert_extent(struct inode *inode,
out_update_clusters:
if (type->ins_split == SPLIT_NONE)
ocfs2_update_dinode_clusters(inode, di,
le16_to_cpu(insert_rec->e_leaf_clusters));
ocfs2_et_update_clusters(inode, et,
le16_to_cpu(insert_rec->e_leaf_clusters));
ret = ocfs2_journal_dirty(handle, di_bh);
ret = ocfs2_journal_dirty(handle, et->et_root_bh);
if (ret)
mlog_errno(ret);
......@@ -3890,7 +4178,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
static void ocfs2_figure_contig_type(struct inode *inode,
struct ocfs2_insert_type *insert,
struct ocfs2_extent_list *el,
struct ocfs2_extent_rec *insert_rec)
struct ocfs2_extent_rec *insert_rec,
struct ocfs2_extent_tree *et)
{
int i;
enum ocfs2_contig_type contig_type = CONTIG_NONE;
......@@ -3906,6 +4195,21 @@ static void ocfs2_figure_contig_type(struct inode *inode,
}
}
insert->ins_contig = contig_type;
if (insert->ins_contig != CONTIG_NONE) {
struct ocfs2_extent_rec *rec =
&el->l_recs[insert->ins_contig_index];
unsigned int len = le16_to_cpu(rec->e_leaf_clusters) +
le16_to_cpu(insert_rec->e_leaf_clusters);
/*
* Caller might want us to limit the size of extents, don't
* calculate contiguousness if we might exceed that limit.
*/
if (et->et_max_leaf_clusters &&
(len > et->et_max_leaf_clusters))
insert->ins_contig = CONTIG_NONE;
}
}
/*
......@@ -3914,8 +4218,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
* ocfs2_figure_appending_type() will figure out whether we'll have to
* insert at the tail of the rightmost leaf.
*
* This should also work against the dinode list for tree's with 0
* depth. If we consider the dinode list to be the rightmost leaf node
* This should also work against the root extent list for tree's with 0
* depth. If we consider the root extent list to be the rightmost leaf node
* then the logic here makes sense.
*/
static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
......@@ -3966,14 +4270,13 @@ static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
* structure.
*/
static int ocfs2_figure_insert_type(struct inode *inode,
struct buffer_head *di_bh,
struct ocfs2_extent_tree *et,
struct buffer_head **last_eb_bh,
struct ocfs2_extent_rec *insert_rec,
int *free_records,
struct ocfs2_insert_type *insert)
{
int ret;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
struct ocfs2_path *path = NULL;
......@@ -3981,7 +4284,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
insert->ins_split = SPLIT_NONE;
el = &di->id2.i_list;
el = et->et_root_el;
insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
if (el->l_tree_depth) {
......@@ -3991,9 +4294,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* ocfs2_figure_insert_type() and ocfs2_add_branch()
* may want it later.
*/
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
le64_to_cpu(di->i_last_eb_blk), &bh,
OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh);
if (ret) {
mlog_exit(ret);
goto out;
......@@ -4014,12 +4315,12 @@ static int ocfs2_figure_insert_type(struct inode *inode,
le16_to_cpu(el->l_next_free_rec);
if (!insert->ins_tree_depth) {
ocfs2_figure_contig_type(inode, insert, el, insert_rec);
ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
ocfs2_figure_appending_type(insert, el, insert_rec);
return 0;
}
path = ocfs2_new_inode_path(di_bh);
path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
if (!path) {
ret = -ENOMEM;
mlog_errno(ret);
......@@ -4048,7 +4349,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* into two types of appends: simple record append, or a
* rotate inside the tail leaf.
*/
ocfs2_figure_contig_type(inode, insert, el, insert_rec);
ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
/*
* The insert code isn't quite ready to deal with all cases of
......@@ -4069,7 +4370,8 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* the case that we're doing a tail append, so maybe we can
* take advantage of that information somehow.
*/
if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) {
if (ocfs2_et_get_last_eb_blk(et) ==
path_leaf_bh(path)->b_blocknr) {
/*
* Ok, ocfs2_find_path() returned us the rightmost
* tree path. This might be an appending insert. There are
......@@ -4099,7 +4401,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_extent_tree *et,
u32 cpos,
u64 start_blk,
u32 new_clusters,
......@@ -4112,26 +4414,21 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
struct ocfs2_insert_type insert = {0, };
struct ocfs2_extent_rec rec;
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog(0, "add %u clusters at position %u to inode %llu\n",
new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
(OCFS2_I(inode)->ip_clusters != cpos),
"Device %s, asking for sparse allocation: inode %llu, "
"cpos %u, clusters %u\n",
osb->dev_str,
(unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
OCFS2_I(inode)->ip_clusters);
memset(&rec, 0, sizeof(rec));
rec.e_cpos = cpu_to_le32(cpos);
rec.e_blkno = cpu_to_le64(start_blk);
rec.e_leaf_clusters = cpu_to_le16(new_clusters);
rec.e_flags = flags;
status = ocfs2_et_insert_check(inode, et, &rec);
if (status) {
mlog_errno(status);
goto bail;
}
status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec,
&free_records, &insert);
if (status < 0) {
mlog_errno(status);
......@@ -4145,7 +4442,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
free_records, insert.ins_tree_depth);
if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
status = ocfs2_grow_tree(inode, handle, fe_bh,
status = ocfs2_grow_tree(inode, handle, et,
&insert.ins_tree_depth, &last_eb_bh,
meta_ac);
if (status) {
......@@ -4155,17 +4452,124 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
}
/* Finally, we can add clusters. This might rotate the tree for us. */
status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert);
if (status < 0)
mlog_errno(status);
else
else if (et->et_ops == &ocfs2_dinode_et_ops)
ocfs2_extent_map_insert_rec(inode, &rec);
bail:
if (last_eb_bh)
brelse(last_eb_bh);
brelse(last_eb_bh);
mlog_exit(status);
return status;
}
/*
* Allcate and add clusters into the extent b-tree.
* The new clusters(clusters_to_add) will be inserted at logical_offset.
* The extent b-tree's root is specified by et, and
* it is not limited to the file storage. Any extent tree can use this
* function if it implements the proper ocfs2_extent_tree.
*/
int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct ocfs2_extent_tree *et,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret)
{
int status = 0;
int free_extents;
enum ocfs2_alloc_restarted reason = RESTART_NONE;
u32 bit_off, num_bits;
u64 block;
u8 flags = 0;
BUG_ON(!clusters_to_add);
if (mark_unwritten)
flags = OCFS2_EXT_UNWRITTEN;
free_extents = ocfs2_num_free_extents(osb, inode, et);
if (free_extents < 0) {
status = free_extents;
mlog_errno(status);
goto leave;
}
/* there are two cases which could cause us to EAGAIN in the
* we-need-more-metadata case:
* 1) we haven't reserved *any*
* 2) we are so fragmented, we've needed to add metadata too
* many times. */
if (!free_extents && !meta_ac) {
mlog(0, "we haven't reserved any metadata!\n");
status = -EAGAIN;
reason = RESTART_META;
goto leave;
} else if ((!free_extents)
&& (ocfs2_alloc_context_bits_left(meta_ac)
< ocfs2_extend_meta_needed(et->et_root_el))) {
mlog(0, "filesystem is really fragmented...\n");
status = -EAGAIN;
reason = RESTART_META;
goto leave;
}
status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
clusters_to_add, &bit_off, &num_bits);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto leave;
}
BUG_ON(num_bits > clusters_to_add);
/* reserve our write early -- insert_extent may update the inode */
status = ocfs2_journal_access(handle, inode, et->et_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
}
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
status = ocfs2_insert_extent(osb, handle, inode, et,
*logical_offset, block,
num_bits, flags, meta_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
}
status = ocfs2_journal_dirty(handle, et->et_root_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
clusters_to_add -= num_bits;
*logical_offset += num_bits;
if (clusters_to_add) {
mlog(0, "need to alloc once more, wanted = %u\n",
clusters_to_add);
status = -EAGAIN;
reason = RESTART_TRANS;
}
leave:
mlog_exit(status);
if (reason_ret)
*reason_ret = reason;
return status;
}
......@@ -4192,7 +4596,7 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
static int ocfs2_split_and_insert(struct inode *inode,
handle_t *handle,
struct ocfs2_path *path,
struct buffer_head *di_bh,
struct ocfs2_extent_tree *et,
struct buffer_head **last_eb_bh,
int split_index,
struct ocfs2_extent_rec *orig_split_rec,
......@@ -4206,7 +4610,6 @@ static int ocfs2_split_and_insert(struct inode *inode,
struct ocfs2_extent_rec split_rec = *orig_split_rec;
struct ocfs2_insert_type insert;
struct ocfs2_extent_block *eb;
struct ocfs2_dinode *di;
leftright:
/*
......@@ -4215,8 +4618,7 @@ static int ocfs2_split_and_insert(struct inode *inode,
*/
rec = path_leaf_el(path)->l_recs[split_index];
di = (struct ocfs2_dinode *)di_bh->b_data;
rightmost_el = &di->id2.i_list;
rightmost_el = et->et_root_el;
depth = le16_to_cpu(rightmost_el->l_tree_depth);
if (depth) {
......@@ -4227,8 +4629,8 @@ static int ocfs2_split_and_insert(struct inode *inode,
if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) {
ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
meta_ac);
ret = ocfs2_grow_tree(inode, handle, et,
&depth, last_eb_bh, meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -4265,8 +4667,7 @@ static int ocfs2_split_and_insert(struct inode *inode,
do_leftright = 1;
}
ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec,
&insert);
ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -4308,8 +4709,9 @@ static int ocfs2_split_and_insert(struct inode *inode,
* of the tree is required. All other cases will degrade into a less
* optimal tree layout.
*
* last_eb_bh should be the rightmost leaf block for any inode with a
* btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call.
* last_eb_bh should be the rightmost leaf block for any extent
* btree. Since a split may grow the tree or a merge might shrink it,
* the caller cannot trust the contents of that buffer after this call.
*
* This code is optimized for readability - several passes might be
* made over certain portions of the tree. All of those blocks will
......@@ -4317,7 +4719,7 @@ static int ocfs2_split_and_insert(struct inode *inode,
* extra overhead is not expressed in terms of disk reads.
*/
static int __ocfs2_mark_extent_written(struct inode *inode,
struct buffer_head *di_bh,
struct ocfs2_extent_tree *et,
handle_t *handle,
struct ocfs2_path *path,
int split_index,
......@@ -4357,11 +4759,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
*/
if (path->p_tree_depth) {
struct ocfs2_extent_block *eb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
le64_to_cpu(di->i_last_eb_blk),
&last_eb_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
&last_eb_bh);
if (ret) {
mlog_exit(ret);
goto out;
......@@ -4394,7 +4794,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
if (ctxt.c_split_covers_rec)
el->l_recs[split_index] = *split_rec;
else
ret = ocfs2_split_and_insert(inode, handle, path, di_bh,
ret = ocfs2_split_and_insert(inode, handle, path, et,
&last_eb_bh, split_index,
split_rec, meta_ac);
if (ret)
......@@ -4402,7 +4802,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
} else {
ret = ocfs2_try_to_merge_extent(inode, handle, path,
split_index, split_rec,
dealloc, &ctxt);
dealloc, &ctxt, et);
if (ret)
mlog_errno(ret);
}
......@@ -4420,7 +4820,8 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
*
* The caller is responsible for passing down meta_ac if we'll need it.
*/
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_mark_extent_written(struct inode *inode,
struct ocfs2_extent_tree *et,
handle_t *handle, u32 cpos, u32 len, u32 phys,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc)
......@@ -4446,10 +4847,14 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
/*
* XXX: This should be fixed up so that we just re-insert the
* next extent records.
*
* XXX: This is a hack on the extent tree, maybe it should be
* an op?
*/
ocfs2_extent_map_trunc(inode, 0);
if (et->et_ops == &ocfs2_dinode_et_ops)
ocfs2_extent_map_trunc(inode, 0);
left_path = ocfs2_new_inode_path(di_bh);
left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret);
......@@ -4480,8 +4885,9 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path,
index, &split_rec, meta_ac, dealloc);
ret = __ocfs2_mark_extent_written(inode, et, handle, left_path,
index, &split_rec, meta_ac,
dealloc);
if (ret)
mlog_errno(ret);
......@@ -4490,13 +4896,12 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
return ret;
}
static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
handle_t *handle, struct ocfs2_path *path,
int index, u32 new_range,
struct ocfs2_alloc_context *meta_ac)
{
int ret, depth, credits = handle->h_buffer_credits;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct buffer_head *last_eb_bh = NULL;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *rightmost_el, *el;
......@@ -4513,9 +4918,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
depth = path->p_tree_depth;
if (depth > 0) {
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
le64_to_cpu(di->i_last_eb_blk),
&last_eb_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
&last_eb_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
......@@ -4526,7 +4930,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
} else
rightmost_el = path_leaf_el(path);
credits += path->p_tree_depth + ocfs2_extend_meta_needed(di);
credits += path->p_tree_depth +
ocfs2_extend_meta_needed(et->et_root_el);
ret = ocfs2_extend_trans(handle, credits);
if (ret) {
mlog_errno(ret);
......@@ -4535,7 +4940,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
le16_to_cpu(rightmost_el->l_count)) {
ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh,
meta_ac);
if (ret) {
mlog_errno(ret);
......@@ -4549,7 +4954,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
insert.ins_split = SPLIT_RIGHT;
insert.ins_tree_depth = depth;
ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
if (ret)
mlog_errno(ret);
......@@ -4561,7 +4966,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
struct ocfs2_path *path, int index,
struct ocfs2_cached_dealloc_ctxt *dealloc,
u32 cpos, u32 len)
u32 cpos, u32 len,
struct ocfs2_extent_tree *et)
{
int ret;
u32 left_cpos, rec_range, trunc_range;
......@@ -4573,7 +4979,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
struct ocfs2_extent_block *eb;
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -4704,7 +5110,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
ocfs2_journal_dirty(handle, path_leaf_bh(path));
ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -4715,7 +5121,8 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
return ret;
}
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_remove_extent(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc)
......@@ -4724,11 +5131,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
u32 rec_range, trunc_range;
struct ocfs2_extent_rec *rec;
struct ocfs2_extent_list *el;
struct ocfs2_path *path;
struct ocfs2_path *path = NULL;
ocfs2_extent_map_trunc(inode, 0);
path = ocfs2_new_inode_path(di_bh);
path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
if (!path) {
ret = -ENOMEM;
mlog_errno(ret);
......@@ -4781,13 +5188,13 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
cpos, len);
cpos, len, et);
if (ret) {
mlog_errno(ret);
goto out;
}
} else {
ret = ocfs2_split_tree(inode, di_bh, handle, path, index,
ret = ocfs2_split_tree(inode, et, handle, path, index,
trunc_range, meta_ac);
if (ret) {
mlog_errno(ret);
......@@ -4836,7 +5243,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
}
ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
cpos, len);
cpos, len, et);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -5179,8 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
goto bail;
}
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
if (status < 0) {
iput(inode);
mlog_errno(status);
......@@ -5255,8 +5661,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
bail:
if (tl_inode)
iput(tl_inode);
if (tl_bh)
brelse(tl_bh);
brelse(tl_bh);
if (status < 0 && (*tl_copy)) {
kfree(*tl_copy);
......@@ -5999,20 +6404,13 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
return status;
}
static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh)
static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
{
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
return 0;
}
static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
{
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
return ocfs2_journal_dirty_data(handle, bh);
}
static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
unsigned int from, unsigned int to,
struct page *page, int zero, u64 *phys)
......@@ -6031,17 +6429,18 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
if (ocfs2_should_order_data(inode)) {
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, &partial,
ocfs2_ordered_zero_func);
if (ret < 0)
mlog_errno(ret);
} else {
ret = walk_page_buffers(handle, page_buffers(page),
from, to, &partial,
ocfs2_zero_func);
if (ret < 0)
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
ret = walk_page_buffers(handle, page_buffers(page),
from, to, &partial,
ocfs2_writeback_zero_func);
ocfs2_journal_dirty_data);
#endif
if (ret < 0)
mlog_errno(ret);
}
......@@ -6206,20 +6605,29 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
return ret;
}
static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode,
struct ocfs2_dinode *di)
{
unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
memset(&di->id2, 0, blocksize -
offsetof(struct ocfs2_dinode, id2) -
xattrsize);
else
memset(&di->id2, 0, blocksize -
offsetof(struct ocfs2_dinode, id2));
}
void ocfs2_dinode_new_extent_list(struct inode *inode,
struct ocfs2_dinode *di)
{
ocfs2_zero_dinode_id2(inode, di);
ocfs2_zero_dinode_id2_with_xattr(inode, di);
di->id2.i_list.l_tree_depth = 0;
di->id2.i_list.l_next_free_rec = 0;
di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
di->id2.i_list.l_count = cpu_to_le16(
ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di));
}
void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
......@@ -6236,9 +6644,10 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
* We clear the entire i_data structure here so that all
* fields can be properly initialized.
*/
ocfs2_zero_dinode_id2(inode, di);
ocfs2_zero_dinode_id2_with_xattr(inode, di);
idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
idata->id_count = cpu_to_le16(
ocfs2_max_inline_data_with_xattr(inode->i_sb, di));
}
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
......@@ -6253,6 +6662,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_alloc_context *data_ac = NULL;
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
struct ocfs2_extent_tree et;
has_data = i_size_read(inode) ? 1 : 0;
......@@ -6352,7 +6762,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* this proves to be false, we could always re-build
* the in-inode data from our pages.
*/
ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
ret = ocfs2_insert_extent(osb, handle, inode, &et,
0, block, 1, 0, NULL);
if (ret) {
mlog_errno(ret);
......@@ -6395,13 +6806,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
handle_t *handle = NULL;
struct inode *tl_inode = osb->osb_tl_inode;
struct ocfs2_path *path = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
mlog_entry_void();
new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
i_size_read(inode));
path = ocfs2_new_inode_path(fe_bh);
path = ocfs2_new_path(fe_bh, &di->id2.i_list);
if (!path) {
status = -ENOMEM;
mlog_errno(status);
......@@ -6572,8 +6984,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
if (fe->id2.i_list.l_tree_depth) {
status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
&last_eb_bh, OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk),
&last_eb_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -6686,8 +7098,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
mlog(ML_NOTICE,
"Truncate completion has non-empty dealloc context\n");
if (tc->tc_last_eb_bh)
brelse(tc->tc_last_eb_bh);
brelse(tc->tc_last_eb_bh);
kfree(tc);
}
......@@ -26,30 +26,102 @@
#ifndef OCFS2_ALLOC_H
#define OCFS2_ALLOC_H
/*
* For xattr tree leaf, we limit the leaf byte size to be 64K.
*/
#define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536
/*
* ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract
* the b-tree operations in ocfs2. Now all the b-tree operations are not
* limited to ocfs2_dinode only. Any data which need to allocate clusters
* to store can use b-tree. And it only needs to implement its ocfs2_extent_tree
* and operation.
*
* ocfs2_extent_tree becomes the first-class object for extent tree
* manipulation. Callers of the alloc.c code need to fill it via one of
* the ocfs2_init_*_extent_tree() operations below.
*
* ocfs2_extent_tree contains info for the root of the b-tree, it must have a
* root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
* functions.
* ocfs2_extent_tree_operations abstract the normal operations we do for
* the root of extent b-tree.
*/
struct ocfs2_extent_tree_operations;
struct ocfs2_extent_tree {
struct ocfs2_extent_tree_operations *et_ops;
struct buffer_head *et_root_bh;
struct ocfs2_extent_list *et_root_el;
void *et_object;
unsigned int et_max_leaf_clusters;
};
/*
* ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the
* specified object buffer.
*/
void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv);
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_extent_tree *et,
u32 cpos,
u64 start_blk,
u32 new_clusters,
u8 flags,
struct ocfs2_alloc_context *meta_ac);
enum ocfs2_alloc_restarted {
RESTART_NONE = 0,
RESTART_TRANS,
RESTART_META
};
int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct ocfs2_extent_tree *et,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
struct ocfs2_cached_dealloc_ctxt;
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_mark_extent_written(struct inode *inode,
struct ocfs2_extent_tree *et,
handle_t *handle, u32 cpos, u32 len, u32 phys,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_remove_extent(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_dinode *fe);
/* how many new metadata chunks would an allocation need at maximum? */
static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
struct ocfs2_extent_tree *et);
/*
* how many new metadata chunks would an allocation need at maximum?
*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
* the result may be wrong.
*/
static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el)
{
/*
* Rather than do all the work of determining how much we need
......@@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
* new tree_depth==0 extent_block, and one block at the new
* top-of-the tree.
*/
return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
return le16_to_cpu(root_el->l_tree_depth) + 2;
}
void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
......
......@@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
goto bail;
}
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno,
&bh, OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -128,8 +126,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
err = 0;
bail:
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(err);
return err;
......@@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!PageLocked(page));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -485,11 +480,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
}
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, NULL,
ocfs2_journal_dirty_data);
if (ret < 0)
#endif
if (ret < 0)
mlog_errno(ret);
}
out:
......@@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
journal_invalidatepage(journal, page, offset);
jbd2_journal_invalidatepage(journal, page, offset);
}
static int ocfs2_releasepage(struct page *page, gfp_t wait)
......@@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
if (!page_has_buffers(page))
return 0;
return journal_try_to_free_buffers(journal, page, wait);
return jbd2_journal_try_to_free_buffers(journal, page, wait);
}
static ssize_t ocfs2_direct_IO(int rw,
......@@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode,
tmppage = wc->w_pages[i];
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
if (ocfs2_should_order_data(inode)) {
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
......@@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
int ret, i, new, should_zero = 0;
u64 v_blkno, p_blkno;
struct inode *inode = mapping->host;
struct ocfs2_extent_tree et;
new = phys == 0 ? 1 : 0;
if (new || unwritten)
......@@ -1255,10 +1258,10 @@ static int ocfs2_write_cluster(struct address_space *mapping,
* any additional semaphores or cluster locks.
*/
tmp_pos = cpos;
ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
&tmp_pos, 1, 0, wc->w_di_bh,
wc->w_handle, data_ac,
meta_ac, NULL);
ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
&tmp_pos, 1, 0, wc->w_di_bh,
wc->w_handle, data_ac,
meta_ac, NULL);
/*
* This shouldn't happen because we must have already
* calculated the correct meta data allocation required. The
......@@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
goto out;
}
} else if (unwritten) {
ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
ret = ocfs2_mark_extent_written(inode, &et,
wc->w_handle, cpos, 1, phys,
meta_ac, &wc->w_dealloc);
if (ret < 0) {
......@@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle;
struct ocfs2_extent_tree et;
ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
if (ret) {
......@@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* ocfs2_lock_allocators(). It greatly over-estimates
* the work to be done.
*/
ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
extents_to_split, &data_ac, &meta_ac);
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
" clusters_to_add = %u, extents_to_split = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
clusters_to_alloc, extents_to_split);
ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
ret = ocfs2_lock_allocators(inode, &et,
clusters_to_alloc, extents_to_split,
&data_ac, &meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
}
credits = ocfs2_calc_extend_credits(inode->i_sb, di,
credits = ocfs2_calc_extend_credits(inode->i_sb,
&di->id2.i_list,
clusters_to_alloc);
}
......@@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
if (ocfs2_should_order_data(inode)) {
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
}
......
......@@ -66,7 +66,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
/* remove from dirty list before I/O. */
clear_buffer_dirty(bh);
get_bh(bh); /* for end_buffer_write_sync() */
get_bh(bh); /* for end_buffer_write_sync() */
bh->b_end_io = end_buffer_write_sync;
submit_bh(WRITE, bh);
......@@ -88,22 +88,103 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
return ret;
}
int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
struct buffer_head *bhs[], int flags,
struct inode *inode)
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[])
{
int status = 0;
unsigned int i;
struct buffer_head *bh;
if (!nr) {
mlog(ML_BH_IO, "No buffers will be read!\n");
goto bail;
}
for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(osb->sb, block++);
if (bhs[i] == NULL) {
status = -EIO;
mlog_errno(status);
goto bail;
}
}
bh = bhs[i];
if (buffer_jbd(bh)) {
mlog(ML_ERROR,
"trying to sync read a jbd "
"managed bh (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
continue;
}
if (buffer_dirty(bh)) {
/* This should probably be a BUG, or
* at least return an error. */
mlog(ML_ERROR,
"trying to sync read a dirty "
"buffer! (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
continue;
}
lock_buffer(bh);
if (buffer_jbd(bh)) {
mlog(ML_ERROR,
"block %llu had the JBD bit set "
"while I was in lock_buffer!",
(unsigned long long)bh->b_blocknr);
BUG();
}
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
}
for (i = nr; i > 0; i--) {
bh = bhs[i - 1];
if (buffer_jbd(bh)) {
mlog(ML_ERROR,
"the journal got the buffer while it was "
"locked for io! (blocknr = %llu)\n",
(unsigned long long)bh->b_blocknr);
BUG();
}
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
* to cleanup the other buffers. */
status = -EIO;
put_bh(bh);
bhs[i - 1] = NULL;
}
}
bail:
return status;
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags)
{
int status = 0;
struct super_block *sb;
int i, ignore_cache = 0;
struct buffer_head *bh;
mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
(unsigned long long)block, nr, flags, inode);
mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
inode, (unsigned long long)block, nr, flags);
BUG_ON(!inode);
BUG_ON((flags & OCFS2_BH_READAHEAD) &&
(!inode || !(flags & OCFS2_BH_CACHED)));
(flags & OCFS2_BH_IGNORE_CACHE));
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
if (bhs == NULL) {
status = -EINVAL;
mlog_errno(status);
goto bail;
......@@ -122,26 +203,19 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
goto bail;
}
sb = osb->sb;
if (flags & OCFS2_BH_CACHED && !inode)
flags &= ~OCFS2_BH_CACHED;
if (inode)
mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
bhs[i] = sb_getblk(inode->i_sb, block++);
if (bhs[i] == NULL) {
if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
status = -EIO;
mlog_errno(status);
goto bail;
}
}
bh = bhs[i];
ignore_cache = 0;
ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
/* There are three read-ahead cases here which we need to
* be concerned with. All three assume a buffer has
......@@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
* before our is-it-in-flight check.
*/
if (flags & OCFS2_BH_CACHED &&
!ocfs2_buffer_uptodate(inode, bh)) {
if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
mlog(ML_UPTODATE,
"bh (%llu), inode %llu not uptodate\n",
(unsigned long long)bh->b_blocknr,
(unsigned long long)OCFS2_I(inode)->ip_blkno);
/* We're using ignore_cache here to say
* "go to disk" */
ignore_cache = 1;
}
/* XXX: Can we ever get this and *not* have the cached
* flag set? */
if (buffer_jbd(bh)) {
if (!(flags & OCFS2_BH_CACHED) || ignore_cache)
if (ignore_cache)
mlog(ML_BH_IO, "trying to sync read a jbd "
"managed bh (blocknr = %llu)\n",
(unsigned long long)bh->b_blocknr);
continue;
}
if (!(flags & OCFS2_BH_CACHED) || ignore_cache) {
if (ignore_cache) {
if (buffer_dirty(bh)) {
/* This should probably be a BUG, or
* at least return an error. */
......@@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
* previously read-ahead buffer may have
* completed I/O while we were waiting for the
* buffer lock. */
if ((flags & OCFS2_BH_CACHED)
if (!(flags & OCFS2_BH_IGNORE_CACHE)
&& !(flags & OCFS2_BH_READAHEAD)
&& ocfs2_buffer_uptodate(inode, bh)) {
unlock_buffer(bh);
......@@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
if (inode)
ocfs2_set_buffer_uptodate(inode, bh);
ocfs2_set_buffer_uptodate(inode, bh);
}
if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
(unsigned long long)block, nr,
(!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
flags);
bail:
......
......@@ -31,31 +31,29 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
static inline int ocfs2_read_block(struct ocfs2_super *osb,
static inline int ocfs2_read_block(struct inode *inode,
u64 off,
struct buffer_head **bh,
int flags,
struct inode *inode);
struct buffer_head **bh);
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
int ocfs2_read_blocks(struct ocfs2_super *osb,
int ocfs2_read_blocks(struct inode *inode,
u64 block,
int nr,
struct buffer_head *bhs[],
int flags,
struct inode *inode);
int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
#define OCFS2_BH_CACHED 1
#define OCFS2_BH_IGNORE_CACHE 1
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
struct buffer_head **bh, int flags,
struct inode *inode)
static inline int ocfs2_read_block(struct inode *inode, u64 off,
struct buffer_head **bh)
{
int status = 0;
......@@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
goto bail;
}
status = ocfs2_read_blocks(osb, off, 1, bh,
flags, inode);
status = ocfs2_read_blocks(inode, off, 1, bh, 0);
bail:
return status;
......
......@@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(CONN),
define_mask(QUORUM),
define_mask(EXPORT),
define_mask(XATTR),
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),
......
......@@ -112,6 +112,7 @@
#define ML_CONN 0x0000000004000000ULL /* net connection management */
#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
......
......@@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
static struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada)
{
struct buffer_head *bh = NULL;
int tmperr;
u64 p_blkno;
int readflags = 0;
if (reada)
readflags |= OCFS2_BH_READAHEAD;
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
BUG_ON(!reada);
return NULL;
}
down_read(&OCFS2_I(inode)->ip_alloc_sem);
tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (tmperr < 0) {
mlog_errno(tmperr);
goto fail;
}
tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
if (tmperr < 0)
goto fail;
tmperr = 0;
*err = 0;
return bh;
fail:
brelse(bh);
bh = NULL;
*err = -EIO;
return NULL;
}
/*
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
......@@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, dir);
ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -260,14 +302,13 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
if (ocfs2_read_block(dir, block, &bh)) {
/* read error, skip block & hope for the best.
* ocfs2_read_block() has released the bh. */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
block);
brelse(bh);
goto next;
}
i = ocfs2_search_dirblock(bh, dir, name, namelen,
......@@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, dir);
ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
struct ocfs2_inline_data *data;
struct ocfs2_dir_entry *de;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
if (ret) {
mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
......@@ -716,8 +755,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
tmp = ocfs2_bread(inode, ++blk, &err, 1);
if (tmp)
brelse(tmp);
brelse(tmp);
}
last_ra_blk = blk;
ra_sectors = 8;
......@@ -899,10 +937,8 @@ int ocfs2_find_files_on_disk(const char *name,
leave:
if (status < 0) {
*dirent = NULL;
if (*dirent_bh) {
brelse(*dirent_bh);
*dirent_bh = NULL;
}
brelse(*dirent_bh);
*dirent_bh = NULL;
}
mlog_exit(status);
......@@ -951,8 +987,7 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
ret = 0;
bail:
if (dirent_bh)
brelse(dirent_bh);
brelse(dirent_bh);
mlog_exit(ret);
return ret;
......@@ -1127,8 +1162,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
status = 0;
bail:
if (new_bh)
brelse(new_bh);
brelse(new_bh);
mlog_exit(status);
return status;
......@@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
struct buffer_head *dirdata_bh = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
handle_t *handle;
struct ocfs2_extent_tree et;
ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
alloc = ocfs2_clusters_for_bytes(sb, bytes);
......@@ -1305,8 +1342,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* This should never fail as our extent list is empty and all
* related blocks have been journaled already.
*/
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
NULL);
ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
0, NULL);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -1337,8 +1374,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
}
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
len, 0, NULL);
ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
blkno, len, 0, NULL);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -1383,9 +1420,9 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
if (extend) {
u32 offset = OCFS2_I(dir)->ip_clusters;
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
BUG_ON(status == -EAGAIN);
if (status < 0) {
mlog_errno(status);
......@@ -1430,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
int credits, num_free_extents, drop_alloc_sem = 0;
loff_t dir_i_size;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
struct ocfs2_extent_list *el = &fe->id2.i_list;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle = NULL;
struct buffer_head *new_bh = NULL;
struct ocfs2_dir_entry * de;
struct super_block *sb = osb->sb;
struct ocfs2_extent_tree et;
mlog_entry_void();
......@@ -1479,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
spin_lock(&OCFS2_I(dir)->ip_lock);
if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
spin_unlock(&OCFS2_I(dir)->ip_lock);
num_free_extents = ocfs2_num_free_extents(osb, dir, fe);
ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
if (num_free_extents < 0) {
status = num_free_extents;
mlog_errno(status);
......@@ -1487,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
}
if (!num_free_extents) {
status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -1502,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
goto bail;
}
credits = ocfs2_calc_extend_credits(sb, fe, 1);
credits = ocfs2_calc_extend_credits(sb, el, 1);
} else {
spin_unlock(&OCFS2_I(dir)->ip_lock);
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
......@@ -1568,8 +1608,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
if (new_bh)
brelse(new_bh);
brelse(new_bh);
mlog_exit(status);
return status;
......@@ -1696,8 +1735,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = 0;
bail:
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(status);
return status;
......@@ -1756,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
*ret_de_bh = bh;
bh = NULL;
out:
if (bh)
brelse(bh);
brelse(bh);
return ret;
}
......@@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs2_refresh_inode */
status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno,
bh, OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, oi->ip_blkno, bh);
if (status < 0) {
mlog_errno(status);
goto bail_refresh;
......@@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode,
return 0;
}
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno,
ret_bh,
OCFS2_BH_CACHED,
inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
if (status < 0)
mlog_errno(status);
......
......@@ -293,8 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
&eb_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -382,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
goto no_more_extents;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
ret = ocfs2_read_block(inode,
le64_to_cpu(eb->h_next_leaf_blk),
&next_eb_bh, OCFS2_BH_CACHED, inode);
&next_eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -551,6 +550,66 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
}
int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el)
{
int ret = 0, i;
struct buffer_head *eb_bh = NULL;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_rec *rec;
u32 coff;
if (el->l_tree_depth) {
ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
if (el->l_tree_depth) {
ocfs2_error(inode->i_sb,
"Inode %lu has non zero tree depth in "
"xattr leaf block %llu\n", inode->i_ino,
(unsigned long long)eb_bh->b_blocknr);
ret = -EROFS;
goto out;
}
}
i = ocfs2_search_extent_list(el, v_cluster);
if (i == -1) {
ret = -EROFS;
mlog_errno(ret);
goto out;
} else {
rec = &el->l_recs[i];
BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
if (!rec->e_blkno) {
ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
"record (%u, %u, 0) in xattr", inode->i_ino,
le32_to_cpu(rec->e_cpos),
ocfs2_rec_clusters(el, rec));
ret = -EROFS;
goto out;
}
coff = v_cluster - le32_to_cpu(rec->e_cpos);
*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
le64_to_cpu(rec->e_blkno));
*p_cluster = *p_cluster + coff;
if (num_clusters)
*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
}
out:
if (eb_bh)
brelse(eb_bh);
return ret;
}
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
unsigned int *extent_flags)
......@@ -571,8 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
if (ret == 0)
goto out;
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
&di_bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......
......@@ -53,4 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 map_start, u64 map_len);
int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el);
#endif /* _EXTENT_MAP_H */
......@@ -55,6 +55,7 @@
#include "mmap.h"
#include "suballoc.h"
#include "super.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -184,7 +185,7 @@ static int ocfs2_sync_file(struct file *file,
goto bail;
journal = osb->journal->j_journal;
err = journal_force_commit(journal);
err = jbd2_journal_force_commit(journal);
bail:
mlog_exit(err);
......@@ -488,7 +489,7 @@ static int ocfs2_truncate_file(struct inode *inode,
}
/*
* extend allocation only here.
* extend file allocation only here.
* we'll update all the disk stuff, and oip->alloc_size
*
* expect stuff to be locked, a transaction started and enough data /
......@@ -497,189 +498,25 @@ static int ocfs2_truncate_file(struct inode *inode,
* Will return -EAGAIN, and a reason if a restart is needed.
* If passed in, *reason will always be set, even in error.
*/
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret)
int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret)
{
int status = 0;
int free_extents;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
enum ocfs2_alloc_restarted reason = RESTART_NONE;
u32 bit_off, num_bits;
u64 block;
u8 flags = 0;
BUG_ON(!clusters_to_add);
if (mark_unwritten)
flags = OCFS2_EXT_UNWRITTEN;
free_extents = ocfs2_num_free_extents(osb, inode, fe);
if (free_extents < 0) {
status = free_extents;
mlog_errno(status);
goto leave;
}
/* there are two cases which could cause us to EAGAIN in the
* we-need-more-metadata case:
* 1) we haven't reserved *any*
* 2) we are so fragmented, we've needed to add metadata too
* many times. */
if (!free_extents && !meta_ac) {
mlog(0, "we haven't reserved any metadata!\n");
status = -EAGAIN;
reason = RESTART_META;
goto leave;
} else if ((!free_extents)
&& (ocfs2_alloc_context_bits_left(meta_ac)
< ocfs2_extend_meta_needed(fe))) {
mlog(0, "filesystem is really fragmented...\n");
status = -EAGAIN;
reason = RESTART_META;
goto leave;
}
status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
clusters_to_add, &bit_off, &num_bits);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto leave;
}
BUG_ON(num_bits > clusters_to_add);
/* reserve our write early -- insert_extent may update the inode */
status = ocfs2_journal_access(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
}
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
*logical_offset, block, num_bits,
flags, meta_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
}
status = ocfs2_journal_dirty(handle, fe_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
clusters_to_add -= num_bits;
*logical_offset += num_bits;
if (clusters_to_add) {
mlog(0, "need to alloc once more, clusters = %u, wanted = "
"%u\n", fe->i_clusters, clusters_to_add);
status = -EAGAIN;
reason = RESTART_TRANS;
}
leave:
mlog_exit(status);
if (reason_ret)
*reason_ret = reason;
return status;
}
/*
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
*
* Sparse file systems call this from ocfs2_write_begin_nolock()
* and ocfs2_allocate_unwritten_extents().
*
* File systems which don't support holes call this from
* ocfs2_extend_allocation().
*/
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac)
{
int ret = 0, num_free_extents;
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
*meta_ac = NULL;
if (data_ac)
*data_ac = NULL;
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
"clusters_to_add = %u, extents_to_split = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
num_free_extents = ocfs2_num_free_extents(osb, inode, di);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
goto out;
}
/*
* Sparse allocation file systems need to be more conservative
* with reserving room for expansion - the actual allocation
* happens while we've got a journal handle open so re-taking
* a cluster lock (because we ran out of room for another
* extent) will violate ordering rules.
*
* Most of the time we'll only be seeing this 1 cluster at a time
* anyway.
*
* Always lock for any unwritten extents - we might want to
* add blocks during a split.
*/
if (!num_free_extents ||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
}
if (clusters_to_add == 0)
goto out;
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
out:
if (ret) {
if (*meta_ac) {
ocfs2_free_alloc_context(*meta_ac);
*meta_ac = NULL;
}
int ret;
struct ocfs2_extent_tree et;
/*
* We cannot have an error and a non null *data_ac.
*/
}
ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset,
clusters_to_add, mark_unwritten,
&et, handle,
data_ac, meta_ac, reason_ret);
return ret;
}
......@@ -698,6 +535,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
struct ocfs2_alloc_context *meta_ac = NULL;
enum ocfs2_alloc_restarted why;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
......@@ -707,8 +545,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
*/
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
OCFS2_BH_CACHED, inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -724,14 +561,21 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
&meta_ac);
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
"clusters_to_add = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
clusters_to_add);
ocfs2_init_dinode_extent_tree(&et, inode, bh);
status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
&data_ac, &meta_ac);
if (status) {
mlog_errno(status);
goto leave;
}
credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
clusters_to_add);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
......@@ -753,16 +597,16 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
prev_clusters = OCFS2_I(inode)->ip_clusters;
status = ocfs2_do_extend_allocation(osb,
inode,
&logical_start,
clusters_to_add,
mark_unwritten,
bh,
handle,
data_ac,
meta_ac,
&why);
status = ocfs2_add_inode_data(osb,
inode,
&logical_start,
clusters_to_add,
mark_unwritten,
bh,
handle,
data_ac,
meta_ac,
&why);
if ((status < 0) && (status != -EAGAIN)) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -789,7 +633,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
mlog(0, "restarting transaction.\n");
/* TODO: This can be more intelligent. */
credits = ocfs2_calc_extend_credits(osb->sb,
fe,
&fe->id2.i_list,
clusters_to_add);
status = ocfs2_extend_trans(handle, credits);
if (status < 0) {
......@@ -826,10 +670,8 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
restart_func = 0;
goto restart_all;
}
if (bh) {
brelse(bh);
bh = NULL;
}
brelse(bh);
bh = NULL;
mlog_exit(status);
return status;
......@@ -1096,9 +938,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
goto bail_unlock;
}
if (i_size_read(inode) > attr->ia_size)
if (i_size_read(inode) > attr->ia_size) {
if (ocfs2_should_order_data(inode)) {
status = ocfs2_begin_ordered_truncate(inode,
attr->ia_size);
if (status)
goto bail_unlock;
}
status = ocfs2_truncate_file(inode, bh, attr->ia_size);
else
} else
status = ocfs2_extend_file(inode, bh, attr->ia_size);
if (status < 0) {
if (status != -ENOSPC)
......@@ -1140,8 +988,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (size_change)
ocfs2_rw_unlock(inode, 1);
bail:
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(status);
return status;
......@@ -1284,8 +1131,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
struct buffer_head *bh = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
......@@ -1311,9 +1157,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
struct buffer_head *di_bh = NULL;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno, &di_bh,
OCFS2_BH_CACHED, inode);
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
&di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -1394,8 +1239,11 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
handle_t *handle;
struct ocfs2_alloc_context *meta_ac = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_extent_tree et;
ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac);
ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
return ret;
......@@ -1425,7 +1273,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
goto out;
}
ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
dealloc);
if (ret) {
mlog_errno(ret);
......@@ -2040,7 +1888,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
*/
if (old_size != i_size_read(inode) ||
old_clusters != OCFS2_I(inode)->ip_clusters) {
ret = journal_force_commit(osb->journal->j_journal);
ret = jbd2_journal_force_commit(osb->journal->j_journal);
if (ret < 0)
written = ret;
}
......@@ -2227,6 +2075,10 @@ const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
.fallocate = ocfs2_fallocate,
.fiemap = ocfs2_fiemap,
};
......@@ -2237,6 +2089,10 @@ const struct inode_operations ocfs2_special_file_iops = {
.permission = ocfs2_permission,
};
/*
* Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
* ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
*/
const struct file_operations ocfs2_fops = {
.llseek = generic_file_llseek,
.read = do_sync_read,
......@@ -2251,6 +2107,7 @@ const struct file_operations ocfs2_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_write = ocfs2_file_splice_write,
......@@ -2266,6 +2123,52 @@ const struct file_operations ocfs2_dops = {
.unlocked_ioctl = ocfs2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
};
/*
* POSIX-lockless variants of our file_operations.
*
* These will be used if the underlying cluster stack does not support
* posix file locking, if the user passes the "localflocks" mount
* option, or if we have a local-only fs.
*
* ocfs2_flock is in here because all stacks handle UNIX file locks,
* so we still want it in the case of no stack support for
* plocks. Internally, it will do the right thing when asked to ignore
* the cluster.
*/
const struct file_operations ocfs2_fops_no_plocks = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.mmap = ocfs2_mmap,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
.open = ocfs2_file_open,
.aio_read = ocfs2_file_aio_read,
.aio_write = ocfs2_file_aio_write,
.unlocked_ioctl = ocfs2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_write = ocfs2_file_splice_write,
};
const struct file_operations ocfs2_dops_no_plocks = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = ocfs2_readdir,
.fsync = ocfs2_sync_file,
.release = ocfs2_dir_release,
.open = ocfs2_dir_open,
.unlocked_ioctl = ocfs2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
};
......@@ -28,9 +28,12 @@
extern const struct file_operations ocfs2_fops;
extern const struct file_operations ocfs2_dops;
extern const struct file_operations ocfs2_fops_no_plocks;
extern const struct file_operations ocfs2_dops_no_plocks;
extern const struct inode_operations ocfs2_file_iops;
extern const struct inode_operations ocfs2_special_file_iops;
struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted;
struct ocfs2_file_private {
struct file *fp_file;
......@@ -38,27 +41,18 @@ struct ocfs2_file_private {
struct ocfs2_lock_res fp_flock;
};
enum ocfs2_alloc_restarted {
RESTART_NONE = 0,
RESTART_TRANS,
RESTART_META
};
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
......
......@@ -49,6 +49,7 @@
#include "symlink.h"
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
struct super_block *sb;
struct ocfs2_super *osb;
int status = -EINVAL;
int use_plocks = 1;
mlog_entry("(0x%p, size:%llu)\n", inode,
(unsigned long long)le64_to_cpu(fe->i_size));
......@@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
sb = inode->i_sb;
osb = OCFS2_SB(sb);
if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
use_plocks = 0;
/* this means that read_inode cannot create a superblock inode
* today. change if needed. */
if (!OCFS2_IS_VALID_DINODE(fe) ||
......@@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_fop = &ocfs2_fops;
if (use_plocks)
inode->i_fop = &ocfs2_fops;
else
inode->i_fop = &ocfs2_fops_no_plocks;
inode->i_op = &ocfs2_file_iops;
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
case S_IFDIR:
inode->i_op = &ocfs2_dir_iops;
inode->i_fop = &ocfs2_dops;
if (use_plocks)
inode->i_fop = &ocfs2_dops;
else
inode->i_fop = &ocfs2_dops_no_plocks;
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
case S_IFLNK:
......@@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
}
status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
can_lock ? inode : NULL);
if (can_lock)
status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
else
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
* data and fast symlinks.
*/
if (fe->i_clusters) {
if (ocfs2_should_order_data(inode))
ocfs2_begin_ordered_truncate(inode, 0);
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
......@@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode,
goto bail_unlock_dir;
}
/*Free extended attribute resources associated with this inode.*/
status = ocfs2_xattr_remove(inode, di_bh);
if (status < 0) {
mlog_errno(status);
goto bail_unlock_dir;
}
status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
orphan_dir_bh);
if (status < 0)
......@@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode)
oi->ip_last_trans = 0;
oi->ip_dir_start_lookup = 0;
oi->ip_blkno = 0ULL;
jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
&oi->ip_jinode);
bail:
mlog_exit_void();
......@@ -1106,58 +1133,6 @@ void ocfs2_drop_inode(struct inode *inode)
mlog_exit_void();
}
/*
* TODO: this should probably be merged into ocfs2_get_block
*
* However, you now need to pay attention to the cont_prepare_write()
* stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much
* expects never to extend).
*/
struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada)
{
struct buffer_head *bh = NULL;
int tmperr;
u64 p_blkno;
int readflags = OCFS2_BH_CACHED;
if (reada)
readflags |= OCFS2_BH_READAHEAD;
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
BUG_ON(!reada);
return NULL;
}
down_read(&OCFS2_I(inode)->ip_alloc_sem);
tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (tmperr < 0) {
mlog_errno(tmperr);
goto fail;
}
tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh,
readflags, inode);
if (tmperr < 0)
goto fail;
tmperr = 0;
*err = 0;
return bh;
fail:
if (bh) {
brelse(bh);
bh = NULL;
}
*err = -EIO;
return NULL;
}
/*
* This is called from our getattr.
*/
......
......@@ -40,6 +40,9 @@ struct ocfs2_inode_info
/* protects allocation changes on this inode. */
struct rw_semaphore ip_alloc_sem;
/* protects extended attribute changes on this inode */
struct rw_semaphore ip_xattr_sem;
/* These fields are protected by ip_lock */
spinlock_t ip_lock;
u32 ip_open_count;
......@@ -68,6 +71,7 @@ struct ocfs2_inode_info
struct ocfs2_extent_map ip_extent_map;
struct inode vfs_inode;
struct jbd2_inode ip_jinode;
};
/*
......@@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache;
extern const struct address_space_operations ocfs2_aops;
struct buffer_head *ocfs2_bread(struct inode *inode, int block,
int *err, int reada);
void ocfs2_clear_inode(struct inode *inode);
void ocfs2_delete_inode(struct inode *inode);
void ocfs2_drop_inode(struct inode *inode);
......
......@@ -102,8 +102,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
bail:
mutex_unlock(&inode->i_mutex);
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(status);
return status;
......
......@@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
goto finally;
}
journal_lock_updates(journal->j_journal);
status = journal_flush(journal->j_journal);
journal_unlock_updates(journal->j_journal);
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal);
jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0) {
up_write(&journal->j_trans_barrier);
mlog_errno(status);
......@@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
down_read(&osb->journal->j_trans_barrier);
handle = journal_start(journal, max_buffs);
handle = jbd2_journal_start(journal, max_buffs);
if (IS_ERR(handle)) {
up_read(&osb->journal->j_trans_barrier);
......@@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
BUG_ON(!handle);
ret = journal_stop(handle);
ret = jbd2_journal_stop(handle);
if (ret < 0)
mlog_errno(ret);
......@@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
* transaction. extend_trans will either extend the current handle by
* nblocks, or commit it and start a new one with nblocks credits.
*
* This might call journal_restart() which will commit dirty buffers
* This might call jbd2_journal_restart() which will commit dirty buffers
* and then restart the transaction. Before calling
* ocfs2_extend_trans(), any changed blocks should have been
* dirtied. After calling it, all blocks which need to be changed must
......@@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
#ifdef CONFIG_OCFS2_DEBUG_FS
status = 1;
#else
status = journal_extend(handle, nblocks);
status = jbd2_journal_extend(handle, nblocks);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
#endif
if (status > 0) {
mlog(0, "journal_extend failed, trying journal_restart\n");
status = journal_restart(handle, nblocks);
mlog(0,
"jbd2_journal_extend failed, trying "
"jbd2_journal_restart\n");
status = jbd2_journal_restart(handle, nblocks);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle,
switch (type) {
case OCFS2_JOURNAL_ACCESS_CREATE:
case OCFS2_JOURNAL_ACCESS_WRITE:
status = journal_get_write_access(handle, bh);
status = jbd2_journal_get_write_access(handle, bh);
break;
case OCFS2_JOURNAL_ACCESS_UNDO:
status = journal_get_undo_access(handle, bh);
status = jbd2_journal_get_undo_access(handle, bh);
break;
default:
......@@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle,
mlog_entry("(bh->b_blocknr=%llu)\n",
(unsigned long long)bh->b_blocknr);
status = journal_dirty_metadata(handle, bh);
status = jbd2_journal_dirty_metadata(handle, bh);
if (status < 0)
mlog(ML_ERROR, "Could not dirty metadata buffer. "
"(bh->b_blocknr=%llu)\n",
......@@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle,
return status;
}
#ifdef CONFIG_OCFS2_COMPAT_JBD
int ocfs2_journal_dirty_data(handle_t *handle,
struct buffer_head *bh)
{
......@@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
return err;
}
#endif
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE)
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
void ocfs2_set_journal_params(struct ocfs2_super *osb)
{
......@@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
spin_lock(&journal->j_state_lock);
journal->j_commit_interval = commit_interval;
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
journal->j_flags |= JFS_BARRIER;
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JFS_BARRIER;
journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock);
}
......@@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
/* call the kernels journal init function now */
j_journal = journal_init_inode(inode);
j_journal = jbd2_journal_init_inode(inode);
if (j_journal == NULL) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EINVAL;
goto done;
}
mlog(0, "Returned from journal_init_inode\n");
mlog(0, "Returned from jbd2_journal_init_inode\n");
mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
......@@ -550,8 +554,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
if (status < 0) {
if (inode_lock)
ocfs2_inode_unlock(inode, 1);
if (bh != NULL)
brelse(bh);
brelse(bh);
if (inode) {
OCFS2_I(inode)->ip_open_count--;
iput(inode);
......@@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (journal->j_state != OCFS2_JOURNAL_LOADED)
goto done;
/* need to inc inode use count as journal_destroy will iput. */
/* need to inc inode use count - jbd2_journal_destroy will iput. */
if (!igrab(inode))
BUG();
......@@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
if (ocfs2_mount_local(osb)) {
journal_lock_updates(journal->j_journal);
status = journal_flush(journal->j_journal);
journal_unlock_updates(journal->j_journal);
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal);
jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0)
mlog_errno(status);
}
......@@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
}
/* Shutdown the kernel journal system */
journal_destroy(journal->j_journal);
jbd2_journal_destroy(journal->j_journal);
OCFS2_I(inode)->ip_open_count--;
......@@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
{
int olderr;
olderr = journal_errno(journal);
olderr = jbd2_journal_errno(journal);
if (olderr) {
mlog(ML_ERROR, "File system error %d recorded in "
"journal %u.\n", olderr, slot);
mlog(ML_ERROR, "File system on device %s needs checking.\n",
sb->s_id);
journal_ack_err(journal);
journal_clear_err(journal);
jbd2_journal_ack_err(journal);
jbd2_journal_clear_err(journal);
}
}
......@@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
osb = journal->j_osb;
status = journal_load(journal->j_journal);
status = jbd2_journal_load(journal->j_journal);
if (status < 0) {
mlog(ML_ERROR, "Failed to load journal!\n");
goto done;
......@@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
BUG_ON(!journal);
status = journal_wipe(journal->j_journal, full);
status = jbd2_journal_wipe(journal->j_journal, full);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
/* We are reading journal data which should not
* be put in the uptodate cache */
status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
p_blkno, p_blocks, bhs, 0,
NULL);
status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
p_blkno, p_blocks, bhs);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -865,8 +867,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
bail:
for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
if (bhs[i])
brelse(bhs[i]);
brelse(bhs[i]);
mlog_exit(status);
return status;
}
......@@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
}
SET_INODE_JOURNAL(inode);
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
}
mlog(0, "calling journal_init_inode\n");
journal = journal_init_inode(inode);
journal = jbd2_journal_init_inode(inode);
if (journal == NULL) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EIO;
goto done;
}
status = journal_load(journal);
status = jbd2_journal_load(journal);
if (status < 0) {
mlog_errno(status);
if (!igrab(inode))
BUG();
journal_destroy(journal);
jbd2_journal_destroy(journal);
goto done;
}
......@@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
/* wipe the journal */
mlog(0, "flushing the journal.\n");
journal_lock_updates(journal);
status = journal_flush(journal);
journal_unlock_updates(journal);
jbd2_journal_lock_updates(journal);
status = jbd2_journal_flush(journal);
jbd2_journal_unlock_updates(journal);
if (status < 0)
mlog_errno(status);
......@@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
if (!igrab(inode))
BUG();
journal_destroy(journal);
jbd2_journal_destroy(journal);
done:
/* drop the lock on this nodes journal */
......@@ -1282,8 +1284,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
if (inode)
iput(inode);
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(status);
return status;
......
......@@ -27,7 +27,12 @@
#define OCFS2_JOURNAL_H
#include <linux/fs.h>
#include <linux/jbd.h>
#ifndef CONFIG_OCFS2_COMPAT_JBD
# include <linux/jbd2.h>
#else
# include <linux/jbd.h>
# include "ocfs2_jbd_compat.h"
#endif
enum ocfs2_journal_state {
OCFS2_JOURNAL_FREE = 0,
......@@ -215,8 +220,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
* buffer. Will have to call ocfs2_journal_dirty once
* we've actually dirtied it. Type is one of . or .
* ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
* ocfs2_journal_dirty_data - Indicate that a data buffer should go out before
* the current handle commits.
* ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
* the current handle commits.
*/
/* You must always start_trans with a number of buffs > 0, but it's
......@@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle,
*/
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh);
#ifdef CONFIG_OCFS2_COMPAT_JBD
int ocfs2_journal_dirty_data(handle_t *handle,
struct buffer_head *bh);
#endif
/*
* Credit Macros:
......@@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* simple file updates like chmod, etc. */
#define OCFS2_INODE_UPDATE_CREDITS 1
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
......@@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
+ OCFS2_UNLINK_CREDITS)
/* global bitmap dinode, group desc., relinked group,
* suballocator dinode, group desc., relinked group,
* dinode, xattr block */
#define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \
+ OCFS2_INODE_UPDATE_CREDITS \
+ OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
/*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
* the result may be wrong.
*/
static inline int ocfs2_calc_extend_credits(struct super_block *sb,
struct ocfs2_dinode *fe,
struct ocfs2_extent_list *root_el,
u32 bits_wanted)
{
int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks;
int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks;
/* bitmap dinode, group desc. + relinked group. */
bitmap_blocks = OCFS2_SUBALLOC_ALLOC;
......@@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
* however many metadata chunks needed * a remaining suballoc
* alloc. */
sysfile_bitmap_blocks = 1 +
(OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe);
(OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el);
/* this does not include *new* metadata blocks, which are
* accounted for in sysfile_bitmap_blocks. fe +
* accounted for in sysfile_bitmap_blocks. root_el +
* prev. last_eb_blk + blocks along edge of tree.
* calc_symlink_credits passes because we just need 1
* credit for the dinode there. */
dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth);
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks;
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
......@@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
return credits;
}
static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode);
}
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
new_size);
}
#endif /* OCFS2_JOURNAL_H */
......@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/bitops.h>
#include <linux/debugfs.h>
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
......@@ -47,8 +48,6 @@
#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb);
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
......@@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode);
static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
#ifdef CONFIG_OCFS2_FS_STATS
static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
#define LA_DEBUG_VER 1
static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
static DEFINE_MUTEX(la_debug_mutex);
struct ocfs2_super *osb = file->private_data;
int written, ret;
char *buf = osb->local_alloc_debug_buf;
mutex_lock(&la_debug_mutex);
memset(buf, 0, LA_DEBUG_BUF_SZ);
written = snprintf(buf, LA_DEBUG_BUF_SZ,
"0x%x\t0x%llx\t%u\t%u\t0x%x\n",
LA_DEBUG_VER,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_default_bits,
osb->local_alloc_bits, osb->local_alloc_state);
ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
mutex_unlock(&la_debug_mutex);
return ret;
}
static const struct file_operations ocfs2_la_debug_fops = {
.open = ocfs2_la_debug_open,
.read = ocfs2_la_debug_read,
};
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
if (!osb->local_alloc_debug_buf)
return;
osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_la_debug_fops);
if (!osb->local_alloc_debug) {
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
}
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
if (osb->local_alloc_debug)
debugfs_remove(osb->local_alloc_debug);
if (osb->local_alloc_debug_buf)
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
osb->local_alloc_debug = NULL;
}
#else /* CONFIG_OCFS2_FS_STATS */
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
return;
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
return;
}
#endif
static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
{
BUG_ON(osb->s_clustersize_bits > 20);
return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
osb->local_alloc_state == OCFS2_LA_ENABLED);
}
/* Size local alloc windows by the megabyte */
return osb->local_alloc_size << (20 - osb->s_clustersize_bits);
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters)
{
spin_lock(&osb->osb_lock);
if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
osb->local_alloc_state == OCFS2_LA_THROTTLED)
if (num_clusters >= osb->local_alloc_default_bits) {
cancel_delayed_work(&osb->la_enable_wq);
osb->local_alloc_state = OCFS2_LA_ENABLED;
}
spin_unlock(&osb->osb_lock);
}
void ocfs2_la_enable_worker(struct work_struct *work)
{
struct ocfs2_super *osb =
container_of(work, struct ocfs2_super,
la_enable_wq.work);
spin_lock(&osb->osb_lock);
osb->local_alloc_state = OCFS2_LA_ENABLED;
spin_unlock(&osb->osb_lock);
}
/*
* Tell us whether a given allocation should use the local alloc
* file. Otherwise, it has to go to the main bitmap.
*
* This function does semi-dirty reads of local alloc size and state!
* This is ok however, as the values are re-checked once under mutex.
*/
int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
{
int la_bits = ocfs2_local_alloc_window_bits(osb);
int ret = 0;
int la_bits;
spin_lock(&osb->osb_lock);
la_bits = osb->local_alloc_bits;
if (osb->local_alloc_state != OCFS2_LA_ENABLED)
if (!ocfs2_la_state_enabled(osb))
goto bail;
/* la_bits should be at least twice the size (in clusters) of
......@@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
bail:
mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
spin_unlock(&osb->osb_lock);
return ret;
}
......@@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
mlog_entry_void();
if (osb->local_alloc_size == 0)
ocfs2_init_la_debug(osb);
if (osb->local_alloc_bits == 0)
goto bail;
if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) {
if (osb->local_alloc_bits >= osb->bitmap_cpg) {
mlog(ML_NOTICE, "Requested local alloc window %d is larger "
"than max possible %u. Using defaults.\n",
ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1));
osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
osb->local_alloc_bits, (osb->bitmap_cpg - 1));
osb->local_alloc_bits =
ocfs2_megabytes_to_clusters(osb->sb,
OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
}
/* read the alloc off disk */
......@@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
goto bail;
}
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
&alloc_bh, 0, inode);
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
&alloc_bh, OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
bail:
if (status < 0)
if (alloc_bh)
brelse(alloc_bh);
brelse(alloc_bh);
if (inode)
iput(inode);
mlog(0, "Local alloc window bits = %d\n",
ocfs2_local_alloc_window_bits(osb));
if (status < 0)
ocfs2_shutdown_la_debug(osb);
mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
mlog_exit(status);
return status;
......@@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
mlog_entry_void();
cancel_delayed_work(&osb->la_enable_wq);
flush_workqueue(ocfs2_wq);
ocfs2_shutdown_la_debug(osb);
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out;
......@@ -295,8 +410,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
ocfs2_commit_trans(osb, handle);
out_unlock:
if (main_bm_bh)
brelse(main_bm_bh);
brelse(main_bm_bh);
ocfs2_inode_unlock(main_bm_inode, 1);
......@@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
mutex_lock(&inode->i_mutex);
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
&alloc_bh, 0, inode);
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
&alloc_bh, OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -372,8 +486,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
*alloc_copy = NULL;
}
if (alloc_bh)
brelse(alloc_bh);
brelse(alloc_bh);
if (inode) {
mutex_unlock(&inode->i_mutex);
......@@ -441,8 +554,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
out_mutex:
mutex_unlock(&main_bm_inode->i_mutex);
if (main_bm_bh)
brelse(main_bm_bh);
brelse(main_bm_bh);
iput(main_bm_inode);
......@@ -453,8 +565,48 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
return status;
}
/* Check to see if the local alloc window is within ac->ac_max_block */
static int ocfs2_local_alloc_in_range(struct inode *inode,
struct ocfs2_alloc_context *ac,
u32 bits_wanted)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *alloc;
struct ocfs2_local_alloc *la;
int start;
u64 block_off;
if (!ac->ac_max_block)
return 1;
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
if (start == -1) {
mlog_errno(-ENOSPC);
return 0;
}
/*
* Converting (bm_off + start + bits_wanted) to blocks gives us
* the blkno just past our actual allocation. This is perfect
* to compare with ac_max_block.
*/
block_off = ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(la->la_bm_off) +
start + bits_wanted);
mlog(0, "Checking %llu against %llu\n",
(unsigned long long)block_off,
(unsigned long long)ac->ac_max_block);
if (block_off > ac->ac_max_block)
return 0;
return 1;
}
/*
* make sure we've got at least bitswanted contiguous bits in the
* make sure we've got at least bits_wanted contiguous bits in the
* local alloc. You lose them when you drop i_mutex.
*
* We will add ourselves to the transaction passed in, but may start
......@@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
mutex_lock(&local_alloc_inode->i_mutex);
if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
status = -ENOSPC;
goto bail;
}
if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) {
mlog(0, "Asking for more than my max window size!\n");
/*
* We must double check state and allocator bits because
* another process may have changed them while holding i_mutex.
*/
spin_lock(&osb->osb_lock);
if (!ocfs2_la_state_enabled(osb) ||
(bits_wanted > osb->local_alloc_bits)) {
spin_unlock(&osb->osb_lock);
status = -ENOSPC;
goto bail;
}
spin_unlock(&osb->osb_lock);
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
......@@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
mlog_errno(status);
goto bail;
}
/*
* Under certain conditions, the window slide code
* might have reduced the number of bits available or
* disabled the the local alloc entirely. Re-check
* here and return -ENOSPC if necessary.
*/
status = -ENOSPC;
if (!ocfs2_la_state_enabled(osb))
goto bail;
free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
le32_to_cpu(alloc->id1.bitmap1.i_used);
if (bits_wanted > free_bits)
goto bail;
}
if (ac->ac_max_block)
mlog(0, "Calling in_range for max block %llu\n",
(unsigned long long)ac->ac_max_block);
if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
bits_wanted)) {
/*
* The window is outside ac->ac_max_block.
* This errno tells the caller to keep localalloc enabled
* but to get the allocation from the main bitmap.
*/
status = -EFBIG;
goto bail;
}
ac->ac_inode = local_alloc_inode;
......@@ -789,6 +973,85 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
return status;
}
enum ocfs2_la_event {
OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
* enough bits theoretically
* free, but a contiguous
* allocation could not be
* found. */
OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
* enough bits free to satisfy
* our request. */
};
#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
/*
* Given an event, calculate the size of our next local alloc window.
*
* This should always be called under i_mutex of the local alloc inode
* so that local alloc disabling doesn't race with processes trying to
* use the allocator.
*
* Returns the state which the local alloc was left in. This value can
* be ignored by some paths.
*/
static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
enum ocfs2_la_event event)
{
unsigned int bits;
int state;
spin_lock(&osb->osb_lock);
if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
goto out_unlock;
}
/*
* ENOSPC and fragmentation are treated similarly for now.
*/
if (event == OCFS2_LA_EVENT_ENOSPC ||
event == OCFS2_LA_EVENT_FRAGMENTED) {
/*
* We ran out of contiguous space in the primary
* bitmap. Drastically reduce the number of bits used
* by local alloc until we have to disable it.
*/
bits = osb->local_alloc_bits >> 1;
if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
/*
* By setting state to THROTTLED, we'll keep
* the number of local alloc bits used down
* until an event occurs which would give us
* reason to assume the bitmap situation might
* have changed.
*/
osb->local_alloc_state = OCFS2_LA_THROTTLED;
osb->local_alloc_bits = bits;
} else {
osb->local_alloc_state = OCFS2_LA_DISABLED;
}
queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
OCFS2_LA_ENABLE_INTERVAL);
goto out_unlock;
}
/*
* Don't increase the size of the local alloc window until we
* know we might be able to fulfill the request. Otherwise, we
* risk bouncing around the global bitmap during periods of
* low space.
*/
if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
osb->local_alloc_bits = osb->local_alloc_default_bits;
out_unlock:
state = osb->local_alloc_state;
spin_unlock(&osb->osb_lock);
return state;
}
static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
struct ocfs2_alloc_context **ac,
struct inode **bitmap_inode,
......@@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
goto bail;
}
(*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb);
retry_enospc:
(*ac)->ac_bits_wanted = osb->local_alloc_bits;
status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
if (status == -ENOSPC) {
if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
OCFS2_LA_DISABLED)
goto bail;
ocfs2_free_ac_resource(*ac);
memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
goto retry_enospc;
}
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
mlog_errno(status);
goto bail;
}
......@@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
"one\n");
mlog(0, "Allocating %u clusters for a new window.\n",
ocfs2_local_alloc_window_bits(osb));
osb->local_alloc_bits);
/* Instruct the allocation code to try the most recently used
* cluster group. We'll re-record the group used this pass
......@@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
/* we used the generic suballoc reserve function, but we set
* everything up nicely, so there's no reason why we can't use
* the more specific cluster api to claim bits. */
status = ocfs2_claim_clusters(osb, handle, ac,
ocfs2_local_alloc_window_bits(osb),
status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
&cluster_off, &cluster_count);
if (status == -ENOSPC) {
retry_enospc:
/*
* Note: We could also try syncing the journal here to
* allow use of any free bits which the current
* transaction can't give us access to. --Mark
*/
if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
OCFS2_LA_DISABLED)
goto bail;
status = ocfs2_claim_clusters(osb, handle, ac,
osb->local_alloc_bits,
&cluster_off,
&cluster_count);
if (status == -ENOSPC)
goto retry_enospc;
/*
* We only shrunk the *minimum* number of in our
* request - it's entirely possible that the allocator
* might give us more than we asked for.
*/
if (status == 0) {
spin_lock(&osb->osb_lock);
osb->local_alloc_bits = cluster_count;
spin_unlock(&osb->osb_lock);
}
}
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
mlog_entry_void();
ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
/* This will lock the main bitmap for us. */
status = ocfs2_local_alloc_reserve_for_window(osb,
&ac,
......@@ -976,8 +1277,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
if (handle)
ocfs2_commit_trans(osb, handle);
if (main_bm_bh)
brelse(main_bm_bh);
brelse(main_bm_bh);
if (main_bm_inode)
iput(main_bm_inode);
......
......@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
u32 *bit_off,
u32 *num_bits);
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters);
void ocfs2_la_enable_worker(struct work_struct *work);
#endif /* OCFS2_LOCALALLOC_H */
......@@ -24,6 +24,7 @@
*/
#include <linux/fs.h>
#include <linux/fcntl.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
......@@ -32,6 +33,7 @@
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
#include "locks.h"
static int ocfs2_do_flock(struct file *file, struct inode *inode,
......@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
else
return ocfs2_do_flock(file, inode, cmd, fl);
}
int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
if (__mandatory_lock(inode))
return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
}
......@@ -27,5 +27,6 @@
#define OCFS2_LOCKS_H
int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl);
#endif /* OCFS2_LOCKS_H */
......@@ -60,6 +60,7 @@
#include "symlink.h"
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -327,14 +328,9 @@ static int ocfs2_mknod(struct inode *dir,
if (status == -ENOSPC)
mlog(0, "Disk is full\n");
if (new_fe_bh)
brelse(new_fe_bh);
if (de_bh)
brelse(de_bh);
if (parent_fe_bh)
brelse(parent_fe_bh);
brelse(new_fe_bh);
brelse(de_bh);
brelse(parent_fe_bh);
if ((status < 0) && inode)
iput(inode);
......@@ -647,12 +643,9 @@ static int ocfs2_link(struct dentry *old_dentry,
out:
ocfs2_inode_unlock(dir, 1);
if (de_bh)
brelse(de_bh);
if (fe_bh)
brelse(fe_bh);
if (parent_fe_bh)
brelse(parent_fe_bh);
brelse(de_bh);
brelse(fe_bh);
brelse(parent_fe_bh);
mlog_exit(err);
......@@ -851,17 +844,10 @@ static int ocfs2_unlink(struct inode *dir,
iput(orphan_dir);
}
if (fe_bh)
brelse(fe_bh);
if (dirent_bh)
brelse(dirent_bh);
if (parent_node_bh)
brelse(parent_node_bh);
if (orphan_entry_bh)
brelse(orphan_entry_bh);
brelse(fe_bh);
brelse(dirent_bh);
brelse(parent_node_bh);
brelse(orphan_entry_bh);
mlog_exit(status);
......@@ -1372,24 +1358,15 @@ static int ocfs2_rename(struct inode *old_dir,
if (new_inode)
iput(new_inode);
if (newfe_bh)
brelse(newfe_bh);
if (old_inode_bh)
brelse(old_inode_bh);
if (old_dir_bh)
brelse(old_dir_bh);
if (new_dir_bh)
brelse(new_dir_bh);
if (new_de_bh)
brelse(new_de_bh);
if (old_de_bh)
brelse(old_de_bh);
if (old_inode_de_bh)
brelse(old_inode_de_bh);
if (orphan_entry_bh)
brelse(orphan_entry_bh);
if (insert_entry_bh)
brelse(insert_entry_bh);
brelse(newfe_bh);
brelse(old_inode_bh);
brelse(old_dir_bh);
brelse(new_dir_bh);
brelse(new_de_bh);
brelse(old_de_bh);
brelse(old_inode_de_bh);
brelse(orphan_entry_bh);
brelse(insert_entry_bh);
mlog_exit(status);
......@@ -1492,8 +1469,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
if (bhs) {
for(i = 0; i < blocks; i++)
if (bhs[i])
brelse(bhs[i]);
brelse(bhs[i]);
kfree(bhs);
}
......@@ -1598,10 +1574,10 @@ static int ocfs2_symlink(struct inode *dir,
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
NULL);
status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
NULL);
if (status < 0) {
if (status != -ENOSPC && status != -EINTR) {
mlog(ML_ERROR,
......@@ -1659,12 +1635,9 @@ static int ocfs2_symlink(struct inode *dir,
ocfs2_inode_unlock(dir, 1);
if (new_fe_bh)
brelse(new_fe_bh);
if (parent_fe_bh)
brelse(parent_fe_bh);
if (de_bh)
brelse(de_bh);
brelse(new_fe_bh);
brelse(parent_fe_bh);
brelse(de_bh);
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
if (data_ac)
......@@ -1759,8 +1732,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
iput(orphan_dir_inode);
}
if (orphan_dir_bh)
brelse(orphan_dir_bh);
brelse(orphan_dir_bh);
mlog_exit(status);
return status;
......@@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
status = ocfs2_read_block(osb,
status = ocfs2_read_block(orphan_dir_inode,
OCFS2_I(orphan_dir_inode)->ip_blkno,
&orphan_dir_bh, OCFS2_BH_CACHED,
orphan_dir_inode);
&orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -1829,8 +1800,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
(unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
leave:
if (orphan_dir_bh)
brelse(orphan_dir_bh);
brelse(orphan_dir_bh);
mlog_exit(status);
return status;
......@@ -1898,8 +1868,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
}
leave:
if (target_de_bh)
brelse(target_de_bh);
brelse(target_de_bh);
mlog_exit(status);
return status;
......@@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
};
......@@ -34,7 +34,12 @@
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/jbd.h>
#ifndef CONFIG_OCFS2_COMPAT_JBD
# include <linux/jbd2.h>
#else
# include <linux/jbd.h>
# include "ocfs2_jbd_compat.h"
#endif
/* For union ocfs2_dlm_lksb */
#include "stackglue.h"
......@@ -171,9 +176,13 @@ struct ocfs2_alloc_stats
enum ocfs2_local_alloc_state
{
OCFS2_LA_UNUSED = 0,
OCFS2_LA_ENABLED,
OCFS2_LA_DISABLED
OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
* this mountpoint. */
OCFS2_LA_ENABLED, /* Local alloc is in use. */
OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
* of bits has been reduced. */
OCFS2_LA_DISABLED /* Local alloc has temporarily been
* disabled. */
};
enum ocfs2_mount_options
......@@ -184,6 +193,8 @@ enum ocfs2_mount_options
OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
};
#define OCFS2_OSB_SOFT_RO 0x0001
......@@ -214,6 +225,7 @@ struct ocfs2_super
u32 bitmap_cpg;
u8 *uuid;
char *uuid_str;
u32 uuid_hash;
u8 *vol_label;
u64 first_cluster_group_blkno;
u32 fs_generation;
......@@ -241,6 +253,7 @@ struct ocfs2_super
int s_sectsize_bits;
int s_clustersize;
int s_clustersize_bits;
unsigned int s_xattr_inline_size;
atomic_t vol_state;
struct mutex recovery_lock;
......@@ -252,11 +265,27 @@ struct ocfs2_super
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
int local_alloc_size;
enum ocfs2_local_alloc_state local_alloc_state;
struct delayed_work la_enable_wq;
/*
* Must hold local alloc i_mutex and osb->osb_lock to change
* local_alloc_bits. Reads can be done under either lock.
*/
unsigned int local_alloc_bits;
unsigned int local_alloc_default_bits;
enum ocfs2_local_alloc_state local_alloc_state; /* protected
* by osb_lock */
struct buffer_head *local_alloc_bh;
u64 la_last_gd;
#ifdef CONFIG_OCFS2_FS_STATS
struct dentry *local_alloc_debug;
char *local_alloc_debug_buf;
#endif
/* Next two fields are for local node slot recovery during
* mount. */
int dirty;
......@@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)
return 1;
return 0;
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock
......@@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
return pages_per_cluster;
}
static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
unsigned int megs)
{
BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576);
return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
}
static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
{
spin_lock(&osb->osb_lock);
......
......@@ -64,6 +64,7 @@
#define OCFS2_INODE_SIGNATURE "INODE01"
#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
/* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
......@@ -90,7 +91,8 @@
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK)
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR)
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
/*
......@@ -127,10 +129,6 @@
/* Support for data packed into inode blocks */
#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
/* Support for the extended slot map */
#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
/*
* Support for alternate, userspace cluster stacks. If set, the superblock
* field s_cluster_info contains a tag for the alternate stack in use as
......@@ -142,6 +140,12 @@
*/
#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
/* Support for the extended slot map */
#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
......@@ -299,6 +303,12 @@ struct ocfs2_new_group_input {
*/
#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
/*
* Inline extended attribute size (in bytes)
* The value chosen should be aligned to 16 byte boundaries.
*/
#define OCFS2_MIN_XATTR_INLINE_SIZE 256
struct ocfs2_system_inode_info {
char *si_name;
int si_iflags;
......@@ -563,7 +573,7 @@ struct ocfs2_super_block {
/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
before tunefs required */
__le16 s_tunefs_flag;
__le32 s_reserved1;
__le32 s_uuid_hash; /* hash value of uuid */
__le64 s_first_cluster_group; /* Block offset of 1st cluster
* group header */
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
......@@ -571,7 +581,11 @@ struct ocfs2_super_block {
/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
stack. Only valid
with INCOMPAT flag. */
/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/
__le16 s_reserved0;
__le32 s_reserved1;
/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */
/*140*/
/*
......@@ -621,7 +635,8 @@ struct ocfs2_dinode {
belongs to */
__le16 i_suballoc_bit; /* Bit offset in suballocator
block group */
/*10*/ __le32 i_reserved0;
/*10*/ __le16 i_reserved0;
__le16 i_xattr_inline_size;
__le32 i_clusters; /* Cluster count */
__le32 i_uid; /* Owner UID */
__le32 i_gid; /* Owning GID */
......@@ -640,11 +655,12 @@ struct ocfs2_dinode {
__le32 i_atime_nsec;
__le32 i_ctime_nsec;
__le32 i_mtime_nsec;
__le32 i_attr;
/*70*/ __le32 i_attr;
__le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
was set in i_flags */
__le16 i_dyn_features;
/*70*/ __le64 i_reserved2[8];
__le64 i_xattr_loc;
/*80*/ __le64 i_reserved2[7];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
......@@ -715,6 +731,136 @@ struct ocfs2_group_desc
/*40*/ __u8 bg_bitmap[0];
};
/*
* On disk extended attribute structure for OCFS2.
*/
/*
* ocfs2_xattr_entry indicates one extend attribute.
*
* Note that it can be stored in inode, one block or one xattr bucket.
*/
struct ocfs2_xattr_entry {
__le32 xe_name_hash; /* hash value of xattr prefix+suffix. */
__le16 xe_name_offset; /* byte offset from the 1st etnry in the local
local xattr storage(inode, xattr block or
xattr bucket). */
__u8 xe_name_len; /* xattr name len, does't include prefix. */
__u8 xe_type; /* the low 7 bits indicates the name prefix's
* type and the highest 1 bits indicate whether
* the EA is stored in the local storage. */
__le64 xe_value_size; /* real xattr value length. */
};
/*
* On disk structure for xattr header.
*
* One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in
* the local xattr storage.
*/
struct ocfs2_xattr_header {
__le16 xh_count; /* contains the count of how
many records are in the
local xattr storage. */
__le16 xh_free_start; /* current offset for storing
xattr. */
__le16 xh_name_value_len; /* total length of name/value
length in this bucket. */
__le16 xh_num_buckets; /* bucket nums in one extent
record, only valid in the
first bucket. */
__le64 xh_csum;
struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
};
/*
* On disk structure for xattr value root.
*
* It is used when one extended attribute's size is larger, and we will save it
* in an outside cluster. It will stored in a b-tree like file content.
*/
struct ocfs2_xattr_value_root {
/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */
__le32 xr_reserved0;
__le64 xr_last_eb_blk; /* Pointer to last extent block */
/*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */
};
/*
* On disk structure for xattr tree root.
*
* It is used when there are too many extended attributes for one file. These
* attributes will be organized and stored in an indexed-btree.
*/
struct ocfs2_xattr_tree_root {
/*00*/ __le32 xt_clusters; /* clusters covered by xattr. */
__le32 xt_reserved0;
__le64 xt_last_eb_blk; /* Pointer to last extent block */
/*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */
};
#define OCFS2_XATTR_INDEXED 0x1
#define OCFS2_HASH_SHIFT 5
#define OCFS2_XATTR_ROUND 3
#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \
~(OCFS2_XATTR_ROUND))
#define OCFS2_XATTR_BUCKET_SIZE 4096
#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \
/ OCFS2_MIN_BLOCKSIZE)
/*
* On disk structure for xattr block.
*/
struct ocfs2_xattr_block {
/*00*/ __u8 xb_signature[8]; /* Signature for verification */
__le16 xb_suballoc_slot; /* Slot suballocator this
block belongs to. */
__le16 xb_suballoc_bit; /* Bit offset in suballocator
block group */
__le32 xb_fs_generation; /* Must match super block */
/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
__le64 xb_csum;
/*20*/ __le16 xb_flags; /* Indicates whether this block contains
real xattr or a xattr tree. */
__le16 xb_reserved0;
__le32 xb_reserved1;
__le64 xb_reserved2;
/*30*/ union {
struct ocfs2_xattr_header xb_header; /* xattr header if this
block contains xattr */
struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this
block cotains xattr
tree. */
} xb_attrs;
};
#define OCFS2_XATTR_ENTRY_LOCAL 0x80
#define OCFS2_XATTR_TYPE_MASK 0x7F
static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe,
int local)
{
if (local)
xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL;
else
xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL;
}
static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe)
{
return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL;
}
static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type)
{
xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK;
}
static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
{
return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
}
#ifdef __KERNEL__
static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
{
......@@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
struct ocfs2_dinode *di)
{
unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
return sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
xattrsize;
else
return sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
{
int size;
......@@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_extent_recs_per_inode_with_xattr(
struct super_block *sb,
struct ocfs2_dinode *di)
{
int size;
unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
size = sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_list.l_recs) -
xattrsize;
else
size = sb->s_blocksize -
offsetof(struct ocfs2_dinode, id2.i_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
{
int size;
......@@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
return 0;
}
static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_xattr_block,
xb_attrs.xb_root.xt_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
#else
static inline int ocfs2_fast_symlink_chars(int blocksize)
{
......@@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
return 0;
}
static inline int ocfs2_xattr_recs_per_xb(int blocksize)
{
int size;
size = blocksize -
offsetof(struct ocfs2_xattr_block,
xb_attrs.xb_root.xt_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
#endif /* __KERNEL__ */
......
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* ocfs2_jbd_compat.h
*
* Compatibility defines for JBD.
*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_JBD_COMPAT_H
#define OCFS2_JBD_COMPAT_H
#ifndef CONFIG_OCFS2_COMPAT_JBD
# error Should not have been included
#endif
struct jbd2_inode {
unsigned int dummy;
};
#define JBD2_BARRIER JFS_BARRIER
#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
#define jbd2_journal_ack_err journal_ack_err
#define jbd2_journal_clear_err journal_clear_err
#define jbd2_journal_destroy journal_destroy
#define jbd2_journal_dirty_metadata journal_dirty_metadata
#define jbd2_journal_errno journal_errno
#define jbd2_journal_extend journal_extend
#define jbd2_journal_flush journal_flush
#define jbd2_journal_force_commit journal_force_commit
#define jbd2_journal_get_write_access journal_get_write_access
#define jbd2_journal_get_undo_access journal_get_undo_access
#define jbd2_journal_init_inode journal_init_inode
#define jbd2_journal_invalidatepage journal_invalidatepage
#define jbd2_journal_load journal_load
#define jbd2_journal_lock_updates journal_lock_updates
#define jbd2_journal_restart journal_restart
#define jbd2_journal_start journal_start
#define jbd2_journal_start_commit journal_start_commit
#define jbd2_journal_stop journal_stop
#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
#define jbd2_journal_unlock_updates journal_unlock_updates
#define jbd2_journal_wipe journal_wipe
#define jbd2_log_wait_commit log_wait_commit
static inline int jbd2_journal_file_inode(handle_t *handle,
struct jbd2_inode *inode)
{
return 0;
}
static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
loff_t new_size)
{
return 0;
}
static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
struct inode *inode)
{
return;
}
static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
struct jbd2_inode *jinode)
{
return;
}
#endif /* OCFS2_JBD_COMPAT_H */
......@@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data)
if (cluster > clusters)
break;
ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL);
ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
if (ret < 0) {
mlog_errno(ret);
break;
......@@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode,
* update the superblock last.
* It doesn't matter if the write failed.
*/
ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO,
&super_bh, 0, NULL);
ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
&super_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
......@@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
first_new_cluster - 1);
ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED,
main_bm_inode);
ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
......@@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
goto out_unlock;
}
ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL);
ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
if (ret < 0) {
mlog(ML_ERROR, "Can't read the group descriptor # %llu "
"from the device.", (unsigned long long)input->group);
......
......@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0,
si->si_inode);
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
OCFS2_BH_IGNORE_CACHE);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
......@@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
(unsigned long long)blkno);
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode);
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
......
......@@ -28,6 +28,7 @@
#include "ocfs2.h" /* For struct ocfs2_lock_res */
#include "stackglue.h"
#include <linux/dlm_plock.h>
/*
* The control protocol starts with a handshake. Until the handshake
......@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
{
}
static int user_plock(struct ocfs2_cluster_connection *conn,
u64 ino,
struct file *file,
int cmd,
struct file_lock *fl)
{
/*
* This more or less just demuxes the plock request into any
* one of three dlm calls.
*
* Internally, fs/dlm will pass these to a misc device, which
* a userspace daemon will read and write to.
*
* For now, cancel requests (which happen internally only),
* are turned into unlocks. Most of this function taken from
* gfs2_lock.
*/
if (cmd == F_CANCELLK) {
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
if (IS_GETLK(cmd))
return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
else if (fl->fl_type == F_UNLCK)
return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
else
return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
}
/*
* Compare a requested locking protocol version against the current one.
*
......@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.dlm_unlock = user_dlm_unlock,
.lock_status = user_dlm_lock_status,
.lock_lvb = user_dlm_lvb,
.plock = user_plock,
.dump_lksb = user_dlm_dump_lksb,
};
......
......@@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
int ocfs2_stack_supports_plocks(void)
{
return active_stack && active_stack->sp_ops->plock;
}
EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks);
/*
* ocfs2_plock() can only be safely called if
* ocfs2_stack_supports_plocks() returned true
*/
int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
struct file *file, int cmd, struct file_lock *fl)
{
WARN_ON_ONCE(active_stack->sp_ops->plock == NULL);
if (active_stack->sp_ops->plock)
return active_stack->sp_ops->plock(conn, ino, file, cmd, fl);
return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(ocfs2_plock);
int ocfs2_cluster_connect(const char *stack_name,
const char *group,
int grouplen,
......
......@@ -28,6 +28,10 @@
#include "dlm/dlmapi.h"
#include <linux/dlm.h>
/* Needed for plock-related prototypes */
struct file;
struct file_lock;
/*
* dlmconstants.h does not have a LOCAL flag. We hope to remove it
* some day, but right now we need it. Let's fake it. This value is larger
......@@ -186,6 +190,17 @@ struct ocfs2_stack_operations {
*/
void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
/*
* Cluster-aware posix locks
*
* This is NULL for stacks which do not support posix locks.
*/
int (*plock)(struct ocfs2_cluster_connection *conn,
u64 ino,
struct file *file,
int cmd,
struct file_lock *fl);
/*
* This is an optoinal debugging hook. If provided, the
* stack can dump debugging information about this lock.
......@@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
int ocfs2_stack_supports_plocks(void);
int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
struct file *file, int cmd, struct file_lock *fl);
void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto);
......
......@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle,
struct ocfs2_chain_list *cl);
static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
struct inode *alloc_inode,
struct buffer_head *bh);
struct buffer_head *bh,
u64 max_block);
static int ocfs2_cluster_group_search(struct inode *inode,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
u64 max_block,
u16 *bit_off, u16 *bits_found);
static int ocfs2_block_group_search(struct inode *inode,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
u64 max_block,
u16 *bit_off, u16 *bits_found);
static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac,
......@@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
u64 data_blkno,
u64 *bg_blkno,
u16 *bg_bit_off);
static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
u32 bits_wanted, u64 max_block,
struct ocfs2_alloc_context **ac);
static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
{
struct inode *inode = ac->ac_inode;
......@@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
iput(inode);
ac->ac_inode = NULL;
}
if (ac->ac_bh) {
brelse(ac->ac_bh);
ac->ac_bh = NULL;
}
brelse(ac->ac_bh);
ac->ac_bh = NULL;
}
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
......@@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
*/
static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
struct inode *alloc_inode,
struct buffer_head *bh)
struct buffer_head *bh,
u64 max_block)
{
int status, credits;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
......@@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
mlog_entry_void();
cl = &fe->id2.i_chain;
status = ocfs2_reserve_clusters(osb,
le16_to_cpu(cl->cl_cpg),
&ac);
status = ocfs2_reserve_clusters_with_limit(osb,
le16_to_cpu(cl->cl_cpg),
max_block, &ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -394,8 +399,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
if (ac)
ocfs2_free_alloc_context(ac);
if (bg_bh)
brelse(bg_bh);
brelse(bg_bh);
mlog_exit(status);
return status;
......@@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
goto bail;
}
status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
ac->ac_max_block);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
get_bh(bh);
ac->ac_bh = bh;
bail:
if (bh)
brelse(bh);
brelse(bh);
mlog_exit(status);
return status;
}
int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
struct ocfs2_dinode *fe,
struct ocfs2_alloc_context **ac)
int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
int blocks,
struct ocfs2_alloc_context **ac)
{
int status;
u32 slot;
......@@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
goto bail;
}
(*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
(*ac)->ac_bits_wanted = blocks;
(*ac)->ac_which = OCFS2_AC_USE_META;
slot = osb->slot_num;
(*ac)->ac_group_search = ocfs2_block_group_search;
......@@ -532,6 +536,15 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
return status;
}
int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
struct ocfs2_extent_list *root_el,
struct ocfs2_alloc_context **ac)
{
return ocfs2_reserve_new_metadata_blocks(osb,
ocfs2_extend_meta_needed(root_el),
ac);
}
static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac)
{
......@@ -581,6 +594,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
(*ac)->ac_group_search = ocfs2_block_group_search;
/*
* stat(2) can't handle i_ino > 32bits, so we tell the
* lower levels not to allocate us a block group past that
* limit. The 'inode64' mount option avoids this behavior.
*/
if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
(*ac)->ac_max_block = (u32)~0U;
/*
* slot is set when we successfully steal inode from other nodes.
* It is reset in 3 places:
......@@ -661,9 +682,9 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
/* Callers don't need to care which bitmap (local alloc or main) to
* use so we figure it out for them, but unfortunately this clutters
* things a bit. */
int ocfs2_reserve_clusters(struct ocfs2_super *osb,
u32 bits_wanted,
struct ocfs2_alloc_context **ac)
static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
u32 bits_wanted, u64 max_block,
struct ocfs2_alloc_context **ac)
{
int status;
......@@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
}
(*ac)->ac_bits_wanted = bits_wanted;
(*ac)->ac_max_block = max_block;
status = -ENOSPC;
if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
status = ocfs2_reserve_local_alloc_bits(osb,
bits_wanted,
*ac);
if ((status < 0) && (status != -ENOSPC)) {
if (status == -EFBIG) {
/* The local alloc window is outside ac_max_block.
* use the main bitmap. */
status = -ENOSPC;
} else if ((status < 0) && (status != -ENOSPC)) {
mlog_errno(status);
goto bail;
} else if (status == -ENOSPC) {
/* reserve_local_bits will return enospc with
* the local alloc inode still locked, so we
* can change this safely here. */
mlog(0, "Disabling local alloc\n");
/* We set to OCFS2_LA_DISABLED so that umount
* can clean up what's left of the local
* allocation */
osb->local_alloc_state = OCFS2_LA_DISABLED;
}
}
......@@ -718,6 +735,13 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
return status;
}
int ocfs2_reserve_clusters(struct ocfs2_super *osb,
u32 bits_wanted,
struct ocfs2_alloc_context **ac)
{
return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
}
/*
* More or less lifted from ext3. I'll leave their description below:
*
......@@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
static int ocfs2_cluster_group_search(struct inode *inode,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
u64 max_block,
u16 *bit_off, u16 *bits_found)
{
int search = -ENOSPC;
int ret;
u64 blkoff;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
u16 tmp_off, tmp_found;
unsigned int max_bits, gd_cluster_off;
......@@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
if (ret)
return ret;
if (max_block) {
blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
gd_cluster_off +
tmp_off + tmp_found);
mlog(0, "Checking %llu against %llu\n",
(unsigned long long)blkoff,
(unsigned long long)max_block);
if (blkoff > max_block)
return -ENOSPC;
}
/* ocfs2_block_group_find_clear_bits() might
* return success, but we still want to return
* -ENOSPC unless it found the minimum number
......@@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
*bit_off = tmp_off;
*bits_found = tmp_found;
search = 0; /* success */
} else if (tmp_found) {
/*
* Don't show bits which we'll be returning
* for allocation to the local alloc bitmap.
*/
ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
}
}
......@@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode,
static int ocfs2_block_group_search(struct inode *inode,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
u64 max_block,
u16 *bit_off, u16 *bits_found)
{
int ret = -ENOSPC;
u64 blkoff;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
BUG_ON(min_bits != 1);
BUG_ON(ocfs2_is_cluster_bitmap(inode));
if (bg->bg_free_bits_count)
if (bg->bg_free_bits_count) {
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
le16_to_cpu(bg->bg_bits),
bit_off, bits_found);
if (!ret && max_block) {
blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
*bits_found;
mlog(0, "Checking %llu against %llu\n",
(unsigned long long)blkoff,
(unsigned long long)max_block);
if (blkoff > max_block)
ret = -ENOSPC;
}
}
return ret;
}
......@@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
struct ocfs2_group_desc *gd;
struct inode *alloc_inode = ac->ac_inode;
ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
&group_bh, OCFS2_BH_CACHED, alloc_inode);
ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
......@@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
}
ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
bit_off, &found);
ac->ac_max_block, bit_off, &found);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
......@@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bits_wanted, chain,
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
status = ocfs2_read_block(alloc_inode,
le64_to_cpu(cl->cl_recs[chain].c_blkno),
&group_bh, OCFS2_BH_CACHED, alloc_inode);
&group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
/* for now, the chain search is a bit simplistic. We just use
* the 1st group with any empty bits. */
while ((status = ac->ac_group_search(alloc_inode, group_bh,
bits_wanted, min_bits, bit_off,
bits_wanted, min_bits,
ac->ac_max_block, bit_off,
&tmp_bits)) == -ENOSPC) {
if (!bg->bg_next_group)
break;
if (prev_group_bh) {
brelse(prev_group_bh);
prev_group_bh = NULL;
}
brelse(prev_group_bh);
prev_group_bh = NULL;
next_group = le64_to_cpu(bg->bg_next_group);
prev_group_bh = group_bh;
group_bh = NULL;
status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
next_group, &group_bh,
OCFS2_BH_CACHED, alloc_inode);
status = ocfs2_read_block(alloc_inode,
next_group, &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
*bg_blkno = le64_to_cpu(bg->bg_blkno);
*bits_left = le16_to_cpu(bg->bg_free_bits_count);
bail:
if (group_bh)
brelse(group_bh);
if (prev_group_bh)
brelse(prev_group_bh);
brelse(group_bh);
brelse(prev_group_bh);
mlog_exit(status);
return status;
......@@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
{
int status = 0;
u32 tmp_used;
struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
struct ocfs2_chain_list *cl = &fe->id2.i_chain;
struct buffer_head *group_bh = NULL;
......@@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
(unsigned long long)bg_blkno, start_bit);
status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
alloc_inode);
status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
}
bail:
if (group_bh)
brelse(group_bh);
brelse(group_bh);
mlog_exit(status);
return status;
......@@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle,
status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
bg_start_bit, bg_blkno,
num_clusters);
if (status < 0)
if (status < 0) {
mlog_errno(status);
goto out;
}
ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
num_clusters);
out:
mlog_exit(status);
return status;
}
......@@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
(unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
}
}
/*
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
*
* Sparse file systems call this from ocfs2_write_begin_nolock()
* and ocfs2_allocate_unwritten_extents().
*
* File systems which don't support holes call this from
* ocfs2_extend_allocation().
*/
int ocfs2_lock_allocators(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac)
{
int ret = 0, num_free_extents;
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
*meta_ac = NULL;
if (data_ac)
*data_ac = NULL;
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
num_free_extents = ocfs2_num_free_extents(osb, inode, et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
goto out;
}
/*
* Sparse allocation file systems need to be more conservative
* with reserving room for expansion - the actual allocation
* happens while we've got a journal handle open so re-taking
* a cluster lock (because we ran out of room for another
* extent) will violate ordering rules.
*
* Most of the time we'll only be seeing this 1 cluster at a time
* anyway.
*
* Always lock for any unwritten extents - we might want to
* add blocks during a split.
*/
if (!num_free_extents ||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
}
if (clusters_to_add == 0)
goto out;
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
out:
if (ret) {
if (*meta_ac) {
ocfs2_free_alloc_context(*meta_ac);
*meta_ac = NULL;
}
/*
* We cannot have an error and a non null *data_ac.
*/
}
return ret;
}
......@@ -28,10 +28,11 @@
typedef int (group_search_t)(struct inode *,
struct buffer_head *,
u32,
u32,
u16 *,
u16 *);
u32, /* bits_wanted */
u32, /* min_bits */
u64, /* max_block */
u16 *, /* *bit_off */
u16 *); /* *bits_found */
struct ocfs2_alloc_context {
struct inode *ac_inode; /* which bitmap are we allocating from? */
......@@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
group_search_t *ac_group_search;
u64 ac_last_group;
u64 ac_max_block; /* Highest block number to allocate. 0 is
is the same as ~0 - unlimited */
};
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
......@@ -59,9 +62,17 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac)
return ac->ac_bits_wanted - ac->ac_bits_given;
}
/*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
* the result may be wrong.
*/
int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
struct ocfs2_dinode *fe,
struct ocfs2_extent_list *root_el,
struct ocfs2_alloc_context **ac);
int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
int blocks,
struct ocfs2_alloc_context **ac);
int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
struct ocfs2_alloc_context **ac);
int ocfs2_reserve_clusters(struct ocfs2_super *osb,
......@@ -147,6 +158,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
* apis above. */
int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac);
void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
/* given a cluster offset, calculate which block group it belongs to
* and return that block offset. */
......@@ -156,4 +168,8 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct ocfs2_group_desc *gd);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
#endif /* _CHAINALLOC_H_ */
......@@ -64,6 +64,7 @@
#include "sysfile.h"
#include "uptodate.h"
#include "ver.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -154,6 +155,9 @@ enum {
Opt_localalloc,
Opt_localflocks,
Opt_stack,
Opt_user_xattr,
Opt_nouser_xattr,
Opt_inode64,
Opt_err,
};
......@@ -173,6 +177,9 @@ static const match_table_t tokens = {
{Opt_localalloc, "localalloc=%d"},
{Opt_localflocks, "localflocks"},
{Opt_stack, "cluster_stack=%s"},
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_inode64, "inode64"},
{Opt_err, NULL}
};
......@@ -205,10 +212,11 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
ocfs2_schedule_truncate_log_flush(osb, 0);
}
if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) {
if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal,
&target)) {
if (wait)
log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
target);
jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
target);
}
return 0;
}
......@@ -325,6 +333,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
if (!oi)
return NULL;
jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
return &oi->vfs_inode;
}
......@@ -406,6 +415,15 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out;
}
/* Probably don't want this on remount; it might
* mess with other nodes */
if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) &&
(parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot enable inode64 on remount\n");
goto out;
}
/* We're going to/from readonly mode. */
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
/* Lock here so the check of HARD_RO and the potential
......@@ -637,7 +655,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_size = parsed_options.localalloc_opt;
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
......@@ -743,8 +762,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
return status;
read_super_error:
if (bh != NULL)
brelse(bh);
brelse(bh);
if (inode)
iput(inode);
......@@ -847,6 +865,12 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_data_writeback:
mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
break;
case Opt_user_xattr:
mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
break;
case Opt_nouser_xattr:
mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR;
break;
case Opt_atime_quantum:
if (match_int(&args[0], &option)) {
status = 0;
......@@ -873,7 +897,7 @@ static int ocfs2_parse_options(struct super_block *sb,
if (option < 0)
return 0;
if (option == 0)
option = JBD_DEFAULT_MAX_COMMIT_AGE;
option = JBD2_DEFAULT_MAX_COMMIT_AGE;
mopt->commit_interval = HZ * option;
break;
case Opt_localalloc:
......@@ -918,6 +942,9 @@ static int ocfs2_parse_options(struct super_block *sb,
OCFS2_STACK_LABEL_LEN);
mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
break;
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
......@@ -938,6 +965,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
{
struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
unsigned long opts = osb->s_mount_opt;
unsigned int local_alloc_megs;
if (opts & OCFS2_MOUNT_HB_LOCAL)
seq_printf(s, ",_netdev,heartbeat=local");
......@@ -970,8 +998,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
seq_printf(s, ",commit=%u",
(unsigned) (osb->osb_commit_interval / HZ));
if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
seq_printf(s, ",localalloc=%d", osb->local_alloc_size);
local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
seq_printf(s, ",localalloc=%d", local_alloc_megs);
if (opts & OCFS2_MOUNT_LOCALFLOCKS)
seq_printf(s, ",localflocks,");
......@@ -980,6 +1009,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
osb->osb_cluster_stack);
if (opts & OCFS2_MOUNT_NOUSERXATTR)
seq_printf(s, ",nouser_xattr");
else
seq_printf(s, ",user_xattr");
if (opts & OCFS2_MOUNT_INODE64)
seq_printf(s, ",inode64");
return 0;
}
......@@ -1132,6 +1169,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_dir_start_lookup = 0;
init_rwsem(&oi->ip_alloc_sem);
init_rwsem(&oi->ip_xattr_sem);
mutex_init(&oi->ip_io_mutex);
oi->ip_blkno = 0ULL;
......@@ -1375,6 +1413,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_fs_info = osb;
sb->s_op = &ocfs2_sops;
sb->s_export_op = &ocfs2_export_ops;
sb->s_xattr = ocfs2_xattr_handlers;
sb->s_time_gran = 1;
sb->s_flags |= MS_NOATIME;
/* this is needed to support O_LARGEFILE */
......@@ -1421,8 +1460,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
osb->slot_num = OCFS2_INVALID_SLOT;
osb->s_xattr_inline_size = le16_to_cpu(
di->id2.i_super.s_xattr_inline_size);
osb->local_alloc_state = OCFS2_LA_UNUSED;
osb->local_alloc_bh = NULL;
INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
init_waitqueue_head(&osb->osb_mount_event);
......@@ -1568,6 +1611,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
osb->first_cluster_group_blkno =
le64_to_cpu(di->id2.i_super.s_first_cluster_group);
osb->fs_generation = le32_to_cpu(di->i_fs_generation);
osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash);
mlog(0, "vol_label: %s\n", osb->vol_label);
mlog(0, "uuid: %s\n", osb->uuid_str);
mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
......
......@@ -50,6 +50,7 @@
#include "inode.h"
#include "journal.h"
#include "symlink.h"
#include "xattr.h"
#include "buffer_head_io.h"
......@@ -83,11 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
mlog_entry_void();
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
OCFS2_I(inode)->ip_blkno,
bh,
OCFS2_BH_CACHED,
inode);
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
if (status < 0) {
mlog_errno(status);
link = ERR_PTR(status);
......@@ -157,8 +154,7 @@ static void *ocfs2_follow_link(struct dentry *dentry,
kunmap(page);
page_cache_release(page);
}
if (bh)
brelse(bh);
brelse(bh);
return ERR_PTR(status);
}
......@@ -168,10 +164,18 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
.follow_link = ocfs2_follow_link,
.getattr = ocfs2_getattr,
.setattr = ocfs2_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
};
const struct inode_operations ocfs2_fast_symlink_inode_operations = {
.readlink = ocfs2_readlink,
.follow_link = ocfs2_follow_link,
.getattr = ocfs2_getattr,
.setattr = ocfs2_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
};
......@@ -53,7 +53,11 @@
#include <linux/highmem.h>
#include <linux/buffer_head.h>
#include <linux/rbtree.h>
#include <linux/jbd.h>
#ifndef CONFIG_OCFS2_COMPAT_JBD
# include <linux/jbd2.h>
#else
# include <linux/jbd.h>
#endif
#define MLOG_MASK_PREFIX ML_UPTODATE
......@@ -511,14 +515,10 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
ci->ci_num_cached--;
}
/* Called when we remove a chunk of metadata from an inode. We don't
* bother reverting things to an inlined array in the case of a remove
* which moves us back under the limit. */
void ocfs2_remove_from_cache(struct inode *inode,
struct buffer_head *bh)
static void ocfs2_remove_block_from_cache(struct inode *inode,
sector_t block)
{
int index;
sector_t block = bh->b_blocknr;
struct ocfs2_meta_cache_item *item = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
......@@ -544,6 +544,30 @@ void ocfs2_remove_from_cache(struct inode *inode,
kmem_cache_free(ocfs2_uptodate_cachep, item);
}
/*
* Called when we remove a chunk of metadata from an inode. We don't
* bother reverting things to an inlined array in the case of a remove
* which moves us back under the limit.
*/
void ocfs2_remove_from_cache(struct inode *inode,
struct buffer_head *bh)
{
sector_t block = bh->b_blocknr;
ocfs2_remove_block_from_cache(inode, block);
}
/* Called when we remove xattr clusters from an inode. */
void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
sector_t block,
u32 c_len)
{
unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len;
for (i = 0; i < b_len; i++, block++)
ocfs2_remove_block_from_cache(inode, block);
}
int __init init_ocfs2_uptodate_cache(void)
{
ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate",
......
......@@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
struct buffer_head *bh);
void ocfs2_remove_from_cache(struct inode *inode,
struct buffer_head *bh);
void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
sector_t block,
u32 c_len);
int ocfs2_buffer_read_ahead(struct inode *inode,
struct buffer_head *bh);
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* xattr.h
*
* Function prototypes
*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef OCFS2_XATTR_H
#define OCFS2_XATTR_H
#include <linux/init.h>
#include <linux/xattr.h>
enum ocfs2_xattr_type {
OCFS2_XATTR_INDEX_USER = 1,
OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS,
OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
OCFS2_XATTR_INDEX_TRUSTED,
OCFS2_XATTR_INDEX_SECURITY,
OCFS2_XATTR_MAX
};
extern struct xattr_handler ocfs2_xattr_user_handler;
extern struct xattr_handler ocfs2_xattr_trusted_handler;
extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
size_t, int);
extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
extern struct xattr_handler *ocfs2_xattr_handlers[];
static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
{
return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
}
static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
{
return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
}
static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
{
u16 len = sb->s_blocksize -
offsetof(struct ocfs2_xattr_header, xh_entries);
return len / sizeof(struct ocfs2_xattr_entry);
}
#endif /* OCFS2_XATTR_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment