Commit 78609a81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits)
  ocfs2: recover orphans in offline slots during recovery and mount
  ocfs2: Pagecache usage optimization on ocfs2
  ocfs2: fix rare stale inode errors when exporting via nfs
  ocfs2/dlm: Tweak mle_state output
  ocfs2/dlm: Do not purge lockres that is being migrated dlm_purge_lockres()
  ocfs2/dlm: Remove struct dlm_lock_name in struct dlm_master_list_entry
  ocfs2/dlm: Show the number of lockres/mles in dlm_state
  ocfs2/dlm: dlm_set_lockres_owner() and dlm_change_lockres_owner() inlined
  ocfs2/dlm: Improve lockres counts
  ocfs2/dlm: Track number of mles
  ocfs2/dlm: Indent dlm_cleanup_master_list()
  ocfs2/dlm: Activate dlm->master_hash for master list entries
  ocfs2/dlm: Create and destroy the dlm->master_hash
  ocfs2/dlm: Refactor dlm_clean_master_list()
  ocfs2/dlm: Clean up struct dlm_lock_name
  ocfs2/dlm: Encapsulate adding and removing of mle from dlm->master_list
  ocfs2: Optimize inode group allocation by recording last used group.
  ocfs2: Allocate inode groups from global_bitmap.
  ocfs2: Optimize inode allocation by remembering last group
  ocfs2: fix leaf start calculation in ocfs2_dx_dir_rebalance()
  ...
parents 133e2a31 9140db04
......@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
};
static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 blkno)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
}
static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
return le64_to_cpu(dx_root->dr_last_eb_blk);
}
static void ocfs2_dx_root_update_clusters(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
le32_add_cpu(&dx_root->dr_clusters, clusters);
}
static int ocfs2_dx_root_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
return 0;
}
static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
{
struct ocfs2_dx_root_block *dx_root = et->et_object;
et->et_root_el = &dx_root->dr_list;
}
static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
.eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
.eo_update_clusters = ocfs2_dx_root_update_clusters,
.eo_sanity_check = ocfs2_dx_root_sanity_check,
.eo_fill_root_el = ocfs2_dx_root_fill_root_el,
};
static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
......@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
&ocfs2_xattr_value_et_ops);
}
void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh)
{
__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
NULL, &ocfs2_dx_root_et_ops);
}
static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 new_last_eb_blk)
{
......
......@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct ocfs2_xattr_value_buf *vb);
void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
/*
* Read an extent block into *bh. If *bh is NULL, a bh will be
......
......@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
}
const struct address_space_operations ocfs2_aops = {
.readpage = ocfs2_readpage,
.readpages = ocfs2_readpages,
.writepage = ocfs2_writepage,
.write_begin = ocfs2_write_begin,
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.sync_page = block_sync_page,
.direct_IO = ocfs2_direct_IO,
.invalidatepage = ocfs2_invalidatepage,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.readpage = ocfs2_readpage,
.readpages = ocfs2_readpages,
.writepage = ocfs2_writepage,
.write_begin = ocfs2_write_begin,
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.sync_page = block_sync_page,
.direct_IO = ocfs2_direct_IO,
.invalidatepage = ocfs2_invalidatepage,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
};
......@@ -33,6 +33,7 @@
#include <linux/random.h>
#include <linux/crc32.h>
#include <linux/time.h>
#include <linux/debugfs.h>
#include "heartbeat.h"
#include "tcp.h"
......@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
static LIST_HEAD(o2hb_node_events);
static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
#define O2HB_DEBUG_DIR "o2hb"
#define O2HB_DEBUG_LIVENODES "livenodes"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
static LIST_HEAD(o2hb_all_regions);
static struct o2hb_callback {
......@@ -905,7 +911,77 @@ static int o2hb_thread(void *data)
return 0;
}
void o2hb_init(void)
#ifdef CONFIG_DEBUG_FS
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
char *buf = NULL;
int i = -1;
int out = 0;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
o2hb_fill_node_map(map, sizeof(map));
while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
out += snprintf(buf + out, PAGE_SIZE - out, "\n");
i_size_write(inode, out);
file->private_data = buf;
return 0;
bail:
return -ENOMEM;
}
static int o2hb_debug_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
i_size_read(file->f_mapping->host));
}
#else
static int o2hb_debug_open(struct inode *inode, struct file *file)
{
return 0;
}
static int o2hb_debug_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static struct file_operations o2hb_debug_fops = {
.open = o2hb_debug_open,
.release = o2hb_debug_release,
.read = o2hb_debug_read,
.llseek = generic_file_llseek,
};
void o2hb_exit(void)
{
if (o2hb_debug_livenodes)
debugfs_remove(o2hb_debug_livenodes);
if (o2hb_debug_dir)
debugfs_remove(o2hb_debug_dir);
}
int o2hb_init(void)
{
int i;
......@@ -918,6 +994,24 @@ void o2hb_init(void)
INIT_LIST_HEAD(&o2hb_node_events);
memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
if (!o2hb_debug_dir) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
S_IFREG|S_IRUSR,
o2hb_debug_dir, NULL,
&o2hb_debug_fops);
if (!o2hb_debug_livenodes) {
mlog_errno(-ENOMEM);
debugfs_remove(o2hb_debug_dir);
return -ENOMEM;
}
return 0;
}
/* if we're already in a callback then we're already serialized by the sem */
......
......@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
unsigned bytes);
void o2hb_init(void);
void o2hb_exit(void);
int o2hb_init(void);
int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void);
......
......@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void)
o2cb_sys_shutdown();
o2net_exit();
o2hb_exit();
}
static int __init init_o2nm(void)
......@@ -889,11 +890,13 @@ static int __init init_o2nm(void)
cluster_print_version();
o2hb_init();
ret = o2hb_init();
if (ret)
goto out;
ret = o2net_init();
if (ret)
goto out;
goto out_o2hb;
ret = o2net_register_hb_callbacks();
if (ret)
......@@ -916,6 +919,8 @@ static int __init init_o2nm(void)
o2net_unregister_hb_callbacks();
out_o2net:
o2net_exit();
out_o2hb:
o2hb_exit();
out:
return ret;
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -26,44 +26,70 @@
#ifndef OCFS2_DIR_H
#define OCFS2_DIR_H
struct buffer_head *ocfs2_find_entry(const char *name,
int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir);
struct ocfs2_dx_hinfo {
u32 major_hash;
u32 minor_hash;
};
struct ocfs2_dir_lookup_result {
struct buffer_head *dl_leaf_bh; /* Unindexed leaf
* block */
struct ocfs2_dir_entry *dl_entry; /* Target dirent in
* unindexed leaf */
struct buffer_head *dl_dx_root_bh; /* Root of indexed
* tree */
struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */
struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in
* indexed leaf */
struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */
struct buffer_head *dl_prev_leaf_bh;/* Previous entry in
* dir free space
* list. NULL if
* previous entry is
* dx root block. */
};
void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res);
int ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_lookup_result *lookup);
int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
struct ocfs2_dir_lookup_result *res);
int __ocfs2_add_entry(handle_t *handle,
struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh);
struct ocfs2_dir_lookup_result *lookup);
static inline int ocfs2_add_entry(handle_t *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
struct buffer_head *insert_bh)
struct ocfs2_dir_lookup_result *lookup)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
inode, blkno, parent_fe_bh, lookup);
}
int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
struct ocfs2_dir_lookup_result *res,
struct inode *new_entry_inode);
int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen);
int ocfs2_empty_dir(struct inode *inode);
int ocfs2_find_files_on_disk(const char *name,
int namelen,
u64 *blkno,
struct inode *inode,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent);
struct ocfs2_dir_lookup_result *res);
int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
int namelen, u64 *blkno);
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
......@@ -74,14 +100,17 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
struct buffer_head *parent_fe_bh,
const char *name,
int namelen,
struct buffer_head **ret_de_bh);
struct ocfs2_dir_lookup_result *lookup);
struct ocfs2_alloc_context;
int ocfs2_fill_new_dir(struct ocfs2_super *osb,
handle_t *handle,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac);
int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh);
struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
void *data);
......
......@@ -52,16 +52,12 @@
enum dlm_mle_type {
DLM_MLE_BLOCK,
DLM_MLE_MASTER,
DLM_MLE_MIGRATION
};
struct dlm_lock_name {
u8 len;
u8 name[DLM_LOCKID_NAME_MAX];
DLM_MLE_MIGRATION,
DLM_MLE_NUM_TYPES
};
struct dlm_master_list_entry {
struct list_head list;
struct hlist_node master_hash_node;
struct list_head hb_events;
struct dlm_ctxt *dlm;
spinlock_t spinlock;
......@@ -78,10 +74,10 @@ struct dlm_master_list_entry {
enum dlm_mle_type type;
struct o2hb_callback_func mle_hb_up;
struct o2hb_callback_func mle_hb_down;
union {
struct dlm_lock_resource *res;
struct dlm_lock_name name;
} u;
struct dlm_lock_resource *mleres;
unsigned char mname[DLM_LOCKID_NAME_MAX];
unsigned int mnamelen;
unsigned int mnamehash;
};
enum dlm_ast_type {
......@@ -151,13 +147,14 @@ struct dlm_ctxt
unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct dlm_recovery_ctxt reco;
spinlock_t master_lock;
struct list_head master_list;
struct hlist_head **master_hash;
struct list_head mle_hb_events;
/* these give a really vague idea of the system load */
atomic_t local_resources;
atomic_t remote_resources;
atomic_t unknown_resources;
atomic_t mle_tot_count[DLM_MLE_NUM_TYPES];
atomic_t mle_cur_count[DLM_MLE_NUM_TYPES];
atomic_t res_tot_count;
atomic_t res_cur_count;
struct dlm_debug_ctxt *dlm_debug_ctxt;
struct dentry *dlm_debugfs_subroot;
......@@ -195,6 +192,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned
return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
}
static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm,
unsigned i)
{
return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] +
(i % DLM_BUCKETS_PER_PAGE);
}
/* these keventd work queue items are for less-frequently
* called functions that cannot be directly called from the
* net message handlers for some reason, usually because
......@@ -848,9 +852,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
unsigned int len);
int dlm_is_host_down(int errno);
void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner);
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
const char *lockid,
int namelen,
......@@ -1008,6 +1010,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
DLM_LOCK_RES_MIGRATING));
}
void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
/* create/destroy slab caches */
int dlm_init_master_caches(void);
void dlm_destroy_master_caches(void);
......@@ -1110,6 +1115,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter)
return bit;
}
static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
res->owner = owner;
}
static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
if (owner != res->owner)
dlm_set_lockres_owner(dlm, res, owner);
}
#endif /* DLMCOMMON_H */
......@@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
{
int out = 0;
unsigned int namelen;
const char *name;
char *mle_type;
if (mle->type != DLM_MLE_MASTER) {
namelen = mle->u.name.len;
name = mle->u.name.name;
} else {
namelen = mle->u.res->lockname.len;
name = mle->u.res->lockname.name;
}
if (mle->type == DLM_MLE_BLOCK)
mle_type = "BLK";
else if (mle->type == DLM_MLE_MASTER)
......@@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
else
mle_type = "MIG";
out += stringify_lockname(name, namelen, buf + out, len - out);
out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out);
out += snprintf(buf + out, len - out,
"\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
mle_type, mle->master, mle->new_master,
......@@ -501,23 +491,33 @@ static struct file_operations debug_purgelist_fops = {
static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
{
struct dlm_master_list_entry *mle;
int out = 0;
unsigned long total = 0;
struct hlist_head *bucket;
struct hlist_node *list;
int i, out = 0;
unsigned long total = 0, longest = 0, bktcnt;
out += snprintf(db->buf + out, db->len - out,
"Dumping MLEs for Domain: %s\n", dlm->name);
spin_lock(&dlm->master_lock);
list_for_each_entry(mle, &dlm->master_list, list) {
++total;
if (db->len - out < 200)
continue;
out += dump_mle(mle, db->buf + out, db->len - out);
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_master_hash(dlm, i);
hlist_for_each(list, bucket) {
mle = hlist_entry(list, struct dlm_master_list_entry,
master_hash_node);
++total;
++bktcnt;
if (db->len - out < 200)
continue;
out += dump_mle(mle, db->buf + out, db->len - out);
}
longest = max(longest, bktcnt);
bktcnt = 0;
}
spin_unlock(&dlm->master_lock);
out += snprintf(db->buf + out, db->len - out,
"Total on list: %ld\n", total);
"Total: %ld, Longest: %ld\n", total, longest);
return out;
}
......@@ -756,12 +756,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
int out = 0;
struct dlm_reco_node_data *node;
char *state;
int lres, rres, ures, tres;
lres = atomic_read(&dlm->local_resources);
rres = atomic_read(&dlm->remote_resources);
ures = atomic_read(&dlm->unknown_resources);
tres = lres + rres + ures;
int cur_mles = 0, tot_mles = 0;
int i;
spin_lock(&dlm->spinlock);
......@@ -804,21 +800,48 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
db->buf + out, db->len - out);
out += snprintf(db->buf + out, db->len - out, "\n");
/* Mastered Resources Total: xxx Locally: xxx Remotely: ... */
/* Lock Resources: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"Lock Resources: %d (%d)\n",
atomic_read(&dlm->res_cur_count),
atomic_read(&dlm->res_tot_count));
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
tot_mles += atomic_read(&dlm->mle_tot_count[i]);
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
cur_mles += atomic_read(&dlm->mle_cur_count[i]);
/* MLEs: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"MLEs: %d (%d)\n", cur_mles, tot_mles);
/* Blocking: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
" Blocking: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
/* Mastery: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
" Mastery: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
/* Migration: xxx (xxx) */
out += snprintf(db->buf + out, db->len - out,
"Mastered Resources Total: %d Locally: %d "
"Remotely: %d Unknown: %d\n",
tres, lres, rres, ures);
" Migration: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
out += snprintf(db->buf + out, db->len - out,
"Lists: Dirty=%s Purge=%s PendingASTs=%s "
"PendingBASTs=%s Master=%s\n",
"PendingBASTs=%s\n",
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
(list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
(list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"),
(list_empty(&dlm->master_list) ? "Empty" : "InUse"));
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
/* Purge Count: xxx Refs: xxx */
out += snprintf(db->buf + out, db->len - out,
......
......@@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
if (dlm->lockres_hash)
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
if (dlm->master_hash)
dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
if (dlm->name)
kfree(dlm->name);
......@@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
dlm->master_hash = (struct hlist_head **)
dlm_alloc_pagevec(DLM_HASH_PAGES);
if (!dlm->master_hash) {
mlog_errno(-ENOMEM);
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
kfree(dlm->name);
kfree(dlm);
dlm = NULL;
goto leave;
}
for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
strcpy(dlm->name, domain);
dlm->key = key;
dlm->node_num = o2nm_this_node();
ret = dlm_create_debugfs_subroot(dlm);
if (ret < 0) {
dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
kfree(dlm->name);
kfree(dlm);
......@@ -1579,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
init_waitqueue_head(&dlm->reco.event);
init_waitqueue_head(&dlm->ast_wq);
init_waitqueue_head(&dlm->migration_wq);
INIT_LIST_HEAD(&dlm->master_list);
INIT_LIST_HEAD(&dlm->mle_hb_events);
dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
......@@ -1587,9 +1604,13 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
atomic_set(&dlm->local_resources, 0);
atomic_set(&dlm->remote_resources, 0);
atomic_set(&dlm->unknown_resources, 0);
atomic_set(&dlm->res_tot_count, 0);
atomic_set(&dlm->res_cur_count, 0);
for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) {
atomic_set(&dlm->mle_tot_count[i], 0);
atomic_set(&dlm->mle_cur_count[i], 0);
}
spin_lock_init(&dlm->work_lock);
INIT_LIST_HEAD(&dlm->work_list);
......
......@@ -73,22 +73,13 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
const char *name,
unsigned int namelen)
{
struct dlm_lock_resource *res;
if (dlm != mle->dlm)
return 0;
if (mle->type == DLM_MLE_BLOCK ||
mle->type == DLM_MLE_MIGRATION) {
if (namelen != mle->u.name.len ||
memcmp(name, mle->u.name.name, namelen)!=0)
return 0;
} else {
res = mle->u.res;
if (namelen != res->lockname.len ||
memcmp(res->lockname.name, name, namelen) != 0)
return 0;
}
if (namelen != mle->mnamelen ||
memcmp(name, mle->mname, namelen) != 0)
return 0;
return 1;
}
......@@ -283,7 +274,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
mle->dlm = dlm;
mle->type = type;
INIT_LIST_HEAD(&mle->list);
INIT_HLIST_NODE(&mle->master_hash_node);
INIT_LIST_HEAD(&mle->hb_events);
memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
spin_lock_init(&mle->spinlock);
......@@ -295,19 +286,27 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
mle->new_master = O2NM_MAX_NODES;
mle->inuse = 0;
BUG_ON(mle->type != DLM_MLE_BLOCK &&
mle->type != DLM_MLE_MASTER &&
mle->type != DLM_MLE_MIGRATION);
if (mle->type == DLM_MLE_MASTER) {
BUG_ON(!res);
mle->u.res = res;
} else if (mle->type == DLM_MLE_BLOCK) {
BUG_ON(!name);
memcpy(mle->u.name.name, name, namelen);
mle->u.name.len = namelen;
} else /* DLM_MLE_MIGRATION */ {
mle->mleres = res;
memcpy(mle->mname, res->lockname.name, res->lockname.len);
mle->mnamelen = res->lockname.len;
mle->mnamehash = res->lockname.hash;
} else {
BUG_ON(!name);
memcpy(mle->u.name.name, name, namelen);
mle->u.name.len = namelen;
mle->mleres = NULL;
memcpy(mle->mname, name, namelen);
mle->mnamelen = namelen;
mle->mnamehash = dlm_lockid_hash(name, namelen);
}
atomic_inc(&dlm->mle_tot_count[mle->type]);
atomic_inc(&dlm->mle_cur_count[mle->type]);
/* copy off the node_map and register hb callbacks on our copy */
memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
......@@ -318,6 +317,24 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
__dlm_mle_attach_hb_events(dlm, mle);
}
void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
{
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&dlm->master_lock);
if (!hlist_unhashed(&mle->master_hash_node))
hlist_del_init(&mle->master_hash_node);
}
void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
{
struct hlist_head *bucket;
assert_spin_locked(&dlm->master_lock);
bucket = dlm_master_hash(dlm, mle->mnamehash);
hlist_add_head(&mle->master_hash_node, bucket);
}
/* returns 1 if found, 0 if not */
static int dlm_find_mle(struct dlm_ctxt *dlm,
......@@ -325,10 +342,17 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
char *name, unsigned int namelen)
{
struct dlm_master_list_entry *tmpmle;
struct hlist_head *bucket;
struct hlist_node *list;
unsigned int hash;
assert_spin_locked(&dlm->master_lock);
list_for_each_entry(tmpmle, &dlm->master_list, list) {
hash = dlm_lockid_hash(name, namelen);
bucket = dlm_master_hash(dlm, hash);
hlist_for_each(list, bucket) {
tmpmle = hlist_entry(list, struct dlm_master_list_entry,
master_hash_node);
if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
continue;
dlm_get_mle(tmpmle);
......@@ -408,24 +432,20 @@ static void dlm_mle_release(struct kref *kref)
mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
dlm = mle->dlm;
if (mle->type != DLM_MLE_MASTER) {
mlog(0, "calling mle_release for %.*s, type %d\n",
mle->u.name.len, mle->u.name.name, mle->type);
} else {
mlog(0, "calling mle_release for %.*s, type %d\n",
mle->u.res->lockname.len,
mle->u.res->lockname.name, mle->type);
}
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&dlm->master_lock);
mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname,
mle->type);
/* remove from list if not already */
if (!list_empty(&mle->list))
list_del_init(&mle->list);
__dlm_unlink_mle(dlm, mle);
/* detach the mle from the domain node up/down events */
__dlm_mle_detach_hb_events(dlm, mle);
atomic_dec(&dlm->mle_cur_count[mle->type]);
/* NOTE: kfree under spinlock here.
* if this is bad, we can move this to a freelist. */
kmem_cache_free(dlm_mle_cache, mle);
......@@ -465,43 +485,6 @@ void dlm_destroy_master_caches(void)
kmem_cache_destroy(dlm_lockres_cache);
}
static void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 owner)
{
assert_spin_locked(&res->spinlock);
mlog_entry("%.*s, %u\n", res->lockname.len, res->lockname.name, owner);
if (owner == dlm->node_num)
atomic_inc(&dlm->local_resources);
else if (owner == DLM_LOCK_RES_OWNER_UNKNOWN)
atomic_inc(&dlm->unknown_resources);
else
atomic_inc(&dlm->remote_resources);
res->owner = owner;
}
void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res, u8 owner)
{
assert_spin_locked(&res->spinlock);
if (owner == res->owner)
return;
if (res->owner == dlm->node_num)
atomic_dec(&dlm->local_resources);
else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN)
atomic_dec(&dlm->unknown_resources);
else
atomic_dec(&dlm->remote_resources);
dlm_set_lockres_owner(dlm, res, owner);
}
static void dlm_lockres_release(struct kref *kref)
{
struct dlm_lock_resource *res;
......@@ -527,6 +510,8 @@ static void dlm_lockres_release(struct kref *kref)
}
spin_unlock(&dlm->track_lock);
atomic_dec(&dlm->res_cur_count);
dlm_put(dlm);
if (!hlist_unhashed(&res->hash_node) ||
......@@ -607,6 +592,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
kref_init(&res->refs);
atomic_inc(&dlm->res_tot_count);
atomic_inc(&dlm->res_cur_count);
/* just for consistency */
spin_lock(&res->spinlock);
dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
......@@ -843,7 +831,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
alloc_mle = NULL;
dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
set_bit(dlm->node_num, mle->maybe_map);
list_add(&mle->list, &dlm->master_list);
__dlm_insert_mle(dlm, mle);
/* still holding the dlm spinlock, check the recovery map
* to see if there are any nodes that still need to be
......@@ -1270,7 +1258,7 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
res->lockname.len,
res->lockname.name);
mle->type = DLM_MLE_MASTER;
mle->u.res = res;
mle->mleres = res;
}
}
}
......@@ -1315,14 +1303,8 @@ static int dlm_do_master_request(struct dlm_lock_resource *res,
BUG_ON(mle->type == DLM_MLE_MIGRATION);
if (mle->type != DLM_MLE_MASTER) {
request.namelen = mle->u.name.len;
memcpy(request.name, mle->u.name.name, request.namelen);
} else {
request.namelen = mle->u.res->lockname.len;
memcpy(request.name, mle->u.res->lockname.name,
request.namelen);
}
request.namelen = (u8)mle->mnamelen;
memcpy(request.name, mle->mname, request.namelen);
again:
ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request,
......@@ -1575,7 +1557,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
// "add the block.\n");
dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
set_bit(request->node_idx, mle->maybe_map);
list_add(&mle->list, &dlm->master_list);
__dlm_insert_mle(dlm, mle);
response = DLM_MASTER_RESP_NO;
} else {
// mlog(0, "mle was found\n");
......@@ -1967,7 +1949,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
assert->node_idx, rr, extra_ref, mle->inuse);
dlm_print_one_mle(mle);
}
list_del_init(&mle->list);
__dlm_unlink_mle(dlm, mle);
__dlm_mle_detach_hb_events(dlm, mle);
__dlm_put_mle(mle);
if (extra_ref) {
......@@ -3159,10 +3141,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
tmp->master = master;
atomic_set(&tmp->woken, 1);
wake_up(&tmp->wq);
/* remove it from the list so that only one
* mle will be found */
list_del_init(&tmp->list);
/* this was obviously WRONG. mle is uninited here. should be tmp. */
/* remove it so that only one mle will be found */
__dlm_unlink_mle(dlm, tmp);
__dlm_mle_detach_hb_events(dlm, tmp);
ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
......@@ -3181,137 +3161,164 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
mle->master = master;
/* do this for consistency with other mle types */
set_bit(new_master, mle->maybe_map);
list_add(&mle->list, &dlm->master_list);
__dlm_insert_mle(dlm, mle);
return ret;
}
void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
/*
* Sets the owner of the lockres, associated to the mle, to UNKNOWN
*/
static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm,
struct dlm_master_list_entry *mle)
{
struct dlm_master_list_entry *mle, *next;
struct dlm_lock_resource *res;
unsigned int hash;
mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
top:
assert_spin_locked(&dlm->spinlock);
/* Find the lockres associated to the mle and set its owner to UNK */
res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen,
mle->mnamehash);
if (res) {
spin_unlock(&dlm->master_lock);
/* clean the master list */
spin_lock(&dlm->master_lock);
list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
BUG_ON(mle->type != DLM_MLE_BLOCK &&
mle->type != DLM_MLE_MASTER &&
mle->type != DLM_MLE_MIGRATION);
/* MASTER mles are initiated locally. the waiting
* process will notice the node map change
* shortly. let that happen as normal. */
if (mle->type == DLM_MLE_MASTER)
continue;
/* move lockres onto recovery list */
spin_lock(&res->spinlock);
dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
dlm_move_lockres_to_recovery_list(dlm, res);
spin_unlock(&res->spinlock);
dlm_lockres_put(res);
/* about to get rid of mle, detach from heartbeat */
__dlm_mle_detach_hb_events(dlm, mle);
/* BLOCK mles are initiated by other nodes.
* need to clean up if the dead node would have
* been the master. */
if (mle->type == DLM_MLE_BLOCK) {
int bit;
/* dump the mle */
spin_lock(&dlm->master_lock);
__dlm_put_mle(mle);
spin_unlock(&dlm->master_lock);
}
spin_lock(&mle->spinlock);
bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
if (bit != dead_node) {
mlog(0, "mle found, but dead node %u would "
"not have been master\n", dead_node);
spin_unlock(&mle->spinlock);
} else {
/* must drop the refcount by one since the
* assert_master will never arrive. this
* may result in the mle being unlinked and
* freed, but there may still be a process
* waiting in the dlmlock path which is fine. */
mlog(0, "node %u was expected master\n",
dead_node);
atomic_set(&mle->woken, 1);
spin_unlock(&mle->spinlock);
wake_up(&mle->wq);
/* do not need events any longer, so detach
* from heartbeat */
__dlm_mle_detach_hb_events(dlm, mle);
__dlm_put_mle(mle);
}
continue;
}
return res;
}
/* everything else is a MIGRATION mle */
/* the rule for MIGRATION mles is that the master
* becomes UNKNOWN if *either* the original or
* the new master dies. all UNKNOWN lockreses
* are sent to whichever node becomes the recovery
* master. the new master is responsible for
* determining if there is still a master for
* this lockres, or if he needs to take over
* mastery. either way, this node should expect
* another message to resolve this. */
if (mle->master != dead_node &&
mle->new_master != dead_node)
continue;
static void dlm_clean_migration_mle(struct dlm_ctxt *dlm,
struct dlm_master_list_entry *mle)
{
__dlm_mle_detach_hb_events(dlm, mle);
/* if we have reached this point, this mle needs to
* be removed from the list and freed. */
spin_lock(&mle->spinlock);
__dlm_unlink_mle(dlm, mle);
atomic_set(&mle->woken, 1);
spin_unlock(&mle->spinlock);
/* remove from the list early. NOTE: unlinking
* list_head while in list_for_each_safe */
__dlm_mle_detach_hb_events(dlm, mle);
spin_lock(&mle->spinlock);
list_del_init(&mle->list);
wake_up(&mle->wq);
}
static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
struct dlm_master_list_entry *mle, u8 dead_node)
{
int bit;
BUG_ON(mle->type != DLM_MLE_BLOCK);
spin_lock(&mle->spinlock);
bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
if (bit != dead_node) {
mlog(0, "mle found, but dead node %u would not have been "
"master\n", dead_node);
spin_unlock(&mle->spinlock);
} else {
/* Must drop the refcount by one since the assert_master will
* never arrive. This may result in the mle being unlinked and
* freed, but there may still be a process waiting in the
* dlmlock path which is fine. */
mlog(0, "node %u was expected master\n", dead_node);
atomic_set(&mle->woken, 1);
spin_unlock(&mle->spinlock);
wake_up(&mle->wq);
mlog(0, "%s: node %u died during migration from "
"%u to %u!\n", dlm->name, dead_node,
mle->master, mle->new_master);
/* if there is a lockres associated with this
* mle, find it and set its owner to UNKNOWN */
hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len);
res = __dlm_lookup_lockres(dlm, mle->u.name.name,
mle->u.name.len, hash);
if (res) {
/* unfortunately if we hit this rare case, our
* lock ordering is messed. we need to drop
* the master lock so that we can take the
* lockres lock, meaning that we will have to
* restart from the head of list. */
spin_unlock(&dlm->master_lock);
/* Do not need events any longer, so detach from heartbeat */
__dlm_mle_detach_hb_events(dlm, mle);
__dlm_put_mle(mle);
}
}
/* move lockres onto recovery list */
spin_lock(&res->spinlock);
dlm_set_lockres_owner(dlm, res,
DLM_LOCK_RES_OWNER_UNKNOWN);
dlm_move_lockres_to_recovery_list(dlm, res);
spin_unlock(&res->spinlock);
dlm_lockres_put(res);
void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
{
struct dlm_master_list_entry *mle;
struct dlm_lock_resource *res;
struct hlist_head *bucket;
struct hlist_node *list;
unsigned int i;
/* about to get rid of mle, detach from heartbeat */
__dlm_mle_detach_hb_events(dlm, mle);
mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
top:
assert_spin_locked(&dlm->spinlock);
/* dump the mle */
spin_lock(&dlm->master_lock);
__dlm_put_mle(mle);
spin_unlock(&dlm->master_lock);
/* clean the master list */
spin_lock(&dlm->master_lock);
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_master_hash(dlm, i);
hlist_for_each(list, bucket) {
mle = hlist_entry(list, struct dlm_master_list_entry,
master_hash_node);
BUG_ON(mle->type != DLM_MLE_BLOCK &&
mle->type != DLM_MLE_MASTER &&
mle->type != DLM_MLE_MIGRATION);
/* MASTER mles are initiated locally. The waiting
* process will notice the node map change shortly.
* Let that happen as normal. */
if (mle->type == DLM_MLE_MASTER)
continue;
/* BLOCK mles are initiated by other nodes. Need to
* clean up if the dead node would have been the
* master. */
if (mle->type == DLM_MLE_BLOCK) {
dlm_clean_block_mle(dlm, mle, dead_node);
continue;
}
/* restart */
goto top;
}
/* Everything else is a MIGRATION mle */
/* The rule for MIGRATION mles is that the master
* becomes UNKNOWN if *either* the original or the new
* master dies. All UNKNOWN lockres' are sent to
* whichever node becomes the recovery master. The new
* master is responsible for determining if there is
* still a master for this lockres, or if he needs to
* take over mastery. Either way, this node should
* expect another message to resolve this. */
if (mle->master != dead_node &&
mle->new_master != dead_node)
continue;
/* If we have reached this point, this mle needs to be
* removed from the list and freed. */
dlm_clean_migration_mle(dlm, mle);
mlog(0, "%s: node %u died during migration from "
"%u to %u!\n", dlm->name, dead_node, mle->master,
mle->new_master);
/* If we find a lockres associated with the mle, we've
* hit this rare case that messes up our lock ordering.
* If so, we need to drop the master lock so that we can
* take the lockres lock, meaning that we will have to
* restart from the head of list. */
res = dlm_reset_mleres_owner(dlm, mle);
if (res)
/* restart */
goto top;
/* this may be the last reference */
__dlm_put_mle(mle);
/* This may be the last reference */
__dlm_put_mle(mle);
}
}
spin_unlock(&dlm->master_lock);
}
int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
u8 old_master)
{
......
......@@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
if (!__dlm_lockres_unused(res)) {
spin_unlock(&res->spinlock);
mlog(0, "%s:%.*s: tried to purge but not unused\n",
dlm->name, res->lockname.len, res->lockname.name);
return -ENOTEMPTY;
__dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock);
BUG();
}
if (res->state & DLM_LOCK_RES_MIGRATING) {
mlog(0, "%s:%.*s: Delay dropref as this lockres is "
"being remastered\n", dlm->name, res->lockname.len,
res->lockname.name);
/* Re-add the lockres to the end of the purge list */
if (!list_empty(&res->purge)) {
list_del_init(&res->purge);
list_add_tail(&res->purge, &dlm->purge_list);
}
spin_unlock(&res->spinlock);
return 0;
}
master = (res->owner == dlm->node_num);
if (!master)
res->state |= DLM_LOCK_RES_DROPPING_REF;
spin_unlock(&res->spinlock);
......
......@@ -244,6 +244,10 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
......@@ -622,6 +626,17 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_rename_lops, osb);
}
static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
struct ocfs2_super *osb)
{
/* nfs_sync lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
&ocfs2_nfs_sync_lops, osb);
}
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp)
{
......@@ -2417,6 +2432,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
}
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
{
int status;
struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
if (ocfs2_mount_local(osb))
return 0;
status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
0, 0);
if (status < 0)
mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
return status;
}
void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
{
struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
}
int ocfs2_dentry_lock(struct dentry *dentry, int ex)
{
int ret;
......@@ -2798,6 +2841,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
osb->cconn = conn;
......@@ -2833,6 +2877,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_super_lockres);
ocfs2_lock_res_free(&osb->osb_rename_lockres);
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
osb->cconn = NULL;
......@@ -3015,6 +3060,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
{
ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
}
int ocfs2_drop_inode_locks(struct inode *inode)
......
......@@ -115,6 +115,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
int ex);
int ocfs2_rename_lock(struct ocfs2_super *osb);
void ocfs2_rename_unlock(struct ocfs2_super *osb);
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex);
int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock);
......
......@@ -31,6 +31,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "dir.h"
#include "dlmglue.h"
#include "dcache.h"
......@@ -38,6 +39,7 @@
#include "inode.h"
#include "buffer_head_io.h"
#include "suballoc.h"
struct ocfs2_inode_handle
{
......@@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
struct ocfs2_inode_handle *handle)
{
struct inode *inode;
struct ocfs2_super *osb = OCFS2_SB(sb);
u64 blkno = handle->ih_blkno;
int status, set;
struct dentry *result;
mlog_entry("(0x%p, 0x%p)\n", sb, handle);
if (handle->ih_blkno == 0) {
mlog_errno(-ESTALE);
return ERR_PTR(-ESTALE);
if (blkno == 0) {
mlog(0, "nfs wants inode with blkno: 0\n");
result = ERR_PTR(-ESTALE);
goto bail;
}
inode = ocfs2_ilookup(sb, blkno);
/*
* If the inode exists in memory, we only need to check it's
* generation number
*/
if (inode)
goto check_gen;
/*
* This will synchronize us against ocfs2_delete_inode() on
* all nodes
*/
status = ocfs2_nfs_sync_lock(osb, 1);
if (status < 0) {
mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
goto check_err;
}
status = ocfs2_test_inode_bit(osb, blkno, &set);
if (status < 0) {
if (status == -EINVAL) {
/*
* The blkno NFS gave us doesn't even show up
* as an inode, we return -ESTALE to be
* nice
*/
mlog(0, "test inode bit failed %d\n", status);
status = -ESTALE;
} else {
mlog(ML_ERROR, "test inode bit failed %d\n", status);
}
goto unlock_nfs_sync;
}
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
mlog(0, "inode %llu suballoc bit is clear\n", blkno);
status = -ESTALE;
goto unlock_nfs_sync;
}
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0);
inode = ocfs2_iget(osb, blkno, 0, 0);
if (IS_ERR(inode))
return (void *)inode;
unlock_nfs_sync:
ocfs2_nfs_sync_unlock(osb, 1);
check_err:
if (status < 0) {
if (status == -ESTALE) {
mlog(0, "stale inode ino: %llu generation: %u\n",
blkno, handle->ih_generation);
}
result = ERR_PTR(status);
goto bail;
}
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
result = (void *)inode;
goto bail;
}
check_gen:
if (handle->ih_generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
mlog(0, "stale inode ino: %llu generation: %u\n", blkno,
handle->ih_generation);
result = ERR_PTR(-ESTALE);
goto bail;
}
result = d_obtain_alias(inode);
if (!IS_ERR(result))
result->d_op = &ocfs2_dentry_ops;
else
mlog_errno(PTR_ERR(result));
bail:
mlog_exit_ptr(result);
return result;
}
......
......@@ -38,6 +38,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "dir.h"
#include "blockcheck.h"
#include "dlmglue.h"
#include "extent_map.h"
......@@ -112,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
oi->ip_attr |= OCFS2_DIRSYNC_FL;
}
struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
{
struct ocfs2_find_inode_args args;
args.fi_blkno = blkno;
args.fi_flags = 0;
args.fi_ino = ino_from_blkno(sb, blkno);
args.fi_sysfile_type = 0;
return ilookup5(sb, blkno, ocfs2_find_actor, &args);
}
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
int sysfile_type)
{
......@@ -275,7 +287,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)le64_to_cpu(fe->i_blkno));
inode->i_nlink = le16_to_cpu(fe->i_links_count);
inode->i_nlink = ocfs2_read_links_count(fe);
if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
......@@ -351,6 +363,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_set_inode_flags(inode);
OCFS2_I(inode)->ip_last_used_slot = 0;
OCFS2_I(inode)->ip_last_used_group = 0;
mlog_exit_void();
}
......@@ -606,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode,
}
handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
ocfs2_quota_trans_credits(inode->i_sb));
ocfs2_quota_trans_credits(inode->i_sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
......@@ -740,6 +754,15 @@ static int ocfs2_wipe_inode(struct inode *inode,
goto bail_unlock_dir;
}
/* Remove any dir index tree */
if (S_ISDIR(inode->i_mode)) {
status = ocfs2_dx_dir_truncate(inode, di_bh);
if (status) {
mlog_errno(status);
goto bail_unlock_dir;
}
}
/*Free extended attribute resources associated with this inode.*/
status = ocfs2_xattr_remove(inode, di_bh);
if (status < 0) {
......@@ -949,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode)
goto bail;
}
/*
* Synchronize us against ocfs2_get_dentry. We take this in
* shared mode so that all nodes can still concurrently
* process deletes.
*/
status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0);
if (status < 0) {
mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status);
ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock;
}
/* Lock down the inode. This gives us an up to date view of
* it's metadata (for verification), and allows us to
* serialize delete_inode on multiple nodes.
......@@ -962,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode)
if (status != -ENOENT)
mlog_errno(status);
ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock;
goto bail_unlock_nfs_sync;
}
/* Query the cluster. This will be the final decision made
......@@ -1005,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode)
bail_unlock_inode:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
bail_unlock_nfs_sync:
ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
bail_unblock:
status = sigprocmask(SIG_SETMASK, &oldset, NULL);
if (status < 0)
......@@ -1205,7 +1243,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
spin_unlock(&OCFS2_I(inode)->ip_lock);
fe->i_size = cpu_to_le64(i_size_read(inode));
fe->i_links_count = cpu_to_le16(inode->i_nlink);
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_uid = cpu_to_le32(inode->i_uid);
fe->i_gid = cpu_to_le32(inode->i_gid);
fe->i_mode = cpu_to_le16(inode->i_mode);
......@@ -1242,7 +1280,7 @@ void ocfs2_refresh_inode(struct inode *inode,
OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
ocfs2_set_inode_flags(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
inode->i_nlink = le16_to_cpu(fe->i_links_count);
inode->i_nlink = ocfs2_read_links_count(fe);
inode->i_uid = le32_to_cpu(fe->i_uid);
inode->i_gid = le32_to_cpu(fe->i_gid);
inode->i_mode = le16_to_cpu(fe->i_mode);
......
......@@ -72,6 +72,10 @@ struct ocfs2_inode_info
struct inode vfs_inode;
struct jbd2_inode ip_jinode;
/* Only valid if the inode is the dir. */
u32 ip_last_used_slot;
u64 ip_last_used_group;
};
/*
......@@ -124,6 +128,7 @@ void ocfs2_drop_inode(struct inode *inode);
/* Flags for ocfs2_iget() */
#define OCFS2_FI_FLAG_SYSFILE 0x1
#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2
struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode);
......
......@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
int slot);
static int ocfs2_commit_thread(void *arg);
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
struct ocfs2_quota_recovery *qrec);
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
{
......@@ -76,18 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
return __ocfs2_wait_on_mount(osb, 1);
}
/*
* The recovery_list is a simple linked list of node numbers to recover.
* It is protected by the recovery_lock.
* This replay_map is to track online/offline slots, so we could recover
* offline slots during recovery and mount
*/
struct ocfs2_recovery_map {
unsigned int rm_used;
unsigned int *rm_entries;
enum ocfs2_replay_state {
REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
REPLAY_DONE /* Replay was already queued */
};
struct ocfs2_replay_map {
unsigned int rm_slots;
enum ocfs2_replay_state rm_state;
unsigned char rm_replay_slots[0];
};
void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
{
if (!osb->replay_map)
return;
/* If we've already queued the replay, we don't have any more to do */
if (osb->replay_map->rm_state == REPLAY_DONE)
return;
osb->replay_map->rm_state = state;
}
int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map;
int i, node_num;
/* If replay map is already set, we don't do it again */
if (osb->replay_map)
return 0;
replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
(osb->max_slots * sizeof(char)), GFP_KERNEL);
if (!replay_map) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
spin_lock(&osb->osb_lock);
replay_map->rm_slots = osb->max_slots;
replay_map->rm_state = REPLAY_UNNEEDED;
/* set rm_replay_slots for offline slot(s) */
for (i = 0; i < replay_map->rm_slots; i++) {
if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
replay_map->rm_replay_slots[i] = 1;
}
osb->replay_map = replay_map;
spin_unlock(&osb->osb_lock);
return 0;
}
void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
int i;
if (!replay_map)
return;
if (replay_map->rm_state != REPLAY_NEEDED)
return;
for (i = 0; i < replay_map->rm_slots; i++)
if (replay_map->rm_replay_slots[i])
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
NULL, NULL);
replay_map->rm_state = REPLAY_DONE;
}
void ocfs2_free_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
if (!osb->replay_map)
return;
kfree(replay_map);
osb->replay_map = NULL;
}
int ocfs2_recovery_init(struct ocfs2_super *osb)
{
struct ocfs2_recovery_map *rm;
......@@ -496,6 +580,22 @@ static struct ocfs2_triggers dq_triggers = {
},
};
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
};
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
};
static int __ocfs2_journal_access(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
......@@ -600,6 +700,20 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
type);
}
int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
type);
}
int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
......@@ -1176,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
}
/* Called by the mount code to queue recovery the last part of
* recovery for it's own slot. */
* recovery for it's own and offline slot(s). */
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
{
struct ocfs2_journal *journal = osb->journal;
if (osb->dirty) {
/* No need to queue up our truncate_log as regular
* cleanup will catch that. */
ocfs2_queue_recovery_completion(journal,
osb->slot_num,
osb->local_alloc_copy,
NULL,
NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
/* No need to queue up our truncate_log as regular cleanup will catch
* that */
ocfs2_queue_recovery_completion(journal, osb->slot_num,
osb->local_alloc_copy, NULL, NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy = NULL;
osb->dirty = 0;
}
osb->local_alloc_copy = NULL;
osb->dirty = 0;
/* queue to recover orphan slots for all offline slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
ocfs2_queue_replay_slots(osb);
ocfs2_free_replay_slots(osb);
}
void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
......@@ -1236,6 +1350,14 @@ static int __ocfs2_recovery_thread(void *arg)
goto bail;
}
status = ocfs2_compute_replay_slots(osb);
if (status < 0)
mlog_errno(status);
/* queue recovery for our own slot */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL, NULL);
spin_lock(&osb->osb_lock);
while (rm->rm_used) {
/* It's always safe to remove entry zero, as we won't
......@@ -1301,11 +1423,8 @@ static int __ocfs2_recovery_thread(void *arg)
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
* node(s) may have disallowd a previos inode delete. Re-processing
* is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL, NULL);
/* queue recovery for offline slots */
ocfs2_queue_replay_slots(osb);
bail:
mutex_lock(&osb->recovery_lock);
......@@ -1314,6 +1433,7 @@ static int __ocfs2_recovery_thread(void *arg)
goto restart;
}
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
mb(); /* sync with ocfs2_recovery_thread_running */
wake_up(&osb->recovery_event);
......@@ -1465,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
goto done;
}
/* we need to run complete recovery for offline orphan slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
node_num, slot_num,
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
......
......@@ -38,6 +38,17 @@ enum ocfs2_journal_state {
struct ocfs2_super;
struct ocfs2_dinode;
/*
* The recovery_list is a simple linked list of node numbers to recover.
* It is protected by the recovery_lock.
*/
struct ocfs2_recovery_map {
unsigned int rm_used;
unsigned int *rm_entries;
};
struct ocfs2_journal {
enum ocfs2_journal_state j_state; /* Journals current state */
......@@ -139,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
/*
* Journal Control:
* Initialize, Load, Shutdown, Wipe a journal.
......@@ -266,6 +278,12 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
/* dirblock */
int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_dx_root_block */
int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_dx_leaf */
int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* Anything that has no ecc */
int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
......@@ -368,14 +386,29 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
}
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
* bitmap block for the new bit) dx_root update for free list */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
/* 1 block for index, 2 allocs (data, metadata), 1 clusters
* worth of blocks for initial extent. */
return 1 + 2 * OCFS2_SUBALLOC_ALLOC +
ocfs2_clusters_to_blocks(sb, 1);
}
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
* group descriptor + mkdir/symlink blocks + quota update */
static inline int ocfs2_mknod_credits(struct super_block *sb)
/* parent fe, parent block, new file entry, index leaf, inode alloc fe, inode
* alloc group descriptor + mkdir/symlink blocks + dir blocks + xattr
* blocks + quota update */
static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
int xattr_credits)
{
return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
int dir_credits = OCFS2_DIR_LINK_ADDITIONAL_CREDITS;
if (is_dir)
dir_credits += ocfs2_add_dir_index_credits(sb);
return 4 + OCFS2_SUBALLOC_ALLOC + dir_credits + xattr_credits +
ocfs2_quota_trans_credits(sb);
}
......@@ -388,31 +421,31 @@ static inline int ocfs2_mknod_credits(struct super_block *sb)
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
* update on dir */
* update on dir + index leaf + dx root update for free list */
static inline int ocfs2_link_credits(struct super_block *sb)
{
return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
return 2*OCFS2_INODE_UPDATE_CREDITS + 3 +
ocfs2_quota_trans_credits(sb);
}
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
* dir inode link + dir inode index leaf + dir index root */
static inline int ocfs2_unlink_credits(struct super_block *sb)
{
/* The quota update from ocfs2_link_credits is unused here... */
return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb);
}
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
* inode alloc group descriptor */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 1 + 1)
* inode alloc group descriptor + orphan dir index leaf */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3)
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink */
* directory + target unlink + 3 x dir index leaves */
static inline int ocfs2_rename_credits(struct super_block *sb)
{
return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
return 3 * OCFS2_INODE_UPDATE_CREDITS + 6 + ocfs2_unlink_credits(sb);
}
/* global bitmap dinode, group desc., relinked group,
......@@ -422,6 +455,20 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
+ OCFS2_INODE_UPDATE_CREDITS \
+ OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
/* inode update, removal of dx root block from allocator */
#define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
OCFS2_SUBALLOC_FREE)
static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
{
int credits = 1 + OCFS2_SUBALLOC_ALLOC;
credits += ocfs2_clusters_to_blocks(sb, 1);
credits += ocfs2_quota_trans_credits(sb);
return credits;
}
/*
* Please note that the caller must make sure that root_el is the root
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
......@@ -457,7 +504,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
{
int blocks = ocfs2_mknod_credits(sb);
int blocks = ocfs2_mknod_credits(sb, 0, 0);
/* links can be longer than one block so we may update many
* within our single allocated extent. */
......
......@@ -28,7 +28,6 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/bitops.h>
#include <linux/debugfs.h>
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
......@@ -75,84 +74,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode);
#ifdef CONFIG_OCFS2_FS_STATS
static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
#define LA_DEBUG_VER 1
static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
static DEFINE_MUTEX(la_debug_mutex);
struct ocfs2_super *osb = file->private_data;
int written, ret;
char *buf = osb->local_alloc_debug_buf;
mutex_lock(&la_debug_mutex);
memset(buf, 0, LA_DEBUG_BUF_SZ);
written = snprintf(buf, LA_DEBUG_BUF_SZ,
"0x%x\t0x%llx\t%u\t%u\t0x%x\n",
LA_DEBUG_VER,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_default_bits,
osb->local_alloc_bits, osb->local_alloc_state);
ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
mutex_unlock(&la_debug_mutex);
return ret;
}
static const struct file_operations ocfs2_la_debug_fops = {
.open = ocfs2_la_debug_open,
.read = ocfs2_la_debug_read,
};
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
if (!osb->local_alloc_debug_buf)
return;
osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_la_debug_fops);
if (!osb->local_alloc_debug) {
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
}
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
if (osb->local_alloc_debug)
debugfs_remove(osb->local_alloc_debug);
if (osb->local_alloc_debug_buf)
kfree(osb->local_alloc_debug_buf);
osb->local_alloc_debug_buf = NULL;
osb->local_alloc_debug = NULL;
}
#else /* CONFIG_OCFS2_FS_STATS */
static void ocfs2_init_la_debug(struct ocfs2_super *osb)
{
return;
}
static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
{
return;
}
#endif
static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
{
return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
......@@ -226,8 +147,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
mlog_entry_void();
ocfs2_init_la_debug(osb);
if (osb->local_alloc_bits == 0)
goto bail;
......@@ -299,9 +218,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
if (inode)
iput(inode);
if (status < 0)
ocfs2_shutdown_la_debug(osb);
mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
mlog_exit(status);
......@@ -331,8 +247,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
cancel_delayed_work(&osb->la_enable_wq);
flush_workqueue(ocfs2_wq);
ocfs2_shutdown_la_debug(osb);
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out;
......
......@@ -80,14 +80,14 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
struct inode **ret_orphan_dir,
struct inode *inode,
char *name,
struct buffer_head **de_bh);
struct ocfs2_dir_lookup_result *lookup);
static int ocfs2_orphan_add(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct ocfs2_dinode *fe,
char *name,
struct buffer_head *de_bh,
struct ocfs2_dir_lookup_result *lookup,
struct inode *orphan_dir_inode);
static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
......@@ -228,17 +228,18 @@ static int ocfs2_mknod(struct inode *dir,
struct ocfs2_super *osb;
struct ocfs2_dinode *dirfe;
struct buffer_head *new_fe_bh = NULL;
struct buffer_head *de_bh = NULL;
struct inode *inode = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *xattr_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
int want_clusters = 0;
int want_meta = 0;
int xattr_credits = 0;
struct ocfs2_security_xattr_info si = {
.enable = 1,
};
int did_quota_inode = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len,
......@@ -254,13 +255,13 @@ static int ocfs2_mknod(struct inode *dir,
return status;
}
if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
if (S_ISDIR(mode) && (dir->i_nlink >= ocfs2_link_max(osb))) {
status = -EMLINK;
goto leave;
}
dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
if (!dirfe->i_links_count) {
if (!ocfs2_read_links_count(dirfe)) {
/* can't make a file in a deleted directory. */
status = -ENOENT;
goto leave;
......@@ -274,7 +275,7 @@ static int ocfs2_mknod(struct inode *dir,
/* get a spot inside the dir. */
status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
dentry->d_name.name,
dentry->d_name.len, &de_bh);
dentry->d_name.len, &lookup);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -308,17 +309,29 @@ static int ocfs2_mknod(struct inode *dir,
/* calculate meta data/clusters for setting security and acl xattr */
status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
&si, &want_clusters,
&xattr_credits, &xattr_ac);
&si, &want_clusters,
&xattr_credits, &want_meta);
if (status < 0) {
mlog_errno(status);
goto leave;
}
/* Reserve a cluster if creating an extent based directory. */
if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
want_clusters += 1;
/* Dir indexing requires extra space as well */
if (ocfs2_supports_indexed_dirs(osb))
want_meta++;
}
status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto leave;
}
status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
if (status < 0) {
if (status != -ENOSPC)
......@@ -326,8 +339,9 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
xattr_credits);
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
S_ISDIR(mode),
xattr_credits));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
......@@ -355,7 +369,7 @@ static int ocfs2_mknod(struct inode *dir,
if (S_ISDIR(mode)) {
status = ocfs2_fill_new_dir(osb, handle, dir, inode,
new_fe_bh, data_ac);
new_fe_bh, data_ac, meta_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -367,7 +381,7 @@ static int ocfs2_mknod(struct inode *dir,
mlog_errno(status);
goto leave;
}
le16_add_cpu(&dirfe->i_links_count, 1);
ocfs2_add_links_count(dirfe, 1);
status = ocfs2_journal_dirty(handle, parent_fe_bh);
if (status < 0) {
mlog_errno(status);
......@@ -377,7 +391,7 @@ static int ocfs2_mknod(struct inode *dir,
}
status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
xattr_ac, data_ac);
meta_ac, data_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -385,7 +399,7 @@ static int ocfs2_mknod(struct inode *dir,
if (si.enable) {
status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
xattr_ac, data_ac);
meta_ac, data_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -394,7 +408,7 @@ static int ocfs2_mknod(struct inode *dir,
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
de_bh);
&lookup);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -423,11 +437,12 @@ static int ocfs2_mknod(struct inode *dir,
mlog(0, "Disk is full\n");
brelse(new_fe_bh);
brelse(de_bh);
brelse(parent_fe_bh);
kfree(si.name);
kfree(si.value);
ocfs2_free_dir_lookup_result(&lookup);
if ((status < 0) && inode) {
clear_nlink(inode);
iput(inode);
......@@ -439,8 +454,8 @@ static int ocfs2_mknod(struct inode *dir,
if (data_ac)
ocfs2_free_alloc_context(data_ac);
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
mlog_exit(status);
......@@ -462,6 +477,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct ocfs2_extent_list *fel;
u64 fe_blkno = 0;
u16 suballoc_bit;
u16 feat;
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
inode->i_mode, (unsigned long)dev, dentry->d_name.len,
......@@ -469,8 +485,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
*new_fe_bh = NULL;
status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
&fe_blkno);
status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
inode_ac, &suballoc_bit, &fe_blkno);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -513,7 +529,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_mode = cpu_to_le16(inode->i_mode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
fe->i_links_count = cpu_to_le16(inode->i_nlink);
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_last_eb_blk = 0;
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
......@@ -525,11 +542,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_dtime = 0;
/*
* If supported, directories start with inline data.
* If supported, directories start with inline data. If inline
* isn't supported, but indexing is, we start them as indexed.
*/
feat = le16_to_cpu(fe->i_dyn_features);
if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
u16 feat = le16_to_cpu(fe->i_dyn_features);
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
fe->id2.i_data.id_count = cpu_to_le16(
......@@ -608,9 +625,9 @@ static int ocfs2_link(struct dentry *old_dentry,
int err;
struct buffer_head *fe_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
struct buffer_head *de_bh = NULL;
struct ocfs2_dinode *fe = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_dir_lookup_result lookup = { NULL, };
mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
old_dentry->d_name.len, old_dentry->d_name.name,
......@@ -638,7 +655,7 @@ static int ocfs2_link(struct dentry *old_dentry,
err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
dentry->d_name.name,
dentry->d_name.len, &de_bh);
dentry->d_name.len, &lookup);
if (err < 0) {
mlog_errno(err);
goto out;
......@@ -652,7 +669,7 @@ static int ocfs2_link(struct dentry *old_dentry,
}
fe = (struct ocfs2_dinode *) fe_bh->b_data;
if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
if (ocfs2_read_links_count(fe) >= ocfs2_link_max(osb)) {
err = -EMLINK;
goto out_unlock_inode;
}
......@@ -674,13 +691,13 @@ static int ocfs2_link(struct dentry *old_dentry,
inc_nlink(inode);
inode->i_ctime = CURRENT_TIME;
fe->i_links_count = cpu_to_le16(inode->i_nlink);
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
err = ocfs2_journal_dirty(handle, fe_bh);
if (err < 0) {
le16_add_cpu(&fe->i_links_count, -1);
ocfs2_add_links_count(fe, -1);
drop_nlink(inode);
mlog_errno(err);
goto out_commit;
......@@ -688,9 +705,9 @@ static int ocfs2_link(struct dentry *old_dentry,
err = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno,
parent_fe_bh, de_bh);
parent_fe_bh, &lookup);
if (err) {
le16_add_cpu(&fe->i_links_count, -1);
ocfs2_add_links_count(fe, -1);
drop_nlink(inode);
mlog_errno(err);
goto out_commit;
......@@ -714,10 +731,11 @@ static int ocfs2_link(struct dentry *old_dentry,
out:
ocfs2_inode_unlock(dir, 1);
brelse(de_bh);
brelse(fe_bh);
brelse(parent_fe_bh);
ocfs2_free_dir_lookup_result(&lookup);
mlog_exit(err);
return err;
......@@ -766,10 +784,9 @@ static int ocfs2_unlink(struct inode *dir,
struct buffer_head *fe_bh = NULL;
struct buffer_head *parent_node_bh = NULL;
handle_t *handle = NULL;
struct ocfs2_dir_entry *dirent = NULL;
struct buffer_head *dirent_bh = NULL;
char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
struct buffer_head *orphan_entry_bh = NULL;
struct ocfs2_dir_lookup_result lookup = { NULL, };
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
dentry->d_name.len, dentry->d_name.name);
......@@ -791,8 +808,8 @@ static int ocfs2_unlink(struct inode *dir,
}
status = ocfs2_find_files_on_disk(dentry->d_name.name,
dentry->d_name.len, &blkno,
dir, &dirent_bh, &dirent);
dentry->d_name.len, &blkno, dir,
&lookup);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
......@@ -817,10 +834,7 @@ static int ocfs2_unlink(struct inode *dir,
child_locked = 1;
if (S_ISDIR(inode->i_mode)) {
if (!ocfs2_empty_dir(inode)) {
status = -ENOTEMPTY;
goto leave;
} else if (inode->i_nlink != 2) {
if (inode->i_nlink != 2 || !ocfs2_empty_dir(inode)) {
status = -ENOTEMPTY;
goto leave;
}
......@@ -836,8 +850,7 @@ static int ocfs2_unlink(struct inode *dir,
if (inode_is_unlinkable(inode)) {
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
orphan_name,
&orphan_entry_bh);
orphan_name, &orphan_insert);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -863,7 +876,7 @@ static int ocfs2_unlink(struct inode *dir,
if (inode_is_unlinkable(inode)) {
status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
orphan_entry_bh, orphan_dir);
&orphan_insert, orphan_dir);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -871,7 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
}
/* delete the name from the parent dir */
status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
status = ocfs2_delete_entry(handle, dir, &lookup);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -880,7 +893,7 @@ static int ocfs2_unlink(struct inode *dir,
if (S_ISDIR(inode->i_mode))
drop_nlink(inode);
drop_nlink(inode);
fe->i_links_count = cpu_to_le16(inode->i_nlink);
ocfs2_set_links_count(fe, inode->i_nlink);
status = ocfs2_journal_dirty(handle, fe_bh);
if (status < 0) {
......@@ -916,9 +929,10 @@ static int ocfs2_unlink(struct inode *dir,
}
brelse(fe_bh);
brelse(dirent_bh);
brelse(parent_node_bh);
brelse(orphan_entry_bh);
ocfs2_free_dir_lookup_result(&orphan_insert);
ocfs2_free_dir_lookup_result(&lookup);
mlog_exit(status);
......@@ -1004,8 +1018,8 @@ static int ocfs2_rename(struct inode *old_dir,
struct inode *new_dir,
struct dentry *new_dentry)
{
int status = 0, rename_lock = 0, parents_locked = 0;
int old_child_locked = 0, new_child_locked = 0;
int status = 0, rename_lock = 0, parents_locked = 0, target_exists = 0;
int old_child_locked = 0, new_child_locked = 0, update_dot_dot = 0;
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct inode *orphan_dir = NULL;
......@@ -1020,13 +1034,13 @@ static int ocfs2_rename(struct inode *old_dir,
handle_t *handle = NULL;
struct buffer_head *old_dir_bh = NULL;
struct buffer_head *new_dir_bh = NULL;
struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
*new_de = NULL;
struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh
nlink_t old_dir_nlink = old_dir->i_nlink;
struct ocfs2_dinode *old_di;
struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
struct ocfs2_dir_lookup_result target_insert = { NULL, };
/* At some point it might be nice to break this function up a
* bit. */
......@@ -1108,9 +1122,10 @@ static int ocfs2_rename(struct inode *old_dir,
if (S_ISDIR(old_inode->i_mode)) {
u64 old_inode_parent;
update_dot_dot = 1;
status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
old_inode, &old_inode_de_bh,
&old_inode_dot_dot_de);
old_inode,
&old_inode_dot_dot_res);
if (status) {
status = -EIO;
goto bail;
......@@ -1122,7 +1137,7 @@ static int ocfs2_rename(struct inode *old_dir,
}
if (!new_inode && new_dir != old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX) {
new_dir->i_nlink >= ocfs2_link_max(osb)) {
status = -EMLINK;
goto bail;
}
......@@ -1151,8 +1166,8 @@ static int ocfs2_rename(struct inode *old_dir,
* to delete it */
status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
new_dentry->d_name.len,
&newfe_blkno, new_dir, &new_de_bh,
&new_de);
&newfe_blkno, new_dir,
&target_lookup_res);
/* The only error we allow here is -ENOENT because the new
* file not existing is perfectly valid. */
if ((status < 0) && (status != -ENOENT)) {
......@@ -1161,8 +1176,10 @@ static int ocfs2_rename(struct inode *old_dir,
mlog_errno(status);
goto bail;
}
if (status == 0)
target_exists = 1;
if (!new_de && new_inode) {
if (!target_exists && new_inode) {
/*
* Target was unlinked by another node while we were
* waiting to get to ocfs2_rename(). There isn't
......@@ -1175,7 +1192,7 @@ static int ocfs2_rename(struct inode *old_dir,
/* In case we need to overwrite an existing file, we blow it
* away first */
if (new_de) {
if (target_exists) {
/* VFS didn't think there existed an inode here, but
* someone else in the cluster must have raced our
* rename to create one. Today we error cleanly, in
......@@ -1216,8 +1233,8 @@ static int ocfs2_rename(struct inode *old_dir,
newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
"newfebh=%p bhblocknr=%llu\n", new_de,
mlog(0, "aha rename over existing... new_blkno=%llu "
"newfebh=%p bhblocknr=%llu\n",
(unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
(unsigned long long)newfe_bh->b_blocknr : 0ULL);
......@@ -1225,7 +1242,7 @@ static int ocfs2_rename(struct inode *old_dir,
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
new_inode,
orphan_name,
&orphan_entry_bh);
&orphan_insert);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1243,7 +1260,7 @@ static int ocfs2_rename(struct inode *old_dir,
status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
new_dentry->d_name.name,
new_dentry->d_name.len,
&insert_entry_bh);
&target_insert);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1258,10 +1275,10 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
if (new_de) {
if (target_exists) {
if (S_ISDIR(new_inode->i_mode)) {
if (!ocfs2_empty_dir(new_inode) ||
new_inode->i_nlink != 2) {
if (new_inode->i_nlink != 2 ||
!ocfs2_empty_dir(new_inode)) {
status = -ENOTEMPTY;
goto bail;
}
......@@ -1274,10 +1291,10 @@ static int ocfs2_rename(struct inode *old_dir,
}
if (S_ISDIR(new_inode->i_mode) ||
(newfe->i_links_count == cpu_to_le16(1))){
(ocfs2_read_links_count(newfe) == 1)) {
status = ocfs2_orphan_add(osb, handle, new_inode,
newfe, orphan_name,
orphan_entry_bh, orphan_dir);
&orphan_insert, orphan_dir);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1285,8 +1302,8 @@ static int ocfs2_rename(struct inode *old_dir,
}
/* change the dirent to point to the correct inode */
status = ocfs2_update_entry(new_dir, handle, new_de_bh,
new_de, old_inode);
status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
old_inode);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1294,9 +1311,9 @@ static int ocfs2_rename(struct inode *old_dir,
new_dir->i_version++;
if (S_ISDIR(new_inode->i_mode))
newfe->i_links_count = 0;
ocfs2_set_links_count(newfe, 0);
else
le16_add_cpu(&newfe->i_links_count, -1);
ocfs2_add_links_count(newfe, -1);
status = ocfs2_journal_dirty(handle, newfe_bh);
if (status < 0) {
......@@ -1307,7 +1324,7 @@ static int ocfs2_rename(struct inode *old_dir,
/* if the name was not found in new_dir, add it now */
status = ocfs2_add_entry(handle, new_dentry, old_inode,
OCFS2_I(old_inode)->ip_blkno,
new_dir_bh, insert_entry_bh);
new_dir_bh, &target_insert);
}
old_inode->i_ctime = CURRENT_TIME;
......@@ -1334,15 +1351,13 @@ static int ocfs2_rename(struct inode *old_dir,
* because the insert might have changed the type of directory
* we're dealing with.
*/
old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len,
old_dir, &old_de);
if (!old_de_bh) {
status = -EIO;
status = ocfs2_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len, old_dir,
&old_entry_lookup);
if (status)
goto bail;
}
status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
status = ocfs2_delete_entry(handle, old_dir, &old_entry_lookup);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1353,9 +1368,10 @@ static int ocfs2_rename(struct inode *old_dir,
new_inode->i_ctime = CURRENT_TIME;
}
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
if (old_inode_de_bh) {
status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
old_inode_dot_dot_de, new_dir);
if (update_dot_dot) {
status = ocfs2_update_entry(old_inode, handle,
&old_inode_dot_dot_res, new_dir);
old_dir->i_nlink--;
if (new_inode) {
new_inode->i_nlink--;
......@@ -1391,14 +1407,13 @@ static int ocfs2_rename(struct inode *old_dir,
} else {
struct ocfs2_dinode *fe;
status = ocfs2_journal_access_di(handle, old_dir,
old_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
old_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
ocfs2_set_links_count(fe, old_dir->i_nlink);
status = ocfs2_journal_dirty(handle, old_dir_bh);
}
}
ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
status = 0;
bail:
......@@ -1429,13 +1444,17 @@ static int ocfs2_rename(struct inode *old_dir,
if (new_inode)
iput(new_inode);
ocfs2_free_dir_lookup_result(&target_lookup_res);
ocfs2_free_dir_lookup_result(&old_entry_lookup);
ocfs2_free_dir_lookup_result(&old_inode_dot_dot_res);
ocfs2_free_dir_lookup_result(&orphan_insert);
ocfs2_free_dir_lookup_result(&target_insert);
brelse(newfe_bh);
brelse(old_inode_bh);
brelse(old_dir_bh);
brelse(new_dir_bh);
brelse(new_de_bh);
brelse(old_de_bh);
brelse(old_inode_de_bh);
brelse(orphan_entry_bh);
brelse(insert_entry_bh);
......@@ -1558,7 +1577,6 @@ static int ocfs2_symlink(struct inode *dir,
struct inode *inode = NULL;
struct super_block *sb;
struct buffer_head *new_fe_bh = NULL;
struct buffer_head *de_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
struct ocfs2_dinode *fe = NULL;
struct ocfs2_dinode *dirfe;
......@@ -1572,6 +1590,7 @@ static int ocfs2_symlink(struct inode *dir,
.enable = 1,
};
int did_quota = 0, did_quota_inode = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
dentry, symname, dentry->d_name.len, dentry->d_name.name);
......@@ -1592,7 +1611,7 @@ static int ocfs2_symlink(struct inode *dir,
}
dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
if (!dirfe->i_links_count) {
if (!ocfs2_read_links_count(dirfe)) {
/* can't make a file in a deleted directory. */
status = -ENOENT;
goto bail;
......@@ -1605,7 +1624,7 @@ static int ocfs2_symlink(struct inode *dir,
status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
dentry->d_name.name,
dentry->d_name.len, &de_bh);
dentry->d_name.len, &lookup);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1744,7 +1763,7 @@ static int ocfs2_symlink(struct inode *dir,
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
de_bh);
&lookup);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -1772,9 +1791,9 @@ static int ocfs2_symlink(struct inode *dir,
brelse(new_fe_bh);
brelse(parent_fe_bh);
brelse(de_bh);
kfree(si.name);
kfree(si.value);
ocfs2_free_dir_lookup_result(&lookup);
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
if (data_ac)
......@@ -1826,7 +1845,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
struct inode **ret_orphan_dir,
struct inode *inode,
char *name,
struct buffer_head **de_bh)
struct ocfs2_dir_lookup_result *lookup)
{
struct inode *orphan_dir_inode;
struct buffer_head *orphan_dir_bh = NULL;
......@@ -1857,7 +1876,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
orphan_dir_bh, name,
OCFS2_ORPHAN_NAMELEN, de_bh);
OCFS2_ORPHAN_NAMELEN, lookup);
if (status < 0) {
ocfs2_inode_unlock(orphan_dir_inode, 1);
......@@ -1884,7 +1903,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_dinode *fe,
char *name,
struct buffer_head *de_bh,
struct ocfs2_dir_lookup_result *lookup,
struct inode *orphan_dir_inode)
{
struct buffer_head *orphan_dir_bh = NULL;
......@@ -1910,8 +1929,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
* underneath us... */
orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
if (S_ISDIR(inode->i_mode))
le16_add_cpu(&orphan_fe->i_links_count, 1);
orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
ocfs2_add_links_count(orphan_fe, 1);
orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
status = ocfs2_journal_dirty(handle, orphan_dir_bh);
if (status < 0) {
......@@ -1922,7 +1941,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
OCFS2_ORPHAN_NAMELEN, inode,
OCFS2_I(inode)->ip_blkno,
orphan_dir_bh, de_bh);
orphan_dir_bh, lookup);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -1955,8 +1974,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
char name[OCFS2_ORPHAN_NAMELEN + 1];
struct ocfs2_dinode *orphan_fe;
int status = 0;
struct buffer_head *target_de_bh = NULL;
struct ocfs2_dir_entry *target_de = NULL;
struct ocfs2_dir_lookup_result lookup = { NULL, };
mlog_entry_void();
......@@ -1971,17 +1989,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
OCFS2_ORPHAN_NAMELEN);
/* find it's spot in the orphan directory */
target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
orphan_dir_inode, &target_de);
if (!target_de_bh) {
status = -ENOENT;
status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode,
&lookup);
if (status) {
mlog_errno(status);
goto leave;
}
/* remove it from the orphan directory */
status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
target_de_bh);
status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -1997,8 +2013,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
/* do the i_nlink dance! :) */
orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
if (S_ISDIR(inode->i_mode))
le16_add_cpu(&orphan_fe->i_links_count, -1);
orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
ocfs2_add_links_count(orphan_fe, -1);
orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
status = ocfs2_journal_dirty(handle, orphan_dir_bh);
if (status < 0) {
......@@ -2007,7 +2023,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
}
leave:
brelse(target_de_bh);
ocfs2_free_dir_lookup_result(&lookup);
mlog_exit(status);
return status;
......
......@@ -209,6 +209,7 @@ enum ocfs2_mount_options
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_replay_map;
struct ocfs2_quota_recovery;
struct ocfs2_dentry_lock;
struct ocfs2_super
......@@ -264,6 +265,7 @@ struct ocfs2_super
atomic_t vol_state;
struct mutex recovery_lock;
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
int disable_recovery;
wait_queue_head_t checkpoint_event;
......@@ -287,11 +289,6 @@ struct ocfs2_super
u64 la_last_gd;
#ifdef CONFIG_OCFS2_FS_STATS
struct dentry *local_alloc_debug;
char *local_alloc_debug_buf;
#endif
/* Next three fields are for local node slot recovery during
* mount. */
int dirty;
......@@ -305,9 +302,11 @@ struct ocfs2_super
struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
struct ocfs2_dlm_debug *osb_dlm_debug;
struct dentry *osb_debug_root;
struct dentry *osb_ctxt;
wait_queue_head_t recovery_event;
......@@ -344,6 +343,12 @@ struct ocfs2_super
/* used to protect metaecc calculation check of xattr. */
spinlock_t osb_xattr_lock;
unsigned int osb_dx_mask;
u32 osb_dx_seed[4];
/* the group we used to allocate inodes. */
u64 osb_inode_alloc_group;
};
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
......@@ -402,6 +407,51 @@ static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
return 1;
return 0;
}
static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
{
if (ocfs2_supports_indexed_dirs(osb))
return OCFS2_DX_LINK_MAX;
return OCFS2_LINK_MAX;
}
static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
{
u32 nlink = le16_to_cpu(di->i_links_count);
u32 hi = le16_to_cpu(di->i_links_count_hi);
if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
return nlink;
}
static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink)
{
u16 lo, hi;
lo = nlink;
hi = nlink >> OCFS2_LINKS_HI_SHIFT;
di->i_links_count = cpu_to_le16(lo);
di->i_links_count_hi = cpu_to_le16(hi);
}
static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
{
u32 links = ocfs2_read_links_count(di);
links += n;
ocfs2_set_links_count(di, links);
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock
......@@ -482,6 +532,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
(!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
#define OCFS2_IS_VALID_DX_ROOT(ptr) \
(!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
#define OCFS2_IS_VALID_DX_LEAF(ptr) \
(!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
static inline unsigned long ino_from_blkno(struct super_block *sb,
u64 blkno)
{
......@@ -532,6 +588,16 @@ static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
}
static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb,
u64 blocks)
{
int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits;
unsigned int clusters;
clusters = ocfs2_blocks_to_clusters(sb, blocks);
return (u64)clusters << bits;
}
static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
u64 bytes)
{
......
......@@ -66,6 +66,8 @@
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01"
#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1"
/* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
......@@ -95,7 +97,8 @@
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR \
| OCFS2_FEATURE_INCOMPAT_META_ECC)
| OCFS2_FEATURE_INCOMPAT_META_ECC \
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
......@@ -151,6 +154,9 @@
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
/* Support for indexed directores */
#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400
/* Metadata checksum and error correction */
#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
......@@ -411,8 +417,12 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \
OCFS2_DIR_ROUND) & \
~OCFS2_DIR_ROUND)
#define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1)
#define OCFS2_LINK_MAX 32000
#define OCFS2_DX_LINK_MAX ((1U << 31) - 1U)
#define OCFS2_LINKS_HI_SHIFT 16
#define OCFS2_DX_ENTRIES_MAX (0xffffffffU)
#define S_SHIFT 12
static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
......@@ -628,8 +638,9 @@ struct ocfs2_super_block {
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/
__le16 s_reserved0;
__le32 s_reserved1;
/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */
__le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash.
* s_uuid_hash serves as seed[3]. */
/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */
/*140*/
/*
......@@ -679,7 +690,7 @@ struct ocfs2_dinode {
belongs to */
__le16 i_suballoc_bit; /* Bit offset in suballocator
block group */
/*10*/ __le16 i_reserved0;
/*10*/ __le16 i_links_count_hi; /* High 16 bits of links count */
__le16 i_xattr_inline_size;
__le32 i_clusters; /* Cluster count */
__le32 i_uid; /* Owner UID */
......@@ -705,7 +716,8 @@ struct ocfs2_dinode {
__le16 i_dyn_features;
__le64 i_xattr_loc;
/*80*/ struct ocfs2_block_check i_check; /* Error checking */
/*88*/ __le64 i_reserved2[6];
/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
__le64 i_reserved2[5];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
......@@ -781,6 +793,90 @@ struct ocfs2_dir_block_trailer {
/*40*/
};
/*
* A directory entry in the indexed tree. We don't store the full name here,
* but instead provide a pointer to the full dirent in the unindexed tree.
*
* We also store name_len here so as to reduce the number of leaf blocks we
* need to search in case of collisions.
*/
struct ocfs2_dx_entry {
__le32 dx_major_hash; /* Used to find logical
* cluster in index */
__le32 dx_minor_hash; /* Lower bits used to find
* block in cluster */
__le64 dx_dirent_blk; /* Physical block in unindexed
* tree holding this dirent. */
};
struct ocfs2_dx_entry_list {
__le32 de_reserved;
__le16 de_count; /* Maximum number of entries
* possible in de_entries */
__le16 de_num_used; /* Current number of
* de_entries entries */
struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries
* in a packed array of
* length de_num_used */
};
#define OCFS2_DX_FLAG_INLINE 0x01
/*
* A directory indexing block. Each indexed directory has one of these,
* pointed to by ocfs2_dinode.
*
* This block stores an indexed btree root, and a set of free space
* start-of-list pointers.
*/
struct ocfs2_dx_root_block {
__u8 dr_signature[8]; /* Signature for verification */
struct ocfs2_block_check dr_check; /* Error checking */
__le16 dr_suballoc_slot; /* Slot suballocator this
* block belongs to. */
__le16 dr_suballoc_bit; /* Bit offset in suballocator
* block group */
__le32 dr_fs_generation; /* Must match super block */
__le64 dr_blkno; /* Offset on disk, in blocks */
__le64 dr_last_eb_blk; /* Pointer to last
* extent block */
__le32 dr_clusters; /* Clusters allocated
* to the indexed tree. */
__u8 dr_flags; /* OCFS2_DX_FLAG_* flags */
__u8 dr_reserved0;
__le16 dr_reserved1;
__le64 dr_dir_blkno; /* Pointer to parent inode */
__le32 dr_num_entries; /* Total number of
* names stored in
* this directory.*/
__le32 dr_reserved2;
__le64 dr_free_blk; /* Pointer to head of free
* unindexed block list. */
__le64 dr_reserved3[15];
union {
struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
* bits for maximum space
* efficiency. */
struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
* entries. We grow out
* to extents if this
* gets too big. */
};
};
/*
* The header of a leaf block in the indexed tree.
*/
struct ocfs2_dx_leaf {
__u8 dl_signature[8];/* Signature for verification */
struct ocfs2_block_check dl_check; /* Error checking */
__le64 dl_blkno; /* Offset on disk, in blocks */
__le32 dl_fs_generation;/* Must match super block */
__le32 dl_reserved0;
__le64 dl_reserved1;
struct ocfs2_dx_entry_list dl_list;
};
/*
* On disk allocator group structure for OCFS2
*/
......@@ -1112,6 +1208,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr(
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
{
int size;
......@@ -1132,6 +1238,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_leaf, dl_list.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
return size / sizeof(struct ocfs2_dx_entry);
}
static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
{
u16 size;
......
......@@ -47,6 +47,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK,
OCFS2_LOCK_TYPE_QINFO,
OCFS2_LOCK_TYPE_NFS_SYNC,
OCFS2_NUM_LOCK_TYPES
};
......@@ -81,6 +82,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_QINFO:
c = 'Q';
break;
case OCFS2_LOCK_TYPE_NFS_SYNC:
c = 'Y';
break;
default:
c = '\0';
}
......
......@@ -48,7 +48,8 @@
#include "buffer_head_io.h"
#define NOT_ALLOC_NEW_GROUP 0
#define ALLOC_NEW_GROUP 1
#define ALLOC_NEW_GROUP 0x1
#define ALLOC_GROUPS_FROM_GLOBAL 0x2
#define OCFS2_MAX_INODES_TO_STEAL 1024
......@@ -64,7 +65,9 @@ static int ocfs2_block_group_fill(handle_t *handle,
static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
struct inode *alloc_inode,
struct buffer_head *bh,
u64 max_block);
u64 max_block,
u64 *last_alloc_group,
int flags);
static int ocfs2_cluster_group_search(struct inode *inode,
struct buffer_head *group_bh,
......@@ -116,6 +119,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
u16 *bg_bit_off);
static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
u32 bits_wanted, u64 max_block,
int flags,
struct ocfs2_alloc_context **ac);
void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
......@@ -403,7 +407,9 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
struct inode *alloc_inode,
struct buffer_head *bh,
u64 max_block)
u64 max_block,
u64 *last_alloc_group,
int flags)
{
int status, credits;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
......@@ -423,7 +429,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
cl = &fe->id2.i_chain;
status = ocfs2_reserve_clusters_with_limit(osb,
le16_to_cpu(cl->cl_cpg),
max_block, &ac);
max_block, flags, &ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -440,6 +446,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
goto bail;
}
if (last_alloc_group && *last_alloc_group != 0) {
mlog(0, "use old allocation group %llu for block group alloc\n",
(unsigned long long)*last_alloc_group);
ac->ac_last_group = *last_alloc_group;
}
status = ocfs2_claim_clusters(osb,
handle,
ac,
......@@ -514,6 +525,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
status = 0;
/* save the new last alloc group so that the caller can cache it. */
if (last_alloc_group)
*last_alloc_group = ac->ac_last_group;
bail:
if (handle)
ocfs2_commit_trans(osb, handle);
......@@ -531,7 +547,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac,
int type,
u32 slot,
int alloc_new_group)
u64 *last_alloc_group,
int flags)
{
int status;
u32 bits_wanted = ac->ac_bits_wanted;
......@@ -587,7 +604,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
goto bail;
}
if (alloc_new_group != ALLOC_NEW_GROUP) {
if (!(flags & ALLOC_NEW_GROUP)) {
mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
"and we don't alloc a new group for it.\n",
slot, bits_wanted, free_bits);
......@@ -596,7 +613,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
}
status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
ac->ac_max_block);
ac->ac_max_block,
last_alloc_group, flags);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -640,7 +658,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
status = ocfs2_reserve_suballoc_bits(osb, (*ac),
EXTENT_ALLOC_SYSTEM_INODE,
slot, ALLOC_NEW_GROUP);
slot, NULL, ALLOC_NEW_GROUP);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
......@@ -686,7 +704,8 @@ static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
status = ocfs2_reserve_suballoc_bits(osb, ac,
INODE_ALLOC_SYSTEM_INODE,
slot, NOT_ALLOC_NEW_GROUP);
slot, NULL,
NOT_ALLOC_NEW_GROUP);
if (status >= 0) {
ocfs2_set_inode_steal_slot(osb, slot);
break;
......@@ -703,6 +722,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
{
int status;
s16 slot = ocfs2_get_inode_steal_slot(osb);
u64 alloc_group;
*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
if (!(*ac)) {
......@@ -738,12 +758,22 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
goto inode_steal;
atomic_set(&osb->s_num_inodes_stolen, 0);
alloc_group = osb->osb_inode_alloc_group;
status = ocfs2_reserve_suballoc_bits(osb, *ac,
INODE_ALLOC_SYSTEM_INODE,
osb->slot_num, ALLOC_NEW_GROUP);
osb->slot_num,
&alloc_group,
ALLOC_NEW_GROUP |
ALLOC_GROUPS_FROM_GLOBAL);
if (status >= 0) {
status = 0;
spin_lock(&osb->osb_lock);
osb->osb_inode_alloc_group = alloc_group;
spin_unlock(&osb->osb_lock);
mlog(0, "after reservation, new allocation group is "
"%llu\n", (unsigned long long)alloc_group);
/*
* Some inodes must be freed by us, so try to allocate
* from our own next time.
......@@ -790,7 +820,7 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
status = ocfs2_reserve_suballoc_bits(osb, ac,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT,
OCFS2_INVALID_SLOT, NULL,
ALLOC_NEW_GROUP);
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
......@@ -806,6 +836,7 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
* things a bit. */
static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
u32 bits_wanted, u64 max_block,
int flags,
struct ocfs2_alloc_context **ac)
{
int status;
......@@ -823,7 +854,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
(*ac)->ac_max_block = max_block;
status = -ENOSPC;
if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
ocfs2_alloc_should_use_local(osb, bits_wanted)) {
status = ocfs2_reserve_local_alloc_bits(osb,
bits_wanted,
*ac);
......@@ -861,7 +893,8 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
u32 bits_wanted,
struct ocfs2_alloc_context **ac)
{
return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
ALLOC_NEW_GROUP, ac);
}
/*
......@@ -1618,8 +1651,41 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
return status;
}
static void ocfs2_init_inode_ac_group(struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *ac)
{
struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data;
/*
* Try to allocate inodes from some specific group.
*
* If the parent dir has recorded the last group used in allocation,
* cool, use it. Otherwise if we try to allocate new inode from the
* same slot the parent dir belongs to, use the same chunk.
*
* We are very careful here to avoid the mistake of setting
* ac_last_group to a group descriptor from a different (unlocked) slot.
*/
if (OCFS2_I(dir)->ip_last_used_group &&
OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot)
ac->ac_last_group = ocfs2_which_suballoc_group(
le64_to_cpu(fe->i_blkno),
le16_to_cpu(fe->i_suballoc_bit));
}
static inline void ocfs2_save_inode_ac_group(struct inode *dir,
struct ocfs2_alloc_context *ac)
{
OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
}
int ocfs2_claim_new_inode(struct ocfs2_super *osb,
handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *ac,
u16 *suballoc_bit,
u64 *fe_blkno)
......@@ -1635,6 +1701,8 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
BUG_ON(ac->ac_bits_wanted != 1);
BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
status = ocfs2_claim_suballoc_bits(osb,
ac,
handle,
......@@ -1653,6 +1721,7 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
*fe_blkno = bg_blkno + (u64) (*suballoc_bit);
ac->ac_bits_given++;
ocfs2_save_inode_ac_group(dir, ac);
status = 0;
bail:
mlog_exit(status);
......@@ -2116,3 +2185,162 @@ int ocfs2_lock_allocators(struct inode *inode,
return ret;
}
/*
* Read the inode specified by blkno to get suballoc_slot and
* suballoc_bit.
*/
static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
u16 *suballoc_slot, u16 *suballoc_bit)
{
int status;
struct buffer_head *inode_bh = NULL;
struct ocfs2_dinode *inode_fe;
mlog_entry("blkno: %llu\n", blkno);
/* dirty read disk */
status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
if (status < 0) {
mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status);
goto bail;
}
inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
mlog(ML_ERROR, "invalid inode %llu requested\n", blkno);
status = -EINVAL;
goto bail;
}
if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT &&
(u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
status = -EINVAL;
goto bail;
}
if (suballoc_slot)
*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
if (suballoc_bit)
*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
bail:
brelse(inode_bh);
mlog_exit(status);
return status;
}
/*
* test whether bit is SET in allocator bitmap or not. on success, 0
* is returned and *res is 1 for SET; 0 otherwise. when fails, errno
* is returned and *res is meaningless. Call this after you have
* cluster locked against suballoc, or you may get a result based on
* non-up2date contents
*/
static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
struct inode *suballoc,
struct buffer_head *alloc_bh, u64 blkno,
u16 bit, int *res)
{
struct ocfs2_dinode *alloc_fe;
struct ocfs2_group_desc *group;
struct buffer_head *group_bh = NULL;
u64 bg_blkno;
int status;
mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit);
alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
(unsigned int)bit,
ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
status = -EINVAL;
goto bail;
}
bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
&group_bh);
if (status < 0) {
mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status);
goto bail;
}
group = (struct ocfs2_group_desc *) group_bh->b_data;
*res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
bail:
brelse(group_bh);
mlog_exit(status);
return status;
}
/*
* Test if the bit representing this inode (blkno) is set in the
* suballocator.
*
* On success, 0 is returned and *res is 1 for SET; 0 otherwise.
*
* In the event of failure, a negative value is returned and *res is
* meaningless.
*
* Callers must make sure to hold nfs_sync_lock to prevent
* ocfs2_delete_inode() on another node from accessing the same
* suballocator concurrently.
*/
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
{
int status;
u16 suballoc_bit = 0, suballoc_slot = 0;
struct inode *inode_alloc_inode;
struct buffer_head *alloc_bh = NULL;
mlog_entry("blkno: %llu", blkno);
status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
&suballoc_bit);
if (status < 0) {
mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
goto bail;
}
inode_alloc_inode =
ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */
status = -EINVAL;
mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
(u32)suballoc_slot);
goto bail;
}
mutex_lock(&inode_alloc_inode->i_mutex);
status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
if (status < 0) {
mutex_unlock(&inode_alloc_inode->i_mutex);
mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
(u32)suballoc_slot, status);
goto bail;
}
status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
blkno, suballoc_bit, res);
if (status < 0)
mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
ocfs2_inode_unlock(inode_alloc_inode, 0);
mutex_unlock(&inode_alloc_inode->i_mutex);
iput(inode_alloc_inode);
brelse(alloc_bh);
bail:
mlog_exit(status);
return status;
}
......@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
u64 *blkno_start);
int ocfs2_claim_new_inode(struct ocfs2_super *osb,
handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *ac,
u16 *suballoc_bit,
u64 *fe_blkno);
......@@ -186,4 +188,6 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
#endif /* _CHAINALLOC_H_ */
......@@ -201,6 +201,170 @@ static const match_table_t tokens = {
{Opt_err, NULL}
};
#ifdef CONFIG_DEBUG_FS
static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
{
int out = 0;
int i;
struct ocfs2_cluster_connection *cconn = osb->cconn;
struct ocfs2_recovery_map *rm = osb->recovery_map;
out += snprintf(buf + out, len - out,
"%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
"Device", osb->dev_str, osb->uuid_str,
osb->fs_generation, osb->vol_label);
out += snprintf(buf + out, len - out,
"%10s => State: %d Flags: 0x%lX\n", "Volume",
atomic_read(&osb->vol_state), osb->osb_flags);
out += snprintf(buf + out, len - out,
"%10s => Block: %lu Cluster: %d\n", "Sizes",
osb->sb->s_blocksize, osb->s_clustersize);
out += snprintf(buf + out, len - out,
"%10s => Compat: 0x%X Incompat: 0x%X "
"ROcompat: 0x%X\n",
"Features", osb->s_feature_compat,
osb->s_feature_incompat, osb->s_feature_ro_compat);
out += snprintf(buf + out, len - out,
"%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
osb->s_mount_opt, osb->s_atime_quantum);
out += snprintf(buf + out, len - out,
"%10s => Stack: %s Name: %*s Version: %d.%d\n",
"Cluster",
(*osb->osb_cluster_stack == '\0' ?
"o2cb" : osb->osb_cluster_stack),
cconn->cc_namelen, cconn->cc_name,
cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
spin_lock(&osb->dc_task_lock);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Count: %lu WakeSeq: %lu "
"WorkSeq: %lu\n", "DownCnvt",
task_pid_nr(osb->dc_task), osb->blocked_lock_count,
osb->dc_wake_sequence, osb->dc_work_sequence);
spin_unlock(&osb->dc_task_lock);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:",
"Recovery",
(osb->recovery_thread_task ?
task_pid_nr(osb->recovery_thread_task) : -1));
if (rm->rm_used == 0)
out += snprintf(buf + out, len - out, " None\n");
else {
for (i = 0; i < rm->rm_used; i++)
out += snprintf(buf + out, len - out, " %d",
rm->rm_entries[i]);
out += snprintf(buf + out, len - out, "\n");
}
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
task_pid_nr(osb->commit_task), osb->osb_commit_interval,
atomic_read(&osb->needs_checkpoint));
out += snprintf(buf + out, len - out,
"%10s => State: %d NumTxns: %d TxnId: %lu\n",
"Journal", osb->journal->j_state,
atomic_read(&osb->journal->j_num_trans),
osb->journal->j_trans_id);
out += snprintf(buf + out, len - out,
"%10s => GlobalAllocs: %d LocalAllocs: %d "
"SubAllocs: %d LAWinMoves: %d SAExtends: %d\n",
"Stats",
atomic_read(&osb->alloc_stats.bitmap_data),
atomic_read(&osb->alloc_stats.local_data),
atomic_read(&osb->alloc_stats.bg_allocs),
atomic_read(&osb->alloc_stats.moves),
atomic_read(&osb->alloc_stats.bg_extends));
out += snprintf(buf + out, len - out,
"%10s => State: %u Descriptor: %llu Size: %u bits "
"Default: %u bits\n",
"LocalAlloc", osb->local_alloc_state,
(unsigned long long)osb->la_last_gd,
osb->local_alloc_bits, osb->local_alloc_default_bits);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
"%10s => Slot: %d NumStolen: %d\n", "Steal",
osb->s_inode_steal_slot,
atomic_read(&osb->s_num_inodes_stolen));
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
"Slots", "Num", "RecoGen");
for (i = 0; i < osb->max_slots; ++i) {
out += snprintf(buf + out, len - out,
"%10s %c %3d %10d\n",
" ",
(i == osb->slot_num ? '*' : ' '),
i, osb->slot_recovery_generations[i]);
}
return out;
}
static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
{
struct ocfs2_super *osb = inode->i_private;
char *buf = NULL;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto bail;
i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE));
file->private_data = buf;
return 0;
bail:
return -ENOMEM;
}
static int ocfs2_debug_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
i_size_read(file->f_mapping->host));
}
#else
static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
{
return 0;
}
static int ocfs2_debug_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static struct file_operations ocfs2_osb_debug_fops = {
.open = ocfs2_osb_debug_open,
.release = ocfs2_debug_release,
.read = ocfs2_debug_read,
.llseek = generic_file_llseek,
};
/*
* write_super and sync_fs ripped right out of ext3.
*/
......@@ -926,6 +1090,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
goto read_super_error;
}
osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_osb_debug_fops);
if (!osb->osb_ctxt) {
status = -EINVAL;
mlog_errno(status);
goto read_super_error;
}
status = ocfs2_mount_volume(sb);
if (osb->root_inode)
inode = igrab(osb->root_inode);
......@@ -1620,6 +1794,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
debugfs_remove(osb->osb_ctxt);
ocfs2_disable_quotas(osb);
ocfs2_shutdown_local_alloc(osb);
......@@ -1742,6 +1918,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
for (i = 0; i < 3; i++)
osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]);
osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
osb->sb = sb;
/* Save off for ocfs2_rw_direct */
osb->s_sectsize_bits = blksize_bits(sector_size);
......@@ -2130,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
* lock, and it's marked as dirty, set the bit in the recover
* map and launch a recovery thread for it. */
status = ocfs2_mark_dead_nodes(osb);
if (status < 0) {
mlog_errno(status);
goto finally;
}
status = ocfs2_compute_replay_slots(osb);
if (status < 0)
mlog_errno(status);
......
......@@ -512,7 +512,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
struct ocfs2_security_xattr_info *si,
int *want_clusters,
int *xattr_credits,
struct ocfs2_alloc_context **xattr_ac)
int *want_meta)
{
int ret = 0;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
......@@ -554,11 +554,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
(S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
(s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
if (ret) {
mlog_errno(ret);
return ret;
}
*want_meta = *want_meta + 1;
*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
}
......
......@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
int *, int *, struct ocfs2_alloc_context **);
int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
int, struct ocfs2_security_xattr_info *,
int *, int *, struct ocfs2_alloc_context **);
int *, int *, int *);
/*
* xattrs can live inside an inode, as part of an external xattr block,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment