Commit d3f8fd76 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (57 commits)
  [GFS2] make gfs2_writepages() static
  [GFS2] Unlock page on prepare_write try lock failure
  [GFS2] nfsd readdirplus assertion failure
  [DLM] fix softlockup in dlm_recv
  [DLM] zero new user lvbs
  [DLM/GFS2] indent help text
  [GFS2] Fix unlink deadlocks
  [GFS2] Put back semaphore to avoid umount problem
  [GFS2] more CURRENT_TIME_SEC
  [GFS2/DLM] fix GFS2 circular dependency
  [GFS2/DLM] use sysfs
  [GFS2] make lock_dlm drop_count tunable in sysfs
  [GFS2] increase default lock limit
  [GFS2] Fix list corruption in lops.c
  [GFS2] Fix recursive locking attempt with NFS
  [DLM] can miss clearing resend flag
  [DLM] saved dlm message can be dropped
  [DLM] Make sock_sem into a mutex
  [GFS2] Fix typo in glock.c
  [GFS2] use CURRENT_TIME_SEC instead of get_seconds in gfs2
  ...
parents 0670afdf a2cf8222
......@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
config DLM
tristate "Distributed Lock Manager (DLM)"
depends on IPV6 || IPV6=n
depends on SYSFS && (IPV6 || IPV6=n)
select CONFIGFS_FS
select IP_SCTP if DLM_SCTP
help
A general purpose distributed lock manager for kernel or userspace
applications.
A general purpose distributed lock manager for kernel or userspace
applications.
choice
prompt "Select DLM communications protocol"
depends on DLM
default DLM_TCP
help
The DLM Can use TCP or SCTP for it's network communications.
SCTP supports multi-homed operations whereas TCP doesn't.
However, SCTP seems to have stability problems at the moment.
The DLM Can use TCP or SCTP for it's network communications.
SCTP supports multi-homed operations whereas TCP doesn't.
However, SCTP seems to have stability problems at the moment.
config DLM_TCP
bool "TCP/IP"
......@@ -31,8 +31,8 @@ config DLM_DEBUG
bool "DLM debugging"
depends on DLM
help
Under the debugfs mount point, the name of each lockspace will
appear as a file in the "dlm" directory. The output is the
list of resource and locks the local node knows about.
Under the debugfs mount point, the name of each lockspace will
appear as a file in the "dlm" directory. The output is the
list of resource and locks the local node knows about.
endmenu
......@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
static void drop_node(struct config_group *, struct config_item *);
static void release_node(struct config_item *);
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len);
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
......@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
static ssize_t node_weight_read(struct node *nd, char *buf);
static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
struct cluster {
struct config_group group;
unsigned int cl_tcp_port;
unsigned int cl_buffer_size;
unsigned int cl_rsbtbl_size;
unsigned int cl_lkbtbl_size;
unsigned int cl_dirtbl_size;
unsigned int cl_recover_timer;
unsigned int cl_toss_secs;
unsigned int cl_scan_secs;
unsigned int cl_log_debug;
};
enum {
CLUSTER_ATTR_TCP_PORT = 0,
CLUSTER_ATTR_BUFFER_SIZE,
CLUSTER_ATTR_RSBTBL_SIZE,
CLUSTER_ATTR_LKBTBL_SIZE,
CLUSTER_ATTR_DIRTBL_SIZE,
CLUSTER_ATTR_RECOVER_TIMER,
CLUSTER_ATTR_TOSS_SECS,
CLUSTER_ATTR_SCAN_SECS,
CLUSTER_ATTR_LOG_DEBUG,
};
struct cluster_attribute {
struct configfs_attribute attr;
ssize_t (*show)(struct cluster *, char *);
ssize_t (*store)(struct cluster *, const char *, size_t);
};
static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
unsigned int *info_field, int check_zero,
const char *buf, size_t len)
{
unsigned int x;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
x = simple_strtoul(buf, NULL, 0);
if (check_zero && !x)
return -EINVAL;
*cl_field = x;
*info_field = x;
return len;
}
#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
.attr = { .ca_name = __stringify(_name), \
.ca_mode = _mode, \
.ca_owner = THIS_MODULE }, \
.show = _read, \
.store = _write, \
}
#define CLUSTER_ATTR(name, check_zero) \
static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
{ \
return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
check_zero, buf, len); \
} \
static ssize_t name##_read(struct cluster *cl, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
} \
static struct cluster_attribute cluster_attr_##name = \
__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
CLUSTER_ATTR(tcp_port, 1);
CLUSTER_ATTR(buffer_size, 1);
CLUSTER_ATTR(rsbtbl_size, 1);
CLUSTER_ATTR(lkbtbl_size, 1);
CLUSTER_ATTR(dirtbl_size, 1);
CLUSTER_ATTR(recover_timer, 1);
CLUSTER_ATTR(toss_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
[CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
[CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
[CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
[CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
[CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
NULL,
};
enum {
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
......@@ -152,10 +252,6 @@ struct clusters {
struct configfs_subsystem subsys;
};
struct cluster {
struct config_group group;
};
struct spaces {
struct config_group ss_group;
};
......@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
static struct configfs_item_operations cluster_ops = {
.release = release_cluster,
.show_attribute = show_cluster,
.store_attribute = store_cluster,
};
static struct configfs_group_operations spaces_ops = {
......@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
static struct config_item_type cluster_type = {
.ct_item_ops = &cluster_ops,
.ct_attrs = cluster_attrs,
.ct_owner = THIS_MODULE,
};
......@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
cl->group.default_groups[1] = &cms->cs_group;
cl->group.default_groups[2] = NULL;
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
cl->cl_recover_timer = dlm_config.ci_recover_timer;
cl->cl_toss_secs = dlm_config.ci_toss_secs;
cl->cl_scan_secs = dlm_config.ci_scan_secs;
cl->cl_log_debug = dlm_config.ci_log_debug;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
return &cl->group;
......@@ -509,6 +618,25 @@ void dlm_config_exit(void)
* Functions for user space to read/write attributes
*/
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
struct cluster *cl = to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->show ? cla->show(cl, buf) : 0;
}
static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len)
{
struct cluster *cl = to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->store ? cla->store(cl, buf, len) : -EINVAL;
}
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
......@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
#define DEFAULT_SCAN_SECS 5
#define DEFAULT_LOG_DEBUG 0
struct dlm_config_info dlm_config = {
.tcp_port = DEFAULT_TCP_PORT,
.buffer_size = DEFAULT_BUFFER_SIZE,
.rsbtbl_size = DEFAULT_RSBTBL_SIZE,
.lkbtbl_size = DEFAULT_LKBTBL_SIZE,
.dirtbl_size = DEFAULT_DIRTBL_SIZE,
.recover_timer = DEFAULT_RECOVER_TIMER,
.toss_secs = DEFAULT_TOSS_SECS,
.scan_secs = DEFAULT_SCAN_SECS
.ci_tcp_port = DEFAULT_TCP_PORT,
.ci_buffer_size = DEFAULT_BUFFER_SIZE,
.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
.ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
.ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
.ci_recover_timer = DEFAULT_RECOVER_TIMER,
.ci_toss_secs = DEFAULT_TOSS_SECS,
.ci_scan_secs = DEFAULT_SCAN_SECS,
.ci_log_debug = DEFAULT_LOG_DEBUG
};
......@@ -17,14 +17,15 @@
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
int tcp_port;
int buffer_size;
int rsbtbl_size;
int lkbtbl_size;
int dirtbl_size;
int recover_timer;
int toss_secs;
int scan_secs;
int ci_tcp_port;
int ci_buffer_size;
int ci_rsbtbl_size;
int ci_lkbtbl_size;
int ci_dirtbl_size;
int ci_recover_timer;
int ci_toss_secs;
int ci_scan_secs;
int ci_log_debug;
};
extern struct dlm_config_info dlm_config;
......
......@@ -41,6 +41,7 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include "config.h"
#define DLM_LOCKSPACE_LEN 64
......@@ -69,12 +70,12 @@ struct dlm_mhandle;
#define log_error(ls, fmt, args...) \
printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
#define DLM_LOG_DEBUG
#ifdef DLM_LOG_DEBUG
#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args)
#else
#define log_debug(ls, fmt, args...)
#endif
#define log_debug(ls, fmt, args...) \
do { \
if (dlm_config.ci_log_debug) \
printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
(ls)->ls_name , ##args); \
} while (0)
#define DLM_ASSERT(x, do) \
{ \
......@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
/* dlm_header is first element of all structs sent between nodes */
#define DLM_HEADER_MAJOR 0x00020000
#define DLM_HEADER_MINOR 0x00000001
#define DLM_HEADER_MAJOR 0x00030000
#define DLM_HEADER_MINOR 0x00000000
#define DLM_MSG 1
#define DLM_RCOM 2
......@@ -386,6 +387,8 @@ struct dlm_rcom {
uint32_t rc_type; /* DLM_RCOM_ */
int rc_result; /* multi-purpose */
uint64_t rc_id; /* match reply with request */
uint64_t rc_seq; /* sender's ls_recover_seq */
uint64_t rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[0];
};
......@@ -523,6 +526,7 @@ struct dlm_user_proc {
spinlock_t asts_spin;
struct list_head locks;
spinlock_t locks_spin;
struct list_head unlocking;
wait_queue_head_t wait;
};
......
......@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
mutex_unlock(&ls->ls_waiters_mutex);
}
/* We clear the RESEND flag because we might be taking an lkb off the waiters
list as part of process_requestqueue (e.g. a lookup that has an optimized
request reply on the requestqueue) between dlm_recover_waiters_pre() which
set RESEND and dlm_recover_waiters_post() */
static int _remove_from_waiters(struct dlm_lkb *lkb)
{
int error = 0;
......@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
goto out;
}
lkb->lkb_wait_type = 0;
lkb->lkb_flags &= ~DLM_IFL_RESEND;
list_del(&lkb->lkb_wait_reply);
unhold_lkb(lkb);
out:
......@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
res_hashchain) {
if (!time_after_eq(jiffies, r->res_toss_time +
dlm_config.toss_secs * HZ))
dlm_config.ci_toss_secs * HZ))
continue;
found = 1;
break;
......@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
if (lkb->lkb_astaddr)
ms->m_asts |= AST_COMP;
if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP)
memcpy(ms->m_extra, r->res_name, r->res_length);
/* compare with switch in create_message; send_remove() doesn't
use send_args() */
else if (lkb->lkb_lvbptr)
switch (ms->m_type) {
case DLM_MSG_REQUEST:
case DLM_MSG_LOOKUP:
memcpy(ms->m_extra, r->res_name, r->res_length);
break;
case DLM_MSG_CONVERT:
case DLM_MSG_UNLOCK:
case DLM_MSG_REQUEST_REPLY:
case DLM_MSG_CONVERT_REPLY:
case DLM_MSG_GRANT:
if (!lkb->lkb_lvbptr)
break;
memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
break;
}
}
static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
......@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
if (receive_lvb(ls, lkb, ms))
return -ENOMEM;
if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
/* lkb was just created so there won't be an lvb yet */
lkb->lkb_lvbptr = allocate_lvb(ls);
if (!lkb->lkb_lvbptr)
return -ENOMEM;
}
return 0;
}
......@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_ls *ls;
int error;
int error = 0;
if (!recovery)
dlm_message_in(ms);
......@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
return 0;
return error;
}
......@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
if (middle_conversion(lkb)) {
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -EINPROGRESS;
ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_convert_reply(lkb, &ls->ls_stub_ms);
......@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_unlock_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
......@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_CANCEL:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_ECANCEL;
ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_cancel_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
......@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
lock_rsb(r);
switch (error) {
case -EBADR:
/* There's a chance the new master received our lock before
dlm_recover_master_reply(), this wouldn't happen if we did
a barrier between recover_masters and recover_locks. */
log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
(unsigned long)r, r->res_name);
dlm_send_rcom_lock(r, lkb);
goto out;
case -EEXIST:
log_debug(ls, "master copy exists %x", lkb->lkb_id);
/* fall through */
......@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
/* an ack for dlm_recover_locks() which waits for replies from
all the locks it sends to new masters */
dlm_recovered_lock(r);
out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
......@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
}
if (flags & DLM_LKF_VALBLK) {
ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
__put_lkb(ls, lkb);
......@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua = (struct dlm_user_args *)lkb->lkb_astparam;
if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
error = -ENOMEM;
goto out_put;
......@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out_put;
spin_lock(&ua->proc->locks_spin);
list_del_init(&lkb->lkb_ownqueue);
/* dlm_user_add_ast() may have already taken lkb off the proc list */
if (!list_empty(&lkb->lkb_ownqueue))
list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
/* this removes the reference for the proc->locks list added by
dlm_user_request */
unhold_lkb(lkb);
out_put:
dlm_put_lkb(lkb);
out:
......@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
/* this lkb was removed from the WAITING queue */
if (lkb->lkb_grmode == DLM_LOCK_IV) {
spin_lock(&ua->proc->locks_spin);
list_del_init(&lkb->lkb_ownqueue);
list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
unhold_lkb(lkb);
}
out_put:
dlm_put_lkb(lkb);
......@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
mutex_lock(&ls->ls_clear_proc_locks);
list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
if (lkb->lkb_ast_type) {
list_del(&lkb->lkb_astqueue);
unhold_lkb(lkb);
}
list_del_init(&lkb->lkb_ownqueue);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
......@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
/* in-progress unlocks */
list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
list_del_init(&lkb->lkb_ownqueue);
lkb->lkb_flags |= DLM_IFL_DEAD;
dlm_put_lkb(lkb);
}
list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
list_del(&lkb->lkb_astqueue);
dlm_put_lkb(lkb);
}
mutex_unlock(&ls->ls_clear_proc_locks);
unlock_recovery(ls);
}
......@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
while (!kthread_should_stop()) {
list_for_each_entry(ls, &lslist, ls_list)
dlm_scan_rsbs(ls);
schedule_timeout_interruptible(dlm_config.scan_secs * HZ);
schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
......@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_count = 0;
ls->ls_flags = 0;
size = dlm_config.rsbtbl_size;
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
......@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
rwlock_init(&ls->ls_rsbtbl[i].lock);
}
size = dlm_config.lkbtbl_size;
size = dlm_config.ci_lkbtbl_size;
ls->ls_lkbtbl_size = size;
ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
......@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_lkbtbl[i].counter = 1;
}
size = dlm_config.dirtbl_size;
size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
......@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (!ls->ls_recover_buf)
goto out_dirfree;
......
......@@ -72,6 +72,8 @@ struct nodeinfo {
struct list_head writequeue; /* outgoing writequeue_entries */
spinlock_t writequeue_lock;
int nodeid;
struct work_struct swork; /* Send workqueue */
struct work_struct lwork; /* Locking workqueue */
};
static DEFINE_IDR(nodeinfo_idr);
......@@ -96,6 +98,7 @@ struct connection {
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
struct work_struct work; /* Send workqueue */
};
/* An entry waiting to be sent */
......@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
static LIST_HEAD(write_nodes);
static DEFINE_SPINLOCK(write_nodes_lock);
/* Maximum number of incoming messages to process before
* doing a schedule()
*/
#define MAX_RX_MSG_COUNT 25
/* Manage daemons */
static struct task_struct *recv_task;
static struct task_struct *send_task;
static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
/* Work queues */
static struct workqueue_struct *recv_workqueue;
static struct workqueue_struct *send_workqueue;
static struct workqueue_struct *lock_workqueue;
/* The SCTP connection */
static struct connection sctp_con;
static void process_send_sockets(struct work_struct *work);
static void process_recv_sockets(struct work_struct *work);
static void process_lock_request(struct work_struct *work);
static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
{
......@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
spin_lock_init(&ni->lock);
INIT_LIST_HEAD(&ni->writequeue);
spin_lock_init(&ni->writequeue_lock);
INIT_WORK(&ni->lwork, process_lock_request);
INIT_WORK(&ni->swork, process_send_sockets);
ni->nodeid = nodeid;
if (nodeid > max_nodeid)
......@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
/* Data or notification available on socket */
static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
atomic_inc(&sctp_con.waiting_requests);
if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
return;
wake_up_interruptible(&lowcomms_recv_wait);
queue_work(recv_workqueue, &sctp_con.work);
}
......@@ -361,10 +367,10 @@ static void init_failed(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
queue_work(send_workqueue, &ni->swork);
}
}
}
wake_up_process(send_task);
}
/* Something happened to an association */
......@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
queue_work(send_workqueue, &ni->swork);
}
wake_up_process(send_task);
}
break;
......@@ -580,8 +586,8 @@ static int receive_from_sock(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
queue_work(send_workqueue, &ni->swork);
}
wake_up_process(send_task);
}
}
......@@ -590,6 +596,7 @@ static int receive_from_sock(void)
return 0;
cbuf_add(&sctp_con.cb, ret);
// PJC: TODO: Add to node's workqueue....can we ??
ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
page_address(sctp_con.rx_page),
sctp_con.cb.base, sctp_con.cb.len,
......@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
if (result < 0)
log_print("Can't bind to port %d addr number %d",
dlm_config.tcp_port, num);
dlm_config.ci_tcp_port, num);
return result;
}
......@@ -711,7 +718,7 @@ static int init_sock(void)
/* Bind to all interfaces. */
for (i = 0; i < dlm_local_count; i++) {
memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
result = add_bind_addr(&localaddr, addr_len, num);
if (result)
......@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
wake_up_process(send_task);
queue_work(send_workqueue, &ni->swork);
}
return;
......@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
return;
}
make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
outmessage.msg_name = &rem_addr;
outmessage.msg_namelen = addrlen;
......@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
return 0;
}
static int write_list_empty(void)
// PJC: The work queue function for receiving.
static void process_recv_sockets(struct work_struct *work)
{
int status;
spin_lock_bh(&write_nodes_lock);
status = list_empty(&write_nodes);
spin_unlock_bh(&write_nodes_lock);
return status;
}
static int dlm_recvd(void *data)
{
DECLARE_WAITQUEUE(wait, current);
while (!kthread_should_stop()) {
if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
int ret;
int count = 0;
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&lowcomms_recv_wait, &wait);
if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
cond_resched();
remove_wait_queue(&lowcomms_recv_wait, &wait);
set_current_state(TASK_RUNNING);
if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
int ret;
do {
ret = receive_from_sock();
do {
ret = receive_from_sock();
/* Don't starve out everyone else */
if (++count >= MAX_RX_MSG_COUNT) {
cond_resched();
count = 0;
}
} while (!kthread_should_stop() && ret >=0);
}
cond_resched();
/* Don't starve out everyone else */
if (++count >= MAX_RX_MSG_COUNT) {
cond_resched();
count = 0;
}
} while (!kthread_should_stop() && ret >=0);
}
return 0;
cond_resched();
}
static int dlm_sendd(void *data)
// PJC: the work queue function for sending
static void process_send_sockets(struct work_struct *work)
{
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (write_list_empty())
cond_resched();
set_current_state(TASK_RUNNING);
if (sctp_con.eagain_flag) {
sctp_con.eagain_flag = 0;
refill_write_queue();
}
process_output_queue();
if (sctp_con.eagain_flag) {
sctp_con.eagain_flag = 0;
refill_write_queue();
}
process_output_queue();
}
remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
return 0;
// PJC: Process lock requests from a particular node.
// TODO: can we optimise this out on UP ??
static void process_lock_request(struct work_struct *work)
{
}
static void daemons_stop(void)
{
kthread_stop(recv_task);
kthread_stop(send_task);
destroy_workqueue(recv_workqueue);
destroy_workqueue(send_workqueue);
destroy_workqueue(lock_workqueue);
}
static int daemons_start(void)
{
struct task_struct *p;
int error;
recv_workqueue = create_workqueue("dlm_recv");
error = IS_ERR(recv_workqueue);
if (error) {
log_print("can't start dlm_recv %d", error);
return error;
}
p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
error = IS_ERR(p);
send_workqueue = create_singlethread_workqueue("dlm_send");
error = IS_ERR(send_workqueue);
if (error) {
log_print("can't start dlm_recvd %d", error);
log_print("can't start dlm_send %d", error);
destroy_workqueue(recv_workqueue);
return error;
}
recv_task = p;
p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
error = IS_ERR(p);
lock_workqueue = create_workqueue("dlm_rlock");
error = IS_ERR(lock_workqueue);
if (error) {
log_print("can't start dlm_sendd %d", error);
kthread_stop(recv_task);
log_print("can't start dlm_rlock %d", error);
destroy_workqueue(send_workqueue);
destroy_workqueue(recv_workqueue);
return error;
}
send_task = p;
return 0;
}
......@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
{
int error;
INIT_WORK(&sctp_con.work, process_recv_sockets);
error = init_sock();
if (error)
goto fail_sock;
......@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
}
This diff is collapsed.
......@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen < sizeof(struct dlm_header))
break;
err = -E2BIG;
if (msglen > dlm_config.buffer_size) {
if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
......@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen > sizeof(__tmp) &&
msg == (struct dlm_header *) __tmp) {
msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (msg == NULL)
return ret;
}
......
......@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
rc->rc_type = type;
spin_lock(&ls->ls_recover_lock);
rc->rc_seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
*mh_ret = mh;
*rc_ret = rc;
return 0;
......@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
rf->rf_lsflags = ls->ls_exflags;
}
static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
log_error(ls, "version mismatch: %x nodeid %d: %x",
DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
rc->rc_header.h_version);
return -EINVAL;
}
if (rf->rf_lvblen != ls->ls_lvblen ||
rf->rf_lsflags != ls->ls_exflags) {
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
......@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
goto out;
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
......@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
} else
error = check_config(ls, (struct rcom_config *) rc->rc_buf,
nodeid);
error = check_config(ls, rc, nodeid);
/* the caller looks at rc_result for the remote recovery status */
out:
return error;
......@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
if (error)
return;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = dlm_recover_status(ls);
make_config(ls, (struct rcom_config *) rc->rc_buf);
......@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
if (nodeid == dlm_our_nodeid()) {
dlm_copy_master_names(ls, last_name, last_len,
ls->ls_recover_buf + len,
dlm_config.buffer_size - len, nodeid);
dlm_config.ci_buffer_size - len, nodeid);
goto out;
}
......@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
memcpy(rc->rc_buf, last_name, last_len);
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
......@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, inlen, outlen;
int nodeid = rc_in->rc_header.h_nodeid;
uint32_t status = dlm_recover_status(ls);
/*
* We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
* dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
* It could only happen in rare cases where we get a late NAMES
* message from a previous instance of recovery.
*/
if (!(status & DLM_RS_NODES)) {
log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
return;
}
int error, inlen, outlen, nodeid;
nodeid = rc_in->rc_header.h_nodeid;
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom);
outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
......@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
ret_nodeid = error;
rc->rc_result = ret_nodeid;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
......@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
uint32_t status = dlm_recover_status(ls);
if (!(status & DLM_RS_DIR)) {
log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
rc_in->rc_header.h_nodeid);
return;
}
dlm_recover_process_copy(ls, rc_in);
}
......@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rc->rc_type = DLM_RCOM_STATUS_REPLY;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = -ESRCH;
rf = (struct rcom_config *) rc->rc_buf;
......@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
return 0;
}
static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
{
uint64_t seq;
int rv = 0;
switch (rc->rc_type) {
case DLM_RCOM_STATUS_REPLY:
case DLM_RCOM_NAMES_REPLY:
case DLM_RCOM_LOOKUP_REPLY:
case DLM_RCOM_LOCK_REPLY:
spin_lock(&ls->ls_recover_lock);
seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
if (rc->rc_seq_reply != seq) {
log_debug(ls, "ignoring old reply %x from %d "
"seq_reply %llx expect %llx",
rc->rc_type, rc->rc_header.h_nodeid,
(unsigned long long)rc->rc_seq_reply,
(unsigned long long)seq);
rv = 1;
}
}
return rv;
}
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
......@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
log_error(ls, "ignoring recovery message %x from %d",
log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
goto out;
}
if (is_old_reply(ls, rc))
goto out;
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
......
......@@ -44,7 +44,7 @@
static void dlm_wait_timer_fn(unsigned long data)
{
struct dlm_ls *ls = (struct dlm_ls *) data;
mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ));
mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
wake_up(&ls->ls_wait_general);
}
......@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
init_timer(&ls->ls_timer);
ls->ls_timer.function = dlm_wait_timer_fn;
ls->ls_timer.data = (long) ls;
ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ);
ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
add_timer(&ls->ls_timer);
wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
......@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
if (dlm_no_directory(ls))
count += recover_master_static(r);
else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) {
else if (!is_master(r) &&
(dlm_is_removed(ls, r->res_nodeid) ||
rsb_flag(r, RSB_NEW_MASTER))) {
recover_master(r);
count++;
}
......
......@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_members(ls, rv, &neg);
if (error) {
log_error(ls, "recover_members failed %d", error);
log_debug(ls, "recover_members failed %d", error);
goto fail;
}
start = jiffies;
......@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory(ls);
if (error) {
log_error(ls, "recover_directory failed %d", error);
log_debug(ls, "recover_directory failed %d", error);
goto fail;
}
......@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory_wait(ls);
if (error) {
log_error(ls, "recover_directory_wait failed %d", error);
log_debug(ls, "recover_directory_wait failed %d", error);
goto fail;
}
......@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_masters(ls);
if (error) {
log_error(ls, "recover_masters failed %d", error);
log_debug(ls, "recover_masters failed %d", error);
goto fail;
}
......@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks(ls);
if (error) {
log_error(ls, "recover_locks failed %d", error);
log_debug(ls, "recover_locks failed %d", error);
goto fail;
}
error = dlm_recover_locks_wait(ls);
if (error) {
log_error(ls, "recover_locks_wait failed %d", error);
log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
......@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks_wait(ls);
if (error) {
log_error(ls, "recover_locks_wait failed %d", error);
log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
}
......@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
log_error(ls, "recover_done_wait failed %d", error);
log_debug(ls, "recover_done_wait failed %d", error);
goto fail;
}
......@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = enable_locking(ls, rv->seq);
if (error) {
log_error(ls, "enable_locking failed %d", error);
log_debug(ls, "enable_locking failed %d", error);
goto fail;
}
error = dlm_process_requestqueue(ls);
if (error) {
log_error(ls, "process_requestqueue failed %d", error);
log_debug(ls, "process_requestqueue failed %d", error);
goto fail;
}
error = dlm_recover_waiters_post(ls);
if (error) {
log_error(ls, "recover_waiters_post failed %d", error);
log_debug(ls, "recover_waiters_post failed %d", error);
goto fail;
}
......
......@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
remove_ownqueue = 1;
/* unlocks or cancels of waiting requests need to be removed from the
proc's unlocking list, again there must be a better way... */
if (ua->lksb.sb_status == -DLM_EUNLOCK ||
(ua->lksb.sb_status == -DLM_ECANCEL &&
lkb->lkb_grmode == DLM_LOCK_IV))
remove_ownqueue = 1;
/* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and
lvb_ops says we should. We could probably have set_lvb_lock()
......@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
proc->lockspace = ls->ls_local_handle;
INIT_LIST_HEAD(&proc->asts);
INIT_LIST_HEAD(&proc->locks);
INIT_LIST_HEAD(&proc->unlocking);
spin_lock_init(&proc->asts_spin);
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
......
......@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
rc->rc_type = cpu_to_le32(rc->rc_type);
rc->rc_result = cpu_to_le32(rc->rc_result);
rc->rc_id = cpu_to_le64(rc->rc_id);
rc->rc_seq = cpu_to_le64(rc->rc_seq);
rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
if (type == DLM_RCOM_LOCK)
rcom_lock_out((struct rcom_lock *) rc->rc_buf);
......@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
rc->rc_type = le32_to_cpu(rc->rc_type);
rc->rc_result = le32_to_cpu(rc->rc_result);
rc->rc_id = le64_to_cpu(rc->rc_id);
rc->rc_seq = le64_to_cpu(rc->rc_seq);
rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
if (rc->rc_type == DLM_RCOM_LOCK)
rcom_lock_in((struct rcom_lock *) rc->rc_buf);
......
......@@ -4,44 +4,43 @@ config GFS2_FS
select FS_POSIX_ACL
select CRC32
help
A cluster filesystem.
A cluster filesystem.
Allows a cluster of computers to simultaneously use a block device
that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
and writes to the block device like a local filesystem, but also uses
a lock module to allow the computers coordinate their I/O so
filesystem consistency is maintained. One of the nifty features of
GFS is perfect consistency -- changes made to the filesystem on one
machine show up immediately on all other machines in the cluster.
Allows a cluster of computers to simultaneously use a block device
that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
and writes to the block device like a local filesystem, but also uses
a lock module to allow the computers coordinate their I/O so
filesystem consistency is maintained. One of the nifty features of
GFS is perfect consistency -- changes made to the filesystem on one
machine show up immediately on all other machines in the cluster.
To use the GFS2 filesystem, you will need to enable one or more of
the below locking modules. Documentation and utilities for GFS2 can
be found here: http://sources.redhat.com/cluster
To use the GFS2 filesystem, you will need to enable one or more of
the below locking modules. Documentation and utilities for GFS2 can
be found here: http://sources.redhat.com/cluster
config GFS2_FS_LOCKING_NOLOCK
tristate "GFS2 \"nolock\" locking module"
depends on GFS2_FS
help
Single node locking module for GFS2.
Single node locking module for GFS2.
Use this module if you want to use GFS2 on a single node without
its clustering features. You can still take advantage of the
large file support, and upgrade to running a full cluster later on
if required.
Use this module if you want to use GFS2 on a single node without
its clustering features. You can still take advantage of the
large file support, and upgrade to running a full cluster later on
if required.
If you will only be using GFS2 in cluster mode, you do not need this
module.
If you will only be using GFS2 in cluster mode, you do not need this
module.
config GFS2_FS_LOCKING_DLM
tristate "GFS2 DLM locking module"
depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n)
depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
select IP_SCTP if DLM_SCTP
select CONFIGFS_FS
select DLM
help
Multiple node locking module for GFS2
Most users of GFS2 will require this module. It provides the locking
interface between GFS2 and the DLM, which is required to use GFS2
in a cluster environment.
Multiple node locking module for GFS2
Most users of GFS2 will require this module. It provides the locking
interface between GFS2 and the DLM, which is required to use GFS2
in a cluster environment.
......@@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
gfs2_free_data(ip, bstart, blen);
}
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, dibh->b_data);
......@@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
}
ip->i_di.di_size = size;
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
......@@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (gfs2_is_stuffed(ip)) {
ip->i_di.di_size = size;
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
......@@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (!error) {
ip->i_di.di_size = size;
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
......@@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip)
ip->i_num.no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
}
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
......
......@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_di.di_size < offset + size)
ip->i_di.di_size = offset + size;
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
......@@ -229,7 +229,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
if (ip->i_di.di_size < offset + copied)
ip->i_di.di_size = offset + copied;
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
......@@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b)
*/
static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
void *opaque, gfs2_filldir_t filldir,
void *opaque, filldir_t filldir,
const struct gfs2_dirent **darr, u32 entries,
int *copied)
{
const struct gfs2_dirent *dent, *dent_next;
struct gfs2_inum_host inum;
u64 off, off_next;
unsigned int x, y;
int run = 0;
......@@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
*offset = off;
}
gfs2_inum_in(&inum, (char *)&dent->de_inum);
error = filldir(opaque, (const char *)(dent + 1),
be16_to_cpu(dent->de_name_len),
off, &inum,
off, be64_to_cpu(dent->de_inum.no_addr),
be16_to_cpu(dent->de_type));
if (error)
return 1;
......@@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
}
static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
gfs2_filldir_t filldir, int *copied,
unsigned *depth, u64 leaf_no)
filldir_t filldir, int *copied, unsigned *depth,
u64 leaf_no)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
......@@ -1343,7 +1340,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
*/
static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
gfs2_filldir_t filldir)
filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
......@@ -1402,7 +1399,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
}
int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
gfs2_filldir_t filldir)
filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
struct dirent_gather g;
......@@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
break;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_entries++;
ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
......@@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
gfs2_consist_inode(dip);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_di.di_entries--;
dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
mark_inode_dirty(&dip->i_inode);
......@@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
gfs2_trans_add_bh(dip->i_gl, bh, 1);
}
dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
return 0;
......
......@@ -16,30 +16,13 @@ struct inode;
struct gfs2_inode;
struct gfs2_inum;
/**
* gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
* @opaque: opaque data used by the function
* @name: the name of the directory entry
* @length: the length of the name
* @offset: the entry's offset in the directory
* @inum: the inode number the entry points to
* @type: the type of inode the entry points to
*
* Returns: 0 on success, 1 if buffer full
*/
typedef int (*gfs2_filldir_t) (void *opaque,
const char *name, unsigned int length,
u64 offset,
struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
struct gfs2_inum_host *inum, unsigned int *type);
int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
const struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
gfs2_filldir_t filldir);
int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir);
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
struct gfs2_inum_host *new_inum, unsigned int new_type);
......
......@@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
......@@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
(er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
......@@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
......@@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
......
This diff is collapsed.
......@@ -20,7 +20,6 @@
#define LM_FLAG_ANY 0x00000008
#define LM_FLAG_PRIORITY 0x00000010 */
#define GL_LOCAL_EXCL 0x00000020
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
......@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
void gfs2_holder_reinit(unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_uninit(struct gfs2_holder *gh);
void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
void gfs2_glock_drop_th(struct gfs2_glock *gl);
int gfs2_glock_nq(struct gfs2_holder *gh);
int gfs2_glock_poll(struct gfs2_holder *gh);
int gfs2_glock_wait(struct gfs2_holder *gh);
void gfs2_glock_dq(struct gfs2_holder *gh);
int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
u64 number, const struct gfs2_glock_operations *glops,
......@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
unsigned int state, int flags);
/**
* gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
* @gl: the glock
......
......@@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
static void meta_go_sync(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_EXCLUSIVE)
return;
if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
gfs2_ail_empty_gl(gl);
}
}
/**
......@@ -141,6 +143,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
gl->gl_vn++;
}
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
*
*/
static void inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
if (ip)
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_meta_sync(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
int error = filemap_fdatawait(mapping);
if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags);
else if (error)
set_bit(AS_EIO, &mapping->flags);
}
clear_bit(GLF_DIRTY, &gl->gl_flags);
gfs2_ail_empty_gl(gl);
}
}
/**
* inode_go_xmote_th - promote/demote a glock
* @gl: the glock
......@@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
*
*/
static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
int flags)
static void inode_go_xmote_th(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_UNLOCKED)
gfs2_pte_inval(gl);
gfs2_glock_xmote_th(gl, state, flags);
if (gl->gl_state == LM_ST_EXCLUSIVE)
inode_go_sync(gl);
}
/**
......@@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl)
static void inode_go_drop_th(struct gfs2_glock *gl)
{
gfs2_pte_inval(gl);
gfs2_glock_drop_th(gl);
}
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
*
*/
static void inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
if (ip)
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_meta_sync(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
int error = filemap_fdatawait(mapping);
if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags);
else if (error)
set_bit(AS_EIO, &mapping->flags);
}
clear_bit(GLF_DIRTY, &gl->gl_flags);
gfs2_ail_empty_gl(gl);
}
if (gl->gl_state == LM_ST_EXCLUSIVE)
inode_go_sync(gl);
}
/**
......@@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_flags & GL_LOCAL_EXCL))
(gh->gh_state == LM_ST_EXCLUSIVE))
error = gfs2_truncatei_resume(ip);
return error;
......@@ -318,39 +321,6 @@ static void inode_go_unlock(struct gfs2_holder *gh)
gfs2_meta_cache_flush(ip);
}
/**
* inode_greedy -
* @gl: the glock
*
*/
static void inode_greedy(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = gl->gl_object;
unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
unsigned int new_time;
spin_lock(&ip->i_spin);
if (time_after(ip->i_last_pfault + quantum, jiffies)) {
new_time = ip->i_greedy + quantum;
if (new_time > max)
new_time = max;
} else {
new_time = ip->i_greedy - quantum;
if (!new_time || new_time > max)
new_time = 1;
}
ip->i_greedy = new_time;
spin_unlock(&ip->i_spin);
iput(&ip->i_inode);
}
/**
* rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
* @gl: the glock
......@@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
*
*/
static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
int flags)
static void trans_go_xmote_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
......@@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
gfs2_glock_xmote_th(gl, state, flags);
}
/**
......@@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl)
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
gfs2_glock_drop_th(gl);
}
/**
......@@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl)
}
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_xmote_th = meta_go_sync,
.go_drop_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
......@@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_xmote_th = inode_go_xmote_th,
.go_xmote_bh = inode_go_xmote_bh,
.go_drop_th = inode_go_drop_th,
.go_sync = inode_go_sync,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
.go_unlock = inode_go_unlock,
.go_greedy = inode_greedy,
.go_type = LM_TYPE_INODE,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_sync = meta_go_sync,
.go_inval = meta_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
......@@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = {
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_IOPEN,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_FLOCK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_demote_ok = quota_go_demote_ok,
.go_type = LM_TYPE_QUOTA,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
.go_xmote_th = gfs2_glock_xmote_th,
.go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_JOURNAL,
};
......@@ -101,17 +101,14 @@ struct gfs2_bufdata {
};
struct gfs2_glock_operations {
void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags);
void (*go_xmote_th) (struct gfs2_glock *gl);
void (*go_xmote_bh) (struct gfs2_glock *gl);
void (*go_drop_th) (struct gfs2_glock *gl);
void (*go_drop_bh) (struct gfs2_glock *gl);
void (*go_sync) (struct gfs2_glock *gl);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
void (*go_greedy) (struct gfs2_glock *gl);
const int go_type;
};
......@@ -120,7 +117,6 @@ enum {
HIF_MUTEX = 0,
HIF_PROMOTE = 1,
HIF_DEMOTE = 2,
HIF_GREEDY = 3,
/* States */
HIF_ALLOCED = 4,
......@@ -128,6 +124,7 @@ enum {
HIF_HOLDER = 6,
HIF_FIRST = 7,
HIF_ABORTED = 9,
HIF_WAIT = 10,
};
struct gfs2_holder {
......@@ -140,17 +137,14 @@ struct gfs2_holder {
int gh_error;
unsigned long gh_iflags;
struct completion gh_wait;
unsigned long gh_ip;
};
enum {
GLF_LOCK = 1,
GLF_STICKY = 2,
GLF_PREFETCH = 3,
GLF_DIRTY = 5,
GLF_SKIP_WAITERS2 = 6,
GLF_GREEDY = 7,
};
struct gfs2_glock {
......@@ -167,7 +161,7 @@ struct gfs2_glock {
unsigned long gl_ip;
struct list_head gl_holders;
struct list_head gl_waiters1; /* HIF_MUTEX */
struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
struct list_head gl_waiters2; /* HIF_DEMOTE */
struct list_head gl_waiters3; /* HIF_PROMOTE */
const struct gfs2_glock_operations *gl_ops;
......@@ -236,7 +230,6 @@ struct gfs2_inode {
spinlock_t i_spin;
struct rw_semaphore i_rw_mutex;
unsigned int i_greedy;
unsigned long i_last_pfault;
struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
......@@ -418,17 +411,12 @@ struct gfs2_tune {
unsigned int gt_atime_quantum; /* Min secs between atime updates */
unsigned int gt_new_files_jdata;
unsigned int gt_new_files_directio;
unsigned int gt_max_atomic_write; /* Split big writes into this size */
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_lockdump_size;
unsigned int gt_stall_secs; /* Detects trouble! */
unsigned int gt_complain_secs;
unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
unsigned int gt_entries_per_readdir;
unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
unsigned int gt_greedy_default;
unsigned int gt_greedy_quantum;
unsigned int gt_greedy_max;
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
};
......
......@@ -287,10 +287,8 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
*
* Returns: errno
*/
int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
{
struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
struct buffer_head *dibh;
u32 nlink;
int error;
......@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
else
drop_nlink(&ip->i_inode);
ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
mark_inode_dirty(&ip->i_inode);
if (ip->i_inode.i_nlink == 0) {
struct gfs2_rgrpd *rgd;
struct gfs2_holder ri_gh, rg_gh;
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
goto out;
error = -EIO;
rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
if (!rgd)
goto out_norgrp;
error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
if (error)
goto out_norgrp;
if (ip->i_inode.i_nlink == 0)
gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
gfs2_glock_dq_uninit(&rg_gh);
out_norgrp:
gfs2_glock_dq_uninit(&ri_gh);
}
out:
return error;
}
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
{
struct qstr qstr;
struct inode *inode;
gfs2_str2qstr(&qstr, name);
return gfs2_lookupi(dip, &qstr, 1, NULL);
inode = gfs2_lookupi(dip, &qstr, 1, NULL);
/* gfs2_lookupi has inconsistent callers: vfs
* related routines expect NULL for no entry found,
* gfs2_lookup_simple callers expect ENOENT
* and do not check for NULL.
*/
if (inode == NULL)
return ERR_PTR(-ENOENT);
else
return inode;
}
......@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
* @is_root: If 1, ignore the caller's permissions
* @i_gh: An uninitialized holder for the new inode glock
*
* There will always be a vnode (Linux VFS inode) for the d_gh inode unless
* @is_root is true.
* This can be called via the VFS filldir function when NFS is doing
* a readdirplus and the inode which its intending to stat isn't
* already in cache. In this case we must not take the directory glock
* again, since the readdir call will have already taken that lock.
*
* Returns: errno
*/
......@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
struct gfs2_holder d_gh;
struct gfs2_inum_host inum;
unsigned int type;
int error = 0;
int error;
struct inode *inode = NULL;
int unlock = 0;
if (!name->len || name->len > GFS2_FNAMESIZE)
return ERR_PTR(-ENAMETOOLONG);
......@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
return dir;
}
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
return ERR_PTR(error);
if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
return ERR_PTR(error);
unlock = 1;
}
if (!is_root) {
error = permission(dir, MAY_EXEC, NULL);
......@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
inode = gfs2_inode_lookup(sb, &inum, type);
out:
gfs2_glock_dq_uninit(&d_gh);
if (unlock)
gfs2_glock_dq_uninit(&d_gh);
if (error == -ENOENT)
return NULL;
return inode;
return inode ? inode : ERR_PTR(error);
}
static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
......
......@@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
vprintk(fmt, args);
va_end(args);
fs_err(sdp, "about to withdraw from the cluster\n");
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
fs_err(sdp, "waiting for outstanding I/O\n");
/* FIXME: suspend dm device so oustanding bio's complete
and all further io requests fail */
fs_err(sdp, "telling LM to withdraw\n");
gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
fs_err(sdp, "withdrawn\n");
......
......@@ -36,7 +36,7 @@
#define GDLM_STRNAME_BYTES 24
#define GDLM_LVB_SIZE 32
#define GDLM_DROP_COUNT 50000
#define GDLM_DROP_COUNT 200000
#define GDLM_DROP_PERIOD 60
#define GDLM_NAME_LEN 128
......
......@@ -11,9 +11,6 @@
#include "lock_dlm.h"
extern int gdlm_drop_count;
extern int gdlm_drop_period;
extern struct lm_lockops gdlm_ops;
static int __init init_lock_dlm(void)
......@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void)
return error;
}
gdlm_drop_count = GDLM_DROP_COUNT;
gdlm_drop_period = GDLM_DROP_PERIOD;
printk(KERN_INFO
"Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
......
......@@ -9,8 +9,6 @@
#include "lock_dlm.h"
int gdlm_drop_count;
int gdlm_drop_period;
const struct lm_lockops gdlm_ops;
......@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
if (!ls)
return NULL;
ls->drop_locks_count = gdlm_drop_count;
ls->drop_locks_period = gdlm_drop_period;
ls->drop_locks_count = GDLM_DROP_COUNT;
ls->drop_locks_period = GDLM_DROP_PERIOD;
ls->fscb = cb;
ls->sdp = sdp;
ls->fsflags = flags;
......
......@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
return sprintf(buf, "%d\n", ls->recover_jid_status);
}
static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->drop_locks_count);
}
static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
{
ls->drop_locks_count = simple_strtol(buf, NULL, 0);
return len;
}
struct gdlm_attr {
struct attribute attr;
ssize_t (*show)(struct gdlm_ls *, char *);
......@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0644, recover_show, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
static struct attribute *gdlm_attrs[] = {
&gdlm_attr_proto_name.attr,
......@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = {
&gdlm_attr_recover.attr,
&gdlm_attr_recover_done.attr,
&gdlm_attr_recover_status.attr,
&gdlm_attr_drop_count.attr,
NULL,
};
......
......@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr;
if (!list_empty(&bd->bd_list_tr))
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr)) {
gfs2_log_unlock(sdp);
return;
}
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
......@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
gfs2_log_lock(sdp);
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
......@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
struct list_head *head = &tr->tr_list_buf;
struct gfs2_bufdata *bd;
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
list_del_init(&bd->bd_list_tr);
tr->tr_num_buf--;
}
gfs2_log_unlock(sdp);
gfs2_assert_warn(sdp, !tr->tr_num_buf);
}
......@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
gfs2_log_lock(sdp);
tr->tr_touched = 1;
if (list_empty(&bd->bd_list_tr) &&
(ip->i_di.di_flags & GFS2_DIF_JDATA)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
gfs2_log_unlock(sdp);
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_buf_new++;
} else {
gfs2_log_unlock(sdp);
}
gfs2_trans_add_gl(bd->bd_gl);
gfs2_log_lock(sdp);
......
......@@ -16,6 +16,7 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/fs.h>
#include <linux/writeback.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
......@@ -156,6 +157,32 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
return 0;
}
/**
* gfs2_writepages - Write a bunch of dirty pages back to disk
* @mapping: The mapping to write
* @wbc: Write-back control
*
* For journaled files and/or ordered writes this just falls back to the
* kernel's default writepages path for now. We will probably want to change
* that eventually (i.e. when we look at allocate on flush).
*
* For the data=writeback case though we can already ignore buffer heads
* and write whole extents at once. This is a big reduction in the
* number of I/O requests we send and the bmap calls we make in this case.
*/
static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
return generic_writepages(mapping, wbc);
}
/**
* stuffed_readpage - Fill in a Linux page with stuffed file data
* @ip: the inode
......@@ -256,7 +283,7 @@ static int gfs2_readpage(struct file *file, struct page *page)
* the page lock and the glock) and return having done no I/O. Its
* obviously not something we'd want to do on too regular a basis.
* Any I/O we ignore at this time will be done via readpage later.
* 2. We have to handle stuffed files here too.
* 2. We don't handle stuffed files here we let readpage do the honours.
* 3. mpage_readpages() does most of the heavy lifting in the common case.
* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
......@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder gh;
unsigned page_idx;
int ret;
int ret = 0;
int do_unlock = 0;
if (likely(file != &gfs2_internal_file_sentinel)) {
......@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
goto out_unlock;
}
skip_lock:
if (gfs2_is_stuffed(ip)) {
struct pagevec lru_pvec;
pagevec_init(&lru_pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
if (!add_to_page_cache(page, mapping,
page->index, GFP_KERNEL)) {
ret = stuffed_readpage(ip, page);
unlock_page(page);
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
} else {
page_cache_release(page);
}
}
pagevec_lru_add(&lru_pvec);
ret = 0;
} else {
/* What we really want to do .... */
if (!gfs2_is_stuffed(ip))
ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
}
if (do_unlock) {
gfs2_glock_dq_m(1, &gh);
......@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
error = gfs2_glock_nq_atime(&ip->i_gh);
if (unlikely(error)) {
if (error == GLR_TRYFAILED)
if (error == GLR_TRYFAILED) {
unlock_page(page);
error = AOP_TRUNCATED_PAGE;
}
goto out_uninit;
}
......@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
return;
}
/**
* gfs2_ok_for_dio - check that dio is valid on this file
* @ip: The inode
* @rw: READ or WRITE
* @offset: The offset at which we are reading or writing
*
* Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
* 1 (to accept the i/o request)
*/
static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
{
/*
* Should we return an error here? I can't see that O_DIRECT for
* a journaled file makes any sense. For now we'll silently fall
* back to buffered I/O, likewise we do the same for stuffed
* files since they are (a) small and (b) unaligned.
*/
if (gfs2_is_jdata(ip))
return 0;
if (gfs2_is_stuffed(ip))
return 0;
if (offset > i_size_read(&ip->i_inode))
return 0;
return 1;
}
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
......@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
struct gfs2_holder gh;
int rv;
if (rw == READ)
mutex_lock(&inode->i_mutex);
/*
* Shared lock, even if its a write, since we do no allocation
* on this path. All we need change is atime.
* Deferred lock, even if its a write, since we do no allocation
* on this path. All we need change is atime, and this lock mode
* ensures that other nodes have flushed their buffered read caches
* (i.e. their page cache entries for this inode). We do not,
* unfortunately have the option of only flushing a range like
* the VFS does.
*/
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
rv = gfs2_glock_nq_atime(&gh);
if (rv)
goto out;
if (offset > i_size_read(inode))
goto out;
/*
* Should we return an error here? I can't see that O_DIRECT for
* a journaled file makes any sense. For now we'll silently fall
* back to buffered I/O, likewise we do the same for stuffed
* files since they are (a) small and (b) unaligned.
*/
if (gfs2_is_jdata(ip))
goto out;
if (gfs2_is_stuffed(ip))
goto out;
rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
inode->i_sb->s_bdev,
iov, offset, nr_segs,
gfs2_get_block_direct, NULL);
return rv;
rv = gfs2_ok_for_dio(ip, rw, offset);
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
iov, offset, nr_segs,
gfs2_get_block_direct, NULL);
out:
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
if (rw == READ)
mutex_unlock(&inode->i_mutex);
return rv;
}
......@@ -763,6 +786,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
const struct address_space_operations gfs2_file_aops = {
.writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.sync_page = block_sync_page,
......
......@@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
struct gfs2_inum_host inum;
unsigned int type;
int error;
int had_lock=0;
if (inode && is_bad_inode(inode))
goto invalid;
......@@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
if (sdp->sd_args.ar_localcaching)
goto valid;
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
goto fail;
had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
if (!had_lock) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
goto fail;
}
error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
switch (error) {
......@@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
}
valid_gunlock:
gfs2_glock_dq_uninit(&d_gh);
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
valid:
dput(parent);
return 1;
invalid_gunlock:
gfs2_glock_dq_uninit(&d_gh);
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
invalid:
if (inode && S_ISDIR(inode->i_mode)) {
if (have_submounts(dentry))
......
......@@ -22,6 +22,7 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "ops_dentry.h"
#include "ops_export.h"
#include "rgrp.h"
#include "util.h"
......@@ -112,13 +113,12 @@ struct get_name_filldir {
char *name;
};
static int get_name_filldir(void *opaque, const char *name, unsigned int length,
u64 offset, struct gfs2_inum_host *inum,
unsigned int type)
static int get_name_filldir(void *opaque, const char *name, int length,
loff_t offset, u64 inum, unsigned int type)
{
struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
struct get_name_filldir *gnfd = opaque;
if (!gfs2_inum_equal(inum, &gnfd->inum))
if (inum != gnfd->inum.no_addr)
return 0;
memcpy(gnfd->name, name, length);
......@@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
return ERR_PTR(-ENOMEM);
}
dentry->d_op = &gfs2_dops;
return dentry;
}
......@@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
}
error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
&i_gh);
LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return ERR_PTR(error);
......@@ -269,6 +269,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
return ERR_PTR(-ENOMEM);
}
dentry->d_op = &gfs2_dops;
return dentry;
fail_rgd:
......
......@@ -43,15 +43,6 @@
#include "util.h"
#include "eaops.h"
/* For regular, non-NFS */
struct filldir_reg {
struct gfs2_sbd *fdr_sbd;
int fdr_prefetch;
filldir_t fdr_filldir;
void *fdr_opaque;
};
/*
* Most fields left uninitialised to catch anybody who tries to
* use them. f_flags set to prevent file_accessed() from touching
......@@ -127,41 +118,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
return error;
}
/**
* filldir_func - Report a directory entry to the caller of gfs2_dir_read()
* @opaque: opaque data used by the function
* @name: the name of the directory entry
* @length: the length of the name
* @offset: the entry's offset in the directory
* @inum: the inode number the entry points to
* @type: the type of inode the entry points to
*
* Returns: 0 on success, 1 if buffer full
*/
static int filldir_func(void *opaque, const char *name, unsigned int length,
u64 offset, struct gfs2_inum_host *inum,
unsigned int type)
{
struct filldir_reg *fdr = (struct filldir_reg *)opaque;
struct gfs2_sbd *sdp = fdr->fdr_sbd;
int error;
error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
inum->no_addr, type);
if (error)
return 1;
if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
LM_ST_SHARED, LM_FLAG_TRY);
}
return 0;
}
/**
* gfs2_readdir - Read directory entries from a directory
* @file: The directory to read from
......@@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
{
struct inode *dir = file->f_mapping->host;
struct gfs2_inode *dip = GFS2_I(dir);
struct filldir_reg fdr;
struct gfs2_holder d_gh;
u64 offset = file->f_pos;
int error;
fdr.fdr_sbd = GFS2_SB(dir);
fdr.fdr_prefetch = 1;
fdr.fdr_filldir = filldir;
fdr.fdr_opaque = dirent;
gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
error = gfs2_glock_nq_atime(&d_gh);
if (error) {
......@@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
return error;
}
error = gfs2_dir_read(dir, &offset, &fdr, filldir_func);
error = gfs2_dir_read(dir, &offset, dirent, filldir);
gfs2_glock_dq_uninit(&d_gh);
......
......@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
struct gfs2_holder ghs[2];
struct gfs2_holder ghs[3];
struct gfs2_rgrpd *rgd;
struct gfs2_holder ri_gh;
int error;
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
return error;
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
error = gfs2_glock_nq_m(2, ghs);
rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
......@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
out_gunlock:
gfs2_glock_dq_m(2, ghs);
gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
gfs2_holder_uninit(ghs + 2);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
......@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
struct gfs2_holder ghs[2];
struct gfs2_holder ghs[3];
struct gfs2_rgrpd *rgd;
struct gfs2_holder ri_gh;
int error;
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
return error;
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
error = gfs2_glock_nq_m(2, ghs);
rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
......@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
gfs2_glock_dq_m(2, ghs);
gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
gfs2_holder_uninit(ghs + 2);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
......@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
struct gfs2_holder ghs[4], r_gh;
struct gfs2_holder ghs[5], r_gh;
struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
int alloc_required;
......@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (nip) {
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
num_gh++;
/* grab the resource lock for unlink flag twiddling
* this is the case of the target file already existing
* so we unlink before doing the rename
*/
nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
if (nrgd)
gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
error = gfs2_glock_nq_m(num_gh, ghs);
......@@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
al->al_rgd->rd_ri.ri_length +
4 * RES_DINODE + 4 * RES_LEAF +
RES_STATFS + RES_QUOTA, 0);
RES_STATFS + RES_QUOTA + 4, 0);
if (error)
goto out_ipreserv;
} else {
error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
5 * RES_LEAF, 0);
5 * RES_LEAF + 4, 0);
if (error)
goto out_gunlock;
}
......@@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out_end_trans;
ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
......@@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
}
generic_fillattr(inode, stat);
if (unlock);
if (unlock)
gfs2_glock_dq_uninit(&gh);
return 0;
......
......@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return;
for (;;) {
error = gfs2_freeze_fs(sdp);
if (!error)
......@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode)
}
error = gfs2_dinode_dealloc(ip);
/*
* Must do this before unlock to avoid trying to write back
* potentially dirty data now that inode no longer exists
* on disk.
*/
truncate_inode_pages(&inode->i_data, 0);
out_unlock:
gfs2_glock_dq(&ip->i_iopen_gh);
......@@ -443,14 +452,12 @@ static void gfs2_delete_inode(struct inode *inode)
static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *ip;
ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
if (ip) {
ip->i_flags = 0;
ip->i_gl = NULL;
ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
ip->i_last_pfault = jiffies;
}
return &ip->i_inode;
......
......@@ -28,34 +28,13 @@
#include "trans.h"
#include "util.h"
static void pfault_be_greedy(struct gfs2_inode *ip)
{
unsigned int time;
spin_lock(&ip->i_spin);
time = ip->i_greedy;
ip->i_last_pfault = jiffies;
spin_unlock(&ip->i_spin);
igrab(&ip->i_inode);
if (gfs2_glock_be_greedy(ip->i_gl, time))
iput(&ip->i_inode);
}
static struct page *gfs2_private_nopage(struct vm_area_struct *area,
unsigned long address, int *type)
{
struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
struct page *result;
set_bit(GIF_PAGED, &ip->i_flags);
result = filemap_nopage(area, address, type);
if (result && result != NOPAGE_OOM)
pfault_be_greedy(ip);
return result;
return filemap_nopage(area, address, type);
}
static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
......@@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
set_page_dirty(result);
}
pfault_be_greedy(ip);
out:
gfs2_glock_dq_uninit(&i_gh);
......
......@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_atime_quantum = 3600;
gt->gt_new_files_jdata = 0;
gt->gt_new_files_directio = 0;
gt->gt_max_atomic_write = 4 << 20;
gt->gt_max_readahead = 1 << 18;
gt->gt_lockdump_size = 131072;
gt->gt_stall_secs = 600;
gt->gt_complain_secs = 10;
gt->gt_reclaim_limit = 5000;
gt->gt_entries_per_readdir = 32;
gt->gt_prefetch_secs = 10;
gt->gt_greedy_default = HZ / 10;
gt->gt_greedy_quantum = HZ / 40;
gt->gt_greedy_max = HZ / 4;
gt->gt_statfs_quantum = 30;
gt->gt_statfs_slow = 0;
}
......@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
mutex_lock(&sdp->sd_jindex_mutex);
for (;;) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
GL_LOCAL_EXCL, ji_gh);
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
if (error)
break;
......@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_log_header_host head;
int error;
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
GL_LOCAL_EXCL, &t_gh);
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
if (error)
return error;
......@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
gfs2_quota_sync(sdp);
gfs2_statfs_sync(sdp);
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
GL_LOCAL_EXCL | GL_NOCACHE,
&t_gh);
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
&t_gh);
if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return error;
......
......@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(reclaim_limit, 0);
TUNE_ATTR(prefetch_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(new_files_directio, 0);
TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(max_atomic_write, 1);
TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(greedy_default, 1);
TUNE_ATTR(greedy_quantum, 1);
TUNE_ATTR(greedy_max, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(scand_secs, scand_process);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
......@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = {
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_reclaim_limit.attr,
&tune_attr_prefetch_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_quota_simul_sync.attr,
&tune_attr_quota_cache_secs.attr,
&tune_attr_max_atomic_write.attr,
&tune_attr_stall_secs.attr,
&tune_attr_greedy_default.attr,
&tune_attr_greedy_quantum.attr,
&tune_attr_greedy_max.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_scand_secs.attr,
&tune_attr_recoverd_secs.attr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment