Commit f26e51f6 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (51 commits)
  [DLM] block dlm_recv in recovery transition
  [DLM] don't overwrite castparam if it's NULL
  [GFS2] Get superblock a different way
  [GFS2] Don't try to remove buffers that don't exist
  [GFS2] Alternate gfs2_iget to avoid looking up inodes being freed
  [GFS2] Data corruption fix
  [GFS2] Clean up journaled data writing
  [GFS2] GFS2: chmod hung - fix race in thread creation
  [DLM] Make dlm_sendd cond_resched more
  [GFS2] Move inode deletion out of blocking_cb
  [GFS2] flocks from same process trip kernel BUG at fs/gfs2/glock.c:1118!
  [GFS2] Clean up gfs2_trans_add_revoke()
  [GFS2] Use slab operations for all gfs2_bufdata allocations
  [GFS2] Replace revoke structure with bufdata structure
  [GFS2] Fix ordering of dirty/journal for ordered buffer unstuffing
  [GFS2] Clean up ordered write code
  [GFS2] Move pin/unpin into lops.c, clean up locking
  [GFS2] Don't mark jdata dirty in gfs2_unstuffer_page()
  [GFS2] Introduce gfs2_remove_from_ail
  [GFS2] Correct lock ordering in unlink
  ...
parents 1462222b c36258b5
......@@ -491,6 +491,7 @@ struct dlm_ls {
uint64_t ls_recover_seq;
struct dlm_recover *ls_recover_args;
struct rw_semaphore ls_in_recovery; /* block local requests */
struct rw_semaphore ls_recv_active; /* block dlm_recv */
struct list_head ls_requestqueue;/* queue remote requests */
struct mutex ls_requestqueue_mutex;
char *ls_recover_buf;
......
......@@ -3638,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
dlm_put_lkb(lkb);
}
int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_ls *ls;
int error = 0;
if (!recovery)
dlm_message_in(ms);
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("drop message %d from %d for unknown lockspace %d",
ms->m_type, nodeid, hd->h_lockspace);
return -EINVAL;
}
/* recovery may have just ended leaving a bunch of backed-up requests
in the requestqueue; wait while dlm_recoverd clears them */
if (!recovery)
dlm_wait_requestqueue(ls);
/* recovery may have just started while there were a bunch of
in-flight requests -- save them in requestqueue to be processed
after recovery. we can't let dlm_recvd block on the recovery
lock. if dlm_recoverd is calling this function to clear the
requestqueue, it needs to be interrupted (-EINTR) if another
recovery operation is starting. */
while (1) {
if (dlm_locking_stopped(ls)) {
if (recovery) {
error = -EINTR;
goto out;
}
error = dlm_add_requestqueue(ls, nodeid, hd);
if (error == -EAGAIN)
continue;
else {
error = -EINTR;
goto out;
}
}
if (dlm_lock_recovery_try(ls))
break;
schedule();
}
switch (ms->m_type) {
/* messages sent to a master node */
......@@ -3761,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
log_error(ls, "unknown message type %d", ms->m_type);
}
dlm_unlock_recovery(ls);
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
return error;
}
/* If the lockspace is in recovery mode (locking stopped), then normal
messages are saved on the requestqueue for processing after recovery is
done. When not in recovery mode, we wait for dlm_recoverd to drain saved
messages off the requestqueue before we process new ones. This occurs right
after recovery completes when we transition from saving all messages on
requestqueue, to processing all the saved messages, to processing new
messages as they arrive. */
/*
* Recovery related
*/
static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
int nodeid)
{
if (dlm_locking_stopped(ls)) {
dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms);
} else {
dlm_wait_requestqueue(ls);
_receive_message(ls, ms);
}
}
/* This is called by dlm_recoverd to process messages that were saved on
the requestqueue. */
void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms)
{
_receive_message(ls, ms);
}
/* This is called by the midcomms layer when something is received for
the lockspace. It could be either a MSG (normal message sent as part of
standard locking activity) or an RCOM (recovery message sent as part of
lockspace recovery). */
void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_rcom *rc = (struct dlm_rcom *) hd;
struct dlm_ls *ls;
int type = 0;
switch (hd->h_cmd) {
case DLM_MSG:
dlm_message_in(ms);
type = ms->m_type;
break;
case DLM_RCOM:
dlm_rcom_in(rc);
type = rc->rc_type;
break;
default:
log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
return;
}
if (hd->h_nodeid != nodeid) {
log_print("invalid h_nodeid %d from %d lockspace %x",
hd->h_nodeid, nodeid, hd->h_lockspace);
return;
}
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("invalid h_lockspace %x from %d cmd %d type %d",
hd->h_lockspace, nodeid, hd->h_cmd, type);
if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
dlm_send_ls_not_ready(nodeid, rc);
return;
}
/* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
be inactive (in this ls) before transitioning to recovery mode */
down_read(&ls->ls_recv_active);
if (hd->h_cmd == DLM_MSG)
dlm_receive_message(ls, ms, nodeid);
else
dlm_receive_rcom(ls, rc, nodeid);
up_read(&ls->ls_recv_active);
dlm_put_lockspace(ls);
}
static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
......@@ -4429,7 +4455,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (lvb_in && ua->lksb.sb_lvbptr)
memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
ua->castparam = ua_tmp->castparam;
if (ua_tmp->castparam)
ua->castparam = ua_tmp->castparam;
ua->user_lksb = ua_tmp->user_lksb;
error = set_unlock_args(flags, ua, &args);
......@@ -4474,7 +4501,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out;
ua = (struct dlm_user_args *)lkb->lkb_astparam;
ua->castparam = ua_tmp->castparam;
if (ua_tmp->castparam)
ua->castparam = ua_tmp->castparam;
ua->user_lksb = ua_tmp->user_lksb;
error = set_unlock_args(flags, ua, &args);
......
......@@ -16,7 +16,8 @@
void dlm_print_rsb(struct dlm_rsb *r);
void dlm_dump_rsb(struct dlm_rsb *r);
void dlm_print_lkb(struct dlm_lkb *lkb);
int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery);
void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms);
void dlm_receive_buffer(struct dlm_header *hd, int nodeid);
int dlm_modes_compat(int mode1, int mode2);
int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
unsigned int flags, struct dlm_rsb **r_ret);
......
......@@ -519,6 +519,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_recover_seq = 0;
ls->ls_recover_args = NULL;
init_rwsem(&ls->ls_in_recovery);
init_rwsem(&ls->ls_recv_active);
INIT_LIST_HEAD(&ls->ls_requestqueue);
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
......
......@@ -334,18 +334,8 @@ static void close_connection(struct connection *con, bool and_other)
con->rx_page = NULL;
}
/* If we are an 'othercon' then NULL the pointer to us
from the parent and tidy ourself up */
if (test_bit(CF_IS_OTHERCON, &con->flags)) {
struct connection *parent = __nodeid2con(con->nodeid, 0);
parent->othercon = NULL;
kmem_cache_free(con_cache, con);
}
else {
/* Parent connections get reused */
con->retries = 0;
mutex_unlock(&con->sock_mutex);
}
con->retries = 0;
mutex_unlock(&con->sock_mutex);
}
/* We only send shutdown messages to nodes that are not part of the cluster */
......@@ -731,6 +721,8 @@ static int tcp_accept_from_sock(struct connection *con)
INIT_WORK(&othercon->swork, process_send_sockets);
INIT_WORK(&othercon->rwork, process_recv_sockets);
set_bit(CF_IS_OTHERCON, &othercon->flags);
}
if (!othercon->sock) {
newcon->othercon = othercon;
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
......@@ -1272,14 +1264,15 @@ static void send_to_sock(struct connection *con)
if (len) {
ret = sendpage(con->sock, e->page, offset, len,
msg_flags);
if (ret == -EAGAIN || ret == 0)
if (ret == -EAGAIN || ret == 0) {
cond_resched();
goto out;
}
if (ret <= 0)
goto send_error;
} else {
}
/* Don't starve people filling buffers */
cond_resched();
}
spin_lock(&con->writequeue_lock);
e->offset += ret;
......
......@@ -18,10 +18,6 @@
#include "rcom.h"
#include "config.h"
/*
* Following called by dlm_recoverd thread
*/
static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
{
struct dlm_member *memb = NULL;
......@@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
return error;
}
/*
* Following called from lockspace.c
*/
/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
dlm_ls_start() is called on any of them to start the new recovery. */
int dlm_ls_stop(struct dlm_ls *ls)
{
int new;
/*
* A stop cancels any recovery that's in progress (see RECOVERY_STOP,
* dlm_recovery_stopped()) and prevents any new locks from being
* processed (see RUNNING, dlm_locking_stopped()).
* Prevent dlm_recv from being in the middle of something when we do
* the stop. This includes ensuring dlm_recv isn't processing a
* recovery message (rcom), while dlm_recoverd is aborting and
* resetting things from an in-progress recovery. i.e. we want
* dlm_recoverd to abort its recovery without worrying about dlm_recv
* processing an rcom at the same time. Stopping dlm_recv also makes
* it easy for dlm_receive_message() to check locking stopped and add a
* message to the requestqueue without races.
*/
down_write(&ls->ls_recv_active);
/*
* Abort any recovery that's in progress (see RECOVERY_STOP,
* dlm_recovery_stopped()) and tell any other threads running in the
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
*/
spin_lock(&ls->ls_recover_lock);
......@@ -270,9 +278,15 @@ int dlm_ls_stop(struct dlm_ls *ls)
ls->ls_recover_seq++;
spin_unlock(&ls->ls_recover_lock);
/*
* Let dlm_recv run again, now any normal messages will be saved on the
* requestqueue for later.
*/
up_write(&ls->ls_recv_active);
/*
* This in_recovery lock does two things:
*
* 1) Keeps this function from returning until all threads are out
* of locking routines and locking is truely stopped.
* 2) Keeps any new requests from being processed until it's unlocked
......@@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls)
/*
* The recoverd suspend/resume makes sure that dlm_recoverd (if
* running) has noticed the clearing of RUNNING above and quit
* processing the previous recovery. This will be true for all nodes
* before any nodes start the new recovery.
* running) has noticed RECOVERY_STOP above and quit processing the
* previous recovery.
*/
dlm_recoverd_suspend(ls);
......
......@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
......@@ -27,7 +27,6 @@
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "rcom.h"
#include "lock.h"
#include "midcomms.h"
......@@ -117,19 +116,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
offset &= (limit - 1);
len -= msglen;
switch (msg->h_cmd) {
case DLM_MSG:
dlm_receive_message(msg, nodeid, 0);
break;
case DLM_RCOM:
dlm_receive_rcom(msg, nodeid);
break;
default:
log_print("unknown msg type %x from %u: %u %u %u %u",
msg->h_cmd, nodeid, msglen, len, offset, ret);
}
dlm_receive_buffer(msg, nodeid);
}
if (msg != (struct dlm_header *) __tmp)
......
......@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
......@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
dlm_recover_process_copy(ls, rc_in);
}
static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
/* If the lockspace doesn't exist then still send a status message
back; it's possible that it just doesn't have its global_id yet. */
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct rcom_config *rf;
......@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
return rv;
}
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
/* Called by dlm_recv; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
struct dlm_rcom *rc = (struct dlm_rcom *) hd;
struct dlm_ls *ls;
dlm_rcom_in(rc);
/* If the lockspace doesn't exist then still send a status message
back; it's possible that it just doesn't have its global_id yet. */
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("lockspace %x from %d type %x not found",
hd->h_lockspace, nodeid, rc->rc_type);
if (rc->rc_type == DLM_RCOM_STATUS)
send_ls_not_ready(nodeid, rc);
return;
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
......@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
if (is_old_reply(ls, rc))
goto out;
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
goto out;
}
switch (rc->rc_type) {
case DLM_RCOM_STATUS:
receive_rcom_status(ls, rc);
......@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
}
out:
dlm_put_lockspace(ls);
return;
}
......@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
......@@ -18,7 +18,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid);
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
void dlm_receive_rcom(struct dlm_header *hd, int nodeid);
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
#endif
......@@ -24,19 +24,28 @@
/* If the start for which we're re-enabling locking (seq) has been superseded
by a newer stop (ls_recover_seq), we need to leave locking disabled. */
by a newer stop (ls_recover_seq), we need to leave locking disabled.
We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
enables locking and clears the requestqueue between a and b. */
static int enable_locking(struct dlm_ls *ls, uint64_t seq)
{
int error = -EINTR;
down_write(&ls->ls_recv_active);
spin_lock(&ls->ls_recover_lock);
if (ls->ls_recover_seq == seq) {
set_bit(LSFL_RUNNING, &ls->ls_flags);
/* unblocks processes waiting to enter the dlm */
up_write(&ls->ls_in_recovery);
error = 0;
}
spin_unlock(&ls->ls_recover_lock);
up_write(&ls->ls_recv_active);
return error;
}
......
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
......@@ -20,7 +20,7 @@
struct rq_entry {
struct list_head list;
int nodeid;
char request[1];
char request[0];
};
/*
......@@ -30,42 +30,39 @@ struct rq_entry {
* lockspace is enabled on some while still suspended on others.
*/
int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
{
struct rq_entry *e;
int length = hd->h_length;
int rv = 0;
e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
if (!e) {
log_print("dlm_add_requestqueue: out of memory\n");
return 0;
log_print("dlm_add_requestqueue: out of memory len %d", length);
return;
}
e->nodeid = nodeid;
memcpy(e->request, hd, length);
/* We need to check dlm_locking_stopped() after taking the mutex to
avoid a race where dlm_recoverd enables locking and runs
process_requestqueue between our earlier dlm_locking_stopped check
and this addition to the requestqueue. */
mutex_lock(&ls->ls_requestqueue_mutex);
if (dlm_locking_stopped(ls))
list_add_tail(&e->list, &ls->ls_requestqueue);
else {
log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
kfree(e);
rv = -EAGAIN;
}
list_add_tail(&e->list, &ls->ls_requestqueue);
mutex_unlock(&ls->ls_requestqueue_mutex);
return rv;
}
/*
* Called by dlm_recoverd to process normal messages saved while recovery was
* happening. Normal locking has been enabled before this is called. dlm_recv
* upon receiving a message, will wait for all saved messages to be drained
* here before processing the message it got. If a new dlm_ls_stop() arrives
* while we're processing these saved messages, it may block trying to suspend
* dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that
* case, we don't abort since locking_stopped is still 0. If dlm_recv is not
* waiting for us, then this processing may be aborted due to locking_stopped.
*/
int dlm_process_requestqueue(struct dlm_ls *ls)
{
struct rq_entry *e;
struct dlm_header *hd;
int error = 0;
mutex_lock(&ls->ls_requestqueue_mutex);
......@@ -79,14 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
mutex_unlock(&ls->ls_requestqueue_mutex);
hd = (struct dlm_header *) e->request;
error = dlm_receive_message(hd, e->nodeid, 1);
if (error == -EINTR) {
/* entry is left on requestqueue */
log_debug(ls, "process_requestqueue abort eintr");
break;
}
dlm_receive_message_saved(ls, (struct dlm_message *)e->request);
mutex_lock(&ls->ls_requestqueue_mutex);
list_del(&e->list);
......@@ -106,10 +96,12 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
/*
* After recovery is done, locking is resumed and dlm_recoverd takes all the
* saved requests and processes them as they would have been by dlm_recvd. At
* the same time, dlm_recvd will start receiving new requests from remote
* nodes. We want to delay dlm_recvd processing new requests until
* dlm_recoverd has finished processing the old saved requests.
* saved requests and processes them as they would have been by dlm_recv. At
* the same time, dlm_recv will start receiving new requests from remote nodes.
* We want to delay dlm_recv processing new requests until dlm_recoverd has
* finished processing the old saved requests. We don't check for locking
* stopped here because dlm_ls_stop won't stop locking until it's suspended us
* (dlm_recv).
*/
void dlm_wait_requestqueue(struct dlm_ls *ls)
......@@ -118,8 +110,6 @@ void dlm_wait_requestqueue(struct dlm_ls *ls)
mutex_lock(&ls->ls_requestqueue_mutex);
if (list_empty(&ls->ls_requestqueue))
break;
if (dlm_locking_stopped(ls))
break;
mutex_unlock(&ls->ls_requestqueue_mutex);
schedule();
}
......
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
......@@ -13,7 +13,7 @@
#ifndef __REQUESTQUEUE_DOT_H__
#define __REQUESTQUEUE_DOT_H__
int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
int dlm_process_requestqueue(struct dlm_ls *ls);
void dlm_wait_requestqueue(struct dlm_ls *ls);
void dlm_purge_requestqueue(struct dlm_ls *ls);
......
......@@ -93,9 +93,10 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
if (!gfs2_is_jdata(ip))
mark_buffer_dirty(bh);
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
gfs2_trans_add_bh(ip->i_gl, bh, 0);
mark_buffer_dirty(bh);
if (release) {
unlock_page(page);
......@@ -1085,6 +1086,33 @@ static int do_shrink(struct gfs2_inode *ip, u64 size)
return error;
}
static int do_touch(struct gfs2_inode *ip, u64 size)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
int error;
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
return error;
down_write(&ip->i_rw_mutex);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto do_touch_out;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
do_touch_out:
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
return error;
}
/**
* gfs2_truncatei - make a file a given size
* @ip: the inode
......@@ -1105,8 +1133,11 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
if (size > ip->i_di.di_size)
error = do_grow(ip, size);
else
else if (size < ip->i_di.di_size)
error = do_shrink(ip, size);
else
/* update time stamps */
error = do_touch(ip, size);
return error;
}
......
......@@ -34,30 +34,6 @@
The kthread functions used to start these daemons block and flush signals. */
/**
* gfs2_scand - Look for cached glocks and inodes to toss from memory
* @sdp: Pointer to GFS2 superblock
*
* One of these daemons runs, finding candidates to add to sd_reclaim_list.
* See gfs2_glockd()
*/
int gfs2_scand(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t;
while (!kthread_should_stop()) {
gfs2_scand_internal(sdp);
t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
if (freezing(current))
refrigerator();
schedule_timeout_interruptible(t);
}
return 0;
}
/**
* gfs2_glockd - Reclaim unused glock structures
* @sdp: Pointer to GFS2 superblock
......
......@@ -10,7 +10,6 @@
#ifndef __DAEMON_DOT_H__
#define __DAEMON_DOT_H__
int gfs2_scand(void *data);
int gfs2_glockd(void *data);
int gfs2_recoverd(void *data);
int gfs2_logd(void *data);
......
......@@ -1043,6 +1043,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
error = gfs2_meta_inode_buffer(dip, &dibh);
if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
gfs2_trans_add_bh(dip->i_gl, dibh, 1);
dip->i_di.di_blocks++;
gfs2_set_inode_blocks(&dip->i_inode);
gfs2_dinode_out(dip, dibh->b_data);
......@@ -1501,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
inode = gfs2_inode_lookup(dir->i_sb,
be16_to_cpu(dent->de_type),
be64_to_cpu(dent->de_inum.no_addr),
be64_to_cpu(dent->de_inum.no_formal_ino));
be64_to_cpu(dent->de_inum.no_formal_ino), 0);
brelse(bh);
return inode;
}
......
......@@ -200,28 +200,28 @@ static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
return gfs2_ea_remove_i(ip, er);
}
static struct gfs2_eattr_operations gfs2_user_eaops = {
static const struct gfs2_eattr_operations gfs2_user_eaops = {
.eo_get = user_eo_get,
.eo_set = user_eo_set,
.eo_remove = user_eo_remove,
.eo_name = "user",
};
struct gfs2_eattr_operations gfs2_system_eaops = {
const struct gfs2_eattr_operations gfs2_system_eaops = {
.eo_get = system_eo_get,
.eo_set = system_eo_set,
.eo_remove = system_eo_remove,
.eo_name = "system",
};
static struct gfs2_eattr_operations gfs2_security_eaops = {
static const struct gfs2_eattr_operations gfs2_security_eaops = {
.eo_get = security_eo_get,
.eo_set = security_eo_set,
.eo_remove = security_eo_remove,
.eo_name = "security",
};
struct gfs2_eattr_operations *gfs2_ea_ops[] = {
const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
NULL,
&gfs2_user_eaops,
&gfs2_system_eaops,
......
......@@ -22,9 +22,9 @@ struct gfs2_eattr_operations {
unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
extern struct gfs2_eattr_operations gfs2_system_eaops;
extern const struct gfs2_eattr_operations gfs2_system_eaops;
extern struct gfs2_eattr_operations *gfs2_ea_ops[];
extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
#endif /* __EAOPS_DOT_H__ */
......@@ -25,8 +25,10 @@
#include <asm/uaccess.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include "gfs2.h"
#include "incore.h"
......@@ -48,7 +50,6 @@ struct glock_iter {
int hash; /* hash bucket index */
struct gfs2_sbd *sdp; /* incore superblock */
struct gfs2_glock *gl; /* current glock struct */
struct hlist_head *hb_list; /* current hash bucket ptr */
struct seq_file *seq; /* sequence file for debugfs */
char string[512]; /* scratch space */
};
......@@ -59,8 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
static void gfs2_glock_drop_th(struct gfs2_glock *gl);
static void run_queue(struct gfs2_glock *gl);
static DECLARE_RWSEM(gfs2_umount_flush_sem);
static struct dentry *gfs2_root;
static struct task_struct *scand_process;
static unsigned int scand_secs = 5;
static struct workqueue_struct *glock_workqueue;
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
......@@ -276,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
return gl;
}
static void glock_work_func(struct work_struct *work)
{
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
spin_lock(&gl->gl_spin);
if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
set_bit(GLF_DEMOTE, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_put(gl);
}
/**
* gfs2_glock_get() - Get a glock, or create one if one doesn't exist
* @sdp: The GFS2 superblock
......@@ -315,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_name = name;
atomic_set(&gl->gl_ref, 1);
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
gl->gl_hash = hash;
gl->gl_owner_pid = 0;
gl->gl_ip = 0;
......@@ -323,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_req_bh = NULL;
gl->gl_vn = 0;
gl->gl_stamp = jiffies;
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
gl->gl_aspace = NULL;
lops_init_le(&gl->gl_le, &gfs2_glock_lops);
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
/* If this glock protects actual on-disk data or metadata blocks,
create a VFS inode to manage the pages/buffers holding them. */
......@@ -440,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh)
static void gfs2_demote_wake(struct gfs2_glock *gl)
{
BUG_ON(!spin_is_locked(&gl->gl_spin));
gl->gl_demote_state = LM_ST_EXCLUSIVE;
clear_bit(GLF_DEMOTE, &gl->gl_flags);
smp_mb__after_clear_bit();
wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
......@@ -545,12 +568,14 @@ static int rq_demote(struct gfs2_glock *gl)
return 0;
}
set_bit(GLF_LOCK, &gl->gl_flags);
spin_unlock(&gl->gl_spin);
if (gl->gl_demote_state == LM_ST_UNLOCKED ||
gl->gl_state != LM_ST_EXCLUSIVE)
gl->gl_state != LM_ST_EXCLUSIVE) {
spin_unlock(&gl->gl_spin);
gfs2_glock_drop_th(gl);
else
} else {
spin_unlock(&gl->gl_spin);
gfs2_glock_xmote_th(gl, NULL);
}
spin_lock(&gl->gl_spin);
return 0;
......@@ -679,24 +704,25 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
* practise: LM_ST_SHARED and LM_ST_UNLOCKED
*/
static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
static void handle_callback(struct gfs2_glock *gl, unsigned int state,
int remote, unsigned long delay)
{
int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
spin_lock(&gl->gl_spin);
if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
set_bit(bit, &gl->gl_flags);
if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
gl->gl_demote_state = state;
gl->gl_demote_time = jiffies;
if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
gl->gl_object) {
struct inode *inode = igrab(gl->gl_object);
gfs2_glock_schedule_for_reclaim(gl);
spin_unlock(&gl->gl_spin);
if (inode) {
d_prune_aliases(inode);
iput(inode);
}
return;
}
} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
gl->gl_demote_state = state;
} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
gl->gl_demote_state != state) {
gl->gl_demote_state = LM_ST_UNLOCKED;
}
spin_unlock(&gl->gl_spin);
}
......@@ -723,6 +749,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
}
gl->gl_state = new_state;
gl->gl_tchange = jiffies;
}
/**
......@@ -760,10 +787,20 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
if (!gh) {
gl->gl_stamp = jiffies;
if (ret & LM_OUT_CANCELED)
if (ret & LM_OUT_CANCELED) {
op_done = 0;
else
} else {
spin_lock(&gl->gl_spin);
if (gl->gl_state != gl->gl_demote_state) {
gl->gl_req_bh = NULL;
spin_unlock(&gl->gl_spin);
gfs2_glock_drop_th(gl);
gfs2_glock_put(gl);
return;
}
gfs2_demote_wake(gl);
spin_unlock(&gl->gl_spin);
}
} else {
spin_lock(&gl->gl_spin);
list_del_init(&gh->gh_list);
......@@ -799,7 +836,6 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
gl->gl_req_gh = NULL;
gl->gl_req_bh = NULL;
clear_bit(GLF_LOCK, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
}
......@@ -817,7 +853,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
*
*/
void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
int flags = gh ? gh->gh_flags : 0;
......@@ -871,7 +907,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
gfs2_assert_warn(sdp, !ret);
state_change(gl, LM_ST_UNLOCKED);
gfs2_demote_wake(gl);
if (glops->go_inval)
glops->go_inval(gl, DIO_METADATA);
......@@ -884,10 +919,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
}
spin_lock(&gl->gl_spin);
gfs2_demote_wake(gl);
gl->gl_req_gh = NULL;
gl->gl_req_bh = NULL;
clear_bit(GLF_LOCK, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_put(gl);
......@@ -1067,24 +1102,31 @@ static void add_to_queue(struct gfs2_holder *gh)
if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
BUG();
existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
gl->gl_name.ln_type, gl->gl_state);
BUG();
}
existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
BUG();
if (!(gh->gh_flags & GL_FLOCK)) {
existing = find_holder_by_owner(&gl->gl_holders,
gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n",
existing->gh_ip);
printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
existing->gh_gl->gl_name.ln_type,
existing->gh_gl->gl_state);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
printk(KERN_INFO "lock type : %d lock state : %d\n",
gl->gl_name.ln_type, gl->gl_state);
BUG();
}
existing = find_holder_by_owner(&gl->gl_waiters3,
gh->gh_owner_pid);
if (existing) {
print_symbol(KERN_WARNING "original: %s\n",
existing->gh_ip);
print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
BUG();
}
}
if (gh->gh_flags & LM_FLAG_PRIORITY)
......@@ -1195,9 +1237,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned delay = 0;
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0);
handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
gfs2_glmutex_lock(gl);
......@@ -1215,8 +1258,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
}
clear_bit(GLF_LOCK, &gl->gl_flags);
run_queue(gl);
spin_unlock(&gl->gl_spin);
gfs2_glock_hold(gl);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
delay = gl->gl_ops->go_min_hold_time;
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
void gfs2_glock_dq_wait(struct gfs2_holder *gh)
......@@ -1443,18 +1492,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
unsigned int state)
{
struct gfs2_glock *gl;
unsigned long delay = 0;
unsigned long holdtime;
unsigned long now = jiffies;
gl = gfs2_glock_find(sdp, name);
if (!gl)
return;
handle_callback(gl, state, 1);
spin_lock(&gl->gl_spin);
run_queue(gl);
spin_unlock(&gl->gl_spin);
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
gfs2_glock_put(gl);
handle_callback(gl, state, 1, delay);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
/**
......@@ -1495,7 +1547,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
return;
if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
gl->gl_req_bh(gl, async->lc_ret);
gfs2_glock_put(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
up_read(&gfs2_umount_flush_sem);
return;
}
......@@ -1588,7 +1641,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
if (gfs2_glmutex_trylock(gl)) {
if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0);
handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
gfs2_glmutex_unlock(gl);
}
......@@ -1617,7 +1670,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
goto out;
gl = list_entry(head->first, struct gfs2_glock, gl_list);
while(1) {
if (gl->gl_sbd == sdp) {
if (!sdp || gl->gl_sbd == sdp) {
gfs2_glock_hold(gl);
read_unlock(gl_lock_addr(hash));
if (prev)
......@@ -1635,6 +1688,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
read_unlock(gl_lock_addr(hash));
if (prev)
gfs2_glock_put(prev);
cond_resched();
return has_entries;
}
......@@ -1662,20 +1716,6 @@ static void scan_glock(struct gfs2_glock *gl)
gfs2_glock_schedule_for_reclaim(gl);
}
/**
* gfs2_scand_internal - Look for glocks and inodes to toss from memory
* @sdp: the filesystem
*
*/
void gfs2_scand_internal(struct gfs2_sbd *sdp)
{
unsigned int x;
for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
examine_bucket(scan_glock, sdp, x);
}
/**
* clear_glock - look at a glock and see if we can free it from glock cache
* @gl: the glock to look at
......@@ -1701,7 +1741,7 @@ static void clear_glock(struct gfs2_glock *gl)
if (gfs2_glmutex_trylock(gl)) {
if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED)
handle_callback(gl, LM_ST_UNLOCKED, 0);
handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
gfs2_glmutex_unlock(gl);
}
}
......@@ -1843,7 +1883,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
spin_lock(&gl->gl_spin);
print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number);
print_dbg(gi, " gl_flags =");
for (x = 0; x < 32; x++) {
......@@ -1963,6 +2003,35 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
return error;
}
/**
* gfs2_scand - Look for cached glocks and inodes to toss from memory
* @sdp: Pointer to GFS2 superblock
*
* One of these daemons runs, finding candidates to add to sd_reclaim_list.
* See gfs2_glockd()
*/
static int gfs2_scand(void *data)
{
unsigned x;
unsigned delay;
while (!kthread_should_stop()) {
for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
examine_bucket(scan_glock, NULL, x);
if (freezing(current))
refrigerator();
delay = scand_secs;
if (delay < 1)
delay = 1;
schedule_timeout_interruptible(delay * HZ);
}
return 0;
}
int __init gfs2_glock_init(void)
{
unsigned i;
......@@ -1974,52 +2043,69 @@ int __init gfs2_glock_init(void)
rwlock_init(&gl_hash_locks[i]);
}
#endif
scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand");
if (IS_ERR(scand_process))
return PTR_ERR(scand_process);
glock_workqueue = create_workqueue("glock_workqueue");
if (IS_ERR(glock_workqueue)) {
kthread_stop(scand_process);
return PTR_ERR(glock_workqueue);
}
return 0;
}
void gfs2_glock_exit(void)
{
destroy_workqueue(glock_workqueue);
kthread_stop(scand_process);
}
module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
static int gfs2_glock_iter_next(struct glock_iter *gi)
{
struct gfs2_glock *gl;
restart:
read_lock(gl_lock_addr(gi->hash));
while (1) {
if (!gi->hb_list) { /* If we don't have a hash bucket yet */
gi->hb_list = &gl_hash_table[gi->hash].hb_list;
if (hlist_empty(gi->hb_list)) {
read_unlock(gl_lock_addr(gi->hash));
gi->hash++;
read_lock(gl_lock_addr(gi->hash));
gi->hb_list = NULL;
if (gi->hash >= GFS2_GL_HASH_SIZE) {
read_unlock(gl_lock_addr(gi->hash));
return 1;
}
else
continue;
}
if (!hlist_empty(gi->hb_list)) {
gi->gl = list_entry(gi->hb_list->first,
struct gfs2_glock,
gl_list);
}
} else {
if (gi->gl->gl_list.next == NULL) {
read_unlock(gl_lock_addr(gi->hash));
gi->hash++;
read_lock(gl_lock_addr(gi->hash));
gi->hb_list = NULL;
continue;
}
gi->gl = list_entry(gi->gl->gl_list.next,
struct gfs2_glock, gl_list);
}
gl = gi->gl;
if (gl) {
gi->gl = hlist_entry(gl->gl_list.next,
struct gfs2_glock, gl_list);
if (gi->gl)
break;
gfs2_glock_hold(gi->gl);
}
read_unlock(gl_lock_addr(gi->hash));
if (gl)
gfs2_glock_put(gl);
if (gl && gi->gl == NULL)
gi->hash++;
while(gi->gl == NULL) {
if (gi->hash >= GFS2_GL_HASH_SIZE)
return 1;
read_lock(gl_lock_addr(gi->hash));
gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
struct gfs2_glock, gl_list);
if (gi->gl)
gfs2_glock_hold(gi->gl);
read_unlock(gl_lock_addr(gi->hash));
gi->hash++;
}
if (gi->sdp != gi->gl->gl_sbd)
goto restart;
return 0;
}
static void gfs2_glock_iter_free(struct glock_iter *gi)
{
if (gi->gl)
gfs2_glock_put(gi->gl);
kfree(gi);
}
......@@ -2033,9 +2119,8 @@ static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
gi->sdp = sdp;
gi->hash = 0;
gi->gl = NULL;
gi->hb_list = NULL;
gi->seq = NULL;
gi->gl = NULL;
memset(gi->string, 0, sizeof(gi->string));
if (gfs2_glock_iter_next(gi)) {
......@@ -2055,7 +2140,7 @@ static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
if (!gi)
return NULL;
while (n--) {
while(n--) {
if (gfs2_glock_iter_next(gi)) {
gfs2_glock_iter_free(gi);
return NULL;
......@@ -2082,7 +2167,9 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
{
/* nothing for now */
struct glock_iter *gi = iter_ptr;
if (gi)
gfs2_glock_iter_free(gi);
}
static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
......@@ -2095,7 +2182,7 @@ static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
return 0;
}
static struct seq_operations gfs2_glock_seq_ops = {
static const struct seq_operations gfs2_glock_seq_ops = {
.start = gfs2_glock_seq_start,
.next = gfs2_glock_seq_next,
.stop = gfs2_glock_seq_stop,
......
......@@ -26,6 +26,7 @@
#define GL_SKIP 0x00000100
#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
#define GL_FLOCK 0x00000800
#define GL_NOCANCEL 0x00001000
#define GLR_TRYFAILED 13
......@@ -132,11 +133,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
void gfs2_scand_internal(struct gfs2_sbd *sdp);
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
int __init gfs2_glock_init(void);
void gfs2_glock_exit(void);
int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
int gfs2_register_debugfs(void);
......
......@@ -41,7 +41,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
u64 blkno;
int error;
blocks = atomic_read(&gl->gl_ail_count);
......@@ -57,19 +56,12 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
blkno = bh->b_blocknr;
gfs2_remove_from_ail(NULL, bd);
bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
gfs2_log_lock(sdp);
gfs2_trans_add_revoke(sdp, bd);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
gfs2_log_unlock(sdp);
......@@ -156,9 +148,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
if (ip)
if (ip && !gfs2_is_jdata(ip))
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_log_flush(gl->gl_sbd, gl);
if (ip && gfs2_is_jdata(ip))
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_meta_sync(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
......@@ -452,6 +446,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_lock = inode_go_lock,
.go_unlock = inode_go_unlock,
.go_type = LM_TYPE_INODE,
.go_min_hold_time = HZ / 10,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
......@@ -462,6 +457,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
.go_type = LM_TYPE_RGRP,
.go_min_hold_time = HZ / 10,
};
const struct gfs2_glock_operations gfs2_trans_glops = {
......
......@@ -11,6 +11,7 @@
#define __INCORE_DOT_H__
#include <linux/fs.h>
#include <linux/workqueue.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
......@@ -113,7 +114,13 @@ struct gfs2_bufdata {
struct buffer_head *bd_bh;
struct gfs2_glock *bd_gl;
struct list_head bd_list_tr;
union {
struct list_head list_tr;
u64 blkno;
} u;
#define bd_list_tr u.list_tr
#define bd_blkno u.blkno
struct gfs2_log_element bd_le;
struct gfs2_ail *bd_ail;
......@@ -130,6 +137,7 @@ struct gfs2_glock_operations {
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
const int go_type;
const unsigned long go_min_hold_time;
};
enum {
......@@ -161,6 +169,7 @@ enum {
GLF_LOCK = 1,
GLF_STICKY = 2,
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DIRTY = 5,
};
......@@ -193,6 +202,7 @@ struct gfs2_glock {
u64 gl_vn;
unsigned long gl_stamp;
unsigned long gl_tchange;
void *gl_object;
struct list_head gl_reclaim;
......@@ -203,6 +213,7 @@ struct gfs2_glock {
struct gfs2_log_element gl_le;
struct list_head gl_ail_list;
atomic_t gl_ail_count;
struct delayed_work gl_work;
};
struct gfs2_alloc {
......@@ -293,11 +304,6 @@ struct gfs2_file {
struct gfs2_holder f_fl_gh;
};
struct gfs2_revoke {
struct gfs2_log_element rv_le;
u64 rv_blkno;
};
struct gfs2_revoke_replay {
struct list_head rr_list;
u64 rr_blkno;
......@@ -335,12 +341,6 @@ struct gfs2_quota_data {
unsigned long qd_last_touched;
};
struct gfs2_log_buf {
struct list_head lb_list;
struct buffer_head *lb_bh;
struct buffer_head *lb_real;
};
struct gfs2_trans {
unsigned long tr_ip;
......@@ -429,7 +429,6 @@ struct gfs2_tune {
unsigned int gt_log_flush_secs;
unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
unsigned int gt_scand_secs;
unsigned int gt_recoverd_secs;
unsigned int gt_logd_secs;
unsigned int gt_quotad_secs;
......@@ -574,7 +573,6 @@ struct gfs2_sbd {
/* Daemon stuff */
struct task_struct *sd_scand_process;
struct task_struct *sd_recoverd_process;
struct task_struct *sd_logd_process;
struct task_struct *sd_quotad_process;
......@@ -609,13 +607,13 @@ struct gfs2_sbd {
unsigned int sd_log_num_revoke;
unsigned int sd_log_num_rg;
unsigned int sd_log_num_databuf;
unsigned int sd_log_num_jdata;
struct list_head sd_log_le_gl;
struct list_head sd_log_le_buf;
struct list_head sd_log_le_revoke;
struct list_head sd_log_le_rg;
struct list_head sd_log_le_databuf;
struct list_head sd_log_le_ordered;
unsigned int sd_log_blks_free;
struct mutex sd_log_reserve_mutex;
......@@ -627,7 +625,8 @@ struct gfs2_sbd {
unsigned long sd_log_flush_time;
struct rw_semaphore sd_log_flush_lock;
struct list_head sd_log_flush_list;
atomic_t sd_log_in_flight;
wait_queue_head_t sd_log_flush_wait;
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
......
......@@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
}
struct gfs2_skip_data {
u64 no_addr;
int skipped;
};
static int iget_skip_test(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_skip_data *data = opaque;
if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
data->skipped = 1;
return 0;
}
return 1;
}
return 0;
}
static int iget_skip_set(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_skip_data *data = opaque;
if (data->skipped)
return 1;
inode->i_ino = (unsigned long)(data->no_addr);
ip->i_no_addr = data->no_addr;
return 0;
}
static struct inode *gfs2_iget_skip(struct super_block *sb,
u64 no_addr)
{
struct gfs2_skip_data data;
unsigned long hash = (unsigned long)no_addr;
data.no_addr = no_addr;
data.skipped = 0;
return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
}
/**
* GFS2 lookup code fills in vfs inode contents based on info obtained
* from directory entry inside gfs2_inode_lookup(). This has caused issues
......@@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
* @sb: The super block
* @no_addr: The inode number
* @type: The type of the inode
* @skip_freeing: set this not return an inode if it is currently being freed.
*
* Returns: A VFS inode, or an error
*/
......@@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
struct inode *gfs2_inode_lookup(struct super_block *sb,
unsigned int type,
u64 no_addr,
u64 no_formal_ino)
u64 no_formal_ino, int skip_freeing)
{
struct inode *inode = gfs2_iget(sb, no_addr);
struct gfs2_inode *ip = GFS2_I(inode);
struct inode *inode;
struct gfs2_inode *ip;
struct gfs2_glock *io_gl;
int error;
if (skip_freeing)
inode = gfs2_iget_skip(sb, no_addr);
else
inode = gfs2_iget(sb, no_addr);
ip = GFS2_I(inode);
if (!inode)
return ERR_PTR(-ENOBUFS);
......@@ -244,6 +294,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
return 0;
}
static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
{
ip->i_cache[0] = bh;
}
/**
* gfs2_inode_refresh - Refresh the incore copy of the dinode
* @ip: The GFS2 inode
......@@ -688,7 +743,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
const struct gfs2_inum_host *inum, unsigned int mode,
unsigned int uid, unsigned int gid,
const u64 *generation, dev_t dev)
const u64 *generation, dev_t dev, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
......@@ -743,13 +798,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
set_buffer_uptodate(dibh);
brelse(dibh);
*bhp = dibh;
}
static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
unsigned int mode, const struct gfs2_inum_host *inum,
const u64 *generation, dev_t dev)
const u64 *generation, dev_t dev, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
unsigned int uid, gid;
......@@ -770,7 +827,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
if (error)
goto out_quota;
init_dinode(dip, gl, inum, mode, uid, gid, generation, dev);
init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
gfs2_quota_change(dip, +1, uid, gid);
gfs2_trans_end(sdp);
......@@ -909,6 +966,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
int error;
u64 generation;
struct buffer_head *bh=NULL;
if (!name->len || name->len > GFS2_FNAMESIZE)
return ERR_PTR(-ENAMETOOLONG);
......@@ -935,16 +993,18 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock;
error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
if (error)
goto fail_gunlock2;
inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
inum.no_addr,
inum.no_formal_ino);
inum.no_formal_ino, 0);
if (IS_ERR(inode))
goto fail_gunlock2;
gfs2_inode_bh(GFS2_I(inode), bh);
error = gfs2_inode_refresh(GFS2_I(inode));
if (error)
goto fail_gunlock2;
......
......@@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
void gfs2_inode_attr_in(struct gfs2_inode *ip);
void gfs2_set_iop(struct inode *inode);
struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
u64 no_addr, u64 no_formal_ino);
u64 no_addr, u64 no_formal_ino,
int skip_freeing);
struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
int gfs2_inode_refresh(struct gfs2_inode *ip);
......
......@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/list.h>
......
......@@ -346,15 +346,16 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
static unsigned int dev_poll(struct file *file, poll_table *wait)
{
unsigned int mask = 0;
poll_wait(file, &send_wq, wait);
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
spin_unlock(&ops_lock);
return POLLIN | POLLRDNORM;
}
if (!list_empty(&send_list))
mask = POLLIN | POLLRDNORM;
spin_unlock(&ops_lock);
return 0;
return mask;
}
static const struct file_operations dev_fops = {
......
......@@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls)
return 0;
}
static int gdlm_thread(void *data)
static int gdlm_thread(void *data, int blist)
{
struct gdlm_ls *ls = (struct gdlm_ls *) data;
struct gdlm_lock *lp = NULL;
int blist = 0;
uint8_t complete, blocking, submit, drop;
DECLARE_WAITQUEUE(wait, current);
/* Only thread1 is allowed to do blocking callbacks since gfs
may wait for a completion callback within a blocking cb. */
if (current == ls->thread1)
blist = 1;
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&ls->thread_wait, &wait);
......@@ -333,12 +329,22 @@ static int gdlm_thread(void *data)
return 0;
}
static int gdlm_thread1(void *data)
{
return gdlm_thread(data, 1);
}
static int gdlm_thread2(void *data)
{
return gdlm_thread(data, 0);
}
int gdlm_init_threads(struct gdlm_ls *ls)
{
struct task_struct *p;
int error;
p = kthread_run(gdlm_thread, ls, "lock_dlm1");
p = kthread_run(gdlm_thread1, ls, "lock_dlm1");
error = IS_ERR(p);
if (error) {
log_error("can't start lock_dlm1 thread %d", error);
......@@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls)
}
ls->thread1 = p;
p = kthread_run(gdlm_thread, ls, "lock_dlm2");
p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
error = IS_ERR(p);
if (error) {
log_error("can't start lock_dlm2 thread %d", error);
......
......@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/fs.h>
......
......@@ -59,6 +59,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
return blks;
}
/**
* gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
* @mapping: The associated mapping (maybe NULL)
* @bd: The gfs2_bufdata to remove
*
* The log lock _must_ be held when calling this function
*
*/
void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
{
bd->bd_ail = NULL;
list_del_init(&bd->bd_ail_st_list);
list_del_init(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
if (mapping)
gfs2_meta_cache_flush(GFS2_I(mapping->host));
brelse(bd->bd_bh);
}
/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
......@@ -83,17 +103,9 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
gfs2_assert(sdp, bd->bd_ail == ai);
if (!bh){
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh)) {
gfs2_log_unlock(sdp);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
gfs2_log_lock(sdp);
}
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
......@@ -103,9 +115,16 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
submit_bh(WRITE, bh);
} else {
unlock_buffer(bh);
brelse(bh);
}
gfs2_log_lock(sdp);
retry = 1;
......@@ -130,11 +149,6 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
bd_ail_st_list) {
bh = bd->bd_bh;
if (!bh){
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
gfs2_assert(sdp, bd->bd_ail == ai);
if (buffer_busy(bh)) {
......@@ -155,13 +169,14 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
{
struct list_head *head = &sdp->sd_ail1_list;
struct list_head *head;
u64 sync_gen;
struct list_head *first;
struct gfs2_ail *first_ai, *ai, *tmp;
int done = 0;
gfs2_log_lock(sdp);
head = &sdp->sd_ail1_list;
if (list_empty(head)) {
gfs2_log_unlock(sdp);
return;
......@@ -233,11 +248,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
bd = list_entry(head->prev, struct gfs2_bufdata,
bd_ail_st_list);
gfs2_assert(sdp, bd->bd_ail == ai);
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bd->bd_bh);
gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd);
}
}
......@@ -439,10 +450,10 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
return tail;
}
static inline void log_incr_head(struct gfs2_sbd *sdp)
void gfs2_log_incr_head(struct gfs2_sbd *sdp)
{
if (sdp->sd_log_flush_head == sdp->sd_log_tail)
gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head);
BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head);
if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
sdp->sd_log_flush_head = 0;
......@@ -450,6 +461,23 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
}
}
/**
* gfs2_log_write_endio - End of I/O for a log buffer
* @bh: The buffer head
* @uptodate: I/O Status
*
*/
static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
{
struct gfs2_sbd *sdp = bh->b_private;
bh->b_private = NULL;
end_buffer_write_sync(bh, uptodate);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
* gfs2_log_get_buf - Get and initialize a buffer to use for log control data
* @sdp: The GFS2 superblock
......@@ -460,24 +488,42 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct gfs2_log_buf *lb;
struct buffer_head *bh;
lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
list_add(&lb->lb_list, &sdp->sd_log_flush_list);
bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
bh = sb_getblk(sdp->sd_vfs, blkno);
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
log_incr_head(sdp);
gfs2_log_incr_head(sdp);
atomic_inc(&sdp->sd_log_in_flight);
bh->b_private = sdp;
bh->b_end_io = gfs2_log_write_endio;
return bh;
}
/**
* gfs2_fake_write_endio -
* @bh: The buffer head
* @uptodate: The I/O Status
*
*/
static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
{
struct buffer_head *real_bh = bh->b_private;
struct gfs2_bufdata *bd = real_bh->b_private;
struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
end_buffer_write_sync(bh, uptodate);
free_buffer_head(bh);
unlock_buffer(real_bh);
brelse(real_bh);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
* gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
* @sdp: the filesystem
......@@ -490,22 +536,20 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
struct buffer_head *real)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct gfs2_log_buf *lb;
struct buffer_head *bh;
lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
list_add(&lb->lb_list, &sdp->sd_log_flush_list);
lb->lb_real = real;
bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
atomic_set(&bh->b_count, 1);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
set_bh_page(bh, real->b_page, bh_offset(real));
bh->b_blocknr = blkno;
bh->b_size = sdp->sd_sb.sb_bsize;
bh->b_bdev = sdp->sd_vfs->s_bdev;
bh->b_private = real;
bh->b_end_io = gfs2_fake_write_endio;
log_incr_head(sdp);
gfs2_log_incr_head(sdp);
atomic_inc(&sdp->sd_log_in_flight);
return bh;
}
......@@ -572,45 +616,75 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
gfs2_assert_withdraw(sdp, !pull);
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
log_incr_head(sdp);
gfs2_log_incr_head(sdp);
}
static void log_flush_commit(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_log_flush_list;
struct gfs2_log_buf *lb;
struct buffer_head *bh;
int flushcount = 0;
DEFINE_WAIT(wait);
if (atomic_read(&sdp->sd_log_in_flight)) {
do {
prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&sdp->sd_log_in_flight))
io_schedule();
} while(atomic_read(&sdp->sd_log_in_flight));
finish_wait(&sdp->sd_log_flush_wait, &wait);
}
while (!list_empty(head)) {
lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
list_del(&lb->lb_list);
bh = lb->lb_bh;
log_write_header(sdp, 0, 0);
}
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
if (lb->lb_real) {
while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
schedule();
free_buffer_head(bh);
} else
static void gfs2_ordered_write(struct gfs2_sbd *sdp)
{
struct gfs2_bufdata *bd;
struct buffer_head *bh;
LIST_HEAD(written);
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
list_move(&bd->bd_le.le_list, &written);
bh = bd->bd_bh;
if (!buffer_dirty(bh))
continue;
get_bh(bh);
gfs2_log_unlock(sdp);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
submit_bh(WRITE, bh);
} else {
unlock_buffer(bh);
brelse(bh);
kfree(lb);
flushcount++;
}
gfs2_log_lock(sdp);
}
list_splice(&written, &sdp->sd_log_le_ordered);
gfs2_log_unlock(sdp);
}
/* If nothing was journaled, the header is unplanned and unwanted. */
if (flushcount) {
log_write_header(sdp, 0, 0);
} else {
unsigned int tail;
tail = current_tail(sdp);
static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
{
struct gfs2_bufdata *bd;
struct buffer_head *bh;
gfs2_ail1_empty(sdp, 0);
if (sdp->sd_log_tail != tail)
log_pull_tail(sdp, tail);
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
bh = bd->bd_bh;
if (buffer_locked(bh)) {
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
brelse(bh);
gfs2_log_lock(sdp);
continue;
}
list_del_init(&bd->bd_le.le_list);
}
gfs2_log_unlock(sdp);
}
/**
......@@ -640,10 +714,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
INIT_LIST_HEAD(&ai->ai_ail1_list);
INIT_LIST_HEAD(&ai->ai_ail2_list);
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
sdp->sd_log_commited_buf +
sdp->sd_log_commited_databuf);
if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
sdp->sd_log_commited_buf);
gfs2_assert_withdraw(sdp, 0);
}
if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) {
printk(KERN_INFO "GFS2: log databuf %u %u\n",
sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf);
gfs2_assert_withdraw(sdp, 0);
}
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
......@@ -651,8 +731,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
sdp->sd_log_flush_wrapped = 0;
ai->ai_first = sdp->sd_log_flush_head;
gfs2_ordered_write(sdp);
lops_before_commit(sdp);
if (!list_empty(&sdp->sd_log_flush_list))
gfs2_ordered_wait(sdp);
if (sdp->sd_log_head != sdp->sd_log_flush_head)
log_flush_commit(sdp);
else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
gfs2_log_lock(sdp);
......@@ -744,7 +827,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
......
......@@ -52,12 +52,14 @@ int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
void gfs2_log_incr_head(struct gfs2_sbd *sdp);
struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
struct buffer_head *real);
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd);
void gfs2_log_shutdown(struct gfs2_sbd *sdp);
void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
......
......@@ -27,7 +27,104 @@
#include "trans.h"
#include "util.h"
static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: The superblock
* @bh: The buffer to be pinned
*
* The log lock must be held when calling this function
*/
static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
clear_buffer_dirty(bh);
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
bd = bh->b_private;
/* If this buffer is in the AIL and it has already been written
* to in-place disk block, remove it from the AIL.
*/
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
get_bh(bh);
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
*
*/
static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
if (!buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
lock_buffer(bh);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
gfs2_log_lock(sdp);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
{
return (struct gfs2_log_descriptor *)bh->b_data;
}
static inline __be64 *bh_log_ptr(struct buffer_head *bh)
{
struct gfs2_log_descriptor *ld = bh_log_desc(bh);
return (__force __be64 *)(ld + 1);
}
static inline __be64 *bh_ptr_end(struct buffer_head *bh)
{
return (__force __be64 *)(bh->b_data + bh->b_size);
}
static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
{
struct buffer_head *bh = gfs2_log_get_buf(sdp);
struct gfs2_log_descriptor *ld = bh_log_desc(bh);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(ld_type);
ld->ld_length = 0;
ld->ld_data1 = 0;
ld->ld_data2 = 0;
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
return bh;
}
static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_glock *gl;
struct gfs2_trans *tr = current->journal_info;
......@@ -38,15 +135,19 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
return;
gfs2_log_lock(sdp);
if (!list_empty(&le->le_list)){
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
}
gfs2_glock_hold(gl);
set_bit(GLF_DIRTY, &gl->gl_flags);
sdp->sd_log_num_gl++;
list_add(&le->le_list, &sdp->sd_log_le_gl);
}
static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
gfs2_log_lock(sdp);
__glock_lo_add(sdp, le);
gfs2_log_unlock(sdp);
}
......@@ -71,30 +172,25 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr;
lock_buffer(bd->bd_bh);
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr)) {
gfs2_log_unlock(sdp);
return;
}
if (!list_empty(&bd->bd_list_tr))
goto out;
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
gfs2_trans_add_gl(bd->bd_gl);
goto out;
__glock_lo_add(sdp, &bd->bd_gl->gl_le);
gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
gfs2_log_lock(sdp);
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
gfs2_log_unlock(sdp);
tr->tr_num_buf_new++;
out:
gfs2_log_unlock(sdp);
unlock_buffer(bd->bd_bh);
}
static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
......@@ -117,8 +213,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd1 = NULL, *bd2;
unsigned int total = sdp->sd_log_num_buf;
unsigned int offset = BUF_OFFSET;
unsigned int total;
unsigned int limit;
unsigned int num;
unsigned n;
......@@ -127,22 +222,20 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
limit = buf_limit(sdp);
/* for 4k blocks, limit = 503 */
gfs2_log_lock(sdp);
total = sdp->sd_log_num_buf;
bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
while(total) {
num = total;
if (total > limit)
num = limit;
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)bh->b_data;
ptr = (__be64 *)(bh->b_data + offset);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
gfs2_log_unlock(sdp);
bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
gfs2_log_lock(sdp);
ld = bh_log_desc(bh);
ptr = bh_log_ptr(bh);
ld->ld_length = cpu_to_be32(num + 1);
ld->ld_data1 = cpu_to_be32(num);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
n = 0;
list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
......@@ -152,21 +245,27 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
break;
}
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_log_unlock(sdp);
submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
n = 0;
list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
bd_le.le_list) {
get_bh(bd2->bd_bh);
gfs2_log_unlock(sdp);
lock_buffer(bd2->bd_bh);
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
if (++n >= num)
break;
}
BUG_ON(total < num);
total -= num;
}
gfs2_log_unlock(sdp);
}
static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
......@@ -270,11 +369,8 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_revoke++;
gfs2_log_lock(sdp);
sdp->sd_log_num_revoke++;
list_add(&le->le_list, &sdp->sd_log_le_revoke);
gfs2_log_unlock(sdp);
}
static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
......@@ -284,32 +380,25 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
struct buffer_head *bh;
unsigned int offset;
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_revoke *rv;
struct gfs2_bufdata *bd;
if (!sdp->sd_log_num_revoke)
return;
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)bh->b_data;
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
ld = bh_log_desc(bh);
ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
sizeof(u64)));
ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
offset = sizeof(struct gfs2_log_descriptor);
while (!list_empty(head)) {
rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
list_del_init(&rv->rv_le.le_list);
bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
list_del_init(&bd->bd_le.le_list);
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
submit_bh(WRITE, bh);
bh = gfs2_log_get_buf(sdp);
mh = (struct gfs2_meta_header *)bh->b_data;
......@@ -319,15 +408,14 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
offset = sizeof(struct gfs2_meta_header);
}
*(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
kfree(rv);
*(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
kmem_cache_free(gfs2_bufdata_cachep, bd);
offset += sizeof(u64);
}
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
submit_bh(WRITE, bh);
}
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
......@@ -466,222 +554,136 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
lock_buffer(bd->bd_bh);
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr)) {
gfs2_log_unlock(sdp);
return;
}
if (!list_empty(&bd->bd_list_tr))
goto out;
tr->tr_touched = 1;
if (gfs2_is_jdata(ip)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
}
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
goto out;
gfs2_trans_add_gl(bd->bd_gl);
__glock_lo_add(sdp, &bd->bd_gl->gl_le);
if (gfs2_is_jdata(ip)) {
sdp->sd_log_num_jdata++;
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_databuf_new++;
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
} else {
list_add(&le->le_list, &sdp->sd_log_le_ordered);
}
gfs2_log_lock(sdp);
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
out:
gfs2_log_unlock(sdp);
unlock_buffer(bd->bd_bh);
}
static int gfs2_check_magic(struct buffer_head *bh)
static void gfs2_check_magic(struct buffer_head *bh)
{
struct page *page = bh->b_page;
void *kaddr;
__be32 *ptr;
int rv = 0;
kaddr = kmap_atomic(page, KM_USER0);
clear_buffer_escaped(bh);
kaddr = kmap_atomic(bh->b_page, KM_USER0);
ptr = kaddr + bh_offset(bh);
if (*ptr == cpu_to_be32(GFS2_MAGIC))
rv = 1;
set_buffer_escaped(bh);
kunmap_atomic(kaddr, KM_USER0);
return rv;
}
/**
* databuf_lo_before_commit - Scan the data buffers, writing as we go
*
* Here we scan through the lists of buffers and make the assumption
* that any buffer thats been pinned is being journaled, and that
* any unpinned buffer is an ordered write data buffer and therefore
* will be written back rather than journaled.
*/
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct list_head *list, struct list_head *done,
unsigned int n)
{
LIST_HEAD(started);
struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
struct buffer_head *bh = NULL,*bh1 = NULL;
struct buffer_head *bh1;
struct gfs2_log_descriptor *ld;
unsigned int limit;
unsigned int total_dbuf;
unsigned int total_jdata = sdp->sd_log_num_jdata;
unsigned int num, n;
__be64 *ptr = NULL;
struct gfs2_bufdata *bd;
__be64 *ptr;
if (!bh)
return;
limit = databuf_limit(sdp);
ld = bh_log_desc(bh);
ld->ld_length = cpu_to_be32(n + 1);
ld->ld_data1 = cpu_to_be32(n);
/*
* Start writing ordered buffers, write journaled buffers
* into the log along with a header
*/
ptr = bh_log_ptr(bh);
get_bh(bh);
submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
total_dbuf = sdp->sd_log_num_databuf;
bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
bd_le.le_list);
while(total_dbuf) {
num = total_jdata;
if (num > limit)
num = limit;
n = 0;
list_for_each_entry_safe_continue(bd1, bdt,
&sdp->sd_log_le_databuf,
bd_le.le_list) {
/* store off the buffer head in a local ptr since
* gfs2_bufdata might change when we drop the log lock
*/
bh1 = bd1->bd_bh;
/* An ordered write buffer */
if (bh1 && !buffer_pinned(bh1)) {
list_move(&bd1->bd_le.le_list, &started);
if (bd1 == bd2) {
bd2 = NULL;
bd2 = list_prepare_entry(bd2,
&sdp->sd_log_le_databuf,
bd_le.le_list);
}
total_dbuf--;
if (bh1) {
if (buffer_dirty(bh1)) {
get_bh(bh1);
gfs2_log_unlock(sdp);
ll_rw_block(SWRITE, 1, &bh1);
brelse(bh1);
gfs2_log_lock(sdp);
}
continue;
}
continue;
} else if (bh1) { /* A journaled buffer */
int magic;
gfs2_log_unlock(sdp);
if (!bh) {
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)
bh->b_data;
ptr = (__be64 *)(bh->b_data +
DATABUF_OFFSET);
ld->ld_header.mh_magic =
cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type =
cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format =
cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type =
cpu_to_be32(GFS2_LOG_DESC_JDATA);
ld->ld_length = cpu_to_be32(num + 1);
ld->ld_data1 = cpu_to_be32(num);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
}
magic = gfs2_check_magic(bh1);
*ptr++ = cpu_to_be64(bh1->b_blocknr);
*ptr++ = cpu_to_be64((__u64)magic);
clear_buffer_escaped(bh1);
if (unlikely(magic != 0))
set_buffer_escaped(bh1);
gfs2_log_lock(sdp);
if (++n >= num)
break;
} else if (!bh1) {
total_dbuf--;
sdp->sd_log_num_databuf--;
list_del_init(&bd1->bd_le.le_list);
if (bd1 == bd2) {
bd2 = NULL;
bd2 = list_prepare_entry(bd2,
&sdp->sd_log_le_databuf,
bd_le.le_list);
}
kmem_cache_free(gfs2_bufdata_cachep, bd1);
}
while(!list_empty(list)) {
bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
list_move_tail(&bd->bd_le.le_list, done);
get_bh(bd->bd_bh);
while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
gfs2_log_incr_head(sdp);
ptr += 2;
}
gfs2_log_unlock(sdp);
if (bh) {
set_buffer_mapped(bh);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
bh = NULL;
lock_buffer(bd->bd_bh);
if (buffer_escaped(bd->bd_bh)) {
void *kaddr;
bh1 = gfs2_log_get_buf(sdp);
kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
bh1->b_size);
kunmap_atomic(kaddr, KM_USER0);
*(__be32 *)bh1->b_data = 0;
clear_buffer_escaped(bd->bd_bh);
unlock_buffer(bd->bd_bh);
brelse(bd->bd_bh);
} else {
bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
}
n = 0;
submit_bh(WRITE, bh1);
gfs2_log_lock(sdp);
list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
bd_le.le_list) {
if (!bd2->bd_bh)
continue;
/* copy buffer if it needs escaping */
gfs2_log_unlock(sdp);
if (unlikely(buffer_escaped(bd2->bd_bh))) {
void *kaddr;
struct page *page = bd2->bd_bh->b_page;
bh = gfs2_log_get_buf(sdp);
kaddr = kmap_atomic(page, KM_USER0);
memcpy(bh->b_data,
kaddr + bh_offset(bd2->bd_bh),
sdp->sd_sb.sb_bsize);
kunmap_atomic(kaddr, KM_USER0);
*(__be32 *)bh->b_data = 0;
} else {
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
}
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_log_lock(sdp);
if (++n >= num)
break;
}
bh = NULL;
BUG_ON(total_dbuf < num);
total_dbuf -= num;
total_jdata -= num;
ptr += 2;
}
gfs2_log_unlock(sdp);
brelse(bh);
}
/* Wait on all ordered buffers */
while (!list_empty(&started)) {
gfs2_log_lock(sdp);
bd1 = list_entry(started.next, struct gfs2_bufdata,
bd_le.le_list);
list_del_init(&bd1->bd_le.le_list);
sdp->sd_log_num_databuf--;
bh = bd1->bd_bh;
if (bh) {
bh->b_private = NULL;
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
brelse(bh);
} else
gfs2_log_unlock(sdp);
/**
* databuf_lo_before_commit - Scan the data buffers, writing as we go
*
*/
kmem_cache_free(gfs2_bufdata_cachep, bd1);
}
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
struct gfs2_bufdata *bd = NULL;
struct buffer_head *bh = NULL;
unsigned int n = 0;
__be64 *ptr = NULL, *end = NULL;
LIST_HEAD(processed);
LIST_HEAD(in_progress);
/* We've removed all the ordered write bufs here, so only jdata left */
gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_databuf)) {
if (ptr == end) {
gfs2_log_unlock(sdp);
gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
n = 0;
bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
ptr = bh_log_ptr(bh);
end = bh_ptr_end(bh) - 1;
gfs2_log_lock(sdp);
continue;
}
bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
list_move_tail(&bd->bd_le.le_list, &in_progress);
gfs2_check_magic(bd->bd_bh);
*ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
*ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
n++;
}
gfs2_log_unlock(sdp);
gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
gfs2_log_lock(sdp);
list_splice(&processed, &sdp->sd_log_le_databuf);
gfs2_log_unlock(sdp);
}
static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
......@@ -765,11 +767,9 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
list_del_init(&bd->bd_le.le_list);
sdp->sd_log_num_databuf--;
sdp->sd_log_num_jdata--;
gfs2_unpin(sdp, bd->bd_bh, ai);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
}
......@@ -817,10 +817,10 @@ const struct gfs2_log_operations gfs2_databuf_lops = {
const struct gfs2_log_operations *gfs2_log_ops[] = {
&gfs2_glock_lops,
&gfs2_databuf_lops,
&gfs2_buf_lops,
&gfs2_revoke_lops,
&gfs2_rg_lops,
&gfs2_databuf_lops,
&gfs2_revoke_lops,
NULL,
};
......@@ -107,6 +107,8 @@ static int __init init_gfs2_fs(void)
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
gfs2_glock_exit();
if (gfs2_bufdata_cachep)
kmem_cache_destroy(gfs2_bufdata_cachep);
......@@ -127,6 +129,7 @@ static int __init init_gfs2_fs(void)
static void __exit exit_gfs2_fs(void)
{
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
......
......@@ -297,74 +297,35 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
unlock_page(bh->b_page);
}
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to be pinned
*
*/
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
{
struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host);
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
wait_on_buffer(bh);
/* If this buffer is in the AIL and it has already been written
to in-place disk block, remove it from the AIL. */
gfs2_log_lock(sdp);
if (bd->bd_ail && !buffer_in_io(bh))
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
gfs2_log_unlock(sdp);
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
get_bh(bh);
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
*
*/
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
if (!buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
gfs2_log_lock(sdp);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
if (test_clear_buffer_pinned(bh)) {
list_del_init(&bd->bd_le.le_list);
if (meta) {
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
tr->tr_num_buf_rm++;
} else {
gfs2_assert_warn(sdp, sdp->sd_log_num_databuf);
sdp->sd_log_num_databuf--;
tr->tr_num_databuf_rm++;
}
tr->tr_touched = 1;
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
if (bd) {
if (bd->bd_ail) {
gfs2_remove_from_ail(NULL, bd);
bh->b_private = NULL;
bd->bd_bh = NULL;
bd->bd_blkno = bh->b_blocknr;
gfs2_trans_add_revoke(sdp, bd);
}
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
}
/**
......@@ -383,44 +344,11 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
while (blen) {
bh = getbuf(ip->i_gl, bstart, NO_CREATE);
if (bh) {
struct gfs2_bufdata *bd = bh->b_private;
if (test_clear_buffer_pinned(bh)) {
struct gfs2_trans *tr = current->journal_info;
struct gfs2_inode *bh_ip =
GFS2_I(bh->b_page->mapping->host);
gfs2_log_lock(sdp);
list_del_init(&bd->bd_le.le_list);
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
gfs2_log_unlock(sdp);
if (bh_ip->i_inode.i_private != NULL)
tr->tr_num_databuf_rm++;
else
tr->tr_num_buf_rm++;
brelse(bh);
}
if (bd) {
gfs2_log_lock(sdp);
if (bd->bd_ail) {
u64 blkno = bh->b_blocknr;
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
} else
gfs2_log_unlock(sdp);
}
lock_buffer(bh);
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
gfs2_log_lock(sdp);
gfs2_remove_from_journal(bh, current->journal_info, 1);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
brelse(bh);
}
......@@ -446,10 +374,10 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip)
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
bh_slot = &ip->i_cache[x];
if (!*bh_slot)
break;
brelse(*bh_slot);
*bh_slot = NULL;
if (*bh_slot) {
brelse(*bh_slot);
*bh_slot = NULL;
}
}
spin_unlock(&ip->i_spin);
......
......@@ -50,9 +50,9 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
int meta);
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai);
void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
int meta);
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
......
......@@ -42,6 +42,7 @@ enum {
Opt_nosuiddir,
Opt_data_writeback,
Opt_data_ordered,
Opt_err,
};
static match_table_t tokens = {
......@@ -64,7 +65,8 @@ static match_table_t tokens = {
{Opt_suiddir, "suiddir"},
{Opt_nosuiddir, "nosuiddir"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_ordered, "data=ordered"}
{Opt_data_ordered, "data=ordered"},
{Opt_err, NULL}
};
/**
......@@ -237,6 +239,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
case Opt_data_ordered:
args->ar_data = GFS2_DATA_ORDERED;
break;
case Opt_err:
default:
fs_info(sdp, "unknown option: %s\n", o);
error = -EINVAL;
......
......@@ -90,7 +90,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
error = gfs2_block_map(inode, lblock, 0, bh_result);
if (error)
return error;
if (bh_result->b_blocknr == 0)
if (!buffer_mapped(bh_result))
return -EIO;
return 0;
}
......@@ -414,7 +414,8 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
if (ind_blocks || data_blocks)
rblocks += RES_STATFS + RES_QUOTA;
error = gfs2_trans_begin(sdp, rblocks, 0);
error = gfs2_trans_begin(sdp, rblocks,
PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
if (error)
goto out_trans_fail;
......@@ -616,58 +617,50 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
return dblock;
}
static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
lock_buffer(bh);
gfs2_log_lock(sdp);
clear_buffer_dirty(bh);
bd = bh->b_private;
if (bd) {
bd->bd_bh = NULL;
bh->b_private = NULL;
if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
kmem_cache_free(gfs2_bufdata_cachep, bd);
if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
list_del_init(&bd->bd_le.le_list);
else
gfs2_remove_from_journal(bh, current->journal_info, 0);
}
gfs2_log_unlock(sdp);
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
static void gfs2_invalidatepage(struct page *page, unsigned long offset)
{
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
struct buffer_head *bh, *head;
unsigned long pos = 0;
BUG_ON(!PageLocked(page));
if (offset == 0)
ClearPageChecked(page);
if (!page_has_buffers(page))
return;
goto out;
bh = head = page_buffers(page);
do {
unsigned int next_off = curr_off + bh->b_size;
next = bh->b_this_page;
if (offset <= curr_off)
discard_buffer(sdp, bh);
curr_off = next_off;
bh = next;
if (offset <= pos)
gfs2_discard(sdp, bh);
pos += bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
if (!offset)
out:
if (offset == 0)
try_to_release_page(page, 0);
return;
}
/**
......@@ -735,59 +728,6 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
return rv;
}
/**
* stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
* @bh: the buffer we're stuck on
*
*/
static void stuck_releasepage(struct buffer_head *bh)
{
struct inode *inode = bh->b_page->mapping->host;
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_bufdata *bd = bh->b_private;
struct gfs2_glock *gl;
static unsigned limit = 0;
if (limit > 3)
return;
limit++;
fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
(unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
if (!bd)
return;
gl = bd->bd_gl;
fs_warn(sdp, "gl = (%u, %llu)\n",
gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
(list_empty(&bd->bd_list_tr)) ? "no" : "yes",
(list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
if (gl->gl_ops == &gfs2_inode_glops) {
struct gfs2_inode *ip = gl->gl_object;
unsigned int x;
if (!ip)
return;
fs_warn(sdp, "ip = %llu %llu\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr);
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
fs_warn(sdp, "ip->i_cache[%u] = %s\n",
x, (ip->i_cache[x]) ? "!NULL" : "NULL");
}
}
/**
* gfs2_releasepage - free the metadata associated with a page
* @page: the page that's being released
......@@ -805,41 +745,39 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
struct buffer_head *bh, *head;
struct gfs2_bufdata *bd;
unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
if (!page_has_buffers(page))
goto out;
return 0;
gfs2_log_lock(sdp);
head = bh = page_buffers(page);
do {
while (atomic_read(&bh->b_count)) {
if (!atomic_read(&aspace->i_writecount))
return 0;
if (!(gfp_mask & __GFP_WAIT))
return 0;
if (time_after_eq(jiffies, t)) {
stuck_releasepage(bh);
/* should we withdraw here? */
return 0;
}
yield();
}
if (atomic_read(&bh->b_count))
goto cannot_release;
bd = bh->b_private;
if (bd && bd->bd_ail)
goto cannot_release;
gfs2_assert_warn(sdp, !buffer_pinned(bh));
gfs2_assert_warn(sdp, !buffer_dirty(bh));
bh = bh->b_this_page;
} while(bh != head);
gfs2_log_unlock(sdp);
head = bh = page_buffers(page);
do {
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd) {
gfs2_assert_warn(sdp, bd->bd_bh == bh);
gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
gfs2_assert_warn(sdp, !bd->bd_ail);
bd->bd_bh = NULL;
if (!list_empty(&bd->bd_le.le_list))
bd = NULL;
if (!list_empty(&bd->bd_le.le_list)) {
if (!buffer_pinned(bh))
list_del_init(&bd->bd_le.le_list);
else
bd = NULL;
}
if (bd)
bd->bd_bh = NULL;
bh->b_private = NULL;
}
gfs2_log_unlock(sdp);
......@@ -849,8 +787,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bh = bh->b_this_page;
} while (bh != head);
out:
return try_to_free_buffers(page);
cannot_release:
gfs2_log_unlock(sdp);
return 0;
}
const struct address_space_operations gfs2_file_aops = {
......
......@@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
inum->no_addr,
0);
0, 0);
if (!inode)
goto fail;
if (IS_ERR(inode)) {
......
......@@ -571,7 +571,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int error = 0;
state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE
| GL_FLOCK;
mutex_lock(&fp->f_fl_mutex);
......@@ -579,21 +580,19 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (gl) {
if (fl_gh->gh_state == state)
goto out;
gfs2_glock_hold(gl);
flock_lock_file_wait(file,
&(struct file_lock){.fl_type = F_UNLCK});
gfs2_glock_dq_uninit(fl_gh);
gfs2_glock_dq_wait(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
ip->i_no_addr, &gfs2_flock_glops,
CREATE, &gl);
if (error)
goto out;
gfs2_holder_init(gl, state, flags, fl_gh);
gfs2_glock_put(gl);
}
gfs2_holder_init(gl, state, flags, fl_gh);
gfs2_glock_put(gl);
error = gfs2_glock_nq(fl_gh);
if (error) {
gfs2_holder_uninit(fl_gh);
......
......@@ -28,18 +28,18 @@
#include "lm.h"
#include "mount.h"
#include "ops_fstype.h"
#include "ops_dentry.h"
#include "ops_super.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "sys.h"
#include "util.h"
#include "log.h"
#define DO 0
#define UNDO 1
extern struct dentry_operations gfs2_dops;
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
......@@ -82,13 +82,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
INIT_LIST_HEAD(&sdp->sd_log_le_rg);
INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
mutex_init(&sdp->sd_log_reserve_mutex);
INIT_LIST_HEAD(&sdp->sd_ail1_list);
INIT_LIST_HEAD(&sdp->sd_ail2_list);
init_rwsem(&sdp->sd_log_flush_lock);
INIT_LIST_HEAD(&sdp->sd_log_flush_list);
atomic_set(&sdp->sd_log_in_flight, 0);
init_waitqueue_head(&sdp->sd_log_flush_wait);
INIT_LIST_HEAD(&sdp->sd_revoke_list);
......@@ -145,7 +147,8 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
while ((table = strchr(sdp->sd_table_name, '/')))
table = sdp->sd_table_name;
while ((table = strchr(table, '/')))
*table = '_';
out:
......@@ -161,14 +164,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
if (undo)
goto fail_trans;
p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start scand thread: %d\n", error);
return error;
}
sdp->sd_scand_process = p;
for (sdp->sd_glockd_num = 0;
sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
sdp->sd_glockd_num++) {
......@@ -229,14 +224,13 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
while (sdp->sd_glockd_num--)
kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
kthread_stop(sdp->sd_scand_process);
return error;
}
static inline struct inode *gfs2_lookup_root(struct super_block *sb,
u64 no_addr)
{
return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
}
static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
......@@ -301,8 +295,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
fs_err(sdp, "can't get root dentry\n");
error = -ENOMEM;
iput(inode);
}
sb->s_root->d_op = &gfs2_dops;
} else
sb->s_root->d_op = &gfs2_dops;
out:
gfs2_glock_dq_uninit(&sb_gh);
return error;
......@@ -368,7 +363,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | GL_EXACT,
LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
&sdp->sd_jinode_gh);
if (error) {
fs_err(sdp, "can't acquire journal inode glock: %d\n",
......@@ -818,7 +813,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
struct nameidata nd;
struct file_system_type *fstype;
struct super_block *sb = NULL, *s;
struct list_head *l;
int error;
error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
......@@ -830,8 +824,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
error = vfs_getattr(nd.mnt, nd.dentry, &stat);
fstype = get_fs_type("gfs2");
list_for_each(l, &fstype->fs_supers) {
s = list_entry(l, struct super_block, s_instances);
list_for_each_entry(s, &fstype->fs_supers, s_instances) {
if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
(S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
sb = s;
......@@ -861,7 +854,7 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
error = -ENOENT;
goto error;
}
sdp = (struct gfs2_sbd*) sb->s_fs_info;
sdp = sb->s_fs_info;
if (sdp->sd_vfs_meta) {
printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
error = -EBUSY;
......@@ -896,7 +889,10 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
static void gfs2_kill_sb(struct super_block *sb)
{
gfs2_delete_debugfs_file(sb->s_fs_info);
if (sb->s_fs_info) {
gfs2_delete_debugfs_file(sb->s_fs_info);
gfs2_meta_syncfs(sb->s_fs_info);
}
kill_block_super(sb);
}
......
......@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
mark_inode_dirty(inode);
break;
} else if (PTR_ERR(inode) != -EEXIST ||
(nd->intent.open.flags & O_EXCL)) {
(nd && (nd->intent.open.flags & O_EXCL))) {
gfs2_holder_uninit(ghs);
return PTR_ERR(inode);
}
......@@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
error = gfs2_glock_nq_m(3, ghs);
error = gfs2_glock_nq(ghs); /* parent */
if (error)
goto out;
goto out_parent;
error = gfs2_glock_nq(ghs + 1); /* child */
if (error)
goto out_child;
error = gfs2_glock_nq(ghs + 2); /* rgrp */
if (error)
goto out_rgrp;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
goto out_gunlock;
goto out_rgrp;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
goto out_gunlock;
goto out_rgrp;
error = gfs2_dir_del(dip, &dentry->d_name);
if (error)
......@@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
out_gunlock:
gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
gfs2_glock_dq(ghs + 2);
out_rgrp:
gfs2_holder_uninit(ghs + 2);
gfs2_glock_dq(ghs + 1);
out_child:
gfs2_holder_uninit(ghs + 1);
gfs2_glock_dq(ghs);
out_parent:
gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
......@@ -894,12 +905,17 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
static int setattr_size(struct inode *inode, struct iattr *attr)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
if (attr->ia_size != ip->i_di.di_size) {
error = vmtruncate(inode, attr->ia_size);
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
return error;
error = vmtruncate(inode, attr->ia_size);
gfs2_trans_end(sdp);
if (error)
return error;
}
error = gfs2_truncatei(ip, attr->ia_size);
......
......@@ -92,7 +92,6 @@ static void gfs2_put_super(struct super_block *sb)
kthread_stop(sdp->sd_recoverd_process);
while (sdp->sd_glockd_num--)
kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
kthread_stop(sdp->sd_scand_process);
if (!(sb->s_flags & MS_RDONLY)) {
error = gfs2_make_fs_ro(sdp);
......@@ -456,12 +455,15 @@ static void gfs2_delete_inode(struct inode *inode)
}
error = gfs2_dinode_dealloc(ip);
/*
* Must do this before unlock to avoid trying to write back
* potentially dirty data now that inode no longer exists
* on disk.
*/
if (error)
goto out_unlock;
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
goto out_unlock;
/* Needs to be done before glock release & also in a transaction */
truncate_inode_pages(&inode->i_data, 0);
gfs2_trans_end(sdp);
out_unlock:
gfs2_glock_dq(&ip->i_iopen_gh);
......
......@@ -70,6 +70,7 @@ struct gfs2_quota_host {
u64 qu_limit;
u64 qu_warn;
s64 qu_value;
u32 qu_ll_next;
};
struct gfs2_quota_change_host {
......@@ -580,6 +581,7 @@ static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
qu->qu_limit = be64_to_cpu(str->qu_limit);
qu->qu_warn = be64_to_cpu(str->qu_warn);
qu->qu_value = be64_to_cpu(str->qu_value);
qu->qu_ll_next = be32_to_cpu(str->qu_ll_next);
}
static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
......@@ -589,6 +591,7 @@ static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
str->qu_limit = cpu_to_be64(qu->qu_limit);
str->qu_warn = cpu_to_be64(qu->qu_warn);
str->qu_value = cpu_to_be64(qu->qu_value);
str->qu_ll_next = cpu_to_be32(qu->qu_ll_next);
memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
}
......@@ -614,6 +617,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
s64 value;
int err = -EIO;
if (gfs2_is_stuffed(ip)) {
struct gfs2_alloc *al = NULL;
al = gfs2_alloc_get(ip);
/* just request 1 blk */
al->al_requested = 1;
gfs2_inplace_reserve(ip);
gfs2_unstuff_dinode(ip, NULL);
gfs2_inplace_release(ip);
gfs2_alloc_put(ip);
}
page = grab_cache_page(mapping, index);
if (!page)
return -ENOMEM;
......
......@@ -469,7 +469,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
};
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP, &ji_gh);
LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
if (error)
goto fail_gunlock_j;
} else {
......
......@@ -31,6 +31,7 @@
#include "inode.h"
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
/*
* These routines are used by the resource group routines (rgrp.c)
......@@ -116,8 +117,7 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
* @buffer: the buffer that holds the bitmaps
* @buflen: the length (in bytes) of the buffer
* @goal: start search at this block's bit-pair (within @buffer)
* @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
* bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
* @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
*
* Scope of @goal and returned block number is only within this bitmap buffer,
* not entire rgrp or filesystem. @buffer will be offset from the actual
......@@ -137,9 +137,13 @@ static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
byte = buffer + (goal / GFS2_NBBY);
bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
end = buffer + buflen;
alloc = (old_state & 1) ? 0 : 0x55;
alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0;
while (byte < end) {
/* If we're looking for a free block we can eliminate all
bitmap settings with 0x55, which represents four data
blocks in a row. If we're looking for a data block, we can
eliminate 0x00 which corresponds to four free blocks. */
if ((*byte & 0x55) == alloc) {
blk += (8 - bit) >> 1;
......@@ -859,23 +863,28 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
{
struct inode *inode;
u32 goal = 0;
u32 goal = 0, block;
u64 no_addr;
struct gfs2_sbd *sdp = rgd->rd_sbd;
for(;;) {
if (goal >= rgd->rd_data)
break;
goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
GFS2_BLKST_UNLINKED);
if (goal == BFITNOENT)
down_write(&sdp->sd_log_flush_lock);
block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
GFS2_BLKST_UNLINKED);
up_write(&sdp->sd_log_flush_lock);
if (block == BFITNOENT)
break;
no_addr = goal + rgd->rd_data0;
/* rgblk_search can return a block < goal, so we need to
keep it marching forward. */
no_addr = block + rgd->rd_data0;
goal++;
if (no_addr < *last_unlinked)
if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
continue;
*last_unlinked = no_addr;
inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
no_addr, -1);
no_addr, -1, 1);
if (!IS_ERR(inode))
return inode;
}
......@@ -1152,7 +1161,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
struct gfs2_alloc *al = &ip->i_alloc;
struct inode *inode;
int error = 0;
u64 last_unlinked = 0;
u64 last_unlinked = NO_BLOCK;
if (gfs2_assert_warn(sdp, al->al_requested))
return -EINVAL;
......@@ -1289,7 +1298,9 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
allocatable block anywhere else, we want to be able wrap around and
search in the first part of our first-searched bit block. */
for (x = 0; x <= length; x++) {
if (bi->bi_clone)
/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
bitmaps, so we must search the originals for that. */
if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
bi->bi_len, goal, old_state);
else
......@@ -1305,9 +1316,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
goal = 0;
}
if (old_state != new_state) {
gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
if (blk != BFITNOENT && old_state != new_state) {
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
bi->bi_len, blk, new_state);
......
......@@ -58,7 +58,6 @@ void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_incore_log_blocks = 1024;
gt->gt_log_flush_secs = 60;
gt->gt_jindex_refresh_secs = 60;
gt->gt_scand_secs = 15;
gt->gt_recoverd_secs = 60;
gt->gt_logd_secs = 1;
gt->gt_quotad_secs = 5;
......
......@@ -442,7 +442,6 @@ TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(scand_secs, scand_process);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
TUNE_ATTR_DAEMON(logd_secs, logd_process);
TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
......@@ -464,7 +463,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_quota_cache_secs.attr,
&tune_attr_stall_secs.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_scand_secs.attr,
&tune_attr_recoverd_secs.attr,
&tune_attr_logd_secs.attr,
&tune_attr_quotad_secs.attr,
......
......@@ -142,25 +142,25 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno)
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
GFP_NOFS | __GFP_NOFAIL);
lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
rv->rv_blkno = blkno;
lops_add(sdp, &rv->rv_le);
BUG_ON(!list_empty(&bd->bd_le.le_list));
BUG_ON(!list_empty(&bd->bd_ail_st_list));
BUG_ON(!list_empty(&bd->bd_ail_gl_list));
lops_init_le(&bd->bd_le, &gfs2_revoke_lops);
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
{
struct gfs2_revoke *rv;
struct gfs2_bufdata *bd;
int found = 0;
gfs2_log_lock(sdp);
list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
if (rv->rv_blkno == blkno) {
list_del(&rv->rv_le.le_list);
list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) {
if (bd->bd_blkno == blkno) {
list_del_init(&bd->bd_le.le_list);
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
found = 1;
......@@ -172,7 +172,7 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
if (found) {
struct gfs2_trans *tr = current->journal_info;
kfree(rv);
kmem_cache_free(gfs2_bufdata_cachep, bd);
tr->tr_num_revoke_rm++;
}
}
......
......@@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp);
void gfs2_trans_add_gl(struct gfs2_glock *gl);
void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
......
......@@ -169,6 +169,33 @@ struct gfs2_rgrp {
__u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
};
/*
* quota linked list: user quotas and group quotas form two separate
* singly linked lists. ll_next stores uids or gids of next quotas in the
* linked list.
Given the uid/gid, how to calculate the quota file offsets for the corresponding
gfs2_quota structures on disk:
for user quotas, given uid,
offset = uid * sizeof(struct gfs2_quota);
for group quotas, given gid,
offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota);
uid:0 gid:0 uid:12 gid:12 uid:17 gid:17 uid:5142 gid:5142
+-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+
| valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid |
+-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+
next:12 next:12 next:17 next:5142 next:NULL next:NULL
| | | | |<-- user quota list |
\______|___________/ \______|___________/ group quota list -->|
| | |
\__________________/ \_______________________________________/
*/
/*
* quota structure
*/
......@@ -177,7 +204,8 @@ struct gfs2_quota {
__be64 qu_limit;
__be64 qu_warn;
__be64 qu_value;
__u8 qu_reserved[64];
__be32 qu_ll_next; /* location of next quota in list */
__u8 qu_reserved[60];
};
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment