Commit affbc19a authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: make sure syncfs flushes all cap snaps

Signed-off-by: default avatarYan, Zheng <zyan@redhat.com>
parent 622f3e25
...@@ -1259,14 +1259,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, ...@@ -1259,14 +1259,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
* asynchronously back to the MDS once sync writes complete and dirty * asynchronously back to the MDS once sync writes complete and dirty
* data is written out. * data is written out.
* *
* Unless @again is true, skip cap_snaps that were already sent to * Unless @kick is true, skip cap_snaps that were already sent to
* the MDS (i.e., during this session). * the MDS (i.e., during this session).
* *
* Called under i_ceph_lock. Takes s_mutex as needed. * Called under i_ceph_lock. Takes s_mutex as needed.
*/ */
void __ceph_flush_snaps(struct ceph_inode_info *ci, void __ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession, struct ceph_mds_session **psession,
int again) int kick)
__releases(ci->i_ceph_lock) __releases(ci->i_ceph_lock)
__acquires(ci->i_ceph_lock) __acquires(ci->i_ceph_lock)
{ {
...@@ -1307,7 +1307,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, ...@@ -1307,7 +1307,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
} }
/* only flush each capsnap once */ /* only flush each capsnap once */
if (!again && !list_empty(&capsnap->flushing_item)) { if (!kick && !list_empty(&capsnap->flushing_item)) {
dout("already flushed %p, skipping\n", capsnap); dout("already flushed %p, skipping\n", capsnap);
continue; continue;
} }
...@@ -1317,6 +1317,9 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, ...@@ -1317,6 +1317,9 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
if (session && session->s_mds != mds) { if (session && session->s_mds != mds) {
dout("oops, wrong session %p mutex\n", session); dout("oops, wrong session %p mutex\n", session);
if (kick)
goto out;
mutex_unlock(&session->s_mutex); mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session); ceph_put_mds_session(session);
session = NULL; session = NULL;
...@@ -1342,8 +1345,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, ...@@ -1342,8 +1345,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
capsnap->flush_tid = ++ci->i_cap_flush_last_tid; capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
atomic_inc(&capsnap->nref); atomic_inc(&capsnap->nref);
if (!list_empty(&capsnap->flushing_item)) if (list_empty(&capsnap->flushing_item))
list_del_init(&capsnap->flushing_item);
list_add_tail(&capsnap->flushing_item, list_add_tail(&capsnap->flushing_item,
&session->s_cap_snaps_flushing); &session->s_cap_snaps_flushing);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -2876,6 +2878,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, ...@@ -2876,6 +2878,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
struct ceph_mds_session *session) struct ceph_mds_session *session)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
u64 follows = le64_to_cpu(m->snap_follows); u64 follows = le64_to_cpu(m->snap_follows);
struct ceph_cap_snap *capsnap; struct ceph_cap_snap *capsnap;
int drop = 0; int drop = 0;
...@@ -2899,6 +2902,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, ...@@ -2899,6 +2902,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
list_del(&capsnap->ci_item); list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item); list_del(&capsnap->flushing_item);
ceph_put_cap_snap(capsnap); ceph_put_cap_snap(capsnap);
wake_up_all(&mdsc->cap_flushing_wq);
drop = 1; drop = 1;
break; break;
} else { } else {
......
...@@ -1488,17 +1488,22 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, ...@@ -1488,17 +1488,22 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
return err; return err;
} }
static int check_cap_flush(struct inode *inode, u64 want_flush_seq) static int check_cap_flush(struct ceph_inode_info *ci,
u64 want_flush_seq, u64 want_snap_seq)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); int ret1 = 1, ret2 = 1;
int ret;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (ci->i_flushing_caps) if (want_flush_seq > 0 && ci->i_flushing_caps)
ret = ci->i_cap_flush_seq >= want_flush_seq; ret1 = ci->i_cap_flush_seq >= want_flush_seq;
else
ret = 1; if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
struct ceph_cap_snap *capsnap =
list_first_entry(&ci->i_cap_snaps,
struct ceph_cap_snap, ci_item);
ret2 = capsnap->follows >= want_snap_seq;
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
return ret; return ret1 && ret2;
} }
/* /*
...@@ -1506,45 +1511,72 @@ static int check_cap_flush(struct inode *inode, u64 want_flush_seq) ...@@ -1506,45 +1511,72 @@ static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
* *
* returns true if we've flushed through want_flush_seq * returns true if we've flushed through want_flush_seq
*/ */
static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) static void wait_caps_flush(struct ceph_mds_client *mdsc,
u64 want_flush_seq, u64 want_snap_seq)
{ {
int mds; int mds;
dout("check_cap_flush want %lld\n", want_flush_seq); dout("check_cap_flush want %lld\n", want_flush_seq);
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
for (mds = 0; mds < mdsc->max_sessions; mds++) { for (mds = 0; mds < mdsc->max_sessions; ) {
struct ceph_mds_session *session = mdsc->sessions[mds]; struct ceph_mds_session *session = mdsc->sessions[mds];
struct inode *inode = NULL; struct inode *inode1 = NULL, *inode2 = NULL;
if (!session) if (!session) {
mds++;
continue; continue;
}
get_session(session); get_session(session);
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex); mutex_lock(&session->s_mutex);
if (!list_empty(&session->s_cap_flushing)) { if (!list_empty(&session->s_cap_flushing)) {
struct ceph_inode_info *ci = struct ceph_inode_info *ci =
list_entry(session->s_cap_flushing.next, list_first_entry(&session->s_cap_flushing,
struct ceph_inode_info, struct ceph_inode_info,
i_flushing_item); i_flushing_item);
if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { if (!check_cap_flush(ci, want_flush_seq, 0)) {
dout("check_cap_flush still flushing %p " dout("check_cap_flush still flushing %p "
"seq %lld <= %lld to mds%d\n", "seq %lld <= %lld to mds%d\n",
&ci->vfs_inode, ci->i_cap_flush_seq, &ci->vfs_inode, ci->i_cap_flush_seq,
want_flush_seq, session->s_mds); want_flush_seq, mds);
inode = igrab(&ci->vfs_inode); inode1 = igrab(&ci->vfs_inode);
}
}
if (!list_empty(&session->s_cap_snaps_flushing)) {
struct ceph_cap_snap *capsnap =
list_first_entry(&session->s_cap_snaps_flushing,
struct ceph_cap_snap,
flushing_item);
struct ceph_inode_info *ci = capsnap->ci;
if (!check_cap_flush(ci, 0, want_snap_seq)) {
dout("check_cap_flush still flushing snap %p "
"follows %lld <= %lld to mds%d\n",
&ci->vfs_inode, capsnap->follows,
want_snap_seq, mds);
inode2 = igrab(&ci->vfs_inode);
} }
} }
mutex_unlock(&session->s_mutex); mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session); ceph_put_mds_session(session);
if (inode) { if (inode1) {
wait_event(mdsc->cap_flushing_wq, wait_event(mdsc->cap_flushing_wq,
check_cap_flush(inode, want_flush_seq)); check_cap_flush(ceph_inode(inode1),
iput(inode); want_flush_seq, 0));
iput(inode1);
}
if (inode2) {
wait_event(mdsc->cap_flushing_wq,
check_cap_flush(ceph_inode(inode2),
0, want_snap_seq));
iput(inode2);
} }
if (!inode1 && !inode2)
mds++;
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
} }
...@@ -3391,6 +3423,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ...@@ -3391,6 +3423,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
atomic_set(&mdsc->num_sessions, 0); atomic_set(&mdsc->num_sessions, 0);
mdsc->max_sessions = 0; mdsc->max_sessions = 0;
mdsc->stopping = 0; mdsc->stopping = 0;
mdsc->last_snap_seq = 0;
init_rwsem(&mdsc->snap_rwsem); init_rwsem(&mdsc->snap_rwsem);
mdsc->snap_realms = RB_ROOT; mdsc->snap_realms = RB_ROOT;
INIT_LIST_HEAD(&mdsc->snap_empty); INIT_LIST_HEAD(&mdsc->snap_empty);
...@@ -3517,7 +3550,7 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) ...@@ -3517,7 +3550,7 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
void ceph_mdsc_sync(struct ceph_mds_client *mdsc) void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{ {
u64 want_tid, want_flush; u64 want_tid, want_flush, want_snap;
if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
return; return;
...@@ -3532,10 +3565,15 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) ...@@ -3532,10 +3565,15 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
want_flush = mdsc->cap_flush_seq; want_flush = mdsc->cap_flush_seq;
spin_unlock(&mdsc->cap_dirty_lock); spin_unlock(&mdsc->cap_dirty_lock);
dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); down_read(&mdsc->snap_rwsem);
want_snap = mdsc->last_snap_seq;
up_read(&mdsc->snap_rwsem);
dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
want_tid, want_flush, want_snap);
wait_unsafe_requests(mdsc, want_tid); wait_unsafe_requests(mdsc, want_tid);
wait_caps_flush(mdsc, want_flush); wait_caps_flush(mdsc, want_flush, want_snap);
} }
/* /*
......
...@@ -290,6 +290,7 @@ struct ceph_mds_client { ...@@ -290,6 +290,7 @@ struct ceph_mds_client {
* references (implying they contain no inodes with caps) that * references (implying they contain no inodes with caps) that
* should be destroyed. * should be destroyed.
*/ */
u64 last_snap_seq;
struct rw_semaphore snap_rwsem; struct rw_semaphore snap_rwsem;
struct rb_root snap_realms; struct rb_root snap_realms;
struct list_head snap_empty; struct list_head snap_empty;
......
...@@ -730,6 +730,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, ...@@ -730,6 +730,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
/* queue realm for cap_snap creation */ /* queue realm for cap_snap creation */
list_add(&realm->dirty_item, &dirty_realms); list_add(&realm->dirty_item, &dirty_realms);
if (realm->seq > mdsc->last_snap_seq)
mdsc->last_snap_seq = realm->seq;
invalidate = 1; invalidate = 1;
} else if (!realm->cached_context) { } else if (!realm->cached_context) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment