Commit 75f19a40 authored by Andrew Morton's avatar Andrew Morton Committed by Jaroslav Kysela

[PATCH] Add a sync_fs super_block operation

This is infrastructure for fixing the journalled-data ext3 unmount data
loss problem. It was sent for comment to linux-fsdevel a week ago; there
was none.

Add a `sync_fs' superblock operation whose mandate is to perform
filesystem-specific operations to ensure a successful sync.

It is called in two places:

1: fsync_super() - for umount.

2: sys_sync() - for global sync.

In the sys_sync() case we call all the ->write_super() methods first.
write_super() is an async flushing operation.  It should not block.

After that, we call all the ->sync_fs functions.  This is independent
of the state of s_dirt!  That was all confused up before, and in this
patch ->write_super() and ->sync_fs() are quite separate.

With ext3 as an example, the initial ->write_super() will start a
transaction, but will not wait on it.  (But only if s_dirt was set!)

The first ->sync_fs() call will get the IO underway.

The second ->sync_fs() call will wait on the IO.

And we really do need to be this elaborate, because all the testing of
s_dirt in there makes ->write_super() an unreliable way of detecting
when the VFS is trying to sync the filesystem.
parent 7404e32c
...@@ -92,6 +92,7 @@ prototypes: ...@@ -92,6 +92,7 @@ prototypes:
void (*delete_inode) (struct inode *); void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *); void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *); void (*write_super) (struct super_block *);
void (*sync_fs) (struct super_block *sb, int wait);
int (*statfs) (struct super_block *, struct statfs *); int (*statfs) (struct super_block *, struct statfs *);
int (*remount_fs) (struct super_block *, int *, char *); int (*remount_fs) (struct super_block *, int *, char *);
void (*clear_inode) (struct inode *); void (*clear_inode) (struct inode *);
...@@ -108,6 +109,7 @@ delete_inode: no ...@@ -108,6 +109,7 @@ delete_inode: no
clear_inode: no clear_inode: no
put_super: yes yes maybe (see below) put_super: yes yes maybe (see below)
write_super: no yes maybe (see below) write_super: no yes maybe (see below)
sync_fs: no no maybe (see below)
statfs: no no no statfs: no no no
remount_fs: yes yes maybe (see below) remount_fs: yes yes maybe (see below)
umount_begin: yes no maybe (see below) umount_begin: yes no maybe (see below)
......
...@@ -221,6 +221,9 @@ int fsync_super(struct super_block *sb) ...@@ -221,6 +221,9 @@ int fsync_super(struct super_block *sb)
lock_super(sb); lock_super(sb);
if (sb->s_dirt && sb->s_op && sb->s_op->write_super) if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb); sb->s_op->write_super(sb);
if (sb->s_op && sb->s_op->sync_fs) {
sb->s_op->sync_fs(sb, 1);
}
unlock_super(sb); unlock_super(sb);
sync_blockdev(sb->s_bdev); sync_blockdev(sb->s_bdev);
sync_inodes_sb(sb, 1); sync_inodes_sb(sb, 1);
...@@ -251,10 +254,12 @@ int fsync_bdev(struct block_device *bdev) ...@@ -251,10 +254,12 @@ int fsync_bdev(struct block_device *bdev)
asmlinkage long sys_sync(void) asmlinkage long sys_sync(void)
{ {
wakeup_bdflush(0); wakeup_bdflush(0);
sync_inodes(0); /* All mappings and inodes, including block devices */ sync_inodes(0); /* All mappings, inodes and their blockdevs */
DQUOT_SYNC(NULL); DQUOT_SYNC(NULL);
sync_supers(); /* Write the superblocks */ sync_supers(); /* Write the superblocks */
sync_inodes(1); /* All the mappings and inodes, again. */ sync_filesystems(0); /* Start syncing the filesystems */
sync_filesystems(1); /* Waitingly sync the filesystems */
sync_inodes(1); /* Mappings, inodes and blockdevs, again. */
return 0; return 0;
} }
......
...@@ -189,6 +189,8 @@ void generic_shutdown_super(struct super_block *sb) ...@@ -189,6 +189,8 @@ void generic_shutdown_super(struct super_block *sb)
if (sop) { if (sop) {
if (sop->write_super && sb->s_dirt) if (sop->write_super && sb->s_dirt)
sop->write_super(sb); sop->write_super(sb);
if (sop->sync_fs)
sop->sync_fs(sb, 1);
if (sop->put_super) if (sop->put_super)
sop->put_super(sb); sop->put_super(sb);
} }
...@@ -266,8 +268,8 @@ void drop_super(struct super_block *sb) ...@@ -266,8 +268,8 @@ void drop_super(struct super_block *sb)
static inline void write_super(struct super_block *sb) static inline void write_super(struct super_block *sb)
{ {
lock_super(sb); lock_super(sb);
if (sb->s_root && sb->s_dirt) if (sb->s_op && sb->s_root && sb->s_dirt)
if (sb->s_op && sb->s_op->write_super) if (sb->s_op->write_super)
sb->s_op->write_super(sb); sb->s_op->write_super(sb);
unlock_super(sb); unlock_super(sb);
} }
...@@ -296,6 +298,46 @@ void sync_supers(void) ...@@ -296,6 +298,46 @@ void sync_supers(void)
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
} }
/*
* Call the ->sync_fs super_op against all filesytems which are r/w and
* which implement it.
*/
void sync_filesystems(int wait)
{
struct super_block * sb;
spin_lock(&sb_lock);
for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_op)
continue;
if (!sb->s_op->sync_fs);
continue;
if (sb->s_flags & MS_RDONLY)
continue;
sb->s_need_sync_fs = 1;
}
spin_unlock(&sb_lock);
restart:
spin_lock(&sb_lock);
for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_need_sync_fs)
continue;
sb->s_need_sync_fs = 0;
if (sb->s_flags & MS_RDONLY)
continue; /* hm. Was remounted r/w meanwhile */
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
sb->s_op->sync_fs(sb, wait);
drop_super(sb);
goto restart;
}
spin_unlock(&sb_lock);
}
/** /**
* get_super - get the superblock of a device * get_super - get the superblock of a device
* @dev: device to get the superblock for * @dev: device to get the superblock for
......
...@@ -631,6 +631,7 @@ struct super_block { ...@@ -631,6 +631,7 @@ struct super_block {
struct semaphore s_lock; struct semaphore s_lock;
int s_count; int s_count;
int s_syncing; int s_syncing;
int s_need_sync_fs;
atomic_t s_active; atomic_t s_active;
void *s_security; void *s_security;
...@@ -810,6 +811,7 @@ struct super_operations { ...@@ -810,6 +811,7 @@ struct super_operations {
void (*delete_inode) (struct inode *); void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *); void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *); void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
void (*write_super_lockfs) (struct super_block *); void (*write_super_lockfs) (struct super_block *);
void (*unlockfs) (struct super_block *); void (*unlockfs) (struct super_block *);
int (*statfs) (struct super_block *, struct statfs *); int (*statfs) (struct super_block *, struct statfs *);
...@@ -1143,6 +1145,7 @@ extern void write_inode_now(struct inode *, int); ...@@ -1143,6 +1145,7 @@ extern void write_inode_now(struct inode *, int);
extern int filemap_fdatawrite(struct address_space *); extern int filemap_fdatawrite(struct address_space *);
extern int filemap_fdatawait(struct address_space *); extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(void); extern void sync_supers(void);
extern void sync_filesystems(int wait);
extern sector_t bmap(struct inode *, sector_t); extern sector_t bmap(struct inode *, sector_t);
extern int setattr_mask(unsigned int); extern int setattr_mask(unsigned int);
extern int notify_change(struct dentry *, struct iattr *); extern int notify_change(struct dentry *, struct iattr *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment