Commit 152b734a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes

Pull GFS2 fixes from Steven Whitehouse:
 "Here is a set of small fixes for GFS2.  There is a fix to drop
  s_umount which is copied in from the core vfs, two patches relate to a
  hard to hit "use after free" and memory leak.  Two patches related to
  using DIO and buffered I/O on the same file to ensure correct
  operation in relation to glock state changes.  The final patch adds an
  RCU read lock to ensure correct locking on an error path"

* tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes:
  GFS2: Fix unsafe dereference in dump_holder()
  GFS2: Wait for async DIO in glock state changes
  GFS2: Fix incorrect invalidation for DIO/buffered I/O
  GFS2: Fix slab memory leak in gfs2_bufdata
  GFS2: Fix use-after-free race when calling gfs2_remove_from_ail
  GFS2: don't hold s_umount over blkdev_put
parents b4796679 0b3a2c99
...@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, ...@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct address_space *mapping = inode->i_mapping;
struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh; struct gfs2_holder gh;
int rv; int rv;
...@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, ...@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
if (rv != 1) if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */ goto out; /* dio not valid, fall back to buffered i/o */
/*
* Now since we are holding a deferred (CW) lock at this point, you
* might be wondering why this is ever needed. There is a case however
* where we've granted a deferred local lock against a cached exclusive
* glock. That is ok provided all granted local locks are deferred, but
* it also means that it is possible to encounter pages which are
* cached and possibly also mapped. So here we check for that and sort
* them out ahead of the dio. The glock state machine will take care of
* everything else.
*
* If in fact the cached glock state (gl->gl_state) is deferred (CW) in
* the first place, mapping->nr_pages will always be zero.
*/
if (mapping->nrpages) {
loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
loff_t len = iov_length(iov, nr_segs);
loff_t end = PAGE_ALIGN(offset + len) - 1;
rv = 0;
if (len == 0)
goto out;
if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
rv = filemap_write_and_wait_range(mapping, lstart, end);
if (rv)
return rv;
truncate_inode_pages_range(mapping, lstart, end);
}
rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gfs2_get_block_direct, offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0); NULL, NULL, 0);
......
...@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) ...@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
struct task_struct *gh_owner = NULL; struct task_struct *gh_owner = NULL;
char flags_buf[32]; char flags_buf[32];
rcu_read_lock();
if (gh->gh_owner_pid) if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
...@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) ...@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
gh_owner ? gh_owner->comm : "(ended)", gh_owner ? gh_owner->comm : "(ended)",
(void *)gh->gh_ip); (void *)gh->gh_ip);
rcu_read_unlock();
return 0; return 0;
} }
......
...@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl) ...@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
if (ip && !S_ISREG(ip->i_inode.i_mode)) if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL; ip = NULL;
if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) if (ip) {
unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
inode_dio_wait(&ip->i_inode);
}
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return; return;
...@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh) ...@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh)
return error; return error;
} }
if (gh->gh_state != LM_ST_DEFERRED)
inode_dio_wait(&ip->i_inode);
if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) && (gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_state == LM_ST_EXCLUSIVE)) { (gh->gh_state == LM_ST_EXCLUSIVE)) {
......
...@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) ...@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
struct buffer_head *bh = bd->bd_bh; struct buffer_head *bh = bd->bd_bh;
struct gfs2_glock *gl = bd->bd_gl; struct gfs2_glock *gl = bd->bd_gl;
gfs2_remove_from_ail(bd);
bd->bd_bh = NULL;
bh->b_private = NULL; bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr; bd->bd_blkno = bh->b_blocknr;
gfs2_remove_from_ail(bd); /* drops ref on bh */
bd->bd_bh = NULL;
bd->bd_ops = &gfs2_revoke_lops; bd->bd_ops = &gfs2_revoke_lops;
sdp->sd_log_num_revoke++; sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes); atomic_inc(&gl->gl_revokes);
......
...@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int ...@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
struct address_space *mapping = bh->b_page->mapping; struct address_space *mapping = bh->b_page->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private; struct gfs2_bufdata *bd = bh->b_private;
int was_pinned = 0;
if (test_clear_buffer_pinned(bh)) { if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0); trace_gfs2_pin(bd, 0);
...@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int ...@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
tr->tr_num_databuf_rm++; tr->tr_num_databuf_rm++;
} }
tr->tr_touched = 1; tr->tr_touched = 1;
was_pinned = 1;
brelse(bh); brelse(bh);
} }
if (bd) { if (bd) {
spin_lock(&sdp->sd_ail_lock); spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) { if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd); gfs2_trans_add_revoke(sdp, bd);
} else if (was_pinned) {
bh->b_private = NULL;
kmem_cache_free(gfs2_bufdata_cachep, bd);
} }
spin_unlock(&sdp->sd_ail_lock); spin_unlock(&sdp->sd_ail_lock);
} }
......
...@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, ...@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(s)) if (IS_ERR(s))
goto error_bdev; goto error_bdev;
if (s->s_root) if (s->s_root) {
/*
* s_umount nests inside bd_mutex during
* __invalidate_device(). blkdev_put() acquires
* bd_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
up_write(&s->s_umount);
blkdev_put(bdev, mode); blkdev_put(bdev, mode);
down_write(&s->s_umount);
}
memset(&args, 0, sizeof(args)); memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT; args.ar_quota = GFS2_QUOTA_DEFAULT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment