Commit f6753df3 authored by Bob Peterson's avatar Bob Peterson Committed by Andreas Gruenbacher

GFS2: rgrp free blocks used incorrectly

Before this patch, several functions in rgrp.c checked the value of
rgd->rd_free_clone. That does not take into account blocks that were
reserved by a multi-block reservation. This causes a problem when
space gets tight in the file system. For example, when function
gfs2_inplace_reserve checks to see if a rgrp has enough blocks to
satisfy the request, it can accept a rgrp that it should reject
because, although there are enough blocks to satisfy the request
_now_, those blocks may be reserved for another running process.

A second problem with this occurs when we've reserved the remaining
blocks in an rgrp: function rg_mblk_search() can reject an rgrp
improperly because it calculates:

   u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;

But rd_reserved includes blocks that the current process just
reserved in its own call to inplace_reserve. For example, it can
reserve the last 128 blocks of an rgrp, then reject that same rgrp
because the above calculates out to free_blocks = 0;

Consequences include, but are not limited to, (1) leaving holes,
and thus increasing file system fragmentation, and (2) reporting
file system is full long before it actually is.

This patch introduces a new function, rgd_free, which returns the
number of clone-free blocks (blocks that are truly free as opposed
to blocks that are still being used because an unlinked file is
still open) minus the number of blocks reserved by processes, but
not counting the blocks we ourselves reserved (because obviously
we need to allocate them).
Signed-off-by: default avatarBob Peterson <rpeterso@redhat.com>
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
parent d1b0cb93
...@@ -1489,6 +1489,34 @@ static void rs_insert(struct gfs2_inode *ip) ...@@ -1489,6 +1489,34 @@ static void rs_insert(struct gfs2_inode *ip)
trace_gfs2_rs(rs, TRACE_RS_INSERT); trace_gfs2_rs(rs, TRACE_RS_INSERT);
} }
/**
* rgd_free - return the number of free blocks we can allocate.
* @rgd: the resource group
*
* This function returns the number of free blocks for an rgrp.
* That's the clone-free blocks (blocks that are free, not including those
* still being used for unlinked files that haven't been deleted.)
*
* It also subtracts any blocks reserved by someone else, but does not
* include free blocks that are still part of our current reservation,
* because obviously we can (and will) allocate them.
*/
static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
{
u32 tot_reserved, tot_free;
if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
return 0;
tot_reserved = rgd->rd_reserved - rs->rs_free;
if (rgd->rd_free_clone < tot_reserved)
tot_reserved = 0;
tot_free = rgd->rd_free_clone - tot_reserved;
return tot_free;
}
/** /**
* rg_mblk_search - find a group of multiple free blocks to form a reservation * rg_mblk_search - find a group of multiple free blocks to form a reservation
* @rgd: the resource group descriptor * @rgd: the resource group descriptor
...@@ -1504,7 +1532,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, ...@@ -1504,7 +1532,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
u64 goal; u64 goal;
struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_blkreserv *rs = &ip->i_res;
u32 extlen; u32 extlen;
u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; u32 free_blocks = rgd_free(rgd, rs);
int ret; int ret;
struct inode *inode = &ip->i_inode; struct inode *inode = &ip->i_inode;
...@@ -1985,7 +2013,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) ...@@ -1985,7 +2013,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
int error = 0, rg_locked, flags = 0; int error = 0, rg_locked, flags = 0;
u64 last_unlinked = NO_BLOCK; u64 last_unlinked = NO_BLOCK;
int loops = 0; int loops = 0;
u32 skip = 0; u32 free_blocks, skip = 0;
if (sdp->sd_args.ar_rgrplvb) if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP; flags |= GL_SKIP;
...@@ -2056,10 +2084,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) ...@@ -2056,10 +2084,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
goto check_rgrp; goto check_rgrp;
/* If rgrp has enough free space, use it */ /* If rgrp has enough free space, use it */
if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
if (free_blocks >= ap->target ||
(loops == 2 && ap->min_target && (loops == 2 && ap->min_target &&
rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { free_blocks >= ap->min_target)) {
ap->allowed = rs->rs_rbm.rgd->rd_free_clone; ap->allowed = free_blocks;
return 0; return 0;
} }
check_rgrp: check_rgrp:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment