Commit 04da0c81 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers

xfs: use a normal shrinker for the dquot freelist

Stop reusing dquots from the freelist when allocating new ones directly, and
implement a shrinker that actually follows the specifications for the
interface.  The shrinker implementation is still highly suboptimal at this
point, but we can gradually work on it.

This also fixes an bug in the previous lock ordering, where we would take
the hash and dqlist locks inside of the freelist lock against the normal
lock ordering.  This is only solvable by introducing the dispose list,
and thus not when using direct reclaim of unused dquots for new allocations.

As a side-effect the quota upper bound and used to free ratio values in
/proc/fs/xfs/xqm are set to 0 as these values don't make any sense in the
new world order.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 45053603
...@@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone) ...@@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
static inline int
kmem_shake_allow(gfp_t gfp_mask)
{
return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
}
#endif /* __XFS_SUPPORT_KMEM_H__ */ #endif /* __XFS_SUPPORT_KMEM_H__ */
...@@ -62,82 +62,6 @@ int xfs_dqerror_mod = 33; ...@@ -62,82 +62,6 @@ int xfs_dqerror_mod = 33;
static struct lock_class_key xfs_dquot_other_class; static struct lock_class_key xfs_dquot_other_class;
/*
* Allocate and initialize a dquot. We don't always allocate fresh memory;
* we try to reclaim a free dquot if the number of incore dquots are above
* a threshold.
* The only field inside the core that gets initialized at this point
* is the d_id field. The idea is to fill in the entire q_core
* when we read in the on disk dquot.
*/
STATIC xfs_dquot_t *
xfs_qm_dqinit(
xfs_mount_t *mp,
xfs_dqid_t id,
uint type)
{
xfs_dquot_t *dqp;
boolean_t brandnewdquot;
brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
dqp->dq_flags = type;
dqp->q_core.d_id = cpu_to_be32(id);
dqp->q_mount = mp;
/*
* No need to re-initialize these if this is a reclaimed dquot.
*/
if (brandnewdquot) {
INIT_LIST_HEAD(&dqp->q_freelist);
mutex_init(&dqp->q_qlock);
init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&dqp->q_flush);
complete(&dqp->q_flush);
trace_xfs_dqinit(dqp);
} else {
/*
* Only the q_core portion was zeroed in dqreclaim_one().
* So, we need to reset others.
*/
dqp->q_nrefs = 0;
dqp->q_blkno = 0;
INIT_LIST_HEAD(&dqp->q_mplist);
INIT_LIST_HEAD(&dqp->q_hashlist);
dqp->q_bufoffset = 0;
dqp->q_fileoffset = 0;
dqp->q_transp = NULL;
dqp->q_gdquot = NULL;
dqp->q_res_bcount = 0;
dqp->q_res_icount = 0;
dqp->q_res_rtbcount = 0;
atomic_set(&dqp->q_pincount, 0);
dqp->q_hash = NULL;
ASSERT(list_empty(&dqp->q_freelist));
trace_xfs_dqreuse(dqp);
}
/*
* In either case we need to make sure group quotas have a different
* lock class than user quotas, to make sure lockdep knows we can
* locks of one of each at the same time.
*/
if (!(type & XFS_DQ_USER))
lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
/*
* log item gets initialized later
*/
return (dqp);
}
/* /*
* This is called to free all the memory associated with a dquot * This is called to free all the memory associated with a dquot
*/ */
...@@ -567,7 +491,32 @@ xfs_qm_dqread( ...@@ -567,7 +491,32 @@ xfs_qm_dqread(
int error; int error;
int cancelflags = 0; int cancelflags = 0;
dqp = xfs_qm_dqinit(mp, id, type);
dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
dqp->dq_flags = type;
dqp->q_core.d_id = cpu_to_be32(id);
dqp->q_mount = mp;
INIT_LIST_HEAD(&dqp->q_freelist);
mutex_init(&dqp->q_qlock);
init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&dqp->q_flush);
complete(&dqp->q_flush);
/*
* Make sure group quotas have a different lock class than user
* quotas.
*/
if (!(type & XFS_DQ_USER))
lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
atomic_inc(&xfs_Gqm->qm_totaldquots);
trace_xfs_dqread(dqp); trace_xfs_dqread(dqp);
......
...@@ -50,7 +50,6 @@ ...@@ -50,7 +50,6 @@
*/ */
struct mutex xfs_Gqm_lock; struct mutex xfs_Gqm_lock;
struct xfs_qm *xfs_Gqm; struct xfs_qm *xfs_Gqm;
uint ndquot;
kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqzone;
kmem_zone_t *qm_dqtrxzone; kmem_zone_t *qm_dqtrxzone;
...@@ -93,7 +92,6 @@ xfs_Gqm_init(void) ...@@ -93,7 +92,6 @@ xfs_Gqm_init(void)
goto out_free_udqhash; goto out_free_udqhash;
hsize /= sizeof(xfs_dqhash_t); hsize /= sizeof(xfs_dqhash_t);
ndquot = hsize << 8;
xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
xqm->qm_dqhashmask = hsize - 1; xqm->qm_dqhashmask = hsize - 1;
...@@ -137,7 +135,6 @@ xfs_Gqm_init(void) ...@@ -137,7 +135,6 @@ xfs_Gqm_init(void)
xqm->qm_dqtrxzone = qm_dqtrxzone; xqm->qm_dqtrxzone = qm_dqtrxzone;
atomic_set(&xqm->qm_totaldquots, 0); atomic_set(&xqm->qm_totaldquots, 0);
xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
xqm->qm_nrefs = 0; xqm->qm_nrefs = 0;
return xqm; return xqm;
...@@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos( ...@@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
return 0; return 0;
} }
STATIC void
xfs_qm_dqfree_one(
struct xfs_dquot *dqp)
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo;
mutex_lock(&dqp->q_hash->qh_lock);
list_del_init(&dqp->q_hashlist);
dqp->q_hash->qh_version++;
mutex_unlock(&dqp->q_hash->qh_lock);
/* mutex_lock(&qi->qi_dqlist_lock);
* Pop the least recently used dquot off the freelist and recycle it. list_del_init(&dqp->q_mplist);
*/ qi->qi_dquots--;
STATIC struct xfs_dquot * qi->qi_dqreclaims++;
xfs_qm_dqreclaim_one(void) mutex_unlock(&qi->qi_dqlist_lock);
xfs_qm_dqdestroy(dqp);
}
STATIC void
xfs_qm_dqreclaim_one(
struct xfs_dquot *dqp,
struct list_head *dispose_list)
{ {
struct xfs_dquot *dqp; struct xfs_mount *mp = dqp->q_mount;
int restarts = 0; int error;
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); if (!xfs_dqlock_nowait(dqp))
restart: goto out_busy;
list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
struct xfs_mount *mp = dqp->q_mount;
if (!xfs_dqlock_nowait(dqp)) /*
continue; * This dquot has acquired a reference in the meantime remove it from
* the freelist and try again.
*/
if (dqp->q_nrefs) {
xfs_dqunlock(dqp);
/* trace_xfs_dqreclaim_want(dqp);
* This dquot has already been grabbed by dqlookup. XQM_STATS_INC(xqmstats.xs_qm_dqwants);
* Remove it from the freelist and try again.
*/
if (dqp->q_nrefs) {
trace_xfs_dqreclaim_want(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dqwants);
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
restarts++;
goto dqunlock;
}
ASSERT(dqp->q_hash); list_del_init(&dqp->q_freelist);
ASSERT(!list_empty(&dqp->q_mplist)); xfs_Gqm->qm_dqfrlist_cnt--;
return;
}
/* ASSERT(dqp->q_hash);
* Try to grab the flush lock. If this dquot is in the process ASSERT(!list_empty(&dqp->q_mplist));
* of getting flushed to disk, we don't want to reclaim it.
*/
if (!xfs_dqflock_nowait(dqp))
goto dqunlock;
/* /*
* We have the flush lock so we know that this is not in the * Try to grab the flush lock. If this dquot is in the process of
* process of being flushed. So, if this is dirty, flush it * getting flushed to disk, we don't want to reclaim it.
* DELWRI so that we don't get a freelist infested with */
* dirty dquots. if (!xfs_dqflock_nowait(dqp))
*/ goto out_busy;
if (XFS_DQ_IS_DIRTY(dqp)) {
int error;
trace_xfs_dqreclaim_dirty(dqp); /*
* We have the flush lock so we know that this is not in the
* process of being flushed. So, if this is dirty, flush it
* DELWRI so that we don't get a freelist infested with
* dirty dquots.
*/
if (XFS_DQ_IS_DIRTY(dqp)) {
trace_xfs_dqreclaim_dirty(dqp);
/* /*
* We flush it delayed write, so don't bother * We flush it delayed write, so don't bother releasing the
* releasing the freelist lock. * freelist lock.
*/ */
error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); error = xfs_qm_dqflush(dqp, 0);
if (error) { if (error) {
xfs_warn(mp, "%s: dquot %p flush failed", xfs_warn(mp, "%s: dquot %p flush failed",
__func__, dqp); __func__, dqp);
}
goto dqunlock;
} }
xfs_dqfunlock(dqp);
/* /*
* Prevent lookup now that we are going to reclaim the dquot. * Give the dquot another try on the freelist, as the
* Once XFS_DQ_FREEING is set lookup won't touch the dquot, * flushing will take some time.
* thus we can drop the lock now.
*/ */
dqp->dq_flags |= XFS_DQ_FREEING; goto out_busy;
xfs_dqunlock(dqp); }
xfs_dqfunlock(dqp);
mutex_lock(&dqp->q_hash->qh_lock);
list_del_init(&dqp->q_hashlist);
dqp->q_hash->qh_version++;
mutex_unlock(&dqp->q_hash->qh_lock);
mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dquots--;
mp->m_quotainfo->qi_dqreclaims++;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
ASSERT(dqp->q_nrefs == 0); /*
list_del_init(&dqp->q_freelist); * Prevent lookups now that we are past the point of no return.
xfs_Gqm->qm_dqfrlist_cnt--; */
dqp->dq_flags |= XFS_DQ_FREEING;
xfs_dqunlock(dqp);
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); ASSERT(dqp->q_nrefs == 0);
return dqp; list_move_tail(&dqp->q_freelist, dispose_list);
dqunlock: xfs_Gqm->qm_dqfrlist_cnt--;
xfs_dqunlock(dqp);
if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
break;
goto restart;
}
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); trace_xfs_dqreclaim_done(dqp);
return NULL; XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
} return;
/* out_busy:
* Traverse the freelist of dquots and attempt to reclaim a maximum of xfs_dqunlock(dqp);
* 'howmany' dquots. This operation races with dqlookup(), and attempts to
* favor the lookup function ...
*/
STATIC int
xfs_qm_shake_freelist(
int howmany)
{
int nreclaimed = 0;
xfs_dquot_t *dqp;
if (howmany <= 0) /*
return 0; * Move the dquot to the tail of the list so that we don't spin on it.
*/
list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
while (nreclaimed < howmany) { trace_xfs_dqreclaim_busy(dqp);
dqp = xfs_qm_dqreclaim_one(); XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
if (!dqp)
return nreclaimed;
xfs_qm_dqdestroy(dqp);
nreclaimed++;
}
return nreclaimed;
} }
/*
* The kmem_shake interface is invoked when memory is running low.
*/
/* ARGSUSED */
STATIC int STATIC int
xfs_qm_shake( xfs_qm_shake(
struct shrinker *shrink, struct shrinker *shrink,
struct shrink_control *sc) struct shrink_control *sc)
{ {
int ndqused, nfree, n; int nr_to_scan = sc->nr_to_scan;
gfp_t gfp_mask = sc->gfp_mask; LIST_HEAD (dispose_list);
struct xfs_dquot *dqp;
if (!kmem_shake_allow(gfp_mask))
return 0;
if (!xfs_Gqm)
return 0;
nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
/* incore dquots in all f/s's */
ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
ASSERT(ndqused >= 0);
if (nfree <= ndqused && nfree < ndquot) if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
return 0; return 0;
if (!nr_to_scan)
goto out;
ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
n = nfree - ndqused - ndquot; /* # over target */ while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
if (nr_to_scan-- <= 0)
return xfs_qm_shake_freelist(MAX(nfree, n)); break;
} dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
q_freelist);
xfs_qm_dqreclaim_one(dqp, &dispose_list);
/*------------------------------------------------------------------*/
/*
* Return a new incore dquot. Depending on the number of
* dquots in the system, we either allocate a new one on the kernel heap,
* or reclaim a free one.
* Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
* to reclaim an existing one from the freelist.
*/
boolean_t
xfs_qm_dqalloc_incore(
xfs_dquot_t **O_dqpp)
{
xfs_dquot_t *dqp;
/*
* Check against high water mark to see if we want to pop
* a nincompoop dquot off the freelist.
*/
if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
/*
* Try to recycle a dquot from the freelist.
*/
if ((dqp = xfs_qm_dqreclaim_one())) {
XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
/*
* Just zero the core here. The rest will get
* reinitialized by caller. XXX we shouldn't even
* do this zero ...
*/
memset(&dqp->q_core, 0, sizeof(dqp->q_core));
*O_dqpp = dqp;
return B_FALSE;
}
XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
} }
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
/* while (!list_empty(&dispose_list)) {
* Allocate a brand new dquot on the kernel heap and return it dqp = list_first_entry(&dispose_list, struct xfs_dquot,
* to the caller to initialize. q_freelist);
*/ list_del_init(&dqp->q_freelist);
ASSERT(xfs_Gqm->qm_dqzone != NULL); xfs_qm_dqfree_one(dqp);
*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); }
atomic_inc(&xfs_Gqm->qm_totaldquots); out:
return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
return B_TRUE;
} }
/* /*
* Start a transaction and write the incore superblock changes to * Start a transaction and write the incore superblock changes to
* disk. flags parameter indicates which fields have changed. * disk. flags parameter indicates which fields have changed.
......
...@@ -26,23 +26,11 @@ ...@@ -26,23 +26,11 @@
struct xfs_qm; struct xfs_qm;
struct xfs_inode; struct xfs_inode;
extern uint ndquot;
extern struct mutex xfs_Gqm_lock; extern struct mutex xfs_Gqm_lock;
extern struct xfs_qm *xfs_Gqm; extern struct xfs_qm *xfs_Gqm;
extern kmem_zone_t *qm_dqzone; extern kmem_zone_t *qm_dqzone;
extern kmem_zone_t *qm_dqtrxzone; extern kmem_zone_t *qm_dqtrxzone;
/*
* Ditto, for xfs_qm_dqreclaim_one.
*/
#define XFS_QM_RECLAIM_MAX_RESTARTS 4
/*
* Ideal ratio of free to in use dquots. Quota manager makes an attempt
* to keep this balance.
*/
#define XFS_QM_DQFREE_RATIO 2
/* /*
* Dquot hashtable constants/threshold values. * Dquot hashtable constants/threshold values.
*/ */
...@@ -74,7 +62,6 @@ typedef struct xfs_qm { ...@@ -74,7 +62,6 @@ typedef struct xfs_qm {
int qm_dqfrlist_cnt; int qm_dqfrlist_cnt;
atomic_t qm_totaldquots; /* total incore dquots */ atomic_t qm_totaldquots; /* total incore dquots */
uint qm_nrefs; /* file systems with quota on */ uint qm_nrefs; /* file systems with quota on */
int qm_dqfree_ratio;/* ratio of free to inuse dquots */
kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
} xfs_qm_t; } xfs_qm_t;
...@@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); ...@@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
/* dquot stuff */ /* dquot stuff */
extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
......
...@@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v) ...@@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v)
{ {
/* maximum; incore; ratio free to inuse; freelist */ /* maximum; incore; ratio free to inuse; freelist */
seq_printf(m, "%d\t%d\t%d\t%u\n", seq_printf(m, "%d\t%d\t%d\t%u\n",
ndquot, 0,
xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 0,
xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
return 0; return 0;
} }
......
...@@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \ ...@@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqadjust);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_found);
DEFINE_DQUOT_EVENT(xfs_dqattach_get); DEFINE_DQUOT_EVENT(xfs_dqattach_get);
DEFINE_DQUOT_EVENT(xfs_dqinit);
DEFINE_DQUOT_EVENT(xfs_dqreuse);
DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqalloc);
DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
DEFINE_DQUOT_EVENT(xfs_dqread); DEFINE_DQUOT_EVENT(xfs_dqread);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment