nfsd: dynamically allocate DRC entries

The existing code keeps a fixed-size cache of 1024 entries. This is much too small for a busy server, and wastes memory on an idle one. This patch changes the code to dynamically allocate and free these cache entries. A cap on the number of entries is retained, but it's much larger than the existing value and now scales with the amount of low memory in the machine. Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>

nfsd: dynamically allocate DRC entries
The existing code keeps a fixed-size cache of 1024 entries. This is much too small for a busy server, and wastes memory on an idle one. This patch changes the code to dynamically allocate and free these cache entries. A cap on the number of entries is retained, but it's much larger than the existing value and now scales with the amount of low memory in the machine. Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
0338dd15 · Jeff Layton · J. Bruce Fields · 0ee0bf7e · 0338dd15
Commit 0338dd15 authored Feb 04, 2013 by Jeff Layton Committed by J. Bruce Fields Feb 04, 2013
Show whitespace changes
Inline Side-by-side

Showing with 75 additions and 50 deletions

fs/nfsd/nfscache.c fs/nfsd/nfscache.c +75 -50

No files found.
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -10,17 +10,13 @@

 #include <linux/slab.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/highmem.h>

 #include "nfsd.h"
 #include "cache.h"

-/* Size of reply cache. Common values are:
- * 4.3BSD:	128
- * 4.4BSD:	256
- * Solaris2:	1024
- * DEC Unix:	512-4096
- */
-#define CACHESIZE		1024
+#define NFSDDBG_FACILITY	NFSDDBG_REPCACHE
+
 #define HASHSIZE		64

 static struct hlist_head *	cache_hash;
@@ -28,6 +24,7 @@ static struct list_head 	lru_head;
 static int			cache_disabled = 1;
 static struct kmem_cache	*drc_slab;
 static unsigned int		num_drc_entries;
+static unsigned int		max_drc_entries;

 /*
 * Calculate the hash index from an XID.
@@ -48,6 +45,34 @@ static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 */
 static DEFINE_SPINLOCK(cache_lock);

+/*
+ * Put a cap on the size of the DRC based on the amount of available
+ * low memory in the machine.
+ *
+ *  64MB:    8192
+ * 128MB:   11585
+ * 256MB:   16384
+ * 512MB:   23170
+ *   1GB:   32768
+ *   2GB:   46340
+ *   4GB:   65536
+ *   8GB:   92681
+ *  16GB:  131072
+ *
+ * ...with a hard cap of 256k entries. In the worst case, each entry will be
+ * ~1k, so the above numbers should give a rough max of the amount of memory
+ * used in k.
+ */
+static unsigned int
+nfsd_cache_size_limit(void)
+{
+	unsigned int limit;
+	unsigned long low_pages = totalram_pages - totalhigh_pages;
+
+	limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
+	return min_t(unsigned int, limit, 256*1024);
+}
+
 static struct svc_cacherep *
 nfsd_reply_cache_alloc(void)
 {
@@ -68,6 +93,7 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
 	if (rp->c_type == RC_REPLBUFF)
 		kfree(rp->c_replvec.iov_base);
+	hlist_del(&rp->c_hash);
 	list_del(&rp->c_lru);
 	--num_drc_entries;
 	kmem_cache_free(drc_slab, rp);
@@ -75,30 +101,18 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)

 int nfsd_reply_cache_init(void)
 {
-	int			i;
-	struct svc_cacherep	*rp;
-
 	drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
 					0, 0, NULL);
 	if (!drc_slab)
 		goto out_nomem;

-	INIT_LIST_HEAD(&lru_head);
-	i = CACHESIZE;
-	num_drc_entries = 0;
-	while (i) {
-		rp = nfsd_reply_cache_alloc();
-		if (!rp)
-			goto out_nomem;
-		++num_drc_entries;
-		list_add(&rp->c_lru, &lru_head);
-		i--;
-	}
-
-	cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+	cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!cache_hash)
 		goto out_nomem;

+	INIT_LIST_HEAD(&lru_head);
+	max_drc_entries = nfsd_cache_size_limit();
+	num_drc_entries = 0;
 	cache_disabled = 0;
 	return 0;
 out_nomem:
@@ -191,7 +205,7 @@ nfsd_cache_search(struct svc_rqst *rqstp)
 int
 nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
-	struct svc_cacherep	*rp;
+	struct svc_cacherep	*rp, *found;
 	__be32			xid = rqstp->rq_xid;
 	u32			proto =  rqstp->rq_prot,
 				vers = rqstp->rq_vers,
@@ -210,38 +224,48 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
 	rtn = RC_DOIT;

 	rp = nfsd_cache_search(rqstp);
-	if (rp) {
-		nfsdstats.rchits++;
+	if (rp)
 		goto found_entry;
-	}
-	nfsdstats.rcmisses++;

-	/* This loop shouldn't take more than a few iterations normally */
-	{
-	int	safe = 0;
-	list_for_each_entry(rp, &lru_head, c_lru) {
-		if (rp->c_state != RC_INPROG)
-			break;
-		if (safe++ > CACHESIZE) {
-			printk("nfsd: loop in repcache LRU list\n");
-			cache_disabled = 1;
-			goto out;
-		}
-	}
+	/* Try to use the first entry on the LRU */
+	if (!list_empty(&lru_head)) {
+		rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
+		if (nfsd_cache_entry_expired(rp) ||
+		    num_drc_entries >= max_drc_entries)
+			goto setup_entry;
 	}

-	/* All entries on the LRU are in-progress. This should not happen */
-	if (&rp->c_lru == &lru_head) {
-		static int	complaints;
-
-		printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
-		if (++complaints > 5) {
-			printk(KERN_WARNING "nfsd: disabling repcache.\n");
-			cache_disabled = 1;
+	spin_unlock(&cache_lock);
+	rp = nfsd_reply_cache_alloc();
+	if (!rp) {
+		dprintk("nfsd: unable to allocate DRC entry!\n");
+		return RC_DOIT;
 	}
-		goto out;
+	spin_lock(&cache_lock);
+	++num_drc_entries;
+
+	/*
+	 * Must search again just in case someone inserted one
+	 * after we dropped the lock above.
+	 */
+	found = nfsd_cache_search(rqstp);
+	if (found) {
+		nfsd_reply_cache_free_locked(rp);
+		rp = found;
+		goto found_entry;
 	}

+	/*
+	 * We're keeping the one we just allocated. Are we now over the
+	 * limit? Prune one off the tip of the LRU in trade for the one we
+	 * just allocated if so.
+	 */
+	if (num_drc_entries >= max_drc_entries)
+		nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
+						struct svc_cacherep, c_lru));
+
+setup_entry:
+	nfsdstats.rcmisses++;
 	rqstp->rq_cacherep = rp;
 	rp->c_state = RC_INPROG;
 	rp->c_xid = xid;
@@ -265,6 +289,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
 	return rtn;

 found_entry:
+	nfsdstats.rchits++;
 	/* We found a matching entry which is either in progress or done. */
 	age = jiffies - rp->c_timestamp;
 	lru_put_end(rp);
@@ -295,7 +320,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
 		break;
 	default:
 		printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
-		rp->c_state = RC_UNUSED;
+		nfsd_reply_cache_free_locked(rp);
 	}

 	goto out;