Commit 344d9004 authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller

[IPV{4,6}]: lru queue for ip_fragment evictor.

The current ip_fragment evictor kills the oldest entry of each hash bucket
starting with 0 instead of killing the oldest entry of all buckets. This 
leads
to unfair behaviour if one of the higher hash slots carries alot of 
fragments.
This patch holds the frag heads in a lru queue so we can kills the least
recently used first. Each arriving fragment counts as usage.
parent f802b699
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* Bill Hawes : Frag accounting and evictor fixes. * Bill Hawes : Frag accounting and evictor fixes.
* John McDonald : 0 length frag bug. * John McDonald : 0 length frag bug.
* Alexey Kuznetsov: SMP races, threading, cleanup. * Alexey Kuznetsov: SMP races, threading, cleanup.
* Patrick McHardy : LRU queue of frag heads for evictor.
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -26,6 +27,7 @@ ...@@ -26,6 +27,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/icmp.h> #include <linux/icmp.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
...@@ -67,6 +69,7 @@ struct ipfrag_skb_cb ...@@ -67,6 +69,7 @@ struct ipfrag_skb_cb
/* Describe an entry in the "incomplete datagrams" queue. */ /* Describe an entry in the "incomplete datagrams" queue. */
struct ipq { struct ipq {
struct ipq *next; /* linked list pointers */ struct ipq *next; /* linked list pointers */
struct list_head lru_list; /* lru list member */
u32 saddr; u32 saddr;
u32 daddr; u32 daddr;
u16 id; u16 id;
...@@ -94,6 +97,7 @@ struct ipq { ...@@ -94,6 +97,7 @@ struct ipq {
/* Per-bucket lock is easy to add now. */ /* Per-bucket lock is easy to add now. */
static struct ipq *ipq_hash[IPQ_HASHSZ]; static struct ipq *ipq_hash[IPQ_HASHSZ];
static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED; static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED;
static LIST_HEAD(ipq_lru_list);
int ip_frag_nqueues = 0; int ip_frag_nqueues = 0;
static __inline__ void __ipq_unlink(struct ipq *qp) static __inline__ void __ipq_unlink(struct ipq *qp)
...@@ -101,6 +105,7 @@ static __inline__ void __ipq_unlink(struct ipq *qp) ...@@ -101,6 +105,7 @@ static __inline__ void __ipq_unlink(struct ipq *qp)
if(qp->next) if(qp->next)
qp->next->pprev = qp->pprev; qp->next->pprev = qp->pprev;
*qp->pprev = qp->next; *qp->pprev = qp->next;
list_del(&qp->lru_list);
ip_frag_nqueues--; ip_frag_nqueues--;
} }
...@@ -202,23 +207,19 @@ static void ipq_kill(struct ipq *ipq) ...@@ -202,23 +207,19 @@ static void ipq_kill(struct ipq *ipq)
*/ */
static void ip_evictor(void) static void ip_evictor(void)
{ {
int i, progress; struct ipq *qp;
struct list_head *tmp;
do { for(;;) {
if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh) if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh)
return; return;
progress = 0;
/* FIXME: Make LRU queue of frag heads. -DaveM */
for (i = 0; i < IPQ_HASHSZ; i++) {
struct ipq *qp;
if (ipq_hash[i] == NULL)
continue;
read_lock(&ipfrag_lock); read_lock(&ipfrag_lock);
if ((qp = ipq_hash[i]) != NULL) { if (list_empty(&ipq_lru_list)) {
/* find the oldest queue for this hash bucket */ read_unlock(&ipfrag_lock);
while (qp->next) return;
qp = qp->next; }
tmp = ipq_lru_list.next;
qp = list_entry(tmp, struct ipq, lru_list);
atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt);
read_unlock(&ipfrag_lock); read_unlock(&ipfrag_lock);
...@@ -229,12 +230,7 @@ static void ip_evictor(void) ...@@ -229,12 +230,7 @@ static void ip_evictor(void)
ipq_put(qp); ipq_put(qp);
IP_INC_STATS_BH(IpReasmFails); IP_INC_STATS_BH(IpReasmFails);
progress = 1;
continue;
} }
read_unlock(&ipfrag_lock);
}
} while (progress);
} }
/* /*
...@@ -302,6 +298,8 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) ...@@ -302,6 +298,8 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
qp->next->pprev = &qp->next; qp->next->pprev = &qp->next;
ipq_hash[hash] = qp; ipq_hash[hash] = qp;
qp->pprev = &ipq_hash[hash]; qp->pprev = &ipq_hash[hash];
INIT_LIST_HEAD(&qp->lru_list);
list_add_tail(&qp->lru_list, &ipq_lru_list);
ip_frag_nqueues++; ip_frag_nqueues++;
write_unlock(&ipfrag_lock); write_unlock(&ipfrag_lock);
return qp; return qp;
...@@ -496,6 +494,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ...@@ -496,6 +494,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (offset == 0) if (offset == 0)
qp->last_in |= FIRST_IN; qp->last_in |= FIRST_IN;
write_lock(&ipfrag_lock);
list_move_tail(&qp->lru_list, &ipq_lru_list);
write_unlock(&ipfrag_lock);
return; return;
err: err:
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
* *
* Horst von Brand Add missing #include <linux/string.h> * Horst von Brand Add missing #include <linux/string.h>
* Alexey Kuznetsov SMP races, threading, cleanup. * Alexey Kuznetsov SMP races, threading, cleanup.
* Patrick McHardy LRU queue of frag heads for evictor.
*/ */
#include <linux/config.h> #include <linux/config.h>
#include <linux/errno.h> #include <linux/errno.h>
...@@ -31,6 +32,7 @@ ...@@ -31,6 +32,7 @@
#include <linux/sockios.h> #include <linux/sockios.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/net.h> #include <linux/net.h>
#include <linux/list.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/in6.h> #include <linux/in6.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
...@@ -67,6 +69,7 @@ struct ip6frag_skb_cb ...@@ -67,6 +69,7 @@ struct ip6frag_skb_cb
struct frag_queue struct frag_queue
{ {
struct frag_queue *next; struct frag_queue *next;
struct list_head lru_list; /* lru list member */
__u32 id; /* fragment id */ __u32 id; /* fragment id */
struct in6_addr saddr; struct in6_addr saddr;
...@@ -95,6 +98,7 @@ struct frag_queue ...@@ -95,6 +98,7 @@ struct frag_queue
static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ]; static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ];
static rwlock_t ip6_frag_lock = RW_LOCK_UNLOCKED; static rwlock_t ip6_frag_lock = RW_LOCK_UNLOCKED;
static LIST_HEAD(ip6_frag_lru_list);
int ip6_frag_nqueues = 0; int ip6_frag_nqueues = 0;
static __inline__ void __fq_unlink(struct frag_queue *fq) static __inline__ void __fq_unlink(struct frag_queue *fq)
...@@ -102,6 +106,7 @@ static __inline__ void __fq_unlink(struct frag_queue *fq) ...@@ -102,6 +106,7 @@ static __inline__ void __fq_unlink(struct frag_queue *fq)
if(fq->next) if(fq->next)
fq->next->pprev = fq->pprev; fq->next->pprev = fq->pprev;
*fq->pprev = fq->next; *fq->pprev = fq->next;
list_del(&fq->lru_list);
ip6_frag_nqueues--; ip6_frag_nqueues--;
} }
...@@ -193,22 +198,19 @@ static __inline__ void fq_kill(struct frag_queue *fq) ...@@ -193,22 +198,19 @@ static __inline__ void fq_kill(struct frag_queue *fq)
static void ip6_evictor(void) static void ip6_evictor(void)
{ {
int i, progress; struct frag_queue *fq;
struct list_head *tmp;
do { for(;;) {
if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh)
return; return;
progress = 0;
for (i = 0; i < IP6Q_HASHSZ; i++) {
struct frag_queue *fq;
if (ip6_frag_hash[i] == NULL)
continue;
read_lock(&ip6_frag_lock); read_lock(&ip6_frag_lock);
if ((fq = ip6_frag_hash[i]) != NULL) { if (list_empty(&ip6_frag_lru_list)) {
/* find the oldest queue for this hash bucket */ read_unlock(&ip6_frag_lock);
while (fq->next) return;
fq = fq->next; }
tmp = ip6_frag_lru_list.next;
fq = list_entry(tmp, struct frag_queue, lru_list);
atomic_inc(&fq->refcnt); atomic_inc(&fq->refcnt);
read_unlock(&ip6_frag_lock); read_unlock(&ip6_frag_lock);
...@@ -219,12 +221,7 @@ static void ip6_evictor(void) ...@@ -219,12 +221,7 @@ static void ip6_evictor(void)
fq_put(fq); fq_put(fq);
IP6_INC_STATS_BH(Ip6ReasmFails); IP6_INC_STATS_BH(Ip6ReasmFails);
progress = 1;
continue;
}
read_unlock(&ip6_frag_lock);
} }
} while (progress);
} }
static void ip6_frag_expire(unsigned long data) static void ip6_frag_expire(unsigned long data)
...@@ -294,6 +291,8 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash, ...@@ -294,6 +291,8 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash,
fq->next->pprev = &fq->next; fq->next->pprev = &fq->next;
ip6_frag_hash[hash] = fq; ip6_frag_hash[hash] = fq;
fq->pprev = &ip6_frag_hash[hash]; fq->pprev = &ip6_frag_hash[hash];
INIT_LIST_HEAD(&fq->lru_list);
list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
ip6_frag_nqueues++; ip6_frag_nqueues++;
write_unlock(&ip6_frag_lock); write_unlock(&ip6_frag_lock);
return fq; return fq;
...@@ -501,6 +500,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ...@@ -501,6 +500,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->nhoffset = nhoff; fq->nhoffset = nhoff;
fq->last_in |= FIRST_IN; fq->last_in |= FIRST_IN;
} }
write_lock(&ip6_frag_lock);
list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
write_unlock(&ip6_frag_lock);
return; return;
err: err:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment