[NETNS][FRAGS]: Make the LRU list per namespace.
The inet_frags.lru_list is used for evicting only, so we have
to make it per-namespace, to evict only those fragments, who's
namespace exceeded its high threshold, but not the whole hash.
Besides, this helps to avoid long loops in evictor.
The spinlock is not per-namespace because it protects the
hash table as well, which is global.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 1917fbe..3695ff4 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -4,6 +4,7 @@
struct netns_frags {
int nqueues;
atomic_t mem;
+ struct list_head lru_list;
/* sysctls */
int timeout;
@@ -32,7 +33,6 @@
#define INETFRAGS_HASHSZ 64
struct inet_frags {
- struct list_head lru_list;
struct hlist_head hash[INETFRAGS_HASHSZ];
rwlock_t lock;
u32 rnd;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index fcf52521..f1b95e1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -57,7 +57,6 @@
for (i = 0; i < INETFRAGS_HASHSZ; i++)
INIT_HLIST_HEAD(&f->hash[i]);
- INIT_LIST_HEAD(&f->lru_list);
rwlock_init(&f->lock);
f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -74,6 +73,7 @@
{
nf->nqueues = 0;
atomic_set(&nf->mem, 0);
+ INIT_LIST_HEAD(&nf->lru_list);
}
EXPORT_SYMBOL(inet_frags_init_net);
@@ -156,12 +156,12 @@
work = atomic_read(&nf->mem) - nf->low_thresh;
while (work > 0) {
read_lock(&f->lock);
- if (list_empty(&f->lru_list)) {
+ if (list_empty(&nf->lru_list)) {
read_unlock(&f->lock);
break;
}
- q = list_first_entry(&f->lru_list,
+ q = list_first_entry(&nf->lru_list,
struct inet_frag_queue, lru_list);
atomic_inc(&q->refcnt);
read_unlock(&f->lock);
@@ -211,7 +211,7 @@
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &f->hash[hash]);
- list_add_tail(&qp->lru_list, &f->lru_list);
+ list_add_tail(&qp->lru_list, &nf->lru_list);
nf->nqueues++;
write_unlock(&f->lock);
return qp;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 00646ed..29b4b09 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -441,7 +441,7 @@
return ip_frag_reasm(qp, prev, dev);
write_lock(&ip4_frags.lock);
- list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+ list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
write_unlock(&ip4_frags.lock);
return -EINPROGRESS;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6eed991..022da6c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -385,7 +385,7 @@
fq->q.last_in |= FIRST_IN;
}
write_lock(&nf_frags.lock);
- list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
+ list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
write_unlock(&nf_frags.lock);
return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 8520700..0c4bc46 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -424,7 +424,7 @@
return ip6_frag_reasm(fq, prev, dev);
write_lock(&ip6_frags.lock);
- list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+ list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
write_unlock(&ip6_frags.lock);
return -1;