[NETFILTER]: reduce netfilter sk_buff enlargement
As discussed at netconf'05, we're trying to save every bit in sk_buff.
The patch below makes sk_buff 8 bytes smaller. I did some basic
testing on my notebook and it seems to work.
The only real in-tree user of nfcache was IPVS, who only needs a
single bit. Unfortunately I couldn't find some other free bit in
sk_buff to stuff that bit into, so I introduced a separate field for
them. Maybe the IPVS guys can resolve that to further save space.
Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and
alike are only 6 values defined), but unfortunately the bluetooth code
overloads pkt_type :(
The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just
came up with a way how to do it without any skb fields, so it's safe
to remove it.
- remove all never-implemented 'nfcache' code
- don't have ipvs code abuse 'nfcache' field. currently get's their own
compile-conditional skb->ipvs_property field. IPVS maintainers can
decide to move this bit elswhere, but nfcache needs to die.
- remove skb->nfcache field to save 4 bytes
- move skb->nfctinfo into three unused bits to save further 4 bytes
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2e20454..ec60856 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -21,10 +21,13 @@
#define NF_STOP 5
#define NF_MAX_VERDICT NF_STOP
+/* only for userspace compatibility */
+#ifndef __KERNEL__
/* Generic cache responses from hook functions.
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
+#endif
#ifdef __KERNEL__
#include <linux/config.h>
diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h
index 3064eec..0189794 100644
--- a/include/linux/netfilter_decnet.h
+++ b/include/linux/netfilter_decnet.h
@@ -9,6 +9,8 @@
#include <linux/netfilter.h>
+/* only for userspace compatibility */
+#ifndef __KERNEL__
/* IP Cache bits. */
/* Src IP address. */
#define NFC_DN_SRC 0x0001
@@ -18,6 +20,7 @@
#define NFC_DN_IF_IN 0x0004
/* Output device. */
#define NFC_DN_IF_OUT 0x0008
+#endif /* ! __KERNEL__ */
/* DECnet Hooks */
/* After promisc drops, checksum checks. */
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 3ebc36a..552815b 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -8,6 +8,8 @@
#include <linux/config.h>
#include <linux/netfilter.h>
+/* only for userspace compatibility */
+#ifndef __KERNEL__
/* IP Cache bits. */
/* Src IP address. */
#define NFC_IP_SRC 0x0001
@@ -35,6 +37,7 @@
#define NFC_IP_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP_PROTO_UNKNOWN 0x2000
+#endif /* ! __KERNEL__ */
/* IP Hooks */
/* After promisc drops, checksum checks. */
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index bee7a5e..20c069a 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -10,6 +10,8 @@
#include <linux/netfilter.h>
+/* only for userspace compatibility */
+#ifndef __KERNEL__
/* IP Cache bits. */
/* Src IP address. */
#define NFC_IP6_SRC 0x0001
@@ -38,6 +40,7 @@
#define NFC_IP6_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP6_PROTO_UNKNOWN 0x2000
+#endif /* ! __KERNEL__ */
/* IP6 Hooks */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e40f4c..4b929c3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -190,7 +190,6 @@
* @end: End pointer
* @destructor: Destruct function
* @nfmark: Can be used for communication between hooks
- * @nfcache: Cache info
* @nfct: Associated connection, if any
* @nfctinfo: Relationship of this skb to the connection
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
@@ -252,17 +251,18 @@
__u8 local_df:1,
cloned:1,
ip_summed:2,
- nohdr:1;
- /* 3 bits spare */
+ nohdr:1,
+ nfctinfo:3;
__u8 pkt_type;
__be16 protocol;
void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_NETFILTER
__u32 nfmark;
- __u32 nfcache;
- __u32 nfctinfo;
struct nf_conntrack *nfct;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ __u8 ipvs_property:1;
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 02c632b..c93d35a 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -23,10 +23,9 @@
{
struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
- if ((*pskb)->nfmark != info->mark) {
+ if ((*pskb)->nfmark != info->mark)
(*pskb)->nfmark = info->mark;
- (*pskb)->nfcache |= NFC_ALTERED;
- }
+
return info->target;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7eab867..096991c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -361,7 +361,6 @@
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
- C(nfcache);
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
@@ -424,7 +423,6 @@
new->destructor = NULL;
#ifdef CONFIG_NETFILTER
new->nfmark = old->nfmark;
- new->nfcache = old->nfcache;
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 80d1310..766564c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -392,7 +392,6 @@
#endif
#ifdef CONFIG_NETFILTER
to->nfmark = from->nfmark;
- to->nfcache = from->nfcache;
/* Connection association is same as pre-frag packet */
nf_conntrack_put(to->nfct);
to->nfct = from->nfct;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 5fb257d..3ac7eec 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -22,6 +22,7 @@
*
* Changes:
* Paul `Rusty' Russell properly handle non-linear skbs
+ * Harald Welte don't use nfcache
*
*/
@@ -529,7 +530,7 @@
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+ if (!((*pskb)->ipvs_property))
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
@@ -701,7 +702,7 @@
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
- skb->nfcache |= NFC_IPVS_PROPERTY;
+ skb->ipvs_property = 1;
verdict = NF_ACCEPT;
out:
@@ -739,7 +740,7 @@
EnterFunction(11);
- if (skb->nfcache & NFC_IPVS_PROPERTY)
+ if (skb->ipvs_property)
return NF_ACCEPT;
iph = skb->nh.iph;
@@ -821,7 +822,7 @@
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
ip_vs_conn_put(cp);
- skb->nfcache |= NFC_IPVS_PROPERTY;
+ skb->ipvs_property = 1;
LeaveFunction(11);
return NF_ACCEPT;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index a8512a3..3b87482 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -127,7 +127,7 @@
#define IP_VS_XMIT(skb, rt) \
do { \
- (skb)->nfcache |= NFC_IPVS_PROPERTY; \
+ (skb)->ipvs_property = 1; \
(skb)->ip_summed = CHECKSUM_NONE; \
NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
(rt)->u.dst.dev, dst_output); \
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index a7f0c82..04c3414 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -625,9 +625,6 @@
return NF_DROP;
}
- /* FIXME: Do this right please. --RR */
- (*pskb)->nfcache |= NFC_UNKNOWN;
-
/* Doesn't cover locally-generated broadcast, so not worth it. */
#if 0
/* Ignore broadcast: no `connection'. */
@@ -943,10 +940,8 @@
skb = ip_defrag(skb, user);
local_bh_enable();
- if (skb) {
+ if (skb)
ip_send_check(skb->nh.iph);
- skb->nfcache |= NFC_ALTERED;
- }
return skb;
}
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 739b6dd..ed4d731 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -321,7 +321,6 @@
{
struct iphdr *iph;
- (*pskb)->nfcache |= NFC_ALTERED;
if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
return 0;
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 91d5ea1..9ecba97 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -73,8 +73,6 @@
IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
& htons(IP_MF|IP_OFFSET)));
- (*pskb)->nfcache |= NFC_UNKNOWN;
-
/* If we had a hardware checksum before, it's now invalid */
if ((*pskb)->ip_summed == CHECKSUM_HW)
if (skb_checksum_help(*pskb, (out == NULL)))
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index c6baa81..bc0af8d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -392,7 +392,6 @@
return -ENOMEM;
memcpy(e->skb->data, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
- e->skb->nfcache |= NFC_ALTERED;
/*
* Extra routing may needed on local out, as the QUEUE target never
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c88dfcd..ff8d85d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -312,7 +312,6 @@
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- (*pskb)->nfcache |= e->nfcache;
if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
struct ipt_entry_target *t;
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
index 9842e6e..dab78d8 100644
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ b/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -32,10 +32,8 @@
{
const struct ipt_classify_target_info *clinfo = targinfo;
- if((*pskb)->priority != clinfo->priority) {
+ if((*pskb)->priority != clinfo->priority)
(*pskb)->priority = clinfo->priority;
- (*pskb)->nfcache |= NFC_ALTERED;
- }
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 8ed7441..1346380 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -61,10 +61,8 @@
case IPT_CONNMARK_RESTORE:
nfmark = (*pskb)->nfmark;
diff = (ct->mark ^ nfmark) & markinfo->mask;
- if (diff != 0) {
+ if (diff != 0)
(*pskb)->nfmark = nfmark ^ diff;
- (*pskb)->nfcache |= NFC_ALTERED;
- }
break;
}
}
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 3ea4509..975476f 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -51,7 +51,6 @@
sizeof(diffs),
(*pskb)->nh.iph->check
^ 0xFFFF));
- (*pskb)->nfcache |= NFC_ALTERED;
}
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 94a0ce1..f63a9bc 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -43,7 +43,6 @@
sizeof(diffs),
(*pskb)->nh.iph->check
^0xFFFF));
- (*pskb)->nfcache |= NFC_ALTERED;
}
return 1;
}
@@ -87,7 +86,6 @@
tcph->check = csum_fold(csum_partial((char *)diffs,
sizeof(diffs),
tcph->check^0xFFFF));
- (*pskb)->nfcache |= NFC_ALTERED;
return 1;
}
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 8526398..52b4f2c 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -29,10 +29,9 @@
{
const struct ipt_mark_target_info *markinfo = targinfo;
- if((*pskb)->nfmark != markinfo->mark) {
+ if((*pskb)->nfmark != markinfo->mark)
(*pskb)->nfmark = markinfo->mark;
- (*pskb)->nfcache |= NFC_ALTERED;
- }
+
return IPT_CONTINUE;
}
@@ -61,10 +60,9 @@
break;
}
- if((*pskb)->nfmark != mark) {
+ if((*pskb)->nfmark != mark)
(*pskb)->nfmark = mark;
- (*pskb)->nfcache |= NFC_ALTERED;
- }
+
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 9156964..f115a84 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -156,7 +156,6 @@
/* This packet will not be the same as the other: clear nf fields */
nf_reset(nskb);
- nskb->nfcache = 0;
nskb->nfmark = 0;
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(nskb->nf_bridge);
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 7b84a25..9492883 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -190,7 +190,6 @@
newmss);
retmodified:
- (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 85c70d2..49abb7e 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -46,7 +46,6 @@
sizeof(diffs),
(*pskb)->nh.iph->check
^0xFFFF));
- (*pskb)->nfcache |= NFC_ALTERED;
}
return IPT_CONTINUE;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ae652ca..590d2b7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -185,19 +185,6 @@
}
#endif
-static inline int ip6_maybe_reroute(struct sk_buff *skb)
-{
-#ifdef CONFIG_NETFILTER
- if (skb->nfcache & NFC_ALTERED){
- if (ip6_route_me_harder(skb) != 0){
- kfree_skb(skb);
- return -EINVAL;
- }
- }
-#endif /* CONFIG_NETFILTER */
- return dst_output(skb);
-}
-
/*
* xmit an sk_buff (used by TCP)
*/
@@ -266,7 +253,8 @@
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || ipfragok) {
IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
- return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+ return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
+ dst_output);
}
if (net_ratelimit())
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index a16df5b..83ccedc 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -388,7 +388,6 @@
return -ENOMEM;
memcpy(e->skb->data, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
- e->skb->nfcache |= NFC_ALTERED;
/*
* Extra routing may needed on local out, as the QUEUE target never
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7303451..41a67cf 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -401,7 +401,6 @@
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- (*pskb)->nfcache |= e->nfcache;
if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
&protoff, &offset)) {
struct ip6t_entry_target *t;
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
index d09ceb0..81924fc 100644
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -28,10 +28,9 @@
{
const struct ip6t_mark_target_info *markinfo = targinfo;
- if((*pskb)->nfmark != markinfo->mark) {
+ if((*pskb)->nfmark != markinfo->mark)
(*pskb)->nfmark = markinfo->mark;
- (*pskb)->nfcache |= NFC_ALTERED;
- }
+
return IP6T_CONTINUE;
}