[TCP]: Add IPv6 support to TCP SYN cookies

Updated to incorporate Eric's suggestion of using a per cpu buffer
rather than allocating on the stack.  Just a two line change, but will
resend in it's entirety.

Signed-off-by: Glenn Griffin <ggriffin.kernel@gmail.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 177da14..4704f27 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -10,8 +10,6 @@
  *      2 of the License, or (at your option) any later version.
  *
  *  $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
- *
- *  Missing: IPv6 support.
  */
 
 #include <linux/tcp.h>
@@ -23,14 +21,15 @@
 
 extern int sysctl_tcp_syncookies;
 
-static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+__u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+EXPORT_SYMBOL(syncookie_secret);
 
 static __init int init_syncookies(void)
 {
 	get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
 	return 0;
 }
-module_init(init_syncookies);
+__initcall(init_syncookies);
 
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19c449f..93e128c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5326,6 +5326,7 @@
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0fdd1db..8245247 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,6 +35,8 @@
 #endif
 
 int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+EXPORT_SYMBOL(sysctl_tcp_syncookies);
+
 int sysctl_tcp_abort_on_overflow __read_mostly;
 
 struct inet_timewait_death_row tcp_death_row = {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed750f9..cbfef8b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2560,6 +2560,7 @@
 	}
 }
 
+EXPORT_SYMBOL(tcp_select_initial_window);
 EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 24f3aa0..ae14617 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,6 +16,7 @@
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
 ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_PROC_FS) += proc.o
+ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
 
 ipv6-objs += $(ipv6-y)
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
new file mode 100644
index 0000000..827c5aa
--- /dev/null
+++ b/net/ipv6/syncookies.c
@@ -0,0 +1,267 @@
+/*
+ *  IPv6 Syncookies implementation for the Linux kernel
+ *
+ *  Authors:
+ *  Glenn Griffin	<ggriffin.kernel@gmail.com>
+ *
+ *  Based on IPv4 implementation by Andi Kleen
+ *  linux/net/ipv4/syncookies.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/tcp.h>
+#include <linux/random.h>
+#include <linux/cryptohash.h>
+#include <linux/kernel.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+extern __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+
+#define COOKIEBITS 24	/* Upper bits store count */
+#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+
+/*
+ * This table has to be sorted and terminated with (__u16)-1.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ *
+ * Taken directly from ipv4 implementation.
+ * Should this list be modified for ipv6 use or is it close enough?
+ * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
+ */
+static __u16 const msstab[] = {
+	64 - 1,
+	256 - 1,
+	512 - 1,
+	536 - 1,
+	1024 - 1,
+	1440 - 1,
+	1460 - 1,
+	4312 - 1,
+	(__u16)-1
+};
+/* The number doesn't include the -1 terminator */
+#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
+
+/*
+ * This (misnamed) value is the age of syncookie which is permitted.
+ * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula (exponential
+ * backoff) to compute at runtime so it's currently hardcoded here.
+ */
+#define COUNTER_TRIES 4
+
+static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+					   struct request_sock *req,
+					   struct dst_entry *dst)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct sock *child;
+
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+	if (child)
+		inet_csk_reqsk_queue_add(sk, req, child);
+	else
+		reqsk_free(req);
+
+	return child;
+}
+
+static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
+
+static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
+		       __be16 sport, __be16 dport, u32 count, int c)
+{
+	__u32 *tmp = __get_cpu_var(cookie_scratch);
+
+	/*
+	 * we have 320 bits of information to hash, copy in the remaining
+	 * 192 bits required for sha_transform, from the syncookie_secret
+	 * and overwrite the digest with the secret
+	 */
+	memcpy(tmp + 10, syncookie_secret[c], 44);
+	memcpy(tmp, saddr, 16);
+	memcpy(tmp + 4, daddr, 16);
+	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
+	tmp[9] = count;
+	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+
+	return tmp[17];
+}
+
+static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *daddr,
+				   __be16 sport, __be16 dport, __u32 sseq,
+				   __u32 count, __u32 data)
+{
+	return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
+		sseq + (count << COOKIEBITS) +
+		((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
+		& COOKIEMASK));
+}
+
+static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
+				  struct in6_addr *daddr, __be16 sport,
+				  __be16 dport, __u32 sseq, __u32 count,
+				  __u32 maxdiff)
+{
+	__u32 diff;
+
+	cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
+
+	diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
+	if (diff >= maxdiff)
+		return (__u32)-1;
+
+	return (cookie -
+		cookie_hash(saddr, daddr, sport, dport, count - diff, 1))
+		& COOKIEMASK;
+}
+
+__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+	int mssind;
+	const __u16 mss = *mssp;
+
+	tcp_sk(sk)->last_synq_overflow = jiffies;
+
+	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
+		;
+	*mssp = msstab[mssind] + 1;
+
+	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+
+	return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
+				     th->dest, ntohl(th->seq),
+				     jiffies / (HZ * 60), mssind);
+}
+
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 seq = ntohl(th->seq) - 1;
+	__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+					    th->source, th->dest, seq,
+					    jiffies / (HZ * 60), COUNTER_TRIES);
+
+	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+	struct inet_request_sock *ireq;
+	struct inet6_request_sock *ireq6;
+	struct tcp_request_sock *treq;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 cookie = ntohl(th->ack_seq) - 1;
+	struct sock *ret = sk;
+	struct request_sock *req;
+	int mss;
+	struct dst_entry *dst;
+	__u8 rcv_wscale;
+
+	if (!sysctl_tcp_syncookies || !th->ack)
+		goto out;
+
+	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+		(mss = cookie_check(skb, cookie)) == 0) {
+		NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+		goto out;
+	}
+
+	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+
+	ret = NULL;
+	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+	if (!req)
+		goto out;
+
+	ireq = inet_rsk(req);
+	ireq6 = inet6_rsk(req);
+	treq = tcp_rsk(req);
+	ireq6->pktopts = NULL;
+
+	if (security_inet_conn_request(sk, skb, req)) {
+		reqsk_free(req);
+		goto out;
+	}
+
+	req->mss = mss;
+	ireq->rmt_port = th->source;
+	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+	if (ipv6_opt_accepted(sk, skb) ||
+	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+		atomic_inc(&skb->users);
+		ireq6->pktopts = skb;
+	}
+
+	ireq6->iif = sk->sk_bound_dev_if;
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq6->iif = inet6_iif(skb);
+
+	req->expires = 0UL;
+	req->retrans = 0;
+	ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
+	ireq->wscale_ok = ireq->sack_ok = 0;
+	treq->rcv_isn = ntohl(th->seq) - 1;
+	treq->snt_isn = cookie;
+
+	/*
+	 * We need to lookup the dst_entry to get the correct window size.
+	 * This is taken from tcp_v6_syn_recv_sock.  Somebody please enlighten
+	 * me if there is a preferred way.
+	 */
+	{
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = IPPROTO_TCP;
+		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+		if (np->opt && np->opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_req_classify_flow(req, &fl);
+		if (ip6_dst_lookup(sk, &dst, &fl)) {
+			reqsk_free(req);
+			goto out;
+		}
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto out;
+	}
+
+	req->window_clamp = dst_metric(dst, RTAX_WINDOW);
+	tcp_select_initial_window(tcp_full_space(sk), req->mss,
+				  &req->rcv_wnd, &req->window_clamp,
+				  0, &rcv_wscale);
+
+	ireq->rcv_wscale = rcv_wscale;
+
+	ret = get_cookie_sock(sk, skb, req, dst);
+
+out:	return ret;
+}
+
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1cbbb87..fd773ac 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -512,6 +512,20 @@
 	return err;
 }
 
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+#ifdef CONFIG_SYN_COOKIES
+	if (sysctl_tcp_syncookies)
+		printk(KERN_INFO
+		       "TCPv6: Possible SYN flooding on port %d. "
+		       "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
+	else
+#endif
+		printk(KERN_INFO
+		       "TCPv6: Possible SYN flooding on port %d. "
+		       "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
+}
+
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
 	if (inet6_rsk(req)->pktopts)
@@ -915,7 +929,7 @@
 }
 #endif
 
-static struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 	.family		=	AF_INET6,
 	.obj_size	=	sizeof(struct tcp6_request_sock),
 	.rtx_syn_ack	=	tcp_v6_send_synack,
@@ -1213,9 +1227,9 @@
 		return NULL;
 	}
 
-#if 0 /*def CONFIG_SYN_COOKIES*/
+#ifdef CONFIG_SYN_COOKIES
 	if (!th->rst && !th->syn && th->ack)
-		sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
+		sk = cookie_v6_check(sk, skb);
 #endif
 	return sk;
 }
@@ -1231,6 +1245,11 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct request_sock *req = NULL;
 	__u32 isn = TCP_SKB_CB(skb)->when;
+#ifdef CONFIG_SYN_COOKIES
+	int want_cookie = 0;
+#else
+#define want_cookie 0
+#endif
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
@@ -1238,12 +1257,14 @@
 	if (!ipv6_unicast_destination(skb))
 		goto drop;
 
-	/*
-	 *	There are no SYN attacks on IPv6, yet...
-	 */
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
 		if (net_ratelimit())
-			printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
+			syn_flood_warning(skb);
+#ifdef CONFIG_SYN_COOKIES
+		if (sysctl_tcp_syncookies)
+			want_cookie = 1;
+		else
+#endif
 		goto drop;
 	}
 
@@ -1264,29 +1285,39 @@
 
 	tcp_parse_options(skb, &tmp_opt, 0);
 
+	if (want_cookie) {
+		tcp_clear_options(&tmp_opt);
+		tmp_opt.saw_tstamp = 0;
+	}
+
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	TCP_ECN_create_request(req, tcp_hdr(skb));
 	treq->pktopts = NULL;
-	if (ipv6_opt_accepted(sk, skb) ||
-	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
-	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-		atomic_inc(&skb->users);
-		treq->pktopts = skb;
-	}
-	treq->iif = sk->sk_bound_dev_if;
+	if (!want_cookie)
+		TCP_ECN_create_request(req, tcp_hdr(skb));
 
-	/* So that link locals have meaning */
-	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		treq->iif = inet6_iif(skb);
+	if (want_cookie) {
+		isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+	} else if (!isn) {
+		if (ipv6_opt_accepted(sk, skb) ||
+		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+			atomic_inc(&skb->users);
+			treq->pktopts = skb;
+		}
+		treq->iif = sk->sk_bound_dev_if;
 
-	if (isn == 0)
+		/* So that link locals have meaning */
+		if (!sk->sk_bound_dev_if &&
+		    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+			treq->iif = inet6_iif(skb);
+
 		isn = tcp_v6_init_sequence(skb);
+	}
 
 	tcp_rsk(req)->snt_isn = isn;
 
@@ -1295,8 +1326,10 @@
 	if (tcp_v6_send_synack(sk, req))
 		goto drop;
 
-	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-	return 0;
+	if (!want_cookie) {
+		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+		return 0;
+	}
 
 drop:
 	if (req)