blob: a82a22d8f77fdca5f496e9d7cb45f40b70d194ca [file] [log] [blame]
Pravin B Shelarc5441932013-03-25 14:49:35 +00001/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
Sachin Kamat27d79f32014-01-27 12:13:57 +053043#include <linux/err.h>
Pravin B Shelarc5441932013-03-25 14:49:35 +000044
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
Duan Jiong967680e2014-01-19 16:43:42 +080065static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
Pravin B Shelarc5441932013-03-25 14:49:35 +000066{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
Eric Dumazet6c7e7612014-01-16 16:41:19 -080071static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
Tom Herbert7d442fa2014-01-02 11:48:26 -080073{
74 struct dst_entry *old_dst;
75
Eric Dumazet6c7e7612014-01-16 16:41:19 -080076 if (dst) {
77 if (dst->flags & DST_NOCACHE)
78 dst = NULL;
79 else
80 dst_clone(dst);
81 }
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -080083 dst_release(old_dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -080084}
85
Eric Dumazet6c7e7612014-01-16 16:41:19 -080086static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
Tom Herbert7d442fa2014-01-02 11:48:26 -080087{
Tom Herbert9a4aa9a2014-01-02 11:48:33 -080088 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -080089}
90
Eric Dumazet6c7e7612014-01-16 16:41:19 -080091static void tunnel_dst_reset(struct ip_tunnel *t)
Tom Herbert7d442fa2014-01-02 11:48:26 -080092{
93 tunnel_dst_set(t, NULL);
94}
95
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +010096void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
Tom Herbert9a4aa9a2014-01-02 11:48:33 -080097{
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102}
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +0100103EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
Tom Herbert9a4aa9a2014-01-02 11:48:33 -0800104
Eric Dumazetb045d372014-02-03 12:52:14 -0800105static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
Tom Herbert7d442fa2014-01-02 11:48:26 -0800106{
107 struct dst_entry *dst;
108
109 rcu_read_lock();
Tom Herbert9a4aa9a2014-01-02 11:48:33 -0800110 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
Eric Dumazetb045d372014-02-03 12:52:14 -0800111 if (dst) {
112 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113 rcu_read_unlock();
114 tunnel_dst_reset(t);
115 return NULL;
116 }
Tom Herbert7d442fa2014-01-02 11:48:26 -0800117 dst_hold(dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -0800118 }
Eric Dumazetb045d372014-02-03 12:52:14 -0800119 rcu_read_unlock();
120 return (struct rtable *)dst;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800121}
122
Pravin B Shelarc5441932013-03-25 14:49:35 +0000123static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124 __be16 flags, __be32 key)
125{
126 if (p->i_flags & TUNNEL_KEY) {
127 if (flags & TUNNEL_KEY)
128 return key == p->i_key;
129 else
130 /* key expected, none present */
131 return false;
132 } else
133 return !(flags & TUNNEL_KEY);
134}
135
136/* Fallback tunnel: no source, no destination, no key, no options
137
138 Tunnel hash table:
139 We require exact key match i.e. if a key is present in packet
140 it will match only tunnel with the same key; if it is not present,
141 it will match only keyless tunnel.
142
143 All keysless packets, if not matched configured keyless tunnels
144 will match fallback tunnel.
145 Given src, dst and key, find appropriate for input tunnel.
146*/
147struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148 int link, __be16 flags,
149 __be32 remote, __be32 local,
150 __be32 key)
151{
152 unsigned int hash;
153 struct ip_tunnel *t, *cand = NULL;
154 struct hlist_head *head;
155
Duan Jiong967680e2014-01-19 16:43:42 +0800156 hash = ip_tunnel_hash(key, remote);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000157 head = &itn->tunnels[hash];
158
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if (local != t->parms.iph.saddr ||
161 remote != t->parms.iph.daddr ||
162 !(t->dev->flags & IFF_UP))
163 continue;
164
165 if (!ip_tunnel_key_match(&t->parms, flags, key))
166 continue;
167
168 if (t->parms.link == link)
169 return t;
170 else
171 cand = t;
172 }
173
174 hlist_for_each_entry_rcu(t, head, hash_node) {
175 if (remote != t->parms.iph.daddr ||
176 !(t->dev->flags & IFF_UP))
177 continue;
178
179 if (!ip_tunnel_key_match(&t->parms, flags, key))
180 continue;
181
182 if (t->parms.link == link)
183 return t;
184 else if (!cand)
185 cand = t;
186 }
187
Duan Jiong967680e2014-01-19 16:43:42 +0800188 hash = ip_tunnel_hash(key, 0);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000189 head = &itn->tunnels[hash];
190
191 hlist_for_each_entry_rcu(t, head, hash_node) {
192 if ((local != t->parms.iph.saddr &&
193 (local != t->parms.iph.daddr ||
194 !ipv4_is_multicast(local))) ||
195 !(t->dev->flags & IFF_UP))
196 continue;
197
198 if (!ip_tunnel_key_match(&t->parms, flags, key))
199 continue;
200
201 if (t->parms.link == link)
202 return t;
203 else if (!cand)
204 cand = t;
205 }
206
207 if (flags & TUNNEL_NO_KEY)
208 goto skip_key_lookup;
209
210 hlist_for_each_entry_rcu(t, head, hash_node) {
211 if (t->parms.i_key != key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->parms.link == link)
216 return t;
217 else if (!cand)
218 cand = t;
219 }
220
221skip_key_lookup:
222 if (cand)
223 return cand;
224
225 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226 return netdev_priv(itn->fb_tunnel_dev);
227
228
229 return NULL;
230}
231EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234 struct ip_tunnel_parm *parms)
235{
236 unsigned int h;
237 __be32 remote;
238
239 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
240 remote = parms->iph.daddr;
241 else
242 remote = 0;
243
Duan Jiong967680e2014-01-19 16:43:42 +0800244 h = ip_tunnel_hash(parms->i_key, remote);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000245 return &itn->tunnels[h];
246}
247
248static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
249{
250 struct hlist_head *head = ip_bucket(itn, &t->parms);
251
252 hlist_add_head_rcu(&t->hash_node, head);
253}
254
255static void ip_tunnel_del(struct ip_tunnel *t)
256{
257 hlist_del_init_rcu(&t->hash_node);
258}
259
260static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
261 struct ip_tunnel_parm *parms,
262 int type)
263{
264 __be32 remote = parms->iph.daddr;
265 __be32 local = parms->iph.saddr;
266 __be32 key = parms->i_key;
267 int link = parms->link;
268 struct ip_tunnel *t = NULL;
269 struct hlist_head *head = ip_bucket(itn, parms);
270
271 hlist_for_each_entry_rcu(t, head, hash_node) {
272 if (local == t->parms.iph.saddr &&
273 remote == t->parms.iph.daddr &&
274 key == t->parms.i_key &&
275 link == t->parms.link &&
276 type == t->dev->type)
277 break;
278 }
279 return t;
280}
281
282static struct net_device *__ip_tunnel_create(struct net *net,
283 const struct rtnl_link_ops *ops,
284 struct ip_tunnel_parm *parms)
285{
286 int err;
287 struct ip_tunnel *tunnel;
288 struct net_device *dev;
289 char name[IFNAMSIZ];
290
291 if (parms->name[0])
292 strlcpy(name, parms->name, IFNAMSIZ);
293 else {
Pravin B Shelar54a5d382013-03-28 08:21:46 +0000294 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000295 err = -E2BIG;
296 goto failed;
297 }
298 strlcpy(name, ops->kind, IFNAMSIZ);
299 strncat(name, "%d", 2);
300 }
301
302 ASSERT_RTNL();
303 dev = alloc_netdev(ops->priv_size, name, ops->setup);
304 if (!dev) {
305 err = -ENOMEM;
306 goto failed;
307 }
308 dev_net_set(dev, net);
309
310 dev->rtnl_link_ops = ops;
311
312 tunnel = netdev_priv(dev);
313 tunnel->parms = *parms;
Nicolas Dichtel5e6700b2013-06-26 16:11:28 +0200314 tunnel->net = net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000315
316 err = register_netdevice(dev);
317 if (err)
318 goto failed_free;
319
320 return dev;
321
322failed_free:
323 free_netdev(dev);
324failed:
325 return ERR_PTR(err);
326}
327
Tom Herbert7d442fa2014-01-02 11:48:26 -0800328static inline void init_tunnel_flow(struct flowi4 *fl4,
329 int proto,
330 __be32 daddr, __be32 saddr,
331 __be32 key, __u8 tos, int oif)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000332{
333 memset(fl4, 0, sizeof(*fl4));
334 fl4->flowi4_oif = oif;
335 fl4->daddr = daddr;
336 fl4->saddr = saddr;
337 fl4->flowi4_tos = tos;
338 fl4->flowi4_proto = proto;
339 fl4->fl4_gre_key = key;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000340}
341
342static int ip_tunnel_bind_dev(struct net_device *dev)
343{
344 struct net_device *tdev = NULL;
345 struct ip_tunnel *tunnel = netdev_priv(dev);
346 const struct iphdr *iph;
347 int hlen = LL_MAX_HEADER;
348 int mtu = ETH_DATA_LEN;
349 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
350
351 iph = &tunnel->parms.iph;
352
353 /* Guess output device to choose reasonable mtu and needed_headroom */
354 if (iph->daddr) {
355 struct flowi4 fl4;
356 struct rtable *rt;
357
Tom Herbert7d442fa2014-01-02 11:48:26 -0800358 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
359 iph->saddr, tunnel->parms.o_key,
360 RT_TOS(iph->tos), tunnel->parms.link);
361 rt = ip_route_output_key(tunnel->net, &fl4);
362
Pravin B Shelarc5441932013-03-25 14:49:35 +0000363 if (!IS_ERR(rt)) {
364 tdev = rt->dst.dev;
Eric Dumazet6c7e7612014-01-16 16:41:19 -0800365 tunnel_dst_set(tunnel, &rt->dst);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000366 ip_rt_put(rt);
367 }
368 if (dev->type != ARPHRD_ETHER)
369 dev->flags |= IFF_POINTOPOINT;
370 }
371
372 if (!tdev && tunnel->parms.link)
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200373 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000374
375 if (tdev) {
376 hlen = tdev->hard_header_len + tdev->needed_headroom;
377 mtu = tdev->mtu;
378 }
379 dev->iflink = tunnel->parms.link;
380
381 dev->needed_headroom = t_hlen + hlen;
382 mtu -= (dev->hard_header_len + t_hlen);
383
384 if (mtu < 68)
385 mtu = 68;
386
387 return mtu;
388}
389
390static struct ip_tunnel *ip_tunnel_create(struct net *net,
391 struct ip_tunnel_net *itn,
392 struct ip_tunnel_parm *parms)
393{
394 struct ip_tunnel *nt, *fbt;
395 struct net_device *dev;
396
397 BUG_ON(!itn->fb_tunnel_dev);
398 fbt = netdev_priv(itn->fb_tunnel_dev);
399 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
400 if (IS_ERR(dev))
401 return NULL;
402
403 dev->mtu = ip_tunnel_bind_dev(dev);
404
405 nt = netdev_priv(dev);
406 ip_tunnel_add(itn, nt);
407 return nt;
408}
409
410int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
411 const struct tnl_ptk_info *tpi, bool log_ecn_error)
412{
Li RongQing8f849852014-01-04 13:57:59 +0800413 struct pcpu_sw_netstats *tstats;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000414 const struct iphdr *iph = ip_hdr(skb);
415 int err;
416
Pravin B Shelarc5441932013-03-25 14:49:35 +0000417#ifdef CONFIG_NET_IPGRE_BROADCAST
418 if (ipv4_is_multicast(iph->daddr)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000419 tunnel->dev->stats.multicast++;
420 skb->pkt_type = PACKET_BROADCAST;
421 }
422#endif
423
424 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
425 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
426 tunnel->dev->stats.rx_crc_errors++;
427 tunnel->dev->stats.rx_errors++;
428 goto drop;
429 }
430
431 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
432 if (!(tpi->flags&TUNNEL_SEQ) ||
433 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
434 tunnel->dev->stats.rx_fifo_errors++;
435 tunnel->dev->stats.rx_errors++;
436 goto drop;
437 }
438 tunnel->i_seqno = ntohl(tpi->seq) + 1;
439 }
440
Pravin B Shelarc5441932013-03-25 14:49:35 +0000441 err = IP_ECN_decapsulate(iph, skb);
442 if (unlikely(err)) {
443 if (log_ecn_error)
444 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
445 &iph->saddr, iph->tos);
446 if (err > 1) {
447 ++tunnel->dev->stats.rx_frame_errors;
448 ++tunnel->dev->stats.rx_errors;
449 goto drop;
450 }
451 }
452
453 tstats = this_cpu_ptr(tunnel->dev->tstats);
454 u64_stats_update_begin(&tstats->syncp);
455 tstats->rx_packets++;
456 tstats->rx_bytes += skb->len;
457 u64_stats_update_end(&tstats->syncp);
458
Alexei Starovoitov81b9eab2013-11-12 14:39:13 -0800459 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
460
Pravin B Shelar3d7b46c2013-06-17 17:50:02 -0700461 if (tunnel->dev->type == ARPHRD_ETHER) {
462 skb->protocol = eth_type_trans(skb, tunnel->dev);
463 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
464 } else {
465 skb->dev = tunnel->dev;
466 }
Nicolas Dichtel64261f22013-08-13 17:51:09 +0200467
Pravin B Shelarc5441932013-03-25 14:49:35 +0000468 gro_cells_receive(&tunnel->gro_cells, skb);
469 return 0;
470
471drop:
472 kfree_skb(skb);
473 return 0;
474}
475EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
476
Pravin B Shelar23a36472013-07-02 10:57:33 -0700477static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
478 struct rtable *rt, __be16 df)
479{
480 struct ip_tunnel *tunnel = netdev_priv(dev);
Alexander Duyck8c91e162013-07-11 13:12:22 -0700481 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
Pravin B Shelar23a36472013-07-02 10:57:33 -0700482 int mtu;
483
484 if (df)
485 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
486 - sizeof(struct iphdr) - tunnel->hlen;
487 else
488 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
489
490 if (skb_dst(skb))
491 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
492
493 if (skb->protocol == htons(ETH_P_IP)) {
494 if (!skb_is_gso(skb) &&
495 (df & htons(IP_DF)) && mtu < pkt_size) {
496 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
497 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
498 return -E2BIG;
499 }
500 }
501#if IS_ENABLED(CONFIG_IPV6)
502 else if (skb->protocol == htons(ETH_P_IPV6)) {
503 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
504
505 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
506 mtu >= IPV6_MIN_MTU) {
507 if ((tunnel->parms.iph.daddr &&
508 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
509 rt6->rt6i_dst.plen == 128) {
510 rt6->rt6i_flags |= RTF_MODIFIED;
511 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
512 }
513 }
514
515 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
516 mtu < pkt_size) {
517 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
518 return -E2BIG;
519 }
520 }
521#endif
522 return 0;
523}
524
Pravin B Shelarc5441932013-03-25 14:49:35 +0000525void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
Nicolas Dichtelbf3d6a82013-05-27 23:48:15 +0000526 const struct iphdr *tnl_params, const u8 protocol)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000527{
528 struct ip_tunnel *tunnel = netdev_priv(dev);
529 const struct iphdr *inner_iph;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000530 struct flowi4 fl4;
531 u8 tos, ttl;
532 __be16 df;
Eric Dumazetb045d372014-02-03 12:52:14 -0800533 struct rtable *rt; /* Route to the other host */
Pravin B Shelarc5441932013-03-25 14:49:35 +0000534 unsigned int max_headroom; /* The extra header space needed */
535 __be32 dst;
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700536 int err;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800537 bool connected = true;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000538
539 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
540
541 dst = tnl_params->daddr;
542 if (dst == 0) {
543 /* NBMA tunnel */
544
545 if (skb_dst(skb) == NULL) {
546 dev->stats.tx_fifo_errors++;
547 goto tx_error;
548 }
549
550 if (skb->protocol == htons(ETH_P_IP)) {
551 rt = skb_rtable(skb);
552 dst = rt_nexthop(rt, inner_iph->daddr);
553 }
554#if IS_ENABLED(CONFIG_IPV6)
555 else if (skb->protocol == htons(ETH_P_IPV6)) {
556 const struct in6_addr *addr6;
557 struct neighbour *neigh;
558 bool do_tx_error_icmp;
559 int addr_type;
560
561 neigh = dst_neigh_lookup(skb_dst(skb),
562 &ipv6_hdr(skb)->daddr);
563 if (neigh == NULL)
564 goto tx_error;
565
566 addr6 = (const struct in6_addr *)&neigh->primary_key;
567 addr_type = ipv6_addr_type(addr6);
568
569 if (addr_type == IPV6_ADDR_ANY) {
570 addr6 = &ipv6_hdr(skb)->daddr;
571 addr_type = ipv6_addr_type(addr6);
572 }
573
574 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
575 do_tx_error_icmp = true;
576 else {
577 do_tx_error_icmp = false;
578 dst = addr6->s6_addr32[3];
579 }
580 neigh_release(neigh);
581 if (do_tx_error_icmp)
582 goto tx_error_icmp;
583 }
584#endif
585 else
586 goto tx_error;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800587
588 connected = false;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000589 }
590
591 tos = tnl_params->tos;
592 if (tos & 0x1) {
593 tos &= ~0x1;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800594 if (skb->protocol == htons(ETH_P_IP)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000595 tos = inner_iph->tos;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800596 connected = false;
597 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000598 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
Tom Herbert7d442fa2014-01-02 11:48:26 -0800599 connected = false;
600 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000601 }
602
Tom Herbert7d442fa2014-01-02 11:48:26 -0800603 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
604 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
605
Eric Dumazetb045d372014-02-03 12:52:14 -0800606 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800607
608 if (!rt) {
609 rt = ip_route_output_key(tunnel->net, &fl4);
610
611 if (IS_ERR(rt)) {
612 dev->stats.tx_carrier_errors++;
613 goto tx_error;
614 }
615 if (connected)
Eric Dumazet6c7e7612014-01-16 16:41:19 -0800616 tunnel_dst_set(tunnel, &rt->dst);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000617 }
Tom Herbert7d442fa2014-01-02 11:48:26 -0800618
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700619 if (rt->dst.dev == dev) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000620 ip_rt_put(rt);
621 dev->stats.collisions++;
622 goto tx_error;
623 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000624
Pravin B Shelar23a36472013-07-02 10:57:33 -0700625 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
626 ip_rt_put(rt);
627 goto tx_error;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000628 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000629
630 if (tunnel->err_count > 0) {
631 if (time_before(jiffies,
632 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
633 tunnel->err_count--;
634
Duan Jiong11c21a32014-01-23 14:00:25 +0800635 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
Pravin B Shelarc5441932013-03-25 14:49:35 +0000636 dst_link_failure(skb);
637 } else
638 tunnel->err_count = 0;
639 }
640
Pravin B Shelard4a71b12013-09-25 09:57:47 -0700641 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000642 ttl = tnl_params->ttl;
643 if (ttl == 0) {
644 if (skb->protocol == htons(ETH_P_IP))
645 ttl = inner_iph->ttl;
646#if IS_ENABLED(CONFIG_IPV6)
647 else if (skb->protocol == htons(ETH_P_IPV6))
648 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
649#endif
650 else
651 ttl = ip4_dst_hoplimit(&rt->dst);
652 }
653
Pravin B Shelar23a36472013-07-02 10:57:33 -0700654 df = tnl_params->frag_off;
655 if (skb->protocol == htons(ETH_P_IP))
656 df |= (inner_iph->frag_off&htons(IP_DF));
657
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700658 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
659 + rt->dst.header_len;
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200660 if (max_headroom > dev->needed_headroom)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000661 dev->needed_headroom = max_headroom;
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200662
663 if (skb_cow_head(skb, dev->needed_headroom)) {
664 dev->stats.tx_dropped++;
Eric Dumazet3acfa1e2014-01-18 18:27:49 -0800665 kfree_skb(skb);
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200666 return;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000667 }
668
Nicolas Dichtel8b7ed2d2013-09-02 15:34:54 +0200669 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
Pravin B Shelard4a71b12013-09-25 09:57:47 -0700670 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700671 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000672
Pravin B Shelarc5441932013-03-25 14:49:35 +0000673 return;
674
675#if IS_ENABLED(CONFIG_IPV6)
676tx_error_icmp:
677 dst_link_failure(skb);
678#endif
679tx_error:
680 dev->stats.tx_errors++;
Eric Dumazet3acfa1e2014-01-18 18:27:49 -0800681 kfree_skb(skb);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000682}
683EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
684
685static void ip_tunnel_update(struct ip_tunnel_net *itn,
686 struct ip_tunnel *t,
687 struct net_device *dev,
688 struct ip_tunnel_parm *p,
689 bool set_mtu)
690{
691 ip_tunnel_del(t);
692 t->parms.iph.saddr = p->iph.saddr;
693 t->parms.iph.daddr = p->iph.daddr;
694 t->parms.i_key = p->i_key;
695 t->parms.o_key = p->o_key;
696 if (dev->type != ARPHRD_ETHER) {
697 memcpy(dev->dev_addr, &p->iph.saddr, 4);
698 memcpy(dev->broadcast, &p->iph.daddr, 4);
699 }
700 ip_tunnel_add(itn, t);
701
702 t->parms.iph.ttl = p->iph.ttl;
703 t->parms.iph.tos = p->iph.tos;
704 t->parms.iph.frag_off = p->iph.frag_off;
705
706 if (t->parms.link != p->link) {
707 int mtu;
708
709 t->parms.link = p->link;
710 mtu = ip_tunnel_bind_dev(dev);
711 if (set_mtu)
712 dev->mtu = mtu;
713 }
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +0100714 ip_tunnel_dst_reset_all(t);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000715 netdev_state_change(dev);
716}
717
718int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
719{
720 int err = 0;
721 struct ip_tunnel *t;
722 struct net *net = dev_net(dev);
723 struct ip_tunnel *tunnel = netdev_priv(dev);
724 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
725
726 BUG_ON(!itn->fb_tunnel_dev);
727 switch (cmd) {
728 case SIOCGETTUNNEL:
729 t = NULL;
730 if (dev == itn->fb_tunnel_dev)
731 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
732 if (t == NULL)
733 t = netdev_priv(dev);
734 memcpy(p, &t->parms, sizeof(*p));
735 break;
736
737 case SIOCADDTUNNEL:
738 case SIOCCHGTUNNEL:
739 err = -EPERM;
740 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
741 goto done;
742 if (p->iph.ttl)
743 p->iph.frag_off |= htons(IP_DF);
744 if (!(p->i_flags&TUNNEL_KEY))
745 p->i_key = 0;
746 if (!(p->o_flags&TUNNEL_KEY))
747 p->o_key = 0;
748
749 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
750
751 if (!t && (cmd == SIOCADDTUNNEL))
752 t = ip_tunnel_create(net, itn, p);
753
754 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
755 if (t != NULL) {
756 if (t->dev != dev) {
757 err = -EEXIST;
758 break;
759 }
760 } else {
761 unsigned int nflags = 0;
762
763 if (ipv4_is_multicast(p->iph.daddr))
764 nflags = IFF_BROADCAST;
765 else if (p->iph.daddr)
766 nflags = IFF_POINTOPOINT;
767
768 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
769 err = -EINVAL;
770 break;
771 }
772
773 t = netdev_priv(dev);
774 }
775 }
776
777 if (t) {
778 err = 0;
779 ip_tunnel_update(itn, t, dev, p, true);
780 } else
781 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
782 break;
783
784 case SIOCDELTUNNEL:
785 err = -EPERM;
786 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
787 goto done;
788
789 if (dev == itn->fb_tunnel_dev) {
790 err = -ENOENT;
791 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
792 if (t == NULL)
793 goto done;
794 err = -EPERM;
795 if (t == netdev_priv(itn->fb_tunnel_dev))
796 goto done;
797 dev = t->dev;
798 }
799 unregister_netdevice(dev);
800 err = 0;
801 break;
802
803 default:
804 err = -EINVAL;
805 }
806
807done:
808 return err;
809}
810EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
811
812int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
813{
814 struct ip_tunnel *tunnel = netdev_priv(dev);
815 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
816
817 if (new_mtu < 68 ||
818 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
819 return -EINVAL;
820 dev->mtu = new_mtu;
821 return 0;
822}
823EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
824
825static void ip_tunnel_dev_free(struct net_device *dev)
826{
827 struct ip_tunnel *tunnel = netdev_priv(dev);
828
829 gro_cells_destroy(&tunnel->gro_cells);
Tom Herbert9a4aa9a2014-01-02 11:48:33 -0800830 free_percpu(tunnel->dst_cache);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000831 free_percpu(dev->tstats);
832 free_netdev(dev);
833}
834
835void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
836{
Pravin B Shelarc5441932013-03-25 14:49:35 +0000837 struct ip_tunnel *tunnel = netdev_priv(dev);
838 struct ip_tunnel_net *itn;
839
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200840 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000841
842 if (itn->fb_tunnel_dev != dev) {
843 ip_tunnel_del(netdev_priv(dev));
844 unregister_netdevice_queue(dev, head);
845 }
846}
847EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
848
Eric Dumazetd3b6f612013-06-07 13:26:05 -0700849int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
Pravin B Shelarc5441932013-03-25 14:49:35 +0000850 struct rtnl_link_ops *ops, char *devname)
851{
852 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
853 struct ip_tunnel_parm parms;
stephen hemminger6261d982013-08-05 22:51:37 -0700854 unsigned int i;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000855
stephen hemminger6261d982013-08-05 22:51:37 -0700856 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
857 INIT_HLIST_HEAD(&itn->tunnels[i]);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000858
859 if (!ops) {
860 itn->fb_tunnel_dev = NULL;
861 return 0;
862 }
stephen hemminger6261d982013-08-05 22:51:37 -0700863
Pravin B Shelarc5441932013-03-25 14:49:35 +0000864 memset(&parms, 0, sizeof(parms));
865 if (devname)
866 strlcpy(parms.name, devname, IFNAMSIZ);
867
868 rtnl_lock();
869 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
Dan Carpenterea857f22013-08-19 10:05:10 +0300870 /* FB netdevice is special: we have one, and only one per netns.
871 * Allowing to move it to another netns is clearly unsafe.
872 */
Steffen Klassert67013282013-10-01 11:34:48 +0200873 if (!IS_ERR(itn->fb_tunnel_dev)) {
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300874 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
Steffen Klassert67013282013-10-01 11:34:48 +0200875 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
876 }
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300877 rtnl_unlock();
Pravin B Shelarc5441932013-03-25 14:49:35 +0000878
Sachin Kamat27d79f32014-01-27 12:13:57 +0530879 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000880}
881EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
882
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200883static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
884 struct rtnl_link_ops *ops)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000885{
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200886 struct net *net = dev_net(itn->fb_tunnel_dev);
887 struct net_device *dev, *aux;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000888 int h;
889
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200890 for_each_netdev_safe(net, dev, aux)
891 if (dev->rtnl_link_ops == ops)
892 unregister_netdevice_queue(dev, head);
893
Pravin B Shelarc5441932013-03-25 14:49:35 +0000894 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
895 struct ip_tunnel *t;
896 struct hlist_node *n;
897 struct hlist_head *thead = &itn->tunnels[h];
898
899 hlist_for_each_entry_safe(t, n, thead, hash_node)
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200900 /* If dev is in the same netns, it has already
901 * been added to the list by the previous loop.
902 */
903 if (!net_eq(dev_net(t->dev), net))
904 unregister_netdevice_queue(t->dev, head);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000905 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000906}
907
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200908void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000909{
910 LIST_HEAD(list);
911
912 rtnl_lock();
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200913 ip_tunnel_destroy(itn, &list, ops);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000914 unregister_netdevice_many(&list);
915 rtnl_unlock();
Pravin B Shelarc5441932013-03-25 14:49:35 +0000916}
917EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
918
919int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
920 struct ip_tunnel_parm *p)
921{
922 struct ip_tunnel *nt;
923 struct net *net = dev_net(dev);
924 struct ip_tunnel_net *itn;
925 int mtu;
926 int err;
927
928 nt = netdev_priv(dev);
929 itn = net_generic(net, nt->ip_tnl_net_id);
930
931 if (ip_tunnel_find(itn, p, dev->type))
932 return -EEXIST;
933
Nicolas Dichtel5e6700b2013-06-26 16:11:28 +0200934 nt->net = net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000935 nt->parms = *p;
936 err = register_netdevice(dev);
937 if (err)
938 goto out;
939
940 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
941 eth_hw_addr_random(dev);
942
943 mtu = ip_tunnel_bind_dev(dev);
944 if (!tb[IFLA_MTU])
945 dev->mtu = mtu;
946
947 ip_tunnel_add(itn, nt);
948
949out:
950 return err;
951}
952EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
953
954int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
955 struct ip_tunnel_parm *p)
956{
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200957 struct ip_tunnel *t;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000958 struct ip_tunnel *tunnel = netdev_priv(dev);
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200959 struct net *net = tunnel->net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000960 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
961
962 if (dev == itn->fb_tunnel_dev)
963 return -EINVAL;
964
Pravin B Shelarc5441932013-03-25 14:49:35 +0000965 t = ip_tunnel_find(itn, p, dev->type);
966
967 if (t) {
968 if (t->dev != dev)
969 return -EEXIST;
970 } else {
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200971 t = tunnel;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000972
973 if (dev->type != ARPHRD_ETHER) {
974 unsigned int nflags = 0;
975
976 if (ipv4_is_multicast(p->iph.daddr))
977 nflags = IFF_BROADCAST;
978 else if (p->iph.daddr)
979 nflags = IFF_POINTOPOINT;
980
981 if ((dev->flags ^ nflags) &
982 (IFF_POINTOPOINT | IFF_BROADCAST))
983 return -EINVAL;
984 }
985 }
986
987 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
988 return 0;
989}
990EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
991
992int ip_tunnel_init(struct net_device *dev)
993{
994 struct ip_tunnel *tunnel = netdev_priv(dev);
995 struct iphdr *iph = &tunnel->parms.iph;
John Stultz827da442013-10-07 15:51:58 -0700996 int i, err;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000997
998 dev->destructor = ip_tunnel_dev_free;
Li RongQing8f849852014-01-04 13:57:59 +0800999 dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001000 if (!dev->tstats)
1001 return -ENOMEM;
1002
John Stultz827da442013-10-07 15:51:58 -07001003 for_each_possible_cpu(i) {
Li RongQing8f849852014-01-04 13:57:59 +08001004 struct pcpu_sw_netstats *ipt_stats;
John Stultz827da442013-10-07 15:51:58 -07001005 ipt_stats = per_cpu_ptr(dev->tstats, i);
1006 u64_stats_init(&ipt_stats->syncp);
1007 }
1008
Tom Herbert9a4aa9a2014-01-02 11:48:33 -08001009 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1010 if (!tunnel->dst_cache) {
1011 free_percpu(dev->tstats);
1012 return -ENOMEM;
1013 }
1014
Pravin B Shelarc5441932013-03-25 14:49:35 +00001015 err = gro_cells_init(&tunnel->gro_cells, dev);
1016 if (err) {
Tom Herbert9a4aa9a2014-01-02 11:48:33 -08001017 free_percpu(tunnel->dst_cache);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001018 free_percpu(dev->tstats);
1019 return err;
1020 }
1021
1022 tunnel->dev = dev;
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001023 tunnel->net = dev_net(dev);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001024 strcpy(tunnel->parms.name, dev->name);
1025 iph->version = 4;
1026 iph->ihl = 5;
1027
1028 return 0;
1029}
1030EXPORT_SYMBOL_GPL(ip_tunnel_init);
1031
1032void ip_tunnel_uninit(struct net_device *dev)
1033{
Pravin B Shelarc5441932013-03-25 14:49:35 +00001034 struct ip_tunnel *tunnel = netdev_priv(dev);
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001035 struct net *net = tunnel->net;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001036 struct ip_tunnel_net *itn;
1037
1038 itn = net_generic(net, tunnel->ip_tnl_net_id);
1039 /* fb_tunnel_dev will be unregisted in net-exit call. */
1040 if (itn->fb_tunnel_dev != dev)
1041 ip_tunnel_del(netdev_priv(dev));
Tom Herbert7d442fa2014-01-02 11:48:26 -08001042
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +01001043 ip_tunnel_dst_reset_all(tunnel);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001044}
1045EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1046
1047/* Do least required initialization, rest of init is done in tunnel_init call */
1048void ip_tunnel_setup(struct net_device *dev, int net_id)
1049{
1050 struct ip_tunnel *tunnel = netdev_priv(dev);
1051 tunnel->ip_tnl_net_id = net_id;
1052}
1053EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1054
1055MODULE_LICENSE("GPL");