]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82                                            struct sk_buff *skb, u32 mtu);
83 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84                                         struct sk_buff *skb);
85
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88                                            const struct in6_addr *prefix, int prefixlen,
89                                            const struct in6_addr *gwaddr, int ifindex,
90                                            unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92                                            const struct in6_addr *prefix, int prefixlen,
93                                            const struct in6_addr *gwaddr, int ifindex);
94 #endif
95
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98         struct rt6_info *rt = (struct rt6_info *) dst;
99         struct inet_peer *peer;
100         u32 *p = NULL;
101
102         if (!(rt->dst.flags & DST_HOST))
103                 return NULL;
104
105         peer = rt6_get_peer_create(rt);
106         if (peer) {
107                 u32 *old_p = __DST_METRICS_PTR(old);
108                 unsigned long prev, new;
109
110                 p = peer->metrics;
111                 if (inet_metrics_new(peer))
112                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114                 new = (unsigned long) p;
115                 prev = cmpxchg(&dst->_metrics, old, new);
116
117                 if (prev != old) {
118                         p = __DST_METRICS_PTR(prev);
119                         if (prev & DST_METRICS_READ_ONLY)
120                                 p = NULL;
121                 }
122         }
123         return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127                                              struct sk_buff *skb,
128                                              const void *daddr)
129 {
130         struct in6_addr *p = &rt->rt6i_gateway;
131
132         if (!ipv6_addr_any(p))
133                 return (const void *) p;
134         else if (skb)
135                 return &ipv6_hdr(skb)->daddr;
136         return daddr;
137 }
138
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140                                           struct sk_buff *skb,
141                                           const void *daddr)
142 {
143         struct rt6_info *rt = (struct rt6_info *) dst;
144         struct neighbour *n;
145
146         daddr = choose_neigh_daddr(rt, skb, daddr);
147         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148         if (n)
149                 return n;
150         return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156         if (!n) {
157                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158                 if (IS_ERR(n))
159                         return PTR_ERR(n);
160         }
161         rt->n = n;
162
163         return 0;
164 }
165
166 static struct dst_ops ip6_dst_ops_template = {
167         .family                 =       AF_INET6,
168         .protocol               =       cpu_to_be16(ETH_P_IPV6),
169         .gc                     =       ip6_dst_gc,
170         .gc_thresh              =       1024,
171         .check                  =       ip6_dst_check,
172         .default_advmss         =       ip6_default_advmss,
173         .mtu                    =       ip6_mtu,
174         .cow_metrics            =       ipv6_cow_metrics,
175         .destroy                =       ip6_dst_destroy,
176         .ifdown                 =       ip6_dst_ifdown,
177         .negative_advice        =       ip6_negative_advice,
178         .link_failure           =       ip6_link_failure,
179         .update_pmtu            =       ip6_rt_update_pmtu,
180         .redirect               =       rt6_do_redirect,
181         .local_out              =       __ip6_local_out,
182         .neigh_lookup           =       ip6_neigh_lookup,
183 };
184
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189         return mtu ? : dst->dev->mtu;
190 }
191
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193                                          struct sk_buff *skb, u32 mtu)
194 {
195 }
196
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198                                       struct sk_buff *skb)
199 {
200 }
201
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203                                          unsigned long old)
204 {
205         return NULL;
206 }
207
208 static struct dst_ops ip6_dst_blackhole_ops = {
209         .family                 =       AF_INET6,
210         .protocol               =       cpu_to_be16(ETH_P_IPV6),
211         .destroy                =       ip6_dst_destroy,
212         .check                  =       ip6_dst_check,
213         .mtu                    =       ip6_blackhole_mtu,
214         .default_advmss         =       ip6_default_advmss,
215         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
216         .redirect               =       ip6_rt_blackhole_redirect,
217         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
218         .neigh_lookup           =       ip6_neigh_lookup,
219 };
220
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222         [RTAX_HOPLIMIT - 1] = 255,
223 };
224
225 static const struct rt6_info ip6_null_entry_template = {
226         .dst = {
227                 .__refcnt       = ATOMIC_INIT(1),
228                 .__use          = 1,
229                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
230                 .error          = -ENETUNREACH,
231                 .input          = ip6_pkt_discard,
232                 .output         = ip6_pkt_discard_out,
233         },
234         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
235         .rt6i_protocol  = RTPROT_KERNEL,
236         .rt6i_metric    = ~(u32) 0,
237         .rt6i_ref       = ATOMIC_INIT(1),
238 };
239
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245 static const struct rt6_info ip6_prohibit_entry_template = {
246         .dst = {
247                 .__refcnt       = ATOMIC_INIT(1),
248                 .__use          = 1,
249                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
250                 .error          = -EACCES,
251                 .input          = ip6_pkt_prohibit,
252                 .output         = ip6_pkt_prohibit_out,
253         },
254         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
255         .rt6i_protocol  = RTPROT_KERNEL,
256         .rt6i_metric    = ~(u32) 0,
257         .rt6i_ref       = ATOMIC_INIT(1),
258 };
259
260 static const struct rt6_info ip6_blk_hole_entry_template = {
261         .dst = {
262                 .__refcnt       = ATOMIC_INIT(1),
263                 .__use          = 1,
264                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
265                 .error          = -EINVAL,
266                 .input          = dst_discard,
267                 .output         = dst_discard,
268         },
269         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
270         .rt6i_protocol  = RTPROT_KERNEL,
271         .rt6i_metric    = ~(u32) 0,
272         .rt6i_ref       = ATOMIC_INIT(1),
273 };
274
275 #endif
276
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279                                              struct net_device *dev,
280                                              int flags,
281                                              struct fib6_table *table)
282 {
283         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284                                         0, DST_OBSOLETE_FORCE_CHK, flags);
285
286         if (rt) {
287                 struct dst_entry *dst = &rt->dst;
288
289                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291                 rt->rt6i_genid = rt_genid(net);
292         }
293         return rt;
294 }
295
296 static void ip6_dst_destroy(struct dst_entry *dst)
297 {
298         struct rt6_info *rt = (struct rt6_info *)dst;
299         struct inet6_dev *idev = rt->rt6i_idev;
300
301         if (rt->n)
302                 neigh_release(rt->n);
303
304         if (!(rt->dst.flags & DST_HOST))
305                 dst_destroy_metrics_generic(dst);
306
307         if (idev) {
308                 rt->rt6i_idev = NULL;
309                 in6_dev_put(idev);
310         }
311
312         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313                 dst_release(dst->from);
314
315         if (rt6_has_peer(rt)) {
316                 struct inet_peer *peer = rt6_peer_ptr(rt);
317                 inet_putpeer(peer);
318         }
319 }
320
321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323 static u32 rt6_peer_genid(void)
324 {
325         return atomic_read(&__rt6_peer_genid);
326 }
327
328 void rt6_bind_peer(struct rt6_info *rt, int create)
329 {
330         struct inet_peer_base *base;
331         struct inet_peer *peer;
332
333         base = inetpeer_base_ptr(rt->_rt6i_peer);
334         if (!base)
335                 return;
336
337         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338         if (peer) {
339                 if (!rt6_set_peer(rt, peer))
340                         inet_putpeer(peer);
341                 else
342                         rt->rt6i_peer_genid = rt6_peer_genid();
343         }
344 }
345
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347                            int how)
348 {
349         struct rt6_info *rt = (struct rt6_info *)dst;
350         struct inet6_dev *idev = rt->rt6i_idev;
351         struct net_device *loopback_dev =
352                 dev_net(dev)->loopback_dev;
353
354         if (dev != loopback_dev) {
355                 if (idev && idev->dev == dev) {
356                         struct inet6_dev *loopback_idev =
357                                 in6_dev_get(loopback_dev);
358                         if (loopback_idev) {
359                                 rt->rt6i_idev = loopback_idev;
360                                 in6_dev_put(idev);
361                         }
362                 }
363                 if (rt->n && rt->n->dev == dev) {
364                         rt->n->dev = loopback_dev;
365                         dev_hold(loopback_dev);
366                         dev_put(dev);
367                 }
368         }
369 }
370
371 static bool rt6_check_expired(const struct rt6_info *rt)
372 {
373         if (rt->rt6i_flags & RTF_EXPIRES) {
374                 if (time_after(jiffies, rt->dst.expires))
375                         return true;
376         } else if (rt->dst.from) {
377                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
378         }
379         return false;
380 }
381
382 static bool rt6_need_strict(const struct in6_addr *daddr)
383 {
384         return ipv6_addr_type(daddr) &
385                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
386 }
387
388 /*
389  *      Route lookup. Any table->tb6_lock is implied.
390  */
391
392 static inline struct rt6_info *rt6_device_match(struct net *net,
393                                                     struct rt6_info *rt,
394                                                     const struct in6_addr *saddr,
395                                                     int oif,
396                                                     int flags)
397 {
398         struct rt6_info *local = NULL;
399         struct rt6_info *sprt;
400
401         if (!oif && ipv6_addr_any(saddr))
402                 goto out;
403
404         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
405                 struct net_device *dev = sprt->dst.dev;
406
407                 if (oif) {
408                         if (dev->ifindex == oif)
409                                 return sprt;
410                         if (dev->flags & IFF_LOOPBACK) {
411                                 if (!sprt->rt6i_idev ||
412                                     sprt->rt6i_idev->dev->ifindex != oif) {
413                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
414                                                 continue;
415                                         if (local && (!oif ||
416                                                       local->rt6i_idev->dev->ifindex == oif))
417                                                 continue;
418                                 }
419                                 local = sprt;
420                         }
421                 } else {
422                         if (ipv6_chk_addr(net, saddr, dev,
423                                           flags & RT6_LOOKUP_F_IFACE))
424                                 return sprt;
425                 }
426         }
427
428         if (oif) {
429                 if (local)
430                         return local;
431
432                 if (flags & RT6_LOOKUP_F_IFACE)
433                         return net->ipv6.ip6_null_entry;
434         }
435 out:
436         return rt;
437 }
438
439 #ifdef CONFIG_IPV6_ROUTER_PREF
440 static void rt6_probe(struct rt6_info *rt)
441 {
442         struct neighbour *neigh;
443         /*
444          * Okay, this does not seem to be appropriate
445          * for now, however, we need to check if it
446          * is really so; aka Router Reachability Probing.
447          *
448          * Router Reachability Probe MUST be rate-limited
449          * to no more than one per minute.
450          */
451         neigh = rt ? rt->n : NULL;
452         if (!neigh || (neigh->nud_state & NUD_VALID))
453                 return;
454         read_lock_bh(&neigh->lock);
455         if (!(neigh->nud_state & NUD_VALID) &&
456             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
457                 struct in6_addr mcaddr;
458                 struct in6_addr *target;
459
460                 neigh->updated = jiffies;
461                 read_unlock_bh(&neigh->lock);
462
463                 target = (struct in6_addr *)&neigh->primary_key;
464                 addrconf_addr_solict_mult(target, &mcaddr);
465                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466         } else {
467                 read_unlock_bh(&neigh->lock);
468         }
469 }
470 #else
471 static inline void rt6_probe(struct rt6_info *rt)
472 {
473 }
474 #endif
475
476 /*
477  * Default Router Selection (RFC 2461 6.3.6)
478  */
479 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
480 {
481         struct net_device *dev = rt->dst.dev;
482         if (!oif || dev->ifindex == oif)
483                 return 2;
484         if ((dev->flags & IFF_LOOPBACK) &&
485             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486                 return 1;
487         return 0;
488 }
489
490 static inline int rt6_check_neigh(struct rt6_info *rt)
491 {
492         struct neighbour *neigh;
493         int m;
494
495         neigh = rt->n;
496         if (rt->rt6i_flags & RTF_NONEXTHOP ||
497             !(rt->rt6i_flags & RTF_GATEWAY))
498                 m = 1;
499         else if (neigh) {
500                 read_lock_bh(&neigh->lock);
501                 if (neigh->nud_state & NUD_VALID)
502                         m = 2;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504                 else if (neigh->nud_state & NUD_FAILED)
505                         m = 0;
506 #endif
507                 else
508                         m = 1;
509                 read_unlock_bh(&neigh->lock);
510         } else
511                 m = 0;
512         return m;
513 }
514
515 static int rt6_score_route(struct rt6_info *rt, int oif,
516                            int strict)
517 {
518         int m, n;
519
520         m = rt6_check_dev(rt, oif);
521         if (!m && (strict & RT6_LOOKUP_F_IFACE))
522                 return -1;
523 #ifdef CONFIG_IPV6_ROUTER_PREF
524         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
525 #endif
526         n = rt6_check_neigh(rt);
527         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
528                 return -1;
529         return m;
530 }
531
532 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533                                    int *mpri, struct rt6_info *match)
534 {
535         int m;
536
537         if (rt6_check_expired(rt))
538                 goto out;
539
540         m = rt6_score_route(rt, oif, strict);
541         if (m < 0)
542                 goto out;
543
544         if (m > *mpri) {
545                 if (strict & RT6_LOOKUP_F_REACHABLE)
546                         rt6_probe(match);
547                 *mpri = m;
548                 match = rt;
549         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
550                 rt6_probe(rt);
551         }
552
553 out:
554         return match;
555 }
556
557 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558                                      struct rt6_info *rr_head,
559                                      u32 metric, int oif, int strict)
560 {
561         struct rt6_info *rt, *match;
562         int mpri = -1;
563
564         match = NULL;
565         for (rt = rr_head; rt && rt->rt6i_metric == metric;
566              rt = rt->dst.rt6_next)
567                 match = find_match(rt, oif, strict, &mpri, match);
568         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
569              rt = rt->dst.rt6_next)
570                 match = find_match(rt, oif, strict, &mpri, match);
571
572         return match;
573 }
574
575 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576 {
577         struct rt6_info *match, *rt0;
578         struct net *net;
579
580         rt0 = fn->rr_ptr;
581         if (!rt0)
582                 fn->rr_ptr = rt0 = fn->leaf;
583
584         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
585
586         if (!match &&
587             (strict & RT6_LOOKUP_F_REACHABLE)) {
588                 struct rt6_info *next = rt0->dst.rt6_next;
589
590                 /* no entries matched; do round-robin */
591                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
592                         next = fn->leaf;
593
594                 if (next != rt0)
595                         fn->rr_ptr = next;
596         }
597
598         net = dev_net(rt0->dst.dev);
599         return match ? match : net->ipv6.ip6_null_entry;
600 }
601
602 #ifdef CONFIG_IPV6_ROUTE_INFO
603 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
604                   const struct in6_addr *gwaddr)
605 {
606         struct net *net = dev_net(dev);
607         struct route_info *rinfo = (struct route_info *) opt;
608         struct in6_addr prefix_buf, *prefix;
609         unsigned int pref;
610         unsigned long lifetime;
611         struct rt6_info *rt;
612
613         if (len < sizeof(struct route_info)) {
614                 return -EINVAL;
615         }
616
617         /* Sanity check for prefix_len and length */
618         if (rinfo->length > 3) {
619                 return -EINVAL;
620         } else if (rinfo->prefix_len > 128) {
621                 return -EINVAL;
622         } else if (rinfo->prefix_len > 64) {
623                 if (rinfo->length < 2) {
624                         return -EINVAL;
625                 }
626         } else if (rinfo->prefix_len > 0) {
627                 if (rinfo->length < 1) {
628                         return -EINVAL;
629                 }
630         }
631
632         pref = rinfo->route_pref;
633         if (pref == ICMPV6_ROUTER_PREF_INVALID)
634                 return -EINVAL;
635
636         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
637
638         if (rinfo->length == 3)
639                 prefix = (struct in6_addr *)rinfo->prefix;
640         else {
641                 /* this function is safe */
642                 ipv6_addr_prefix(&prefix_buf,
643                                  (struct in6_addr *)rinfo->prefix,
644                                  rinfo->prefix_len);
645                 prefix = &prefix_buf;
646         }
647
648         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649                                 dev->ifindex);
650
651         if (rt && !lifetime) {
652                 ip6_del_rt(rt);
653                 rt = NULL;
654         }
655
656         if (!rt && lifetime)
657                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
658                                         pref);
659         else if (rt)
660                 rt->rt6i_flags = RTF_ROUTEINFO |
661                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662
663         if (rt) {
664                 if (!addrconf_finite_timeout(lifetime))
665                         rt6_clean_expires(rt);
666                 else
667                         rt6_set_expires(rt, jiffies + HZ * lifetime);
668
669                 dst_release(&rt->dst);
670         }
671         return 0;
672 }
673 #endif
674
675 #define BACKTRACK(__net, saddr)                 \
676 do { \
677         if (rt == __net->ipv6.ip6_null_entry) { \
678                 struct fib6_node *pn; \
679                 while (1) { \
680                         if (fn->fn_flags & RTN_TL_ROOT) \
681                                 goto out; \
682                         pn = fn->parent; \
683                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
684                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
685                         else \
686                                 fn = pn; \
687                         if (fn->fn_flags & RTN_RTINFO) \
688                                 goto restart; \
689                 } \
690         } \
691 } while (0)
692
693 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694                                              struct fib6_table *table,
695                                              struct flowi6 *fl6, int flags)
696 {
697         struct fib6_node *fn;
698         struct rt6_info *rt;
699
700         read_lock_bh(&table->tb6_lock);
701         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
702 restart:
703         rt = fn->leaf;
704         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705         BACKTRACK(net, &fl6->saddr);
706 out:
707         dst_use(&rt->dst, jiffies);
708         read_unlock_bh(&table->tb6_lock);
709         return rt;
710
711 }
712
713 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714                                     int flags)
715 {
716         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717 }
718 EXPORT_SYMBOL_GPL(ip6_route_lookup);
719
720 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721                             const struct in6_addr *saddr, int oif, int strict)
722 {
723         struct flowi6 fl6 = {
724                 .flowi6_oif = oif,
725                 .daddr = *daddr,
726         };
727         struct dst_entry *dst;
728         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
729
730         if (saddr) {
731                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
732                 flags |= RT6_LOOKUP_F_HAS_SADDR;
733         }
734
735         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
736         if (dst->error == 0)
737                 return (struct rt6_info *) dst;
738
739         dst_release(dst);
740
741         return NULL;
742 }
743
744 EXPORT_SYMBOL(rt6_lookup);
745
746 /* ip6_ins_rt is called with FREE table->tb6_lock.
747    It takes new route entry, the addition fails by any reason the
748    route is freed. In any case, if caller does not hold it, it may
749    be destroyed.
750  */
751
752 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
753 {
754         int err;
755         struct fib6_table *table;
756
757         table = rt->rt6i_table;
758         write_lock_bh(&table->tb6_lock);
759         err = fib6_add(&table->tb6_root, rt, info);
760         write_unlock_bh(&table->tb6_lock);
761
762         return err;
763 }
764
765 int ip6_ins_rt(struct rt6_info *rt)
766 {
767         struct nl_info info = {
768                 .nl_net = dev_net(rt->dst.dev),
769         };
770         return __ip6_ins_rt(rt, &info);
771 }
772
773 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
774                                       const struct in6_addr *daddr,
775                                       const struct in6_addr *saddr)
776 {
777         struct rt6_info *rt;
778
779         /*
780          *      Clone the route.
781          */
782
783         rt = ip6_rt_copy(ort, daddr);
784
785         if (rt) {
786                 int attempts = !in_softirq();
787
788                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
789                         if (ort->rt6i_dst.plen != 128 &&
790                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
791                                 rt->rt6i_flags |= RTF_ANYCAST;
792                         rt->rt6i_gateway = *daddr;
793                 }
794
795                 rt->rt6i_flags |= RTF_CACHE;
796
797 #ifdef CONFIG_IPV6_SUBTREES
798                 if (rt->rt6i_src.plen && saddr) {
799                         rt->rt6i_src.addr = *saddr;
800                         rt->rt6i_src.plen = 128;
801                 }
802 #endif
803
804         retry:
805                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
806                         struct net *net = dev_net(rt->dst.dev);
807                         int saved_rt_min_interval =
808                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
809                         int saved_rt_elasticity =
810                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
811
812                         if (attempts-- > 0) {
813                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815
816                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
817
818                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
819                                         saved_rt_elasticity;
820                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
821                                         saved_rt_min_interval;
822                                 goto retry;
823                         }
824
825                         net_warn_ratelimited("Neighbour table overflow\n");
826                         dst_free(&rt->dst);
827                         return NULL;
828                 }
829         }
830
831         return rt;
832 }
833
834 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835                                         const struct in6_addr *daddr)
836 {
837         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838
839         if (rt) {
840                 rt->rt6i_flags |= RTF_CACHE;
841                 rt->n = neigh_clone(ort->n);
842         }
843         return rt;
844 }
845
846 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
847                                       struct flowi6 *fl6, int flags)
848 {
849         struct fib6_node *fn;
850         struct rt6_info *rt, *nrt;
851         int strict = 0;
852         int attempts = 3;
853         int err;
854         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
855
856         strict |= flags & RT6_LOOKUP_F_IFACE;
857
858 relookup:
859         read_lock_bh(&table->tb6_lock);
860
861 restart_2:
862         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
863
864 restart:
865         rt = rt6_select(fn, oif, strict | reachable);
866
867         BACKTRACK(net, &fl6->saddr);
868         if (rt == net->ipv6.ip6_null_entry ||
869             rt->rt6i_flags & RTF_CACHE)
870                 goto out;
871
872         dst_hold(&rt->dst);
873         read_unlock_bh(&table->tb6_lock);
874
875         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
876                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
877         else if (!(rt->dst.flags & DST_HOST))
878                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
879         else
880                 goto out2;
881
882         dst_release(&rt->dst);
883         rt = nrt ? : net->ipv6.ip6_null_entry;
884
885         dst_hold(&rt->dst);
886         if (nrt) {
887                 err = ip6_ins_rt(nrt);
888                 if (!err)
889                         goto out2;
890         }
891
892         if (--attempts <= 0)
893                 goto out2;
894
895         /*
896          * Race condition! In the gap, when table->tb6_lock was
897          * released someone could insert this route.  Relookup.
898          */
899         dst_release(&rt->dst);
900         goto relookup;
901
902 out:
903         if (reachable) {
904                 reachable = 0;
905                 goto restart_2;
906         }
907         dst_hold(&rt->dst);
908         read_unlock_bh(&table->tb6_lock);
909 out2:
910         rt->dst.lastuse = jiffies;
911         rt->dst.__use++;
912
913         return rt;
914 }
915
916 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
917                                             struct flowi6 *fl6, int flags)
918 {
919         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920 }
921
922 static struct dst_entry *ip6_route_input_lookup(struct net *net,
923                                                 struct net_device *dev,
924                                                 struct flowi6 *fl6, int flags)
925 {
926         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927                 flags |= RT6_LOOKUP_F_IFACE;
928
929         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930 }
931
932 void ip6_route_input(struct sk_buff *skb)
933 {
934         const struct ipv6hdr *iph = ipv6_hdr(skb);
935         struct net *net = dev_net(skb->dev);
936         int flags = RT6_LOOKUP_F_HAS_SADDR;
937         struct flowi6 fl6 = {
938                 .flowi6_iif = skb->dev->ifindex,
939                 .daddr = iph->daddr,
940                 .saddr = iph->saddr,
941                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
942                 .flowi6_mark = skb->mark,
943                 .flowi6_proto = iph->nexthdr,
944         };
945
946         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947 }
948
949 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
950                                              struct flowi6 *fl6, int flags)
951 {
952         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953 }
954
955 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
956                                     struct flowi6 *fl6)
957 {
958         int flags = 0;
959
960         fl6->flowi6_iif = LOOPBACK_IFINDEX;
961
962         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
963                 flags |= RT6_LOOKUP_F_IFACE;
964
965         if (!ipv6_addr_any(&fl6->saddr))
966                 flags |= RT6_LOOKUP_F_HAS_SADDR;
967         else if (sk)
968                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
969
970         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
971 }
972
973 EXPORT_SYMBOL(ip6_route_output);
974
975 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
976 {
977         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
978         struct dst_entry *new = NULL;
979
980         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
981         if (rt) {
982                 new = &rt->dst;
983
984                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985                 rt6_init_peer(rt, net->ipv6.peers);
986
987                 new->__use = 1;
988                 new->input = dst_discard;
989                 new->output = dst_discard;
990
991                 if (dst_metrics_read_only(&ort->dst))
992                         new->_metrics = ort->dst._metrics;
993                 else
994                         dst_copy_metrics(new, &ort->dst);
995                 rt->rt6i_idev = ort->rt6i_idev;
996                 if (rt->rt6i_idev)
997                         in6_dev_hold(rt->rt6i_idev);
998
999                 rt->rt6i_gateway = ort->rt6i_gateway;
1000                 rt->rt6i_flags = ort->rt6i_flags;
1001                 rt6_clean_expires(rt);
1002                 rt->rt6i_metric = 0;
1003
1004                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005 #ifdef CONFIG_IPV6_SUBTREES
1006                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007 #endif
1008
1009                 dst_free(new);
1010         }
1011
1012         dst_release(dst_orig);
1013         return new ? new : ERR_PTR(-ENOMEM);
1014 }
1015
1016 /*
1017  *      Destination cache support functions
1018  */
1019
1020 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021 {
1022         struct rt6_info *rt;
1023
1024         rt = (struct rt6_info *) dst;
1025
1026         /* All IPV6 dsts are created with ->obsolete set to the value
1027          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028          * into this function always.
1029          */
1030         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031                 return NULL;
1032
1033         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035                         if (!rt6_has_peer(rt))
1036                                 rt6_bind_peer(rt, 0);
1037                         rt->rt6i_peer_genid = rt6_peer_genid();
1038                 }
1039                 return dst;
1040         }
1041         return NULL;
1042 }
1043
1044 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045 {
1046         struct rt6_info *rt = (struct rt6_info *) dst;
1047
1048         if (rt) {
1049                 if (rt->rt6i_flags & RTF_CACHE) {
1050                         if (rt6_check_expired(rt)) {
1051                                 ip6_del_rt(rt);
1052                                 dst = NULL;
1053                         }
1054                 } else {
1055                         dst_release(dst);
1056                         dst = NULL;
1057                 }
1058         }
1059         return dst;
1060 }
1061
1062 static void ip6_link_failure(struct sk_buff *skb)
1063 {
1064         struct rt6_info *rt;
1065
1066         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1067
1068         rt = (struct rt6_info *) skb_dst(skb);
1069         if (rt) {
1070                 if (rt->rt6i_flags & RTF_CACHE)
1071                         rt6_update_expires(rt, 0);
1072                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1073                         rt->rt6i_node->fn_sernum = -1;
1074         }
1075 }
1076
1077 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078                                struct sk_buff *skb, u32 mtu)
1079 {
1080         struct rt6_info *rt6 = (struct rt6_info*)dst;
1081
1082         dst_confirm(dst);
1083         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1084                 struct net *net = dev_net(dst->dev);
1085
1086                 rt6->rt6i_flags |= RTF_MODIFIED;
1087                 if (mtu < IPV6_MIN_MTU) {
1088                         u32 features = dst_metric(dst, RTAX_FEATURES);
1089                         mtu = IPV6_MIN_MTU;
1090                         features |= RTAX_FEATURE_ALLFRAG;
1091                         dst_metric_set(dst, RTAX_FEATURES, features);
1092                 }
1093                 dst_metric_set(dst, RTAX_MTU, mtu);
1094                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1095         }
1096 }
1097
1098 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099                      int oif, u32 mark)
1100 {
1101         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102         struct dst_entry *dst;
1103         struct flowi6 fl6;
1104
1105         memset(&fl6, 0, sizeof(fl6));
1106         fl6.flowi6_oif = oif;
1107         fl6.flowi6_mark = mark;
1108         fl6.flowi6_flags = 0;
1109         fl6.daddr = iph->daddr;
1110         fl6.saddr = iph->saddr;
1111         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112
1113         dst = ip6_route_output(net, NULL, &fl6);
1114         if (!dst->error)
1115                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1116         dst_release(dst);
1117 }
1118 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119
1120 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121 {
1122         ip6_update_pmtu(skb, sock_net(sk), mtu,
1123                         sk->sk_bound_dev_if, sk->sk_mark);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126
1127 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128 {
1129         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130         struct dst_entry *dst;
1131         struct flowi6 fl6;
1132
1133         memset(&fl6, 0, sizeof(fl6));
1134         fl6.flowi6_oif = oif;
1135         fl6.flowi6_mark = mark;
1136         fl6.flowi6_flags = 0;
1137         fl6.daddr = iph->daddr;
1138         fl6.saddr = iph->saddr;
1139         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140
1141         dst = ip6_route_output(net, NULL, &fl6);
1142         if (!dst->error)
1143                 rt6_do_redirect(dst, NULL, skb);
1144         dst_release(dst);
1145 }
1146 EXPORT_SYMBOL_GPL(ip6_redirect);
1147
1148 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149 {
1150         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151 }
1152 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153
1154 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1155 {
1156         struct net_device *dev = dst->dev;
1157         unsigned int mtu = dst_mtu(dst);
1158         struct net *net = dev_net(dev);
1159
1160         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161
1162         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1164
1165         /*
1166          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168          * IPV6_MAXPLEN is also valid and means: "any MSS,
1169          * rely only on pmtu discovery"
1170          */
1171         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172                 mtu = IPV6_MAXPLEN;
1173         return mtu;
1174 }
1175
1176 static unsigned int ip6_mtu(const struct dst_entry *dst)
1177 {
1178         struct inet6_dev *idev;
1179         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180
1181         if (mtu)
1182                 return mtu;
1183
1184         mtu = IPV6_MIN_MTU;
1185
1186         rcu_read_lock();
1187         idev = __in6_dev_get(dst->dev);
1188         if (idev)
1189                 mtu = idev->cnf.mtu6;
1190         rcu_read_unlock();
1191
1192         return mtu;
1193 }
1194
1195 static struct dst_entry *icmp6_dst_gc_list;
1196 static DEFINE_SPINLOCK(icmp6_dst_lock);
1197
1198 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1199                                   struct neighbour *neigh,
1200                                   struct flowi6 *fl6)
1201 {
1202         struct dst_entry *dst;
1203         struct rt6_info *rt;
1204         struct inet6_dev *idev = in6_dev_get(dev);
1205         struct net *net = dev_net(dev);
1206
1207         if (unlikely(!idev))
1208                 return ERR_PTR(-ENODEV);
1209
1210         rt = ip6_dst_alloc(net, dev, 0, NULL);
1211         if (unlikely(!rt)) {
1212                 in6_dev_put(idev);
1213                 dst = ERR_PTR(-ENOMEM);
1214                 goto out;
1215         }
1216
1217         if (neigh)
1218                 neigh_hold(neigh);
1219         else {
1220                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1221                 if (IS_ERR(neigh)) {
1222                         in6_dev_put(idev);
1223                         dst_free(&rt->dst);
1224                         return ERR_CAST(neigh);
1225                 }
1226         }
1227
1228         rt->dst.flags |= DST_HOST;
1229         rt->dst.output  = ip6_output;
1230         rt->n = neigh;
1231         atomic_set(&rt->dst.__refcnt, 1);
1232         rt->rt6i_dst.addr = fl6->daddr;
1233         rt->rt6i_dst.plen = 128;
1234         rt->rt6i_idev     = idev;
1235         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1236
1237         spin_lock_bh(&icmp6_dst_lock);
1238         rt->dst.next = icmp6_dst_gc_list;
1239         icmp6_dst_gc_list = &rt->dst;
1240         spin_unlock_bh(&icmp6_dst_lock);
1241
1242         fib6_force_start_gc(net);
1243
1244         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245
1246 out:
1247         return dst;
1248 }
1249
1250 int icmp6_dst_gc(void)
1251 {
1252         struct dst_entry *dst, **pprev;
1253         int more = 0;
1254
1255         spin_lock_bh(&icmp6_dst_lock);
1256         pprev = &icmp6_dst_gc_list;
1257
1258         while ((dst = *pprev) != NULL) {
1259                 if (!atomic_read(&dst->__refcnt)) {
1260                         *pprev = dst->next;
1261                         dst_free(dst);
1262                 } else {
1263                         pprev = &dst->next;
1264                         ++more;
1265                 }
1266         }
1267
1268         spin_unlock_bh(&icmp6_dst_lock);
1269
1270         return more;
1271 }
1272
1273 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274                             void *arg)
1275 {
1276         struct dst_entry *dst, **pprev;
1277
1278         spin_lock_bh(&icmp6_dst_lock);
1279         pprev = &icmp6_dst_gc_list;
1280         while ((dst = *pprev) != NULL) {
1281                 struct rt6_info *rt = (struct rt6_info *) dst;
1282                 if (func(rt, arg)) {
1283                         *pprev = dst->next;
1284                         dst_free(dst);
1285                 } else {
1286                         pprev = &dst->next;
1287                 }
1288         }
1289         spin_unlock_bh(&icmp6_dst_lock);
1290 }
1291
1292 static int ip6_dst_gc(struct dst_ops *ops)
1293 {
1294         unsigned long now = jiffies;
1295         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1296         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1301         int entries;
1302
1303         entries = dst_entries_get_fast(ops);
1304         if (time_after(rt_last_gc + rt_min_interval, now) &&
1305             entries <= rt_max_size)
1306                 goto out;
1307
1308         net->ipv6.ip6_rt_gc_expire++;
1309         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310         net->ipv6.ip6_rt_last_gc = now;
1311         entries = dst_entries_get_slow(ops);
1312         if (entries < ops->gc_thresh)
1313                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1314 out:
1315         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1316         return entries > rt_max_size;
1317 }
1318
1319 /* Clean host part of a prefix. Not necessary in radix tree,
1320    but results in cleaner routing tables.
1321
1322    Remove it only when all the things will work!
1323  */
1324
1325 int ip6_dst_hoplimit(struct dst_entry *dst)
1326 {
1327         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1328         if (hoplimit == 0) {
1329                 struct net_device *dev = dst->dev;
1330                 struct inet6_dev *idev;
1331
1332                 rcu_read_lock();
1333                 idev = __in6_dev_get(dev);
1334                 if (idev)
1335                         hoplimit = idev->cnf.hop_limit;
1336                 else
1337                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1338                 rcu_read_unlock();
1339         }
1340         return hoplimit;
1341 }
1342 EXPORT_SYMBOL(ip6_dst_hoplimit);
1343
1344 /*
1345  *
1346  */
1347
1348 int ip6_route_add(struct fib6_config *cfg)
1349 {
1350         int err;
1351         struct net *net = cfg->fc_nlinfo.nl_net;
1352         struct rt6_info *rt = NULL;
1353         struct net_device *dev = NULL;
1354         struct inet6_dev *idev = NULL;
1355         struct fib6_table *table;
1356         int addr_type;
1357
1358         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1359                 return -EINVAL;
1360 #ifndef CONFIG_IPV6_SUBTREES
1361         if (cfg->fc_src_len)
1362                 return -EINVAL;
1363 #endif
1364         if (cfg->fc_ifindex) {
1365                 err = -ENODEV;
1366                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1367                 if (!dev)
1368                         goto out;
1369                 idev = in6_dev_get(dev);
1370                 if (!idev)
1371                         goto out;
1372         }
1373
1374         if (cfg->fc_metric == 0)
1375                 cfg->fc_metric = IP6_RT_PRIO_USER;
1376
1377         err = -ENOBUFS;
1378         if (cfg->fc_nlinfo.nlh &&
1379             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1380                 table = fib6_get_table(net, cfg->fc_table);
1381                 if (!table) {
1382                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1383                         table = fib6_new_table(net, cfg->fc_table);
1384                 }
1385         } else {
1386                 table = fib6_new_table(net, cfg->fc_table);
1387         }
1388
1389         if (!table)
1390                 goto out;
1391
1392         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1393
1394         if (!rt) {
1395                 err = -ENOMEM;
1396                 goto out;
1397         }
1398
1399         if (cfg->fc_flags & RTF_EXPIRES)
1400                 rt6_set_expires(rt, jiffies +
1401                                 clock_t_to_jiffies(cfg->fc_expires));
1402         else
1403                 rt6_clean_expires(rt);
1404
1405         if (cfg->fc_protocol == RTPROT_UNSPEC)
1406                 cfg->fc_protocol = RTPROT_BOOT;
1407         rt->rt6i_protocol = cfg->fc_protocol;
1408
1409         addr_type = ipv6_addr_type(&cfg->fc_dst);
1410
1411         if (addr_type & IPV6_ADDR_MULTICAST)
1412                 rt->dst.input = ip6_mc_input;
1413         else if (cfg->fc_flags & RTF_LOCAL)
1414                 rt->dst.input = ip6_input;
1415         else
1416                 rt->dst.input = ip6_forward;
1417
1418         rt->dst.output = ip6_output;
1419
1420         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421         rt->rt6i_dst.plen = cfg->fc_dst_len;
1422         if (rt->rt6i_dst.plen == 128)
1423                rt->dst.flags |= DST_HOST;
1424
1425         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427                 if (!metrics) {
1428                         err = -ENOMEM;
1429                         goto out;
1430                 }
1431                 dst_init_metrics(&rt->dst, metrics, 0);
1432         }
1433 #ifdef CONFIG_IPV6_SUBTREES
1434         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435         rt->rt6i_src.plen = cfg->fc_src_len;
1436 #endif
1437
1438         rt->rt6i_metric = cfg->fc_metric;
1439
1440         /* We cannot add true routes via loopback here,
1441            they would result in kernel looping; promote them to reject routes
1442          */
1443         if ((cfg->fc_flags & RTF_REJECT) ||
1444             (dev && (dev->flags & IFF_LOOPBACK) &&
1445              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446              !(cfg->fc_flags & RTF_LOCAL))) {
1447                 /* hold loopback dev/idev if we haven't done so. */
1448                 if (dev != net->loopback_dev) {
1449                         if (dev) {
1450                                 dev_put(dev);
1451                                 in6_dev_put(idev);
1452                         }
1453                         dev = net->loopback_dev;
1454                         dev_hold(dev);
1455                         idev = in6_dev_get(dev);
1456                         if (!idev) {
1457                                 err = -ENODEV;
1458                                 goto out;
1459                         }
1460                 }
1461                 rt->dst.output = ip6_pkt_discard_out;
1462                 rt->dst.input = ip6_pkt_discard;
1463                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464                 switch (cfg->fc_type) {
1465                 case RTN_BLACKHOLE:
1466                         rt->dst.error = -EINVAL;
1467                         break;
1468                 case RTN_PROHIBIT:
1469                         rt->dst.error = -EACCES;
1470                         break;
1471                 case RTN_THROW:
1472                         rt->dst.error = -EAGAIN;
1473                         break;
1474                 default:
1475                         rt->dst.error = -ENETUNREACH;
1476                         break;
1477                 }
1478                 goto install_route;
1479         }
1480
1481         if (cfg->fc_flags & RTF_GATEWAY) {
1482                 const struct in6_addr *gw_addr;
1483                 int gwa_type;
1484
1485                 gw_addr = &cfg->fc_gateway;
1486                 rt->rt6i_gateway = *gw_addr;
1487                 gwa_type = ipv6_addr_type(gw_addr);
1488
1489                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490                         struct rt6_info *grt;
1491
1492                         /* IPv6 strictly inhibits using not link-local
1493                            addresses as nexthop address.
1494                            Otherwise, router will not able to send redirects.
1495                            It is very good, but in some (rare!) circumstances
1496                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1497                            some exceptions. --ANK
1498                          */
1499                         err = -EINVAL;
1500                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1501                                 goto out;
1502
1503                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1504
1505                         err = -EHOSTUNREACH;
1506                         if (!grt)
1507                                 goto out;
1508                         if (dev) {
1509                                 if (dev != grt->dst.dev) {
1510                                         dst_release(&grt->dst);
1511                                         goto out;
1512                                 }
1513                         } else {
1514                                 dev = grt->dst.dev;
1515                                 idev = grt->rt6i_idev;
1516                                 dev_hold(dev);
1517                                 in6_dev_hold(grt->rt6i_idev);
1518                         }
1519                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1520                                 err = 0;
1521                         dst_release(&grt->dst);
1522
1523                         if (err)
1524                                 goto out;
1525                 }
1526                 err = -EINVAL;
1527                 if (!dev || (dev->flags & IFF_LOOPBACK))
1528                         goto out;
1529         }
1530
1531         err = -ENODEV;
1532         if (!dev)
1533                 goto out;
1534
1535         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537                         err = -EINVAL;
1538                         goto out;
1539                 }
1540                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1541                 rt->rt6i_prefsrc.plen = 128;
1542         } else
1543                 rt->rt6i_prefsrc.plen = 0;
1544
1545         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1546                 err = rt6_bind_neighbour(rt, dev);
1547                 if (err)
1548                         goto out;
1549         }
1550
1551         rt->rt6i_flags = cfg->fc_flags;
1552
1553 install_route:
1554         if (cfg->fc_mx) {
1555                 struct nlattr *nla;
1556                 int remaining;
1557
1558                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1559                         int type = nla_type(nla);
1560
1561                         if (type) {
1562                                 if (type > RTAX_MAX) {
1563                                         err = -EINVAL;
1564                                         goto out;
1565                                 }
1566
1567                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1568                         }
1569                 }
1570         }
1571
1572         rt->dst.dev = dev;
1573         rt->rt6i_idev = idev;
1574         rt->rt6i_table = table;
1575
1576         cfg->fc_nlinfo.nl_net = dev_net(dev);
1577
1578         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1579
1580 out:
1581         if (dev)
1582                 dev_put(dev);
1583         if (idev)
1584                 in6_dev_put(idev);
1585         if (rt)
1586                 dst_free(&rt->dst);
1587         return err;
1588 }
1589
1590 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1591 {
1592         int err;
1593         struct fib6_table *table;
1594         struct net *net = dev_net(rt->dst.dev);
1595
1596         if (rt == net->ipv6.ip6_null_entry) {
1597                 err = -ENOENT;
1598                 goto out;
1599         }
1600
1601         table = rt->rt6i_table;
1602         write_lock_bh(&table->tb6_lock);
1603         err = fib6_del(rt, info);
1604         write_unlock_bh(&table->tb6_lock);
1605
1606 out:
1607         dst_release(&rt->dst);
1608         return err;
1609 }
1610
1611 int ip6_del_rt(struct rt6_info *rt)
1612 {
1613         struct nl_info info = {
1614                 .nl_net = dev_net(rt->dst.dev),
1615         };
1616         return __ip6_del_rt(rt, &info);
1617 }
1618
1619 static int ip6_route_del(struct fib6_config *cfg)
1620 {
1621         struct fib6_table *table;
1622         struct fib6_node *fn;
1623         struct rt6_info *rt;
1624         int err = -ESRCH;
1625
1626         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1627         if (!table)
1628                 return err;
1629
1630         read_lock_bh(&table->tb6_lock);
1631
1632         fn = fib6_locate(&table->tb6_root,
1633                          &cfg->fc_dst, cfg->fc_dst_len,
1634                          &cfg->fc_src, cfg->fc_src_len);
1635
1636         if (fn) {
1637                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1638                         if (cfg->fc_ifindex &&
1639                             (!rt->dst.dev ||
1640                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1641                                 continue;
1642                         if (cfg->fc_flags & RTF_GATEWAY &&
1643                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1644                                 continue;
1645                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1646                                 continue;
1647                         dst_hold(&rt->dst);
1648                         read_unlock_bh(&table->tb6_lock);
1649
1650                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1651                 }
1652         }
1653         read_unlock_bh(&table->tb6_lock);
1654
1655         return err;
1656 }
1657
1658 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1659 {
1660         struct net *net = dev_net(skb->dev);
1661         struct netevent_redirect netevent;
1662         struct rt6_info *rt, *nrt = NULL;
1663         const struct in6_addr *target;
1664         struct ndisc_options ndopts;
1665         const struct in6_addr *dest;
1666         struct neighbour *old_neigh;
1667         struct inet6_dev *in6_dev;
1668         struct neighbour *neigh;
1669         struct icmp6hdr *icmph;
1670         int optlen, on_link;
1671         u8 *lladdr;
1672
1673         optlen = skb->tail - skb->transport_header;
1674         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1675
1676         if (optlen < 0) {
1677                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1678                 return;
1679         }
1680
1681         icmph = icmp6_hdr(skb);
1682         target = (const struct in6_addr *) (icmph + 1);
1683         dest = target + 1;
1684
1685         if (ipv6_addr_is_multicast(dest)) {
1686                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1687                 return;
1688         }
1689
1690         on_link = 0;
1691         if (ipv6_addr_equal(dest, target)) {
1692                 on_link = 1;
1693         } else if (ipv6_addr_type(target) !=
1694                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1695                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1696                 return;
1697         }
1698
1699         in6_dev = __in6_dev_get(skb->dev);
1700         if (!in6_dev)
1701                 return;
1702         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1703                 return;
1704
1705         /* RFC2461 8.1:
1706          *      The IP source address of the Redirect MUST be the same as the current
1707          *      first-hop router for the specified ICMP Destination Address.
1708          */
1709
1710         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1711                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1712                 return;
1713         }
1714
1715         lladdr = NULL;
1716         if (ndopts.nd_opts_tgt_lladdr) {
1717                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1718                                              skb->dev);
1719                 if (!lladdr) {
1720                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1721                         return;
1722                 }
1723         }
1724
1725         rt = (struct rt6_info *) dst;
1726         if (rt == net->ipv6.ip6_null_entry) {
1727                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1728                 return;
1729         }
1730
1731         /* Redirect received -> path was valid.
1732          * Look, redirects are sent only in response to data packets,
1733          * so that this nexthop apparently is reachable. --ANK
1734          */
1735         dst_confirm(&rt->dst);
1736
1737         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1738         if (!neigh)
1739                 return;
1740
1741         /* Duplicate redirect: silently ignore. */
1742         old_neigh = rt->n;
1743         if (neigh == old_neigh)
1744                 goto out;
1745
1746         /*
1747          *      We have finally decided to accept it.
1748          */
1749
1750         neigh_update(neigh, lladdr, NUD_STALE,
1751                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1752                      NEIGH_UPDATE_F_OVERRIDE|
1753                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1754                                      NEIGH_UPDATE_F_ISROUTER))
1755                      );
1756
1757         nrt = ip6_rt_copy(rt, dest);
1758         if (!nrt)
1759                 goto out;
1760
1761         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1762         if (on_link)
1763                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1764
1765         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1766         nrt->n = neigh_clone(neigh);
1767
1768         if (ip6_ins_rt(nrt))
1769                 goto out;
1770
1771         netevent.old = &rt->dst;
1772         netevent.old_neigh = old_neigh;
1773         netevent.new = &nrt->dst;
1774         netevent.new_neigh = neigh;
1775         netevent.daddr = dest;
1776         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1777
1778         if (rt->rt6i_flags & RTF_CACHE) {
1779                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1780                 ip6_del_rt(rt);
1781         }
1782
1783 out:
1784         neigh_release(neigh);
1785 }
1786
1787 /*
1788  *      Misc support functions
1789  */
1790
1791 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1792                                     const struct in6_addr *dest)
1793 {
1794         struct net *net = dev_net(ort->dst.dev);
1795         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1796                                             ort->rt6i_table);
1797
1798         if (rt) {
1799                 rt->dst.input = ort->dst.input;
1800                 rt->dst.output = ort->dst.output;
1801                 rt->dst.flags |= DST_HOST;
1802
1803                 rt->rt6i_dst.addr = *dest;
1804                 rt->rt6i_dst.plen = 128;
1805                 dst_copy_metrics(&rt->dst, &ort->dst);
1806                 rt->dst.error = ort->dst.error;
1807                 rt->rt6i_idev = ort->rt6i_idev;
1808                 if (rt->rt6i_idev)
1809                         in6_dev_hold(rt->rt6i_idev);
1810                 rt->dst.lastuse = jiffies;
1811
1812                 rt->rt6i_gateway = ort->rt6i_gateway;
1813                 rt->rt6i_flags = ort->rt6i_flags;
1814                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1815                     (RTF_DEFAULT | RTF_ADDRCONF))
1816                         rt6_set_from(rt, ort);
1817                 else
1818                         rt6_clean_expires(rt);
1819                 rt->rt6i_metric = 0;
1820
1821 #ifdef CONFIG_IPV6_SUBTREES
1822                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823 #endif
1824                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1825                 rt->rt6i_table = ort->rt6i_table;
1826         }
1827         return rt;
1828 }
1829
1830 #ifdef CONFIG_IPV6_ROUTE_INFO
1831 static struct rt6_info *rt6_get_route_info(struct net *net,
1832                                            const struct in6_addr *prefix, int prefixlen,
1833                                            const struct in6_addr *gwaddr, int ifindex)
1834 {
1835         struct fib6_node *fn;
1836         struct rt6_info *rt = NULL;
1837         struct fib6_table *table;
1838
1839         table = fib6_get_table(net, RT6_TABLE_INFO);
1840         if (!table)
1841                 return NULL;
1842
1843         read_lock_bh(&table->tb6_lock);
1844         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1845         if (!fn)
1846                 goto out;
1847
1848         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1849                 if (rt->dst.dev->ifindex != ifindex)
1850                         continue;
1851                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852                         continue;
1853                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854                         continue;
1855                 dst_hold(&rt->dst);
1856                 break;
1857         }
1858 out:
1859         read_unlock_bh(&table->tb6_lock);
1860         return rt;
1861 }
1862
1863 static struct rt6_info *rt6_add_route_info(struct net *net,
1864                                            const struct in6_addr *prefix, int prefixlen,
1865                                            const struct in6_addr *gwaddr, int ifindex,
1866                                            unsigned int pref)
1867 {
1868         struct fib6_config cfg = {
1869                 .fc_table       = RT6_TABLE_INFO,
1870                 .fc_metric      = IP6_RT_PRIO_USER,
1871                 .fc_ifindex     = ifindex,
1872                 .fc_dst_len     = prefixlen,
1873                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874                                   RTF_UP | RTF_PREF(pref),
1875                 .fc_nlinfo.portid = 0,
1876                 .fc_nlinfo.nlh = NULL,
1877                 .fc_nlinfo.nl_net = net,
1878         };
1879
1880         cfg.fc_dst = *prefix;
1881         cfg.fc_gateway = *gwaddr;
1882
1883         /* We should treat it as a default route if prefix length is 0. */
1884         if (!prefixlen)
1885                 cfg.fc_flags |= RTF_DEFAULT;
1886
1887         ip6_route_add(&cfg);
1888
1889         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1890 }
1891 #endif
1892
1893 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1894 {
1895         struct rt6_info *rt;
1896         struct fib6_table *table;
1897
1898         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1899         if (!table)
1900                 return NULL;
1901
1902         read_lock_bh(&table->tb6_lock);
1903         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1904                 if (dev == rt->dst.dev &&
1905                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1906                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907                         break;
1908         }
1909         if (rt)
1910                 dst_hold(&rt->dst);
1911         read_unlock_bh(&table->tb6_lock);
1912         return rt;
1913 }
1914
1915 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1916                                      struct net_device *dev,
1917                                      unsigned int pref)
1918 {
1919         struct fib6_config cfg = {
1920                 .fc_table       = RT6_TABLE_DFLT,
1921                 .fc_metric      = IP6_RT_PRIO_USER,
1922                 .fc_ifindex     = dev->ifindex,
1923                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1925                 .fc_nlinfo.portid = 0,
1926                 .fc_nlinfo.nlh = NULL,
1927                 .fc_nlinfo.nl_net = dev_net(dev),
1928         };
1929
1930         cfg.fc_gateway = *gwaddr;
1931
1932         ip6_route_add(&cfg);
1933
1934         return rt6_get_dflt_router(gwaddr, dev);
1935 }
1936
1937 void rt6_purge_dflt_routers(struct net *net)
1938 {
1939         struct rt6_info *rt;
1940         struct fib6_table *table;
1941
1942         /* NOTE: Keep consistent with rt6_get_dflt_router */
1943         table = fib6_get_table(net, RT6_TABLE_DFLT);
1944         if (!table)
1945                 return;
1946
1947 restart:
1948         read_lock_bh(&table->tb6_lock);
1949         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1950                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1951                         dst_hold(&rt->dst);
1952                         read_unlock_bh(&table->tb6_lock);
1953                         ip6_del_rt(rt);
1954                         goto restart;
1955                 }
1956         }
1957         read_unlock_bh(&table->tb6_lock);
1958 }
1959
1960 static void rtmsg_to_fib6_config(struct net *net,
1961                                  struct in6_rtmsg *rtmsg,
1962                                  struct fib6_config *cfg)
1963 {
1964         memset(cfg, 0, sizeof(*cfg));
1965
1966         cfg->fc_table = RT6_TABLE_MAIN;
1967         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968         cfg->fc_metric = rtmsg->rtmsg_metric;
1969         cfg->fc_expires = rtmsg->rtmsg_info;
1970         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972         cfg->fc_flags = rtmsg->rtmsg_flags;
1973
1974         cfg->fc_nlinfo.nl_net = net;
1975
1976         cfg->fc_dst = rtmsg->rtmsg_dst;
1977         cfg->fc_src = rtmsg->rtmsg_src;
1978         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1979 }
1980
1981 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1982 {
1983         struct fib6_config cfg;
1984         struct in6_rtmsg rtmsg;
1985         int err;
1986
1987         switch(cmd) {
1988         case SIOCADDRT:         /* Add a route */
1989         case SIOCDELRT:         /* Delete a route */
1990                 if (!capable(CAP_NET_ADMIN))
1991                         return -EPERM;
1992                 err = copy_from_user(&rtmsg, arg,
1993                                      sizeof(struct in6_rtmsg));
1994                 if (err)
1995                         return -EFAULT;
1996
1997                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1998
1999                 rtnl_lock();
2000                 switch (cmd) {
2001                 case SIOCADDRT:
2002                         err = ip6_route_add(&cfg);
2003                         break;
2004                 case SIOCDELRT:
2005                         err = ip6_route_del(&cfg);
2006                         break;
2007                 default:
2008                         err = -EINVAL;
2009                 }
2010                 rtnl_unlock();
2011
2012                 return err;
2013         }
2014
2015         return -EINVAL;
2016 }
2017
2018 /*
2019  *      Drop the packet on the floor
2020  */
2021
2022 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2023 {
2024         int type;
2025         struct dst_entry *dst = skb_dst(skb);
2026         switch (ipstats_mib_noroutes) {
2027         case IPSTATS_MIB_INNOROUTES:
2028                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2029                 if (type == IPV6_ADDR_ANY) {
2030                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031                                       IPSTATS_MIB_INADDRERRORS);
2032                         break;
2033                 }
2034                 /* FALLTHROUGH */
2035         case IPSTATS_MIB_OUTNOROUTES:
2036                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037                               ipstats_mib_noroutes);
2038                 break;
2039         }
2040         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2041         kfree_skb(skb);
2042         return 0;
2043 }
2044
2045 static int ip6_pkt_discard(struct sk_buff *skb)
2046 {
2047         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2048 }
2049
2050 static int ip6_pkt_discard_out(struct sk_buff *skb)
2051 {
2052         skb->dev = skb_dst(skb)->dev;
2053         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2054 }
2055
2056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057
2058 static int ip6_pkt_prohibit(struct sk_buff *skb)
2059 {
2060         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2061 }
2062
2063 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064 {
2065         skb->dev = skb_dst(skb)->dev;
2066         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2067 }
2068
2069 #endif
2070
2071 /*
2072  *      Allocate a dst for local (unicast / anycast) address.
2073  */
2074
2075 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076                                     const struct in6_addr *addr,
2077                                     bool anycast)
2078 {
2079         struct net *net = dev_net(idev->dev);
2080         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2081         int err;
2082
2083         if (!rt) {
2084                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2085                 return ERR_PTR(-ENOMEM);
2086         }
2087
2088         in6_dev_hold(idev);
2089
2090         rt->dst.flags |= DST_HOST;
2091         rt->dst.input = ip6_input;
2092         rt->dst.output = ip6_output;
2093         rt->rt6i_idev = idev;
2094
2095         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2096         if (anycast)
2097                 rt->rt6i_flags |= RTF_ANYCAST;
2098         else
2099                 rt->rt6i_flags |= RTF_LOCAL;
2100         err = rt6_bind_neighbour(rt, rt->dst.dev);
2101         if (err) {
2102                 dst_free(&rt->dst);
2103                 return ERR_PTR(err);
2104         }
2105
2106         rt->rt6i_dst.addr = *addr;
2107         rt->rt6i_dst.plen = 128;
2108         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2109
2110         atomic_set(&rt->dst.__refcnt, 1);
2111
2112         return rt;
2113 }
2114
2115 int ip6_route_get_saddr(struct net *net,
2116                         struct rt6_info *rt,
2117                         const struct in6_addr *daddr,
2118                         unsigned int prefs,
2119                         struct in6_addr *saddr)
2120 {
2121         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2122         int err = 0;
2123         if (rt->rt6i_prefsrc.plen)
2124                 *saddr = rt->rt6i_prefsrc.addr;
2125         else
2126                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2127                                          daddr, prefs, saddr);
2128         return err;
2129 }
2130
2131 /* remove deleted ip from prefsrc entries */
2132 struct arg_dev_net_ip {
2133         struct net_device *dev;
2134         struct net *net;
2135         struct in6_addr *addr;
2136 };
2137
2138 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2139 {
2140         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2141         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2142         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2143
2144         if (((void *)rt->dst.dev == dev || !dev) &&
2145             rt != net->ipv6.ip6_null_entry &&
2146             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2147                 /* remove prefsrc entry */
2148                 rt->rt6i_prefsrc.plen = 0;
2149         }
2150         return 0;
2151 }
2152
2153 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2154 {
2155         struct net *net = dev_net(ifp->idev->dev);
2156         struct arg_dev_net_ip adni = {
2157                 .dev = ifp->idev->dev,
2158                 .net = net,
2159                 .addr = &ifp->addr,
2160         };
2161         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2162 }
2163
2164 struct arg_dev_net {
2165         struct net_device *dev;
2166         struct net *net;
2167 };
2168
2169 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2170 {
2171         const struct arg_dev_net *adn = arg;
2172         const struct net_device *dev = adn->dev;
2173
2174         if ((rt->dst.dev == dev || !dev) &&
2175             rt != adn->net->ipv6.ip6_null_entry)
2176                 return -1;
2177
2178         return 0;
2179 }
2180
2181 void rt6_ifdown(struct net *net, struct net_device *dev)
2182 {
2183         struct arg_dev_net adn = {
2184                 .dev = dev,
2185                 .net = net,
2186         };
2187
2188         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2189         icmp6_clean_all(fib6_ifdown, &adn);
2190 }
2191
2192 struct rt6_mtu_change_arg {
2193         struct net_device *dev;
2194         unsigned int mtu;
2195 };
2196
2197 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2198 {
2199         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2200         struct inet6_dev *idev;
2201
2202         /* In IPv6 pmtu discovery is not optional,
2203            so that RTAX_MTU lock cannot disable it.
2204            We still use this lock to block changes
2205            caused by addrconf/ndisc.
2206         */
2207
2208         idev = __in6_dev_get(arg->dev);
2209         if (!idev)
2210                 return 0;
2211
2212         /* For administrative MTU increase, there is no way to discover
2213            IPv6 PMTU increase, so PMTU increase should be updated here.
2214            Since RFC 1981 doesn't include administrative MTU increase
2215            update PMTU increase is a MUST. (i.e. jumbo frame)
2216          */
2217         /*
2218            If new MTU is less than route PMTU, this new MTU will be the
2219            lowest MTU in the path, update the route PMTU to reflect PMTU
2220            decreases; if new MTU is greater than route PMTU, and the
2221            old MTU is the lowest MTU in the path, update the route PMTU
2222            to reflect the increase. In this case if the other nodes' MTU
2223            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2224            PMTU discouvery.
2225          */
2226         if (rt->dst.dev == arg->dev &&
2227             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2228             (dst_mtu(&rt->dst) >= arg->mtu ||
2229              (dst_mtu(&rt->dst) < arg->mtu &&
2230               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2231                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2232         }
2233         return 0;
2234 }
2235
2236 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2237 {
2238         struct rt6_mtu_change_arg arg = {
2239                 .dev = dev,
2240                 .mtu = mtu,
2241         };
2242
2243         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2244 }
2245
2246 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2247         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2248         [RTA_OIF]               = { .type = NLA_U32 },
2249         [RTA_IIF]               = { .type = NLA_U32 },
2250         [RTA_PRIORITY]          = { .type = NLA_U32 },
2251         [RTA_METRICS]           = { .type = NLA_NESTED },
2252 };
2253
2254 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2255                               struct fib6_config *cfg)
2256 {
2257         struct rtmsg *rtm;
2258         struct nlattr *tb[RTA_MAX+1];
2259         int err;
2260
2261         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2262         if (err < 0)
2263                 goto errout;
2264
2265         err = -EINVAL;
2266         rtm = nlmsg_data(nlh);
2267         memset(cfg, 0, sizeof(*cfg));
2268
2269         cfg->fc_table = rtm->rtm_table;
2270         cfg->fc_dst_len = rtm->rtm_dst_len;
2271         cfg->fc_src_len = rtm->rtm_src_len;
2272         cfg->fc_flags = RTF_UP;
2273         cfg->fc_protocol = rtm->rtm_protocol;
2274         cfg->fc_type = rtm->rtm_type;
2275
2276         if (rtm->rtm_type == RTN_UNREACHABLE ||
2277             rtm->rtm_type == RTN_BLACKHOLE ||
2278             rtm->rtm_type == RTN_PROHIBIT ||
2279             rtm->rtm_type == RTN_THROW)
2280                 cfg->fc_flags |= RTF_REJECT;
2281
2282         if (rtm->rtm_type == RTN_LOCAL)
2283                 cfg->fc_flags |= RTF_LOCAL;
2284
2285         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2286         cfg->fc_nlinfo.nlh = nlh;
2287         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2288
2289         if (tb[RTA_GATEWAY]) {
2290                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2291                 cfg->fc_flags |= RTF_GATEWAY;
2292         }
2293
2294         if (tb[RTA_DST]) {
2295                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2296
2297                 if (nla_len(tb[RTA_DST]) < plen)
2298                         goto errout;
2299
2300                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2301         }
2302
2303         if (tb[RTA_SRC]) {
2304                 int plen = (rtm->rtm_src_len + 7) >> 3;
2305
2306                 if (nla_len(tb[RTA_SRC]) < plen)
2307                         goto errout;
2308
2309                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2310         }
2311
2312         if (tb[RTA_PREFSRC])
2313                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2314
2315         if (tb[RTA_OIF])
2316                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2317
2318         if (tb[RTA_PRIORITY])
2319                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2320
2321         if (tb[RTA_METRICS]) {
2322                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2323                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2324         }
2325
2326         if (tb[RTA_TABLE])
2327                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328
2329         err = 0;
2330 errout:
2331         return err;
2332 }
2333
2334 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335 {
2336         struct fib6_config cfg;
2337         int err;
2338
2339         err = rtm_to_fib6_config(skb, nlh, &cfg);
2340         if (err < 0)
2341                 return err;
2342
2343         return ip6_route_del(&cfg);
2344 }
2345
2346 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2347 {
2348         struct fib6_config cfg;
2349         int err;
2350
2351         err = rtm_to_fib6_config(skb, nlh, &cfg);
2352         if (err < 0)
2353                 return err;
2354
2355         return ip6_route_add(&cfg);
2356 }
2357
2358 static inline size_t rt6_nlmsg_size(void)
2359 {
2360         return NLMSG_ALIGN(sizeof(struct rtmsg))
2361                + nla_total_size(16) /* RTA_SRC */
2362                + nla_total_size(16) /* RTA_DST */
2363                + nla_total_size(16) /* RTA_GATEWAY */
2364                + nla_total_size(16) /* RTA_PREFSRC */
2365                + nla_total_size(4) /* RTA_TABLE */
2366                + nla_total_size(4) /* RTA_IIF */
2367                + nla_total_size(4) /* RTA_OIF */
2368                + nla_total_size(4) /* RTA_PRIORITY */
2369                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2370                + nla_total_size(sizeof(struct rta_cacheinfo));
2371 }
2372
2373 static int rt6_fill_node(struct net *net,
2374                          struct sk_buff *skb, struct rt6_info *rt,
2375                          struct in6_addr *dst, struct in6_addr *src,
2376                          int iif, int type, u32 portid, u32 seq,
2377                          int prefix, int nowait, unsigned int flags)
2378 {
2379         struct rtmsg *rtm;
2380         struct nlmsghdr *nlh;
2381         long expires;
2382         u32 table;
2383         struct neighbour *n;
2384
2385         if (prefix) {   /* user wants prefix routes only */
2386                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2387                         /* success since this is not a prefix route */
2388                         return 1;
2389                 }
2390         }
2391
2392         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2393         if (!nlh)
2394                 return -EMSGSIZE;
2395
2396         rtm = nlmsg_data(nlh);
2397         rtm->rtm_family = AF_INET6;
2398         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2399         rtm->rtm_src_len = rt->rt6i_src.plen;
2400         rtm->rtm_tos = 0;
2401         if (rt->rt6i_table)
2402                 table = rt->rt6i_table->tb6_id;
2403         else
2404                 table = RT6_TABLE_UNSPEC;
2405         rtm->rtm_table = table;
2406         if (nla_put_u32(skb, RTA_TABLE, table))
2407                 goto nla_put_failure;
2408         if (rt->rt6i_flags & RTF_REJECT) {
2409                 switch (rt->dst.error) {
2410                 case -EINVAL:
2411                         rtm->rtm_type = RTN_BLACKHOLE;
2412                         break;
2413                 case -EACCES:
2414                         rtm->rtm_type = RTN_PROHIBIT;
2415                         break;
2416                 case -EAGAIN:
2417                         rtm->rtm_type = RTN_THROW;
2418                         break;
2419                 default:
2420                         rtm->rtm_type = RTN_UNREACHABLE;
2421                         break;
2422                 }
2423         }
2424         else if (rt->rt6i_flags & RTF_LOCAL)
2425                 rtm->rtm_type = RTN_LOCAL;
2426         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2427                 rtm->rtm_type = RTN_LOCAL;
2428         else
2429                 rtm->rtm_type = RTN_UNICAST;
2430         rtm->rtm_flags = 0;
2431         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2432         rtm->rtm_protocol = rt->rt6i_protocol;
2433         if (rt->rt6i_flags & RTF_DYNAMIC)
2434                 rtm->rtm_protocol = RTPROT_REDIRECT;
2435         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2436                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2437                         rtm->rtm_protocol = RTPROT_RA;
2438                 else
2439                         rtm->rtm_protocol = RTPROT_KERNEL;
2440         }
2441
2442         if (rt->rt6i_flags & RTF_CACHE)
2443                 rtm->rtm_flags |= RTM_F_CLONED;
2444
2445         if (dst) {
2446                 if (nla_put(skb, RTA_DST, 16, dst))
2447                         goto nla_put_failure;
2448                 rtm->rtm_dst_len = 128;
2449         } else if (rtm->rtm_dst_len)
2450                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2451                         goto nla_put_failure;
2452 #ifdef CONFIG_IPV6_SUBTREES
2453         if (src) {
2454                 if (nla_put(skb, RTA_SRC, 16, src))
2455                         goto nla_put_failure;
2456                 rtm->rtm_src_len = 128;
2457         } else if (rtm->rtm_src_len &&
2458                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2459                 goto nla_put_failure;
2460 #endif
2461         if (iif) {
2462 #ifdef CONFIG_IPV6_MROUTE
2463                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2464                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2465                         if (err <= 0) {
2466                                 if (!nowait) {
2467                                         if (err == 0)
2468                                                 return 0;
2469                                         goto nla_put_failure;
2470                                 } else {
2471                                         if (err == -EMSGSIZE)
2472                                                 goto nla_put_failure;
2473                                 }
2474                         }
2475                 } else
2476 #endif
2477                         if (nla_put_u32(skb, RTA_IIF, iif))
2478                                 goto nla_put_failure;
2479         } else if (dst) {
2480                 struct in6_addr saddr_buf;
2481                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2482                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2483                         goto nla_put_failure;
2484         }
2485
2486         if (rt->rt6i_prefsrc.plen) {
2487                 struct in6_addr saddr_buf;
2488                 saddr_buf = rt->rt6i_prefsrc.addr;
2489                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490                         goto nla_put_failure;
2491         }
2492
2493         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2494                 goto nla_put_failure;
2495
2496         n = rt->n;
2497         if (n) {
2498                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2499                         goto nla_put_failure;
2500         }
2501
2502         if (rt->dst.dev &&
2503             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2504                 goto nla_put_failure;
2505         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2506                 goto nla_put_failure;
2507
2508         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2509
2510         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2511                 goto nla_put_failure;
2512
2513         return nlmsg_end(skb, nlh);
2514
2515 nla_put_failure:
2516         nlmsg_cancel(skb, nlh);
2517         return -EMSGSIZE;
2518 }
2519
2520 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2521 {
2522         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2523         int prefix;
2524
2525         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2526                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2527                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2528         } else
2529                 prefix = 0;
2530
2531         return rt6_fill_node(arg->net,
2532                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2533                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2534                      prefix, 0, NLM_F_MULTI);
2535 }
2536
2537 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2538 {
2539         struct net *net = sock_net(in_skb->sk);
2540         struct nlattr *tb[RTA_MAX+1];
2541         struct rt6_info *rt;
2542         struct sk_buff *skb;
2543         struct rtmsg *rtm;
2544         struct flowi6 fl6;
2545         int err, iif = 0, oif = 0;
2546
2547         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2548         if (err < 0)
2549                 goto errout;
2550
2551         err = -EINVAL;
2552         memset(&fl6, 0, sizeof(fl6));
2553
2554         if (tb[RTA_SRC]) {
2555                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2556                         goto errout;
2557
2558                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2559         }
2560
2561         if (tb[RTA_DST]) {
2562                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2563                         goto errout;
2564
2565                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2566         }
2567
2568         if (tb[RTA_IIF])
2569                 iif = nla_get_u32(tb[RTA_IIF]);
2570
2571         if (tb[RTA_OIF])
2572                 oif = nla_get_u32(tb[RTA_OIF]);
2573
2574         if (iif) {
2575                 struct net_device *dev;
2576                 int flags = 0;
2577
2578                 dev = __dev_get_by_index(net, iif);
2579                 if (!dev) {
2580                         err = -ENODEV;
2581                         goto errout;
2582                 }
2583
2584                 fl6.flowi6_iif = iif;
2585
2586                 if (!ipv6_addr_any(&fl6.saddr))
2587                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2588
2589                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2590                                                                flags);
2591         } else {
2592                 fl6.flowi6_oif = oif;
2593
2594                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2595         }
2596
2597         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2598         if (!skb) {
2599                 dst_release(&rt->dst);
2600                 err = -ENOBUFS;
2601                 goto errout;
2602         }
2603
2604         /* Reserve room for dummy headers, this skb can pass
2605            through good chunk of routing engine.
2606          */
2607         skb_reset_mac_header(skb);
2608         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2609
2610         skb_dst_set(skb, &rt->dst);
2611
2612         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2613                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2614                             nlh->nlmsg_seq, 0, 0, 0);
2615         if (err < 0) {
2616                 kfree_skb(skb);
2617                 goto errout;
2618         }
2619
2620         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2621 errout:
2622         return err;
2623 }
2624
2625 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2626 {
2627         struct sk_buff *skb;
2628         struct net *net = info->nl_net;
2629         u32 seq;
2630         int err;
2631
2632         err = -ENOBUFS;
2633         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2634
2635         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2636         if (!skb)
2637                 goto errout;
2638
2639         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2640                                 event, info->portid, seq, 0, 0, 0);
2641         if (err < 0) {
2642                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2643                 WARN_ON(err == -EMSGSIZE);
2644                 kfree_skb(skb);
2645                 goto errout;
2646         }
2647         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2648                     info->nlh, gfp_any());
2649         return;
2650 errout:
2651         if (err < 0)
2652                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2653 }
2654
2655 static int ip6_route_dev_notify(struct notifier_block *this,
2656                                 unsigned long event, void *data)
2657 {
2658         struct net_device *dev = (struct net_device *)data;
2659         struct net *net = dev_net(dev);
2660
2661         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2662                 net->ipv6.ip6_null_entry->dst.dev = dev;
2663                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2664 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2665                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2666                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2667                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2668                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2669 #endif
2670         }
2671
2672         return NOTIFY_OK;
2673 }
2674
2675 /*
2676  *      /proc
2677  */
2678
2679 #ifdef CONFIG_PROC_FS
2680
2681 struct rt6_proc_arg
2682 {
2683         char *buffer;
2684         int offset;
2685         int length;
2686         int skip;
2687         int len;
2688 };
2689
2690 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2691 {
2692         struct seq_file *m = p_arg;
2693         struct neighbour *n;
2694
2695         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2696
2697 #ifdef CONFIG_IPV6_SUBTREES
2698         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2699 #else
2700         seq_puts(m, "00000000000000000000000000000000 00 ");
2701 #endif
2702         n = rt->n;
2703         if (n) {
2704                 seq_printf(m, "%pi6", n->primary_key);
2705         } else {
2706                 seq_puts(m, "00000000000000000000000000000000");
2707         }
2708         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2709                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2710                    rt->dst.__use, rt->rt6i_flags,
2711                    rt->dst.dev ? rt->dst.dev->name : "");
2712         return 0;
2713 }
2714
2715 static int ipv6_route_show(struct seq_file *m, void *v)
2716 {
2717         struct net *net = (struct net *)m->private;
2718         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2719         return 0;
2720 }
2721
2722 static int ipv6_route_open(struct inode *inode, struct file *file)
2723 {
2724         return single_open_net(inode, file, ipv6_route_show);
2725 }
2726
2727 static const struct file_operations ipv6_route_proc_fops = {
2728         .owner          = THIS_MODULE,
2729         .open           = ipv6_route_open,
2730         .read           = seq_read,
2731         .llseek         = seq_lseek,
2732         .release        = single_release_net,
2733 };
2734
2735 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2736 {
2737         struct net *net = (struct net *)seq->private;
2738         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2739                    net->ipv6.rt6_stats->fib_nodes,
2740                    net->ipv6.rt6_stats->fib_route_nodes,
2741                    net->ipv6.rt6_stats->fib_rt_alloc,
2742                    net->ipv6.rt6_stats->fib_rt_entries,
2743                    net->ipv6.rt6_stats->fib_rt_cache,
2744                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2745                    net->ipv6.rt6_stats->fib_discarded_routes);
2746
2747         return 0;
2748 }
2749
2750 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2751 {
2752         return single_open_net(inode, file, rt6_stats_seq_show);
2753 }
2754
2755 static const struct file_operations rt6_stats_seq_fops = {
2756         .owner   = THIS_MODULE,
2757         .open    = rt6_stats_seq_open,
2758         .read    = seq_read,
2759         .llseek  = seq_lseek,
2760         .release = single_release_net,
2761 };
2762 #endif  /* CONFIG_PROC_FS */
2763
2764 #ifdef CONFIG_SYSCTL
2765
2766 static
2767 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2768                               void __user *buffer, size_t *lenp, loff_t *ppos)
2769 {
2770         struct net *net;
2771         int delay;
2772         if (!write)
2773                 return -EINVAL;
2774
2775         net = (struct net *)ctl->extra1;
2776         delay = net->ipv6.sysctl.flush_delay;
2777         proc_dointvec(ctl, write, buffer, lenp, ppos);
2778         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2779         return 0;
2780 }
2781
2782 ctl_table ipv6_route_table_template[] = {
2783         {
2784                 .procname       =       "flush",
2785                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2786                 .maxlen         =       sizeof(int),
2787                 .mode           =       0200,
2788                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2789         },
2790         {
2791                 .procname       =       "gc_thresh",
2792                 .data           =       &ip6_dst_ops_template.gc_thresh,
2793                 .maxlen         =       sizeof(int),
2794                 .mode           =       0644,
2795                 .proc_handler   =       proc_dointvec,
2796         },
2797         {
2798                 .procname       =       "max_size",
2799                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2800                 .maxlen         =       sizeof(int),
2801                 .mode           =       0644,
2802                 .proc_handler   =       proc_dointvec,
2803         },
2804         {
2805                 .procname       =       "gc_min_interval",
2806                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2807                 .maxlen         =       sizeof(int),
2808                 .mode           =       0644,
2809                 .proc_handler   =       proc_dointvec_jiffies,
2810         },
2811         {
2812                 .procname       =       "gc_timeout",
2813                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2814                 .maxlen         =       sizeof(int),
2815                 .mode           =       0644,
2816                 .proc_handler   =       proc_dointvec_jiffies,
2817         },
2818         {
2819                 .procname       =       "gc_interval",
2820                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2821                 .maxlen         =       sizeof(int),
2822                 .mode           =       0644,
2823                 .proc_handler   =       proc_dointvec_jiffies,
2824         },
2825         {
2826                 .procname       =       "gc_elasticity",
2827                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2828                 .maxlen         =       sizeof(int),
2829                 .mode           =       0644,
2830                 .proc_handler   =       proc_dointvec,
2831         },
2832         {
2833                 .procname       =       "mtu_expires",
2834                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2835                 .maxlen         =       sizeof(int),
2836                 .mode           =       0644,
2837                 .proc_handler   =       proc_dointvec_jiffies,
2838         },
2839         {
2840                 .procname       =       "min_adv_mss",
2841                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2842                 .maxlen         =       sizeof(int),
2843                 .mode           =       0644,
2844                 .proc_handler   =       proc_dointvec,
2845         },
2846         {
2847                 .procname       =       "gc_min_interval_ms",
2848                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2849                 .maxlen         =       sizeof(int),
2850                 .mode           =       0644,
2851                 .proc_handler   =       proc_dointvec_ms_jiffies,
2852         },
2853         { }
2854 };
2855
2856 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2857 {
2858         struct ctl_table *table;
2859
2860         table = kmemdup(ipv6_route_table_template,
2861                         sizeof(ipv6_route_table_template),
2862                         GFP_KERNEL);
2863
2864         if (table) {
2865                 table[0].data = &net->ipv6.sysctl.flush_delay;
2866                 table[0].extra1 = net;
2867                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2868                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2869                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2870                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2871                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2872                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2873                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2874                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2875                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2876         }
2877
2878         return table;
2879 }
2880 #endif
2881
2882 static int __net_init ip6_route_net_init(struct net *net)
2883 {
2884         int ret = -ENOMEM;
2885
2886         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2887                sizeof(net->ipv6.ip6_dst_ops));
2888
2889         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2890                 goto out_ip6_dst_ops;
2891
2892         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2893                                            sizeof(*net->ipv6.ip6_null_entry),
2894                                            GFP_KERNEL);
2895         if (!net->ipv6.ip6_null_entry)
2896                 goto out_ip6_dst_entries;
2897         net->ipv6.ip6_null_entry->dst.path =
2898                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2899         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2900         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2901                          ip6_template_metrics, true);
2902
2903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2905                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2906                                                GFP_KERNEL);
2907         if (!net->ipv6.ip6_prohibit_entry)
2908                 goto out_ip6_null_entry;
2909         net->ipv6.ip6_prohibit_entry->dst.path =
2910                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2911         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2912         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2913                          ip6_template_metrics, true);
2914
2915         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2916                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2917                                                GFP_KERNEL);
2918         if (!net->ipv6.ip6_blk_hole_entry)
2919                 goto out_ip6_prohibit_entry;
2920         net->ipv6.ip6_blk_hole_entry->dst.path =
2921                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2922         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2923         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2924                          ip6_template_metrics, true);
2925 #endif
2926
2927         net->ipv6.sysctl.flush_delay = 0;
2928         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2929         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2930         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2931         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2932         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2933         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2934         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2935
2936         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2937
2938         ret = 0;
2939 out:
2940         return ret;
2941
2942 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2943 out_ip6_prohibit_entry:
2944         kfree(net->ipv6.ip6_prohibit_entry);
2945 out_ip6_null_entry:
2946         kfree(net->ipv6.ip6_null_entry);
2947 #endif
2948 out_ip6_dst_entries:
2949         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2950 out_ip6_dst_ops:
2951         goto out;
2952 }
2953
2954 static void __net_exit ip6_route_net_exit(struct net *net)
2955 {
2956         kfree(net->ipv6.ip6_null_entry);
2957 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2958         kfree(net->ipv6.ip6_prohibit_entry);
2959         kfree(net->ipv6.ip6_blk_hole_entry);
2960 #endif
2961         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2962 }
2963
2964 static int __net_init ip6_route_net_init_late(struct net *net)
2965 {
2966 #ifdef CONFIG_PROC_FS
2967         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969 #endif
2970         return 0;
2971 }
2972
2973 static void __net_exit ip6_route_net_exit_late(struct net *net)
2974 {
2975 #ifdef CONFIG_PROC_FS
2976         proc_net_remove(net, "ipv6_route");
2977         proc_net_remove(net, "rt6_stats");
2978 #endif
2979 }
2980
2981 static struct pernet_operations ip6_route_net_ops = {
2982         .init = ip6_route_net_init,
2983         .exit = ip6_route_net_exit,
2984 };
2985
2986 static int __net_init ipv6_inetpeer_init(struct net *net)
2987 {
2988         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2989
2990         if (!bp)
2991                 return -ENOMEM;
2992         inet_peer_base_init(bp);
2993         net->ipv6.peers = bp;
2994         return 0;
2995 }
2996
2997 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2998 {
2999         struct inet_peer_base *bp = net->ipv6.peers;
3000
3001         net->ipv6.peers = NULL;
3002         inetpeer_invalidate_tree(bp);
3003         kfree(bp);
3004 }
3005
3006 static struct pernet_operations ipv6_inetpeer_ops = {
3007         .init   =       ipv6_inetpeer_init,
3008         .exit   =       ipv6_inetpeer_exit,
3009 };
3010
3011 static struct pernet_operations ip6_route_net_late_ops = {
3012         .init = ip6_route_net_init_late,
3013         .exit = ip6_route_net_exit_late,
3014 };
3015
3016 static struct notifier_block ip6_route_dev_notifier = {
3017         .notifier_call = ip6_route_dev_notify,
3018         .priority = 0,
3019 };
3020
3021 int __init ip6_route_init(void)
3022 {
3023         int ret;
3024
3025         ret = -ENOMEM;
3026         ip6_dst_ops_template.kmem_cachep =
3027                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3028                                   SLAB_HWCACHE_ALIGN, NULL);
3029         if (!ip6_dst_ops_template.kmem_cachep)
3030                 goto out;
3031
3032         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3033         if (ret)
3034                 goto out_kmem_cache;
3035
3036         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3037         if (ret)
3038                 goto out_dst_entries;
3039
3040         ret = register_pernet_subsys(&ip6_route_net_ops);
3041         if (ret)
3042                 goto out_register_inetpeer;
3043
3044         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045
3046         /* Registering of the loopback is done before this portion of code,
3047          * the loopback reference in rt6_info will not be taken, do it
3048          * manually for init_net */
3049         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3050         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3053         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3055         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056   #endif
3057         ret = fib6_init();
3058         if (ret)
3059                 goto out_register_subsys;
3060
3061         ret = xfrm6_init();
3062         if (ret)
3063                 goto out_fib6_init;
3064
3065         ret = fib6_rules_init();
3066         if (ret)
3067                 goto xfrm6_init;
3068
3069         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070         if (ret)
3071                 goto fib6_rules_init;
3072
3073         ret = -ENOBUFS;
3074         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3077                 goto out_register_late_subsys;
3078
3079         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3080         if (ret)
3081                 goto out_register_late_subsys;
3082
3083 out:
3084         return ret;
3085
3086 out_register_late_subsys:
3087         unregister_pernet_subsys(&ip6_route_net_late_ops);
3088 fib6_rules_init:
3089         fib6_rules_cleanup();
3090 xfrm6_init:
3091         xfrm6_fini();
3092 out_fib6_init:
3093         fib6_gc_cleanup();
3094 out_register_subsys:
3095         unregister_pernet_subsys(&ip6_route_net_ops);
3096 out_register_inetpeer:
3097         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098 out_dst_entries:
3099         dst_entries_destroy(&ip6_dst_blackhole_ops);
3100 out_kmem_cache:
3101         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3102         goto out;
3103 }
3104
3105 void ip6_route_cleanup(void)
3106 {
3107         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3108         unregister_pernet_subsys(&ip6_route_net_late_ops);
3109         fib6_rules_cleanup();
3110         xfrm6_fini();
3111         fib6_gc_cleanup();
3112         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3113         unregister_pernet_subsys(&ip6_route_net_ops);
3114         dst_entries_destroy(&ip6_dst_blackhole_ops);
3115         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3116 }