]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
ipv6: move ip6_dst_hoplimit() into core kernel
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -2,
70         RT6_NUD_FAIL_SOFT = -1,
71         RT6_NUD_SUCCEED = 1
72 };
73
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75                                     const struct in6_addr *dest);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int      ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void             ip6_dst_destroy(struct dst_entry *);
81 static void             ip6_dst_ifdown(struct dst_entry *,
82                                        struct net_device *dev, int how);
83 static int               ip6_dst_gc(struct dst_ops *ops);
84
85 static int              ip6_pkt_discard(struct sk_buff *skb);
86 static int              ip6_pkt_discard_out(struct sk_buff *skb);
87 static void             ip6_link_failure(struct sk_buff *skb);
88 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
89                                            struct sk_buff *skb, u32 mtu);
90 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
91                                         struct sk_buff *skb);
92 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
93
94 #ifdef CONFIG_IPV6_ROUTE_INFO
95 static struct rt6_info *rt6_add_route_info(struct net *net,
96                                            const struct in6_addr *prefix, int prefixlen,
97                                            const struct in6_addr *gwaddr, int ifindex,
98                                            unsigned int pref);
99 static struct rt6_info *rt6_get_route_info(struct net *net,
100                                            const struct in6_addr *prefix, int prefixlen,
101                                            const struct in6_addr *gwaddr, int ifindex);
102 #endif
103
104 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
105 {
106         struct rt6_info *rt = (struct rt6_info *) dst;
107         struct inet_peer *peer;
108         u32 *p = NULL;
109
110         if (!(rt->dst.flags & DST_HOST))
111                 return NULL;
112
113         peer = rt6_get_peer_create(rt);
114         if (peer) {
115                 u32 *old_p = __DST_METRICS_PTR(old);
116                 unsigned long prev, new;
117
118                 p = peer->metrics;
119                 if (inet_metrics_new(peer))
120                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121
122                 new = (unsigned long) p;
123                 prev = cmpxchg(&dst->_metrics, old, new);
124
125                 if (prev != old) {
126                         p = __DST_METRICS_PTR(prev);
127                         if (prev & DST_METRICS_READ_ONLY)
128                                 p = NULL;
129                 }
130         }
131         return p;
132 }
133
134 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
135                                              struct sk_buff *skb,
136                                              const void *daddr)
137 {
138         struct in6_addr *p = &rt->rt6i_gateway;
139
140         if (!ipv6_addr_any(p))
141                 return (const void *) p;
142         else if (skb)
143                 return &ipv6_hdr(skb)->daddr;
144         return daddr;
145 }
146
147 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
148                                           struct sk_buff *skb,
149                                           const void *daddr)
150 {
151         struct rt6_info *rt = (struct rt6_info *) dst;
152         struct neighbour *n;
153
154         daddr = choose_neigh_daddr(rt, skb, daddr);
155         n = __ipv6_neigh_lookup(dst->dev, daddr);
156         if (n)
157                 return n;
158         return neigh_create(&nd_tbl, daddr, dst->dev);
159 }
160
161 static struct dst_ops ip6_dst_ops_template = {
162         .family                 =       AF_INET6,
163         .protocol               =       cpu_to_be16(ETH_P_IPV6),
164         .gc                     =       ip6_dst_gc,
165         .gc_thresh              =       1024,
166         .check                  =       ip6_dst_check,
167         .default_advmss         =       ip6_default_advmss,
168         .mtu                    =       ip6_mtu,
169         .cow_metrics            =       ipv6_cow_metrics,
170         .destroy                =       ip6_dst_destroy,
171         .ifdown                 =       ip6_dst_ifdown,
172         .negative_advice        =       ip6_negative_advice,
173         .link_failure           =       ip6_link_failure,
174         .update_pmtu            =       ip6_rt_update_pmtu,
175         .redirect               =       rt6_do_redirect,
176         .local_out              =       __ip6_local_out,
177         .neigh_lookup           =       ip6_neigh_lookup,
178 };
179
180 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
181 {
182         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
183
184         return mtu ? : dst->dev->mtu;
185 }
186
187 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
188                                          struct sk_buff *skb, u32 mtu)
189 {
190 }
191
192 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
193                                       struct sk_buff *skb)
194 {
195 }
196
197 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
198                                          unsigned long old)
199 {
200         return NULL;
201 }
202
203 static struct dst_ops ip6_dst_blackhole_ops = {
204         .family                 =       AF_INET6,
205         .protocol               =       cpu_to_be16(ETH_P_IPV6),
206         .destroy                =       ip6_dst_destroy,
207         .check                  =       ip6_dst_check,
208         .mtu                    =       ip6_blackhole_mtu,
209         .default_advmss         =       ip6_default_advmss,
210         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
211         .redirect               =       ip6_rt_blackhole_redirect,
212         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
213         .neigh_lookup           =       ip6_neigh_lookup,
214 };
215
216 static const u32 ip6_template_metrics[RTAX_MAX] = {
217         [RTAX_HOPLIMIT - 1] = 0,
218 };
219
220 static const struct rt6_info ip6_null_entry_template = {
221         .dst = {
222                 .__refcnt       = ATOMIC_INIT(1),
223                 .__use          = 1,
224                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
225                 .error          = -ENETUNREACH,
226                 .input          = ip6_pkt_discard,
227                 .output         = ip6_pkt_discard_out,
228         },
229         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
230         .rt6i_protocol  = RTPROT_KERNEL,
231         .rt6i_metric    = ~(u32) 0,
232         .rt6i_ref       = ATOMIC_INIT(1),
233 };
234
235 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
236
237 static int ip6_pkt_prohibit(struct sk_buff *skb);
238 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
239
240 static const struct rt6_info ip6_prohibit_entry_template = {
241         .dst = {
242                 .__refcnt       = ATOMIC_INIT(1),
243                 .__use          = 1,
244                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
245                 .error          = -EACCES,
246                 .input          = ip6_pkt_prohibit,
247                 .output         = ip6_pkt_prohibit_out,
248         },
249         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
250         .rt6i_protocol  = RTPROT_KERNEL,
251         .rt6i_metric    = ~(u32) 0,
252         .rt6i_ref       = ATOMIC_INIT(1),
253 };
254
255 static const struct rt6_info ip6_blk_hole_entry_template = {
256         .dst = {
257                 .__refcnt       = ATOMIC_INIT(1),
258                 .__use          = 1,
259                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
260                 .error          = -EINVAL,
261                 .input          = dst_discard,
262                 .output         = dst_discard,
263         },
264         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
265         .rt6i_protocol  = RTPROT_KERNEL,
266         .rt6i_metric    = ~(u32) 0,
267         .rt6i_ref       = ATOMIC_INIT(1),
268 };
269
270 #endif
271
272 /* allocate dst with ip6_dst_ops */
273 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274                                              struct net_device *dev,
275                                              int flags,
276                                              struct fib6_table *table)
277 {
278         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279                                         0, DST_OBSOLETE_FORCE_CHK, flags);
280
281         if (rt) {
282                 struct dst_entry *dst = &rt->dst;
283
284                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286                 rt->rt6i_genid = rt_genid_ipv6(net);
287                 INIT_LIST_HEAD(&rt->rt6i_siblings);
288         }
289         return rt;
290 }
291
292 static void ip6_dst_destroy(struct dst_entry *dst)
293 {
294         struct rt6_info *rt = (struct rt6_info *)dst;
295         struct inet6_dev *idev = rt->rt6i_idev;
296         struct dst_entry *from = dst->from;
297
298         if (!(rt->dst.flags & DST_HOST))
299                 dst_destroy_metrics_generic(dst);
300
301         if (idev) {
302                 rt->rt6i_idev = NULL;
303                 in6_dev_put(idev);
304         }
305
306         dst->from = NULL;
307         dst_release(from);
308
309         if (rt6_has_peer(rt)) {
310                 struct inet_peer *peer = rt6_peer_ptr(rt);
311                 inet_putpeer(peer);
312         }
313 }
314
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317         struct inet_peer_base *base;
318         struct inet_peer *peer;
319
320         base = inetpeer_base_ptr(rt->_rt6i_peer);
321         if (!base)
322                 return;
323
324         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325         if (peer) {
326                 if (!rt6_set_peer(rt, peer))
327                         inet_putpeer(peer);
328         }
329 }
330
331 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
332                            int how)
333 {
334         struct rt6_info *rt = (struct rt6_info *)dst;
335         struct inet6_dev *idev = rt->rt6i_idev;
336         struct net_device *loopback_dev =
337                 dev_net(dev)->loopback_dev;
338
339         if (dev != loopback_dev) {
340                 if (idev && idev->dev == dev) {
341                         struct inet6_dev *loopback_idev =
342                                 in6_dev_get(loopback_dev);
343                         if (loopback_idev) {
344                                 rt->rt6i_idev = loopback_idev;
345                                 in6_dev_put(idev);
346                         }
347                 }
348         }
349 }
350
351 static bool rt6_check_expired(const struct rt6_info *rt)
352 {
353         if (rt->rt6i_flags & RTF_EXPIRES) {
354                 if (time_after(jiffies, rt->dst.expires))
355                         return true;
356         } else if (rt->dst.from) {
357                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
358         }
359         return false;
360 }
361
362 static bool rt6_need_strict(const struct in6_addr *daddr)
363 {
364         return ipv6_addr_type(daddr) &
365                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
366 }
367
368 /* Multipath route selection:
369  *   Hash based function using packet header and flowlabel.
370  * Adapted from fib_info_hashfn()
371  */
372 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
373                                const struct flowi6 *fl6)
374 {
375         unsigned int val = fl6->flowi6_proto;
376
377         val ^= ipv6_addr_hash(&fl6->daddr);
378         val ^= ipv6_addr_hash(&fl6->saddr);
379
380         /* Work only if this not encapsulated */
381         switch (fl6->flowi6_proto) {
382         case IPPROTO_UDP:
383         case IPPROTO_TCP:
384         case IPPROTO_SCTP:
385                 val ^= (__force u16)fl6->fl6_sport;
386                 val ^= (__force u16)fl6->fl6_dport;
387                 break;
388
389         case IPPROTO_ICMPV6:
390                 val ^= (__force u16)fl6->fl6_icmp_type;
391                 val ^= (__force u16)fl6->fl6_icmp_code;
392                 break;
393         }
394         /* RFC6438 recommands to use flowlabel */
395         val ^= (__force u32)fl6->flowlabel;
396
397         /* Perhaps, we need to tune, this function? */
398         val = val ^ (val >> 7) ^ (val >> 12);
399         return val % candidate_count;
400 }
401
402 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
403                                              struct flowi6 *fl6, int oif,
404                                              int strict)
405 {
406         struct rt6_info *sibling, *next_sibling;
407         int route_choosen;
408
409         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
410         /* Don't change the route, if route_choosen == 0
411          * (siblings does not include ourself)
412          */
413         if (route_choosen)
414                 list_for_each_entry_safe(sibling, next_sibling,
415                                 &match->rt6i_siblings, rt6i_siblings) {
416                         route_choosen--;
417                         if (route_choosen == 0) {
418                                 if (rt6_score_route(sibling, oif, strict) < 0)
419                                         break;
420                                 match = sibling;
421                                 break;
422                         }
423                 }
424         return match;
425 }
426
427 /*
428  *      Route lookup. Any table->tb6_lock is implied.
429  */
430
431 static inline struct rt6_info *rt6_device_match(struct net *net,
432                                                     struct rt6_info *rt,
433                                                     const struct in6_addr *saddr,
434                                                     int oif,
435                                                     int flags)
436 {
437         struct rt6_info *local = NULL;
438         struct rt6_info *sprt;
439
440         if (!oif && ipv6_addr_any(saddr))
441                 goto out;
442
443         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
444                 struct net_device *dev = sprt->dst.dev;
445
446                 if (oif) {
447                         if (dev->ifindex == oif)
448                                 return sprt;
449                         if (dev->flags & IFF_LOOPBACK) {
450                                 if (!sprt->rt6i_idev ||
451                                     sprt->rt6i_idev->dev->ifindex != oif) {
452                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
453                                                 continue;
454                                         if (local && (!oif ||
455                                                       local->rt6i_idev->dev->ifindex == oif))
456                                                 continue;
457                                 }
458                                 local = sprt;
459                         }
460                 } else {
461                         if (ipv6_chk_addr(net, saddr, dev,
462                                           flags & RT6_LOOKUP_F_IFACE))
463                                 return sprt;
464                 }
465         }
466
467         if (oif) {
468                 if (local)
469                         return local;
470
471                 if (flags & RT6_LOOKUP_F_IFACE)
472                         return net->ipv6.ip6_null_entry;
473         }
474 out:
475         return rt;
476 }
477
478 #ifdef CONFIG_IPV6_ROUTER_PREF
479 static void rt6_probe(struct rt6_info *rt)
480 {
481         struct neighbour *neigh;
482         /*
483          * Okay, this does not seem to be appropriate
484          * for now, however, we need to check if it
485          * is really so; aka Router Reachability Probing.
486          *
487          * Router Reachability Probe MUST be rate-limited
488          * to no more than one per minute.
489          */
490         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
491                 return;
492         rcu_read_lock_bh();
493         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
494         if (neigh) {
495                 write_lock(&neigh->lock);
496                 if (neigh->nud_state & NUD_VALID)
497                         goto out;
498         }
499
500         if (!neigh ||
501             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
502                 struct in6_addr mcaddr;
503                 struct in6_addr *target;
504
505                 if (neigh) {
506                         neigh->updated = jiffies;
507                         write_unlock(&neigh->lock);
508                 }
509
510                 target = (struct in6_addr *)&rt->rt6i_gateway;
511                 addrconf_addr_solict_mult(target, &mcaddr);
512                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
513         } else {
514 out:
515                 write_unlock(&neigh->lock);
516         }
517         rcu_read_unlock_bh();
518 }
519 #else
520 static inline void rt6_probe(struct rt6_info *rt)
521 {
522 }
523 #endif
524
525 /*
526  * Default Router Selection (RFC 2461 6.3.6)
527  */
528 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
529 {
530         struct net_device *dev = rt->dst.dev;
531         if (!oif || dev->ifindex == oif)
532                 return 2;
533         if ((dev->flags & IFF_LOOPBACK) &&
534             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
535                 return 1;
536         return 0;
537 }
538
539 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
540 {
541         struct neighbour *neigh;
542         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
543
544         if (rt->rt6i_flags & RTF_NONEXTHOP ||
545             !(rt->rt6i_flags & RTF_GATEWAY))
546                 return RT6_NUD_SUCCEED;
547
548         rcu_read_lock_bh();
549         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
550         if (neigh) {
551                 read_lock(&neigh->lock);
552                 if (neigh->nud_state & NUD_VALID)
553                         ret = RT6_NUD_SUCCEED;
554 #ifdef CONFIG_IPV6_ROUTER_PREF
555                 else if (!(neigh->nud_state & NUD_FAILED))
556                         ret = RT6_NUD_SUCCEED;
557 #endif
558                 read_unlock(&neigh->lock);
559         } else {
560                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
561                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
562         }
563         rcu_read_unlock_bh();
564
565         return ret;
566 }
567
568 static int rt6_score_route(struct rt6_info *rt, int oif,
569                            int strict)
570 {
571         int m;
572
573         m = rt6_check_dev(rt, oif);
574         if (!m && (strict & RT6_LOOKUP_F_IFACE))
575                 return RT6_NUD_FAIL_HARD;
576 #ifdef CONFIG_IPV6_ROUTER_PREF
577         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
578 #endif
579         if (strict & RT6_LOOKUP_F_REACHABLE) {
580                 int n = rt6_check_neigh(rt);
581                 if (n < 0)
582                         return n;
583         }
584         return m;
585 }
586
587 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
588                                    int *mpri, struct rt6_info *match,
589                                    bool *do_rr)
590 {
591         int m;
592         bool match_do_rr = false;
593
594         if (rt6_check_expired(rt))
595                 goto out;
596
597         m = rt6_score_route(rt, oif, strict);
598         if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
599                 match_do_rr = true;
600                 m = 0; /* lowest valid score */
601         } else if (m < 0) {
602                 goto out;
603         }
604
605         if (strict & RT6_LOOKUP_F_REACHABLE)
606                 rt6_probe(rt);
607
608         if (m > *mpri) {
609                 *do_rr = match_do_rr;
610                 *mpri = m;
611                 match = rt;
612         }
613 out:
614         return match;
615 }
616
617 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
618                                      struct rt6_info *rr_head,
619                                      u32 metric, int oif, int strict,
620                                      bool *do_rr)
621 {
622         struct rt6_info *rt, *match;
623         int mpri = -1;
624
625         match = NULL;
626         for (rt = rr_head; rt && rt->rt6i_metric == metric;
627              rt = rt->dst.rt6_next)
628                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
629         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
630              rt = rt->dst.rt6_next)
631                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
632
633         return match;
634 }
635
636 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
637 {
638         struct rt6_info *match, *rt0;
639         struct net *net;
640         bool do_rr = false;
641
642         rt0 = fn->rr_ptr;
643         if (!rt0)
644                 fn->rr_ptr = rt0 = fn->leaf;
645
646         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
647                              &do_rr);
648
649         if (do_rr) {
650                 struct rt6_info *next = rt0->dst.rt6_next;
651
652                 /* no entries matched; do round-robin */
653                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
654                         next = fn->leaf;
655
656                 if (next != rt0)
657                         fn->rr_ptr = next;
658         }
659
660         net = dev_net(rt0->dst.dev);
661         return match ? match : net->ipv6.ip6_null_entry;
662 }
663
664 #ifdef CONFIG_IPV6_ROUTE_INFO
665 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
666                   const struct in6_addr *gwaddr)
667 {
668         struct net *net = dev_net(dev);
669         struct route_info *rinfo = (struct route_info *) opt;
670         struct in6_addr prefix_buf, *prefix;
671         unsigned int pref;
672         unsigned long lifetime;
673         struct rt6_info *rt;
674
675         if (len < sizeof(struct route_info)) {
676                 return -EINVAL;
677         }
678
679         /* Sanity check for prefix_len and length */
680         if (rinfo->length > 3) {
681                 return -EINVAL;
682         } else if (rinfo->prefix_len > 128) {
683                 return -EINVAL;
684         } else if (rinfo->prefix_len > 64) {
685                 if (rinfo->length < 2) {
686                         return -EINVAL;
687                 }
688         } else if (rinfo->prefix_len > 0) {
689                 if (rinfo->length < 1) {
690                         return -EINVAL;
691                 }
692         }
693
694         pref = rinfo->route_pref;
695         if (pref == ICMPV6_ROUTER_PREF_INVALID)
696                 return -EINVAL;
697
698         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
699
700         if (rinfo->length == 3)
701                 prefix = (struct in6_addr *)rinfo->prefix;
702         else {
703                 /* this function is safe */
704                 ipv6_addr_prefix(&prefix_buf,
705                                  (struct in6_addr *)rinfo->prefix,
706                                  rinfo->prefix_len);
707                 prefix = &prefix_buf;
708         }
709
710         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
711                                 dev->ifindex);
712
713         if (rt && !lifetime) {
714                 ip6_del_rt(rt);
715                 rt = NULL;
716         }
717
718         if (!rt && lifetime)
719                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
720                                         pref);
721         else if (rt)
722                 rt->rt6i_flags = RTF_ROUTEINFO |
723                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
724
725         if (rt) {
726                 if (!addrconf_finite_timeout(lifetime))
727                         rt6_clean_expires(rt);
728                 else
729                         rt6_set_expires(rt, jiffies + HZ * lifetime);
730
731                 ip6_rt_put(rt);
732         }
733         return 0;
734 }
735 #endif
736
737 #define BACKTRACK(__net, saddr)                 \
738 do { \
739         if (rt == __net->ipv6.ip6_null_entry) { \
740                 struct fib6_node *pn; \
741                 while (1) { \
742                         if (fn->fn_flags & RTN_TL_ROOT) \
743                                 goto out; \
744                         pn = fn->parent; \
745                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
746                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
747                         else \
748                                 fn = pn; \
749                         if (fn->fn_flags & RTN_RTINFO) \
750                                 goto restart; \
751                 } \
752         } \
753 } while (0)
754
755 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
756                                              struct fib6_table *table,
757                                              struct flowi6 *fl6, int flags)
758 {
759         struct fib6_node *fn;
760         struct rt6_info *rt;
761
762         read_lock_bh(&table->tb6_lock);
763         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
764 restart:
765         rt = fn->leaf;
766         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
767         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
768                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
769         BACKTRACK(net, &fl6->saddr);
770 out:
771         dst_use(&rt->dst, jiffies);
772         read_unlock_bh(&table->tb6_lock);
773         return rt;
774
775 }
776
777 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
778                                     int flags)
779 {
780         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
781 }
782 EXPORT_SYMBOL_GPL(ip6_route_lookup);
783
784 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
785                             const struct in6_addr *saddr, int oif, int strict)
786 {
787         struct flowi6 fl6 = {
788                 .flowi6_oif = oif,
789                 .daddr = *daddr,
790         };
791         struct dst_entry *dst;
792         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
793
794         if (saddr) {
795                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
796                 flags |= RT6_LOOKUP_F_HAS_SADDR;
797         }
798
799         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
800         if (dst->error == 0)
801                 return (struct rt6_info *) dst;
802
803         dst_release(dst);
804
805         return NULL;
806 }
807
808 EXPORT_SYMBOL(rt6_lookup);
809
810 /* ip6_ins_rt is called with FREE table->tb6_lock.
811    It takes new route entry, the addition fails by any reason the
812    route is freed. In any case, if caller does not hold it, it may
813    be destroyed.
814  */
815
816 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
817 {
818         int err;
819         struct fib6_table *table;
820
821         table = rt->rt6i_table;
822         write_lock_bh(&table->tb6_lock);
823         err = fib6_add(&table->tb6_root, rt, info);
824         write_unlock_bh(&table->tb6_lock);
825
826         return err;
827 }
828
829 int ip6_ins_rt(struct rt6_info *rt)
830 {
831         struct nl_info info = {
832                 .nl_net = dev_net(rt->dst.dev),
833         };
834         return __ip6_ins_rt(rt, &info);
835 }
836
837 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
838                                       const struct in6_addr *daddr,
839                                       const struct in6_addr *saddr)
840 {
841         struct rt6_info *rt;
842
843         /*
844          *      Clone the route.
845          */
846
847         rt = ip6_rt_copy(ort, daddr);
848
849         if (rt) {
850                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
851                         if (ort->rt6i_dst.plen != 128 &&
852                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
853                                 rt->rt6i_flags |= RTF_ANYCAST;
854                         rt->rt6i_gateway = *daddr;
855                 }
856
857                 rt->rt6i_flags |= RTF_CACHE;
858
859 #ifdef CONFIG_IPV6_SUBTREES
860                 if (rt->rt6i_src.plen && saddr) {
861                         rt->rt6i_src.addr = *saddr;
862                         rt->rt6i_src.plen = 128;
863                 }
864 #endif
865         }
866
867         return rt;
868 }
869
870 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
871                                         const struct in6_addr *daddr)
872 {
873         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
874
875         if (rt)
876                 rt->rt6i_flags |= RTF_CACHE;
877         return rt;
878 }
879
880 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
881                                       struct flowi6 *fl6, int flags)
882 {
883         struct fib6_node *fn;
884         struct rt6_info *rt, *nrt;
885         int strict = 0;
886         int attempts = 3;
887         int err;
888         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
889
890         strict |= flags & RT6_LOOKUP_F_IFACE;
891
892 relookup:
893         read_lock_bh(&table->tb6_lock);
894
895 restart_2:
896         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
897
898 restart:
899         rt = rt6_select(fn, oif, strict | reachable);
900         if (rt->rt6i_nsiblings)
901                 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
902         BACKTRACK(net, &fl6->saddr);
903         if (rt == net->ipv6.ip6_null_entry ||
904             rt->rt6i_flags & RTF_CACHE)
905                 goto out;
906
907         dst_hold(&rt->dst);
908         read_unlock_bh(&table->tb6_lock);
909
910         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
911                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
912         else if (!(rt->dst.flags & DST_HOST))
913                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
914         else
915                 goto out2;
916
917         ip6_rt_put(rt);
918         rt = nrt ? : net->ipv6.ip6_null_entry;
919
920         dst_hold(&rt->dst);
921         if (nrt) {
922                 err = ip6_ins_rt(nrt);
923                 if (!err)
924                         goto out2;
925         }
926
927         if (--attempts <= 0)
928                 goto out2;
929
930         /*
931          * Race condition! In the gap, when table->tb6_lock was
932          * released someone could insert this route.  Relookup.
933          */
934         ip6_rt_put(rt);
935         goto relookup;
936
937 out:
938         if (reachable) {
939                 reachable = 0;
940                 goto restart_2;
941         }
942         dst_hold(&rt->dst);
943         read_unlock_bh(&table->tb6_lock);
944 out2:
945         rt->dst.lastuse = jiffies;
946         rt->dst.__use++;
947
948         return rt;
949 }
950
951 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
952                                             struct flowi6 *fl6, int flags)
953 {
954         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
955 }
956
957 static struct dst_entry *ip6_route_input_lookup(struct net *net,
958                                                 struct net_device *dev,
959                                                 struct flowi6 *fl6, int flags)
960 {
961         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
962                 flags |= RT6_LOOKUP_F_IFACE;
963
964         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
965 }
966
967 void ip6_route_input(struct sk_buff *skb)
968 {
969         const struct ipv6hdr *iph = ipv6_hdr(skb);
970         struct net *net = dev_net(skb->dev);
971         int flags = RT6_LOOKUP_F_HAS_SADDR;
972         struct flowi6 fl6 = {
973                 .flowi6_iif = skb->dev->ifindex,
974                 .daddr = iph->daddr,
975                 .saddr = iph->saddr,
976                 .flowlabel = ip6_flowinfo(iph),
977                 .flowi6_mark = skb->mark,
978                 .flowi6_proto = iph->nexthdr,
979         };
980
981         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
982 }
983
984 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
985                                              struct flowi6 *fl6, int flags)
986 {
987         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
988 }
989
990 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
991                                     struct flowi6 *fl6)
992 {
993         int flags = 0;
994
995         fl6->flowi6_iif = LOOPBACK_IFINDEX;
996
997         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
998                 flags |= RT6_LOOKUP_F_IFACE;
999
1000         if (!ipv6_addr_any(&fl6->saddr))
1001                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1002         else if (sk)
1003                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1004
1005         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1006 }
1007
1008 EXPORT_SYMBOL(ip6_route_output);
1009
1010 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1011 {
1012         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1013         struct dst_entry *new = NULL;
1014
1015         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1016         if (rt) {
1017                 new = &rt->dst;
1018
1019                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1020                 rt6_init_peer(rt, net->ipv6.peers);
1021
1022                 new->__use = 1;
1023                 new->input = dst_discard;
1024                 new->output = dst_discard;
1025
1026                 if (dst_metrics_read_only(&ort->dst))
1027                         new->_metrics = ort->dst._metrics;
1028                 else
1029                         dst_copy_metrics(new, &ort->dst);
1030                 rt->rt6i_idev = ort->rt6i_idev;
1031                 if (rt->rt6i_idev)
1032                         in6_dev_hold(rt->rt6i_idev);
1033
1034                 rt->rt6i_gateway = ort->rt6i_gateway;
1035                 rt->rt6i_flags = ort->rt6i_flags;
1036                 rt->rt6i_metric = 0;
1037
1038                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1039 #ifdef CONFIG_IPV6_SUBTREES
1040                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1041 #endif
1042
1043                 dst_free(new);
1044         }
1045
1046         dst_release(dst_orig);
1047         return new ? new : ERR_PTR(-ENOMEM);
1048 }
1049
1050 /*
1051  *      Destination cache support functions
1052  */
1053
1054 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1055 {
1056         struct rt6_info *rt;
1057
1058         rt = (struct rt6_info *) dst;
1059
1060         /* All IPV6 dsts are created with ->obsolete set to the value
1061          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1062          * into this function always.
1063          */
1064         if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1065                 return NULL;
1066
1067         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1068                 return dst;
1069
1070         return NULL;
1071 }
1072
1073 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1074 {
1075         struct rt6_info *rt = (struct rt6_info *) dst;
1076
1077         if (rt) {
1078                 if (rt->rt6i_flags & RTF_CACHE) {
1079                         if (rt6_check_expired(rt)) {
1080                                 ip6_del_rt(rt);
1081                                 dst = NULL;
1082                         }
1083                 } else {
1084                         dst_release(dst);
1085                         dst = NULL;
1086                 }
1087         }
1088         return dst;
1089 }
1090
1091 static void ip6_link_failure(struct sk_buff *skb)
1092 {
1093         struct rt6_info *rt;
1094
1095         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1096
1097         rt = (struct rt6_info *) skb_dst(skb);
1098         if (rt) {
1099                 if (rt->rt6i_flags & RTF_CACHE) {
1100                         dst_hold(&rt->dst);
1101                         if (ip6_del_rt(rt))
1102                                 dst_free(&rt->dst);
1103                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1104                         rt->rt6i_node->fn_sernum = -1;
1105                 }
1106         }
1107 }
1108
1109 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1110                                struct sk_buff *skb, u32 mtu)
1111 {
1112         struct rt6_info *rt6 = (struct rt6_info*)dst;
1113
1114         dst_confirm(dst);
1115         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1116                 struct net *net = dev_net(dst->dev);
1117
1118                 rt6->rt6i_flags |= RTF_MODIFIED;
1119                 if (mtu < IPV6_MIN_MTU) {
1120                         u32 features = dst_metric(dst, RTAX_FEATURES);
1121                         mtu = IPV6_MIN_MTU;
1122                         features |= RTAX_FEATURE_ALLFRAG;
1123                         dst_metric_set(dst, RTAX_FEATURES, features);
1124                 }
1125                 dst_metric_set(dst, RTAX_MTU, mtu);
1126                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1127         }
1128 }
1129
1130 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1131                      int oif, u32 mark)
1132 {
1133         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1134         struct dst_entry *dst;
1135         struct flowi6 fl6;
1136
1137         memset(&fl6, 0, sizeof(fl6));
1138         fl6.flowi6_oif = oif;
1139         fl6.flowi6_mark = mark;
1140         fl6.flowi6_flags = 0;
1141         fl6.daddr = iph->daddr;
1142         fl6.saddr = iph->saddr;
1143         fl6.flowlabel = ip6_flowinfo(iph);
1144
1145         dst = ip6_route_output(net, NULL, &fl6);
1146         if (!dst->error)
1147                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1148         dst_release(dst);
1149 }
1150 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1151
1152 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1153 {
1154         ip6_update_pmtu(skb, sock_net(sk), mtu,
1155                         sk->sk_bound_dev_if, sk->sk_mark);
1156 }
1157 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1158
1159 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1160 {
1161         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1162         struct dst_entry *dst;
1163         struct flowi6 fl6;
1164
1165         memset(&fl6, 0, sizeof(fl6));
1166         fl6.flowi6_oif = oif;
1167         fl6.flowi6_mark = mark;
1168         fl6.flowi6_flags = 0;
1169         fl6.daddr = iph->daddr;
1170         fl6.saddr = iph->saddr;
1171         fl6.flowlabel = ip6_flowinfo(iph);
1172
1173         dst = ip6_route_output(net, NULL, &fl6);
1174         if (!dst->error)
1175                 rt6_do_redirect(dst, NULL, skb);
1176         dst_release(dst);
1177 }
1178 EXPORT_SYMBOL_GPL(ip6_redirect);
1179
1180 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1181                             u32 mark)
1182 {
1183         const struct ipv6hdr *iph = ipv6_hdr(skb);
1184         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1185         struct dst_entry *dst;
1186         struct flowi6 fl6;
1187
1188         memset(&fl6, 0, sizeof(fl6));
1189         fl6.flowi6_oif = oif;
1190         fl6.flowi6_mark = mark;
1191         fl6.flowi6_flags = 0;
1192         fl6.daddr = msg->dest;
1193         fl6.saddr = iph->daddr;
1194
1195         dst = ip6_route_output(net, NULL, &fl6);
1196         if (!dst->error)
1197                 rt6_do_redirect(dst, NULL, skb);
1198         dst_release(dst);
1199 }
1200
1201 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1202 {
1203         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1204 }
1205 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1206
1207 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1208 {
1209         struct net_device *dev = dst->dev;
1210         unsigned int mtu = dst_mtu(dst);
1211         struct net *net = dev_net(dev);
1212
1213         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1214
1215         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1216                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1217
1218         /*
1219          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1220          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1221          * IPV6_MAXPLEN is also valid and means: "any MSS,
1222          * rely only on pmtu discovery"
1223          */
1224         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1225                 mtu = IPV6_MAXPLEN;
1226         return mtu;
1227 }
1228
1229 static unsigned int ip6_mtu(const struct dst_entry *dst)
1230 {
1231         struct inet6_dev *idev;
1232         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1233
1234         if (mtu)
1235                 return mtu;
1236
1237         mtu = IPV6_MIN_MTU;
1238
1239         rcu_read_lock();
1240         idev = __in6_dev_get(dst->dev);
1241         if (idev)
1242                 mtu = idev->cnf.mtu6;
1243         rcu_read_unlock();
1244
1245         return mtu;
1246 }
1247
1248 static struct dst_entry *icmp6_dst_gc_list;
1249 static DEFINE_SPINLOCK(icmp6_dst_lock);
1250
1251 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1252                                   struct flowi6 *fl6)
1253 {
1254         struct dst_entry *dst;
1255         struct rt6_info *rt;
1256         struct inet6_dev *idev = in6_dev_get(dev);
1257         struct net *net = dev_net(dev);
1258
1259         if (unlikely(!idev))
1260                 return ERR_PTR(-ENODEV);
1261
1262         rt = ip6_dst_alloc(net, dev, 0, NULL);
1263         if (unlikely(!rt)) {
1264                 in6_dev_put(idev);
1265                 dst = ERR_PTR(-ENOMEM);
1266                 goto out;
1267         }
1268
1269         rt->dst.flags |= DST_HOST;
1270         rt->dst.output  = ip6_output;
1271         atomic_set(&rt->dst.__refcnt, 1);
1272         rt->rt6i_dst.addr = fl6->daddr;
1273         rt->rt6i_dst.plen = 128;
1274         rt->rt6i_idev     = idev;
1275         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1276
1277         spin_lock_bh(&icmp6_dst_lock);
1278         rt->dst.next = icmp6_dst_gc_list;
1279         icmp6_dst_gc_list = &rt->dst;
1280         spin_unlock_bh(&icmp6_dst_lock);
1281
1282         fib6_force_start_gc(net);
1283
1284         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1285
1286 out:
1287         return dst;
1288 }
1289
1290 int icmp6_dst_gc(void)
1291 {
1292         struct dst_entry *dst, **pprev;
1293         int more = 0;
1294
1295         spin_lock_bh(&icmp6_dst_lock);
1296         pprev = &icmp6_dst_gc_list;
1297
1298         while ((dst = *pprev) != NULL) {
1299                 if (!atomic_read(&dst->__refcnt)) {
1300                         *pprev = dst->next;
1301                         dst_free(dst);
1302                 } else {
1303                         pprev = &dst->next;
1304                         ++more;
1305                 }
1306         }
1307
1308         spin_unlock_bh(&icmp6_dst_lock);
1309
1310         return more;
1311 }
1312
1313 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1314                             void *arg)
1315 {
1316         struct dst_entry *dst, **pprev;
1317
1318         spin_lock_bh(&icmp6_dst_lock);
1319         pprev = &icmp6_dst_gc_list;
1320         while ((dst = *pprev) != NULL) {
1321                 struct rt6_info *rt = (struct rt6_info *) dst;
1322                 if (func(rt, arg)) {
1323                         *pprev = dst->next;
1324                         dst_free(dst);
1325                 } else {
1326                         pprev = &dst->next;
1327                 }
1328         }
1329         spin_unlock_bh(&icmp6_dst_lock);
1330 }
1331
1332 static int ip6_dst_gc(struct dst_ops *ops)
1333 {
1334         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1335         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1336         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1337         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1338         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1339         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1340         int entries;
1341
1342         entries = dst_entries_get_fast(ops);
1343         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1344             entries <= rt_max_size)
1345                 goto out;
1346
1347         net->ipv6.ip6_rt_gc_expire++;
1348         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1349         entries = dst_entries_get_slow(ops);
1350         if (entries < ops->gc_thresh)
1351                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1352 out:
1353         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1354         return entries > rt_max_size;
1355 }
1356
1357 /*
1358  *
1359  */
1360
1361 int ip6_route_add(struct fib6_config *cfg)
1362 {
1363         int err;
1364         struct net *net = cfg->fc_nlinfo.nl_net;
1365         struct rt6_info *rt = NULL;
1366         struct net_device *dev = NULL;
1367         struct inet6_dev *idev = NULL;
1368         struct fib6_table *table;
1369         int addr_type;
1370
1371         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1372                 return -EINVAL;
1373 #ifndef CONFIG_IPV6_SUBTREES
1374         if (cfg->fc_src_len)
1375                 return -EINVAL;
1376 #endif
1377         if (cfg->fc_ifindex) {
1378                 err = -ENODEV;
1379                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1380                 if (!dev)
1381                         goto out;
1382                 idev = in6_dev_get(dev);
1383                 if (!idev)
1384                         goto out;
1385         }
1386
1387         if (cfg->fc_metric == 0)
1388                 cfg->fc_metric = IP6_RT_PRIO_USER;
1389
1390         err = -ENOBUFS;
1391         if (cfg->fc_nlinfo.nlh &&
1392             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1393                 table = fib6_get_table(net, cfg->fc_table);
1394                 if (!table) {
1395                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1396                         table = fib6_new_table(net, cfg->fc_table);
1397                 }
1398         } else {
1399                 table = fib6_new_table(net, cfg->fc_table);
1400         }
1401
1402         if (!table)
1403                 goto out;
1404
1405         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1406
1407         if (!rt) {
1408                 err = -ENOMEM;
1409                 goto out;
1410         }
1411
1412         if (cfg->fc_flags & RTF_EXPIRES)
1413                 rt6_set_expires(rt, jiffies +
1414                                 clock_t_to_jiffies(cfg->fc_expires));
1415         else
1416                 rt6_clean_expires(rt);
1417
1418         if (cfg->fc_protocol == RTPROT_UNSPEC)
1419                 cfg->fc_protocol = RTPROT_BOOT;
1420         rt->rt6i_protocol = cfg->fc_protocol;
1421
1422         addr_type = ipv6_addr_type(&cfg->fc_dst);
1423
1424         if (addr_type & IPV6_ADDR_MULTICAST)
1425                 rt->dst.input = ip6_mc_input;
1426         else if (cfg->fc_flags & RTF_LOCAL)
1427                 rt->dst.input = ip6_input;
1428         else
1429                 rt->dst.input = ip6_forward;
1430
1431         rt->dst.output = ip6_output;
1432
1433         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1434         rt->rt6i_dst.plen = cfg->fc_dst_len;
1435         if (rt->rt6i_dst.plen == 128)
1436                rt->dst.flags |= DST_HOST;
1437
1438         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1439                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1440                 if (!metrics) {
1441                         err = -ENOMEM;
1442                         goto out;
1443                 }
1444                 dst_init_metrics(&rt->dst, metrics, 0);
1445         }
1446 #ifdef CONFIG_IPV6_SUBTREES
1447         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1448         rt->rt6i_src.plen = cfg->fc_src_len;
1449 #endif
1450
1451         rt->rt6i_metric = cfg->fc_metric;
1452
1453         /* We cannot add true routes via loopback here,
1454            they would result in kernel looping; promote them to reject routes
1455          */
1456         if ((cfg->fc_flags & RTF_REJECT) ||
1457             (dev && (dev->flags & IFF_LOOPBACK) &&
1458              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1459              !(cfg->fc_flags & RTF_LOCAL))) {
1460                 /* hold loopback dev/idev if we haven't done so. */
1461                 if (dev != net->loopback_dev) {
1462                         if (dev) {
1463                                 dev_put(dev);
1464                                 in6_dev_put(idev);
1465                         }
1466                         dev = net->loopback_dev;
1467                         dev_hold(dev);
1468                         idev = in6_dev_get(dev);
1469                         if (!idev) {
1470                                 err = -ENODEV;
1471                                 goto out;
1472                         }
1473                 }
1474                 rt->dst.output = ip6_pkt_discard_out;
1475                 rt->dst.input = ip6_pkt_discard;
1476                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1477                 switch (cfg->fc_type) {
1478                 case RTN_BLACKHOLE:
1479                         rt->dst.error = -EINVAL;
1480                         break;
1481                 case RTN_PROHIBIT:
1482                         rt->dst.error = -EACCES;
1483                         break;
1484                 case RTN_THROW:
1485                         rt->dst.error = -EAGAIN;
1486                         break;
1487                 default:
1488                         rt->dst.error = -ENETUNREACH;
1489                         break;
1490                 }
1491                 goto install_route;
1492         }
1493
1494         if (cfg->fc_flags & RTF_GATEWAY) {
1495                 const struct in6_addr *gw_addr;
1496                 int gwa_type;
1497
1498                 gw_addr = &cfg->fc_gateway;
1499                 rt->rt6i_gateway = *gw_addr;
1500                 gwa_type = ipv6_addr_type(gw_addr);
1501
1502                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1503                         struct rt6_info *grt;
1504
1505                         /* IPv6 strictly inhibits using not link-local
1506                            addresses as nexthop address.
1507                            Otherwise, router will not able to send redirects.
1508                            It is very good, but in some (rare!) circumstances
1509                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1510                            some exceptions. --ANK
1511                          */
1512                         err = -EINVAL;
1513                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1514                                 goto out;
1515
1516                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1517
1518                         err = -EHOSTUNREACH;
1519                         if (!grt)
1520                                 goto out;
1521                         if (dev) {
1522                                 if (dev != grt->dst.dev) {
1523                                         ip6_rt_put(grt);
1524                                         goto out;
1525                                 }
1526                         } else {
1527                                 dev = grt->dst.dev;
1528                                 idev = grt->rt6i_idev;
1529                                 dev_hold(dev);
1530                                 in6_dev_hold(grt->rt6i_idev);
1531                         }
1532                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1533                                 err = 0;
1534                         ip6_rt_put(grt);
1535
1536                         if (err)
1537                                 goto out;
1538                 }
1539                 err = -EINVAL;
1540                 if (!dev || (dev->flags & IFF_LOOPBACK))
1541                         goto out;
1542         }
1543
1544         err = -ENODEV;
1545         if (!dev)
1546                 goto out;
1547
1548         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1549                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1550                         err = -EINVAL;
1551                         goto out;
1552                 }
1553                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1554                 rt->rt6i_prefsrc.plen = 128;
1555         } else
1556                 rt->rt6i_prefsrc.plen = 0;
1557
1558         rt->rt6i_flags = cfg->fc_flags;
1559
1560 install_route:
1561         if (cfg->fc_mx) {
1562                 struct nlattr *nla;
1563                 int remaining;
1564
1565                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1566                         int type = nla_type(nla);
1567
1568                         if (type) {
1569                                 if (type > RTAX_MAX) {
1570                                         err = -EINVAL;
1571                                         goto out;
1572                                 }
1573
1574                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1575                         }
1576                 }
1577         }
1578
1579         rt->dst.dev = dev;
1580         rt->rt6i_idev = idev;
1581         rt->rt6i_table = table;
1582
1583         cfg->fc_nlinfo.nl_net = dev_net(dev);
1584
1585         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1586
1587 out:
1588         if (dev)
1589                 dev_put(dev);
1590         if (idev)
1591                 in6_dev_put(idev);
1592         if (rt)
1593                 dst_free(&rt->dst);
1594         return err;
1595 }
1596
1597 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1598 {
1599         int err;
1600         struct fib6_table *table;
1601         struct net *net = dev_net(rt->dst.dev);
1602
1603         if (rt == net->ipv6.ip6_null_entry) {
1604                 err = -ENOENT;
1605                 goto out;
1606         }
1607
1608         table = rt->rt6i_table;
1609         write_lock_bh(&table->tb6_lock);
1610         err = fib6_del(rt, info);
1611         write_unlock_bh(&table->tb6_lock);
1612
1613 out:
1614         ip6_rt_put(rt);
1615         return err;
1616 }
1617
1618 int ip6_del_rt(struct rt6_info *rt)
1619 {
1620         struct nl_info info = {
1621                 .nl_net = dev_net(rt->dst.dev),
1622         };
1623         return __ip6_del_rt(rt, &info);
1624 }
1625
1626 static int ip6_route_del(struct fib6_config *cfg)
1627 {
1628         struct fib6_table *table;
1629         struct fib6_node *fn;
1630         struct rt6_info *rt;
1631         int err = -ESRCH;
1632
1633         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1634         if (!table)
1635                 return err;
1636
1637         read_lock_bh(&table->tb6_lock);
1638
1639         fn = fib6_locate(&table->tb6_root,
1640                          &cfg->fc_dst, cfg->fc_dst_len,
1641                          &cfg->fc_src, cfg->fc_src_len);
1642
1643         if (fn) {
1644                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1645                         if (cfg->fc_ifindex &&
1646                             (!rt->dst.dev ||
1647                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1648                                 continue;
1649                         if (cfg->fc_flags & RTF_GATEWAY &&
1650                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1651                                 continue;
1652                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1653                                 continue;
1654                         dst_hold(&rt->dst);
1655                         read_unlock_bh(&table->tb6_lock);
1656
1657                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1658                 }
1659         }
1660         read_unlock_bh(&table->tb6_lock);
1661
1662         return err;
1663 }
1664
1665 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1666 {
1667         struct net *net = dev_net(skb->dev);
1668         struct netevent_redirect netevent;
1669         struct rt6_info *rt, *nrt = NULL;
1670         struct ndisc_options ndopts;
1671         struct inet6_dev *in6_dev;
1672         struct neighbour *neigh;
1673         struct rd_msg *msg;
1674         int optlen, on_link;
1675         u8 *lladdr;
1676
1677         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1678         optlen -= sizeof(*msg);
1679
1680         if (optlen < 0) {
1681                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1682                 return;
1683         }
1684
1685         msg = (struct rd_msg *)icmp6_hdr(skb);
1686
1687         if (ipv6_addr_is_multicast(&msg->dest)) {
1688                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1689                 return;
1690         }
1691
1692         on_link = 0;
1693         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1694                 on_link = 1;
1695         } else if (ipv6_addr_type(&msg->target) !=
1696                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1697                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1698                 return;
1699         }
1700
1701         in6_dev = __in6_dev_get(skb->dev);
1702         if (!in6_dev)
1703                 return;
1704         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1705                 return;
1706
1707         /* RFC2461 8.1:
1708          *      The IP source address of the Redirect MUST be the same as the current
1709          *      first-hop router for the specified ICMP Destination Address.
1710          */
1711
1712         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1713                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1714                 return;
1715         }
1716
1717         lladdr = NULL;
1718         if (ndopts.nd_opts_tgt_lladdr) {
1719                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1720                                              skb->dev);
1721                 if (!lladdr) {
1722                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1723                         return;
1724                 }
1725         }
1726
1727         rt = (struct rt6_info *) dst;
1728         if (rt == net->ipv6.ip6_null_entry) {
1729                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1730                 return;
1731         }
1732
1733         /* Redirect received -> path was valid.
1734          * Look, redirects are sent only in response to data packets,
1735          * so that this nexthop apparently is reachable. --ANK
1736          */
1737         dst_confirm(&rt->dst);
1738
1739         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1740         if (!neigh)
1741                 return;
1742
1743         /*
1744          *      We have finally decided to accept it.
1745          */
1746
1747         neigh_update(neigh, lladdr, NUD_STALE,
1748                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1749                      NEIGH_UPDATE_F_OVERRIDE|
1750                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1751                                      NEIGH_UPDATE_F_ISROUTER))
1752                      );
1753
1754         nrt = ip6_rt_copy(rt, &msg->dest);
1755         if (!nrt)
1756                 goto out;
1757
1758         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1759         if (on_link)
1760                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1761
1762         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1763
1764         if (ip6_ins_rt(nrt))
1765                 goto out;
1766
1767         netevent.old = &rt->dst;
1768         netevent.new = &nrt->dst;
1769         netevent.daddr = &msg->dest;
1770         netevent.neigh = neigh;
1771         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1772
1773         if (rt->rt6i_flags & RTF_CACHE) {
1774                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1775                 ip6_del_rt(rt);
1776         }
1777
1778 out:
1779         neigh_release(neigh);
1780 }
1781
1782 /*
1783  *      Misc support functions
1784  */
1785
1786 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1787                                     const struct in6_addr *dest)
1788 {
1789         struct net *net = dev_net(ort->dst.dev);
1790         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1791                                             ort->rt6i_table);
1792
1793         if (rt) {
1794                 rt->dst.input = ort->dst.input;
1795                 rt->dst.output = ort->dst.output;
1796                 rt->dst.flags |= DST_HOST;
1797
1798                 rt->rt6i_dst.addr = *dest;
1799                 rt->rt6i_dst.plen = 128;
1800                 dst_copy_metrics(&rt->dst, &ort->dst);
1801                 rt->dst.error = ort->dst.error;
1802                 rt->rt6i_idev = ort->rt6i_idev;
1803                 if (rt->rt6i_idev)
1804                         in6_dev_hold(rt->rt6i_idev);
1805                 rt->dst.lastuse = jiffies;
1806
1807                 rt->rt6i_gateway = ort->rt6i_gateway;
1808                 rt->rt6i_flags = ort->rt6i_flags;
1809                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1810                     (RTF_DEFAULT | RTF_ADDRCONF))
1811                         rt6_set_from(rt, ort);
1812                 rt->rt6i_metric = 0;
1813
1814 #ifdef CONFIG_IPV6_SUBTREES
1815                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1816 #endif
1817                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1818                 rt->rt6i_table = ort->rt6i_table;
1819         }
1820         return rt;
1821 }
1822
1823 #ifdef CONFIG_IPV6_ROUTE_INFO
1824 static struct rt6_info *rt6_get_route_info(struct net *net,
1825                                            const struct in6_addr *prefix, int prefixlen,
1826                                            const struct in6_addr *gwaddr, int ifindex)
1827 {
1828         struct fib6_node *fn;
1829         struct rt6_info *rt = NULL;
1830         struct fib6_table *table;
1831
1832         table = fib6_get_table(net, RT6_TABLE_INFO);
1833         if (!table)
1834                 return NULL;
1835
1836         read_lock_bh(&table->tb6_lock);
1837         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1838         if (!fn)
1839                 goto out;
1840
1841         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1842                 if (rt->dst.dev->ifindex != ifindex)
1843                         continue;
1844                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1845                         continue;
1846                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1847                         continue;
1848                 dst_hold(&rt->dst);
1849                 break;
1850         }
1851 out:
1852         read_unlock_bh(&table->tb6_lock);
1853         return rt;
1854 }
1855
1856 static struct rt6_info *rt6_add_route_info(struct net *net,
1857                                            const struct in6_addr *prefix, int prefixlen,
1858                                            const struct in6_addr *gwaddr, int ifindex,
1859                                            unsigned int pref)
1860 {
1861         struct fib6_config cfg = {
1862                 .fc_table       = RT6_TABLE_INFO,
1863                 .fc_metric      = IP6_RT_PRIO_USER,
1864                 .fc_ifindex     = ifindex,
1865                 .fc_dst_len     = prefixlen,
1866                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1867                                   RTF_UP | RTF_PREF(pref),
1868                 .fc_nlinfo.portid = 0,
1869                 .fc_nlinfo.nlh = NULL,
1870                 .fc_nlinfo.nl_net = net,
1871         };
1872
1873         cfg.fc_dst = *prefix;
1874         cfg.fc_gateway = *gwaddr;
1875
1876         /* We should treat it as a default route if prefix length is 0. */
1877         if (!prefixlen)
1878                 cfg.fc_flags |= RTF_DEFAULT;
1879
1880         ip6_route_add(&cfg);
1881
1882         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1883 }
1884 #endif
1885
1886 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1887 {
1888         struct rt6_info *rt;
1889         struct fib6_table *table;
1890
1891         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1892         if (!table)
1893                 return NULL;
1894
1895         read_lock_bh(&table->tb6_lock);
1896         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1897                 if (dev == rt->dst.dev &&
1898                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1899                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1900                         break;
1901         }
1902         if (rt)
1903                 dst_hold(&rt->dst);
1904         read_unlock_bh(&table->tb6_lock);
1905         return rt;
1906 }
1907
1908 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1909                                      struct net_device *dev,
1910                                      unsigned int pref)
1911 {
1912         struct fib6_config cfg = {
1913                 .fc_table       = RT6_TABLE_DFLT,
1914                 .fc_metric      = IP6_RT_PRIO_USER,
1915                 .fc_ifindex     = dev->ifindex,
1916                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1917                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1918                 .fc_nlinfo.portid = 0,
1919                 .fc_nlinfo.nlh = NULL,
1920                 .fc_nlinfo.nl_net = dev_net(dev),
1921         };
1922
1923         cfg.fc_gateway = *gwaddr;
1924
1925         ip6_route_add(&cfg);
1926
1927         return rt6_get_dflt_router(gwaddr, dev);
1928 }
1929
1930 void rt6_purge_dflt_routers(struct net *net)
1931 {
1932         struct rt6_info *rt;
1933         struct fib6_table *table;
1934
1935         /* NOTE: Keep consistent with rt6_get_dflt_router */
1936         table = fib6_get_table(net, RT6_TABLE_DFLT);
1937         if (!table)
1938                 return;
1939
1940 restart:
1941         read_lock_bh(&table->tb6_lock);
1942         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1943                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1944                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1945                         dst_hold(&rt->dst);
1946                         read_unlock_bh(&table->tb6_lock);
1947                         ip6_del_rt(rt);
1948                         goto restart;
1949                 }
1950         }
1951         read_unlock_bh(&table->tb6_lock);
1952 }
1953
1954 static void rtmsg_to_fib6_config(struct net *net,
1955                                  struct in6_rtmsg *rtmsg,
1956                                  struct fib6_config *cfg)
1957 {
1958         memset(cfg, 0, sizeof(*cfg));
1959
1960         cfg->fc_table = RT6_TABLE_MAIN;
1961         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1962         cfg->fc_metric = rtmsg->rtmsg_metric;
1963         cfg->fc_expires = rtmsg->rtmsg_info;
1964         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1965         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1966         cfg->fc_flags = rtmsg->rtmsg_flags;
1967
1968         cfg->fc_nlinfo.nl_net = net;
1969
1970         cfg->fc_dst = rtmsg->rtmsg_dst;
1971         cfg->fc_src = rtmsg->rtmsg_src;
1972         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1973 }
1974
1975 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1976 {
1977         struct fib6_config cfg;
1978         struct in6_rtmsg rtmsg;
1979         int err;
1980
1981         switch(cmd) {
1982         case SIOCADDRT:         /* Add a route */
1983         case SIOCDELRT:         /* Delete a route */
1984                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1985                         return -EPERM;
1986                 err = copy_from_user(&rtmsg, arg,
1987                                      sizeof(struct in6_rtmsg));
1988                 if (err)
1989                         return -EFAULT;
1990
1991                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1992
1993                 rtnl_lock();
1994                 switch (cmd) {
1995                 case SIOCADDRT:
1996                         err = ip6_route_add(&cfg);
1997                         break;
1998                 case SIOCDELRT:
1999                         err = ip6_route_del(&cfg);
2000                         break;
2001                 default:
2002                         err = -EINVAL;
2003                 }
2004                 rtnl_unlock();
2005
2006                 return err;
2007         }
2008
2009         return -EINVAL;
2010 }
2011
2012 /*
2013  *      Drop the packet on the floor
2014  */
2015
2016 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2017 {
2018         int type;
2019         struct dst_entry *dst = skb_dst(skb);
2020         switch (ipstats_mib_noroutes) {
2021         case IPSTATS_MIB_INNOROUTES:
2022                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2023                 if (type == IPV6_ADDR_ANY) {
2024                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2025                                       IPSTATS_MIB_INADDRERRORS);
2026                         break;
2027                 }
2028                 /* FALLTHROUGH */
2029         case IPSTATS_MIB_OUTNOROUTES:
2030                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031                               ipstats_mib_noroutes);
2032                 break;
2033         }
2034         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2035         kfree_skb(skb);
2036         return 0;
2037 }
2038
2039 static int ip6_pkt_discard(struct sk_buff *skb)
2040 {
2041         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2042 }
2043
2044 static int ip6_pkt_discard_out(struct sk_buff *skb)
2045 {
2046         skb->dev = skb_dst(skb)->dev;
2047         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2048 }
2049
2050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2051
2052 static int ip6_pkt_prohibit(struct sk_buff *skb)
2053 {
2054         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2055 }
2056
2057 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2058 {
2059         skb->dev = skb_dst(skb)->dev;
2060         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2061 }
2062
2063 #endif
2064
2065 /*
2066  *      Allocate a dst for local (unicast / anycast) address.
2067  */
2068
2069 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2070                                     const struct in6_addr *addr,
2071                                     bool anycast)
2072 {
2073         struct net *net = dev_net(idev->dev);
2074         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2075
2076         if (!rt) {
2077                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2078                 return ERR_PTR(-ENOMEM);
2079         }
2080
2081         in6_dev_hold(idev);
2082
2083         rt->dst.flags |= DST_HOST;
2084         rt->dst.input = ip6_input;
2085         rt->dst.output = ip6_output;
2086         rt->rt6i_idev = idev;
2087
2088         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2089         if (anycast)
2090                 rt->rt6i_flags |= RTF_ANYCAST;
2091         else
2092                 rt->rt6i_flags |= RTF_LOCAL;
2093
2094         rt->rt6i_dst.addr = *addr;
2095         rt->rt6i_dst.plen = 128;
2096         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2097
2098         atomic_set(&rt->dst.__refcnt, 1);
2099
2100         return rt;
2101 }
2102
2103 int ip6_route_get_saddr(struct net *net,
2104                         struct rt6_info *rt,
2105                         const struct in6_addr *daddr,
2106                         unsigned int prefs,
2107                         struct in6_addr *saddr)
2108 {
2109         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2110         int err = 0;
2111         if (rt->rt6i_prefsrc.plen)
2112                 *saddr = rt->rt6i_prefsrc.addr;
2113         else
2114                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2115                                          daddr, prefs, saddr);
2116         return err;
2117 }
2118
2119 /* remove deleted ip from prefsrc entries */
2120 struct arg_dev_net_ip {
2121         struct net_device *dev;
2122         struct net *net;
2123         struct in6_addr *addr;
2124 };
2125
2126 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2127 {
2128         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2129         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2130         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2131
2132         if (((void *)rt->dst.dev == dev || !dev) &&
2133             rt != net->ipv6.ip6_null_entry &&
2134             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2135                 /* remove prefsrc entry */
2136                 rt->rt6i_prefsrc.plen = 0;
2137         }
2138         return 0;
2139 }
2140
2141 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2142 {
2143         struct net *net = dev_net(ifp->idev->dev);
2144         struct arg_dev_net_ip adni = {
2145                 .dev = ifp->idev->dev,
2146                 .net = net,
2147                 .addr = &ifp->addr,
2148         };
2149         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2150 }
2151
2152 struct arg_dev_net {
2153         struct net_device *dev;
2154         struct net *net;
2155 };
2156
2157 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2158 {
2159         const struct arg_dev_net *adn = arg;
2160         const struct net_device *dev = adn->dev;
2161
2162         if ((rt->dst.dev == dev || !dev) &&
2163             rt != adn->net->ipv6.ip6_null_entry)
2164                 return -1;
2165
2166         return 0;
2167 }
2168
2169 void rt6_ifdown(struct net *net, struct net_device *dev)
2170 {
2171         struct arg_dev_net adn = {
2172                 .dev = dev,
2173                 .net = net,
2174         };
2175
2176         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2177         icmp6_clean_all(fib6_ifdown, &adn);
2178 }
2179
2180 struct rt6_mtu_change_arg {
2181         struct net_device *dev;
2182         unsigned int mtu;
2183 };
2184
2185 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2186 {
2187         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2188         struct inet6_dev *idev;
2189
2190         /* In IPv6 pmtu discovery is not optional,
2191            so that RTAX_MTU lock cannot disable it.
2192            We still use this lock to block changes
2193            caused by addrconf/ndisc.
2194         */
2195
2196         idev = __in6_dev_get(arg->dev);
2197         if (!idev)
2198                 return 0;
2199
2200         /* For administrative MTU increase, there is no way to discover
2201            IPv6 PMTU increase, so PMTU increase should be updated here.
2202            Since RFC 1981 doesn't include administrative MTU increase
2203            update PMTU increase is a MUST. (i.e. jumbo frame)
2204          */
2205         /*
2206            If new MTU is less than route PMTU, this new MTU will be the
2207            lowest MTU in the path, update the route PMTU to reflect PMTU
2208            decreases; if new MTU is greater than route PMTU, and the
2209            old MTU is the lowest MTU in the path, update the route PMTU
2210            to reflect the increase. In this case if the other nodes' MTU
2211            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2212            PMTU discouvery.
2213          */
2214         if (rt->dst.dev == arg->dev &&
2215             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2216             (dst_mtu(&rt->dst) >= arg->mtu ||
2217              (dst_mtu(&rt->dst) < arg->mtu &&
2218               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2219                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2220         }
2221         return 0;
2222 }
2223
2224 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2225 {
2226         struct rt6_mtu_change_arg arg = {
2227                 .dev = dev,
2228                 .mtu = mtu,
2229         };
2230
2231         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2232 }
2233
2234 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2235         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2236         [RTA_OIF]               = { .type = NLA_U32 },
2237         [RTA_IIF]               = { .type = NLA_U32 },
2238         [RTA_PRIORITY]          = { .type = NLA_U32 },
2239         [RTA_METRICS]           = { .type = NLA_NESTED },
2240         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2241 };
2242
2243 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2244                               struct fib6_config *cfg)
2245 {
2246         struct rtmsg *rtm;
2247         struct nlattr *tb[RTA_MAX+1];
2248         int err;
2249
2250         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2251         if (err < 0)
2252                 goto errout;
2253
2254         err = -EINVAL;
2255         rtm = nlmsg_data(nlh);
2256         memset(cfg, 0, sizeof(*cfg));
2257
2258         cfg->fc_table = rtm->rtm_table;
2259         cfg->fc_dst_len = rtm->rtm_dst_len;
2260         cfg->fc_src_len = rtm->rtm_src_len;
2261         cfg->fc_flags = RTF_UP;
2262         cfg->fc_protocol = rtm->rtm_protocol;
2263         cfg->fc_type = rtm->rtm_type;
2264
2265         if (rtm->rtm_type == RTN_UNREACHABLE ||
2266             rtm->rtm_type == RTN_BLACKHOLE ||
2267             rtm->rtm_type == RTN_PROHIBIT ||
2268             rtm->rtm_type == RTN_THROW)
2269                 cfg->fc_flags |= RTF_REJECT;
2270
2271         if (rtm->rtm_type == RTN_LOCAL)
2272                 cfg->fc_flags |= RTF_LOCAL;
2273
2274         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2275         cfg->fc_nlinfo.nlh = nlh;
2276         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2277
2278         if (tb[RTA_GATEWAY]) {
2279                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2280                 cfg->fc_flags |= RTF_GATEWAY;
2281         }
2282
2283         if (tb[RTA_DST]) {
2284                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2285
2286                 if (nla_len(tb[RTA_DST]) < plen)
2287                         goto errout;
2288
2289                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2290         }
2291
2292         if (tb[RTA_SRC]) {
2293                 int plen = (rtm->rtm_src_len + 7) >> 3;
2294
2295                 if (nla_len(tb[RTA_SRC]) < plen)
2296                         goto errout;
2297
2298                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2299         }
2300
2301         if (tb[RTA_PREFSRC])
2302                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2303
2304         if (tb[RTA_OIF])
2305                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2306
2307         if (tb[RTA_PRIORITY])
2308                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2309
2310         if (tb[RTA_METRICS]) {
2311                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2312                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2313         }
2314
2315         if (tb[RTA_TABLE])
2316                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2317
2318         if (tb[RTA_MULTIPATH]) {
2319                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2320                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2321         }
2322
2323         err = 0;
2324 errout:
2325         return err;
2326 }
2327
2328 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2329 {
2330         struct fib6_config r_cfg;
2331         struct rtnexthop *rtnh;
2332         int remaining;
2333         int attrlen;
2334         int err = 0, last_err = 0;
2335
2336 beginning:
2337         rtnh = (struct rtnexthop *)cfg->fc_mp;
2338         remaining = cfg->fc_mp_len;
2339
2340         /* Parse a Multipath Entry */
2341         while (rtnh_ok(rtnh, remaining)) {
2342                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2343                 if (rtnh->rtnh_ifindex)
2344                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2345
2346                 attrlen = rtnh_attrlen(rtnh);
2347                 if (attrlen > 0) {
2348                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2349
2350                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2351                         if (nla) {
2352                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2353                                 r_cfg.fc_flags |= RTF_GATEWAY;
2354                         }
2355                 }
2356                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2357                 if (err) {
2358                         last_err = err;
2359                         /* If we are trying to remove a route, do not stop the
2360                          * loop when ip6_route_del() fails (because next hop is
2361                          * already gone), we should try to remove all next hops.
2362                          */
2363                         if (add) {
2364                                 /* If add fails, we should try to delete all
2365                                  * next hops that have been already added.
2366                                  */
2367                                 add = 0;
2368                                 goto beginning;
2369                         }
2370                 }
2371                 /* Because each route is added like a single route we remove
2372                  * this flag after the first nexthop (if there is a collision,
2373                  * we have already fail to add the first nexthop:
2374                  * fib6_add_rt2node() has reject it).
2375                  */
2376                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2377                 rtnh = rtnh_next(rtnh, &remaining);
2378         }
2379
2380         return last_err;
2381 }
2382
2383 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2384 {
2385         struct fib6_config cfg;
2386         int err;
2387
2388         err = rtm_to_fib6_config(skb, nlh, &cfg);
2389         if (err < 0)
2390                 return err;
2391
2392         if (cfg.fc_mp)
2393                 return ip6_route_multipath(&cfg, 0);
2394         else
2395                 return ip6_route_del(&cfg);
2396 }
2397
2398 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2399 {
2400         struct fib6_config cfg;
2401         int err;
2402
2403         err = rtm_to_fib6_config(skb, nlh, &cfg);
2404         if (err < 0)
2405                 return err;
2406
2407         if (cfg.fc_mp)
2408                 return ip6_route_multipath(&cfg, 1);
2409         else
2410                 return ip6_route_add(&cfg);
2411 }
2412
2413 static inline size_t rt6_nlmsg_size(void)
2414 {
2415         return NLMSG_ALIGN(sizeof(struct rtmsg))
2416                + nla_total_size(16) /* RTA_SRC */
2417                + nla_total_size(16) /* RTA_DST */
2418                + nla_total_size(16) /* RTA_GATEWAY */
2419                + nla_total_size(16) /* RTA_PREFSRC */
2420                + nla_total_size(4) /* RTA_TABLE */
2421                + nla_total_size(4) /* RTA_IIF */
2422                + nla_total_size(4) /* RTA_OIF */
2423                + nla_total_size(4) /* RTA_PRIORITY */
2424                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2425                + nla_total_size(sizeof(struct rta_cacheinfo));
2426 }
2427
2428 static int rt6_fill_node(struct net *net,
2429                          struct sk_buff *skb, struct rt6_info *rt,
2430                          struct in6_addr *dst, struct in6_addr *src,
2431                          int iif, int type, u32 portid, u32 seq,
2432                          int prefix, int nowait, unsigned int flags)
2433 {
2434         struct rtmsg *rtm;
2435         struct nlmsghdr *nlh;
2436         long expires;
2437         u32 table;
2438
2439         if (prefix) {   /* user wants prefix routes only */
2440                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2441                         /* success since this is not a prefix route */
2442                         return 1;
2443                 }
2444         }
2445
2446         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2447         if (!nlh)
2448                 return -EMSGSIZE;
2449
2450         rtm = nlmsg_data(nlh);
2451         rtm->rtm_family = AF_INET6;
2452         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2453         rtm->rtm_src_len = rt->rt6i_src.plen;
2454         rtm->rtm_tos = 0;
2455         if (rt->rt6i_table)
2456                 table = rt->rt6i_table->tb6_id;
2457         else
2458                 table = RT6_TABLE_UNSPEC;
2459         rtm->rtm_table = table;
2460         if (nla_put_u32(skb, RTA_TABLE, table))
2461                 goto nla_put_failure;
2462         if (rt->rt6i_flags & RTF_REJECT) {
2463                 switch (rt->dst.error) {
2464                 case -EINVAL:
2465                         rtm->rtm_type = RTN_BLACKHOLE;
2466                         break;
2467                 case -EACCES:
2468                         rtm->rtm_type = RTN_PROHIBIT;
2469                         break;
2470                 case -EAGAIN:
2471                         rtm->rtm_type = RTN_THROW;
2472                         break;
2473                 default:
2474                         rtm->rtm_type = RTN_UNREACHABLE;
2475                         break;
2476                 }
2477         }
2478         else if (rt->rt6i_flags & RTF_LOCAL)
2479                 rtm->rtm_type = RTN_LOCAL;
2480         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2481                 rtm->rtm_type = RTN_LOCAL;
2482         else
2483                 rtm->rtm_type = RTN_UNICAST;
2484         rtm->rtm_flags = 0;
2485         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2486         rtm->rtm_protocol = rt->rt6i_protocol;
2487         if (rt->rt6i_flags & RTF_DYNAMIC)
2488                 rtm->rtm_protocol = RTPROT_REDIRECT;
2489         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2490                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2491                         rtm->rtm_protocol = RTPROT_RA;
2492                 else
2493                         rtm->rtm_protocol = RTPROT_KERNEL;
2494         }
2495
2496         if (rt->rt6i_flags & RTF_CACHE)
2497                 rtm->rtm_flags |= RTM_F_CLONED;
2498
2499         if (dst) {
2500                 if (nla_put(skb, RTA_DST, 16, dst))
2501                         goto nla_put_failure;
2502                 rtm->rtm_dst_len = 128;
2503         } else if (rtm->rtm_dst_len)
2504                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2505                         goto nla_put_failure;
2506 #ifdef CONFIG_IPV6_SUBTREES
2507         if (src) {
2508                 if (nla_put(skb, RTA_SRC, 16, src))
2509                         goto nla_put_failure;
2510                 rtm->rtm_src_len = 128;
2511         } else if (rtm->rtm_src_len &&
2512                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2513                 goto nla_put_failure;
2514 #endif
2515         if (iif) {
2516 #ifdef CONFIG_IPV6_MROUTE
2517                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2518                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2519                         if (err <= 0) {
2520                                 if (!nowait) {
2521                                         if (err == 0)
2522                                                 return 0;
2523                                         goto nla_put_failure;
2524                                 } else {
2525                                         if (err == -EMSGSIZE)
2526                                                 goto nla_put_failure;
2527                                 }
2528                         }
2529                 } else
2530 #endif
2531                         if (nla_put_u32(skb, RTA_IIF, iif))
2532                                 goto nla_put_failure;
2533         } else if (dst) {
2534                 struct in6_addr saddr_buf;
2535                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2536                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2537                         goto nla_put_failure;
2538         }
2539
2540         if (rt->rt6i_prefsrc.plen) {
2541                 struct in6_addr saddr_buf;
2542                 saddr_buf = rt->rt6i_prefsrc.addr;
2543                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2544                         goto nla_put_failure;
2545         }
2546
2547         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2548                 goto nla_put_failure;
2549
2550         if (rt->rt6i_flags & RTF_GATEWAY) {
2551                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2552                         goto nla_put_failure;
2553         }
2554
2555         if (rt->dst.dev &&
2556             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2557                 goto nla_put_failure;
2558         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2559                 goto nla_put_failure;
2560
2561         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2562
2563         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2564                 goto nla_put_failure;
2565
2566         return nlmsg_end(skb, nlh);
2567
2568 nla_put_failure:
2569         nlmsg_cancel(skb, nlh);
2570         return -EMSGSIZE;
2571 }
2572
2573 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2574 {
2575         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2576         int prefix;
2577
2578         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2579                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2580                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2581         } else
2582                 prefix = 0;
2583
2584         return rt6_fill_node(arg->net,
2585                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2586                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2587                      prefix, 0, NLM_F_MULTI);
2588 }
2589
2590 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2591 {
2592         struct net *net = sock_net(in_skb->sk);
2593         struct nlattr *tb[RTA_MAX+1];
2594         struct rt6_info *rt;
2595         struct sk_buff *skb;
2596         struct rtmsg *rtm;
2597         struct flowi6 fl6;
2598         int err, iif = 0, oif = 0;
2599
2600         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2601         if (err < 0)
2602                 goto errout;
2603
2604         err = -EINVAL;
2605         memset(&fl6, 0, sizeof(fl6));
2606
2607         if (tb[RTA_SRC]) {
2608                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2609                         goto errout;
2610
2611                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2612         }
2613
2614         if (tb[RTA_DST]) {
2615                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2616                         goto errout;
2617
2618                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2619         }
2620
2621         if (tb[RTA_IIF])
2622                 iif = nla_get_u32(tb[RTA_IIF]);
2623
2624         if (tb[RTA_OIF])
2625                 oif = nla_get_u32(tb[RTA_OIF]);
2626
2627         if (iif) {
2628                 struct net_device *dev;
2629                 int flags = 0;
2630
2631                 dev = __dev_get_by_index(net, iif);
2632                 if (!dev) {
2633                         err = -ENODEV;
2634                         goto errout;
2635                 }
2636
2637                 fl6.flowi6_iif = iif;
2638
2639                 if (!ipv6_addr_any(&fl6.saddr))
2640                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2641
2642                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2643                                                                flags);
2644         } else {
2645                 fl6.flowi6_oif = oif;
2646
2647                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2648         }
2649
2650         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2651         if (!skb) {
2652                 ip6_rt_put(rt);
2653                 err = -ENOBUFS;
2654                 goto errout;
2655         }
2656
2657         /* Reserve room for dummy headers, this skb can pass
2658            through good chunk of routing engine.
2659          */
2660         skb_reset_mac_header(skb);
2661         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2662
2663         skb_dst_set(skb, &rt->dst);
2664
2665         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2666                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2667                             nlh->nlmsg_seq, 0, 0, 0);
2668         if (err < 0) {
2669                 kfree_skb(skb);
2670                 goto errout;
2671         }
2672
2673         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2674 errout:
2675         return err;
2676 }
2677
2678 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2679 {
2680         struct sk_buff *skb;
2681         struct net *net = info->nl_net;
2682         u32 seq;
2683         int err;
2684
2685         err = -ENOBUFS;
2686         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2687
2688         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2689         if (!skb)
2690                 goto errout;
2691
2692         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2693                                 event, info->portid, seq, 0, 0, 0);
2694         if (err < 0) {
2695                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2696                 WARN_ON(err == -EMSGSIZE);
2697                 kfree_skb(skb);
2698                 goto errout;
2699         }
2700         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2701                     info->nlh, gfp_any());
2702         return;
2703 errout:
2704         if (err < 0)
2705                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2706 }
2707
2708 static int ip6_route_dev_notify(struct notifier_block *this,
2709                                 unsigned long event, void *ptr)
2710 {
2711         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2712         struct net *net = dev_net(dev);
2713
2714         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2715                 net->ipv6.ip6_null_entry->dst.dev = dev;
2716                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2717 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2718                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2719                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2720                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2721                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2722 #endif
2723         }
2724
2725         return NOTIFY_OK;
2726 }
2727
2728 /*
2729  *      /proc
2730  */
2731
2732 #ifdef CONFIG_PROC_FS
2733
2734 struct rt6_proc_arg
2735 {
2736         char *buffer;
2737         int offset;
2738         int length;
2739         int skip;
2740         int len;
2741 };
2742
2743 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2744 {
2745         struct seq_file *m = p_arg;
2746
2747         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2748
2749 #ifdef CONFIG_IPV6_SUBTREES
2750         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2751 #else
2752         seq_puts(m, "00000000000000000000000000000000 00 ");
2753 #endif
2754         if (rt->rt6i_flags & RTF_GATEWAY) {
2755                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2756         } else {
2757                 seq_puts(m, "00000000000000000000000000000000");
2758         }
2759         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2760                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2761                    rt->dst.__use, rt->rt6i_flags,
2762                    rt->dst.dev ? rt->dst.dev->name : "");
2763         return 0;
2764 }
2765
2766 static int ipv6_route_show(struct seq_file *m, void *v)
2767 {
2768         struct net *net = (struct net *)m->private;
2769         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2770         return 0;
2771 }
2772
2773 static int ipv6_route_open(struct inode *inode, struct file *file)
2774 {
2775         return single_open_net(inode, file, ipv6_route_show);
2776 }
2777
2778 static const struct file_operations ipv6_route_proc_fops = {
2779         .owner          = THIS_MODULE,
2780         .open           = ipv6_route_open,
2781         .read           = seq_read,
2782         .llseek         = seq_lseek,
2783         .release        = single_release_net,
2784 };
2785
2786 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2787 {
2788         struct net *net = (struct net *)seq->private;
2789         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2790                    net->ipv6.rt6_stats->fib_nodes,
2791                    net->ipv6.rt6_stats->fib_route_nodes,
2792                    net->ipv6.rt6_stats->fib_rt_alloc,
2793                    net->ipv6.rt6_stats->fib_rt_entries,
2794                    net->ipv6.rt6_stats->fib_rt_cache,
2795                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2796                    net->ipv6.rt6_stats->fib_discarded_routes);
2797
2798         return 0;
2799 }
2800
2801 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2802 {
2803         return single_open_net(inode, file, rt6_stats_seq_show);
2804 }
2805
2806 static const struct file_operations rt6_stats_seq_fops = {
2807         .owner   = THIS_MODULE,
2808         .open    = rt6_stats_seq_open,
2809         .read    = seq_read,
2810         .llseek  = seq_lseek,
2811         .release = single_release_net,
2812 };
2813 #endif  /* CONFIG_PROC_FS */
2814
2815 #ifdef CONFIG_SYSCTL
2816
2817 static
2818 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2819                               void __user *buffer, size_t *lenp, loff_t *ppos)
2820 {
2821         struct net *net;
2822         int delay;
2823         if (!write)
2824                 return -EINVAL;
2825
2826         net = (struct net *)ctl->extra1;
2827         delay = net->ipv6.sysctl.flush_delay;
2828         proc_dointvec(ctl, write, buffer, lenp, ppos);
2829         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2830         return 0;
2831 }
2832
2833 struct ctl_table ipv6_route_table_template[] = {
2834         {
2835                 .procname       =       "flush",
2836                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2837                 .maxlen         =       sizeof(int),
2838                 .mode           =       0200,
2839                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2840         },
2841         {
2842                 .procname       =       "gc_thresh",
2843                 .data           =       &ip6_dst_ops_template.gc_thresh,
2844                 .maxlen         =       sizeof(int),
2845                 .mode           =       0644,
2846                 .proc_handler   =       proc_dointvec,
2847         },
2848         {
2849                 .procname       =       "max_size",
2850                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2851                 .maxlen         =       sizeof(int),
2852                 .mode           =       0644,
2853                 .proc_handler   =       proc_dointvec,
2854         },
2855         {
2856                 .procname       =       "gc_min_interval",
2857                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2858                 .maxlen         =       sizeof(int),
2859                 .mode           =       0644,
2860                 .proc_handler   =       proc_dointvec_jiffies,
2861         },
2862         {
2863                 .procname       =       "gc_timeout",
2864                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2865                 .maxlen         =       sizeof(int),
2866                 .mode           =       0644,
2867                 .proc_handler   =       proc_dointvec_jiffies,
2868         },
2869         {
2870                 .procname       =       "gc_interval",
2871                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2872                 .maxlen         =       sizeof(int),
2873                 .mode           =       0644,
2874                 .proc_handler   =       proc_dointvec_jiffies,
2875         },
2876         {
2877                 .procname       =       "gc_elasticity",
2878                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2879                 .maxlen         =       sizeof(int),
2880                 .mode           =       0644,
2881                 .proc_handler   =       proc_dointvec,
2882         },
2883         {
2884                 .procname       =       "mtu_expires",
2885                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2886                 .maxlen         =       sizeof(int),
2887                 .mode           =       0644,
2888                 .proc_handler   =       proc_dointvec_jiffies,
2889         },
2890         {
2891                 .procname       =       "min_adv_mss",
2892                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2893                 .maxlen         =       sizeof(int),
2894                 .mode           =       0644,
2895                 .proc_handler   =       proc_dointvec,
2896         },
2897         {
2898                 .procname       =       "gc_min_interval_ms",
2899                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2900                 .maxlen         =       sizeof(int),
2901                 .mode           =       0644,
2902                 .proc_handler   =       proc_dointvec_ms_jiffies,
2903         },
2904         { }
2905 };
2906
2907 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2908 {
2909         struct ctl_table *table;
2910
2911         table = kmemdup(ipv6_route_table_template,
2912                         sizeof(ipv6_route_table_template),
2913                         GFP_KERNEL);
2914
2915         if (table) {
2916                 table[0].data = &net->ipv6.sysctl.flush_delay;
2917                 table[0].extra1 = net;
2918                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2919                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2920                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2921                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2922                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2923                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2924                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2925                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2926                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2927
2928                 /* Don't export sysctls to unprivileged users */
2929                 if (net->user_ns != &init_user_ns)
2930                         table[0].procname = NULL;
2931         }
2932
2933         return table;
2934 }
2935 #endif
2936
2937 static int __net_init ip6_route_net_init(struct net *net)
2938 {
2939         int ret = -ENOMEM;
2940
2941         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2942                sizeof(net->ipv6.ip6_dst_ops));
2943
2944         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2945                 goto out_ip6_dst_ops;
2946
2947         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2948                                            sizeof(*net->ipv6.ip6_null_entry),
2949                                            GFP_KERNEL);
2950         if (!net->ipv6.ip6_null_entry)
2951                 goto out_ip6_dst_entries;
2952         net->ipv6.ip6_null_entry->dst.path =
2953                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2954         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2955         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2956                          ip6_template_metrics, true);
2957
2958 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2959         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2960                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2961                                                GFP_KERNEL);
2962         if (!net->ipv6.ip6_prohibit_entry)
2963                 goto out_ip6_null_entry;
2964         net->ipv6.ip6_prohibit_entry->dst.path =
2965                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2966         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2967         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2968                          ip6_template_metrics, true);
2969
2970         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2971                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2972                                                GFP_KERNEL);
2973         if (!net->ipv6.ip6_blk_hole_entry)
2974                 goto out_ip6_prohibit_entry;
2975         net->ipv6.ip6_blk_hole_entry->dst.path =
2976                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2977         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2978         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2979                          ip6_template_metrics, true);
2980 #endif
2981
2982         net->ipv6.sysctl.flush_delay = 0;
2983         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2984         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2985         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2986         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2987         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2988         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2989         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2990
2991         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2992
2993         ret = 0;
2994 out:
2995         return ret;
2996
2997 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2998 out_ip6_prohibit_entry:
2999         kfree(net->ipv6.ip6_prohibit_entry);
3000 out_ip6_null_entry:
3001         kfree(net->ipv6.ip6_null_entry);
3002 #endif
3003 out_ip6_dst_entries:
3004         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3005 out_ip6_dst_ops:
3006         goto out;
3007 }
3008
3009 static void __net_exit ip6_route_net_exit(struct net *net)
3010 {
3011         kfree(net->ipv6.ip6_null_entry);
3012 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3013         kfree(net->ipv6.ip6_prohibit_entry);
3014         kfree(net->ipv6.ip6_blk_hole_entry);
3015 #endif
3016         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3017 }
3018
3019 static int __net_init ip6_route_net_init_late(struct net *net)
3020 {
3021 #ifdef CONFIG_PROC_FS
3022         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3023         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3024 #endif
3025         return 0;
3026 }
3027
3028 static void __net_exit ip6_route_net_exit_late(struct net *net)
3029 {
3030 #ifdef CONFIG_PROC_FS
3031         remove_proc_entry("ipv6_route", net->proc_net);
3032         remove_proc_entry("rt6_stats", net->proc_net);
3033 #endif
3034 }
3035
3036 static struct pernet_operations ip6_route_net_ops = {
3037         .init = ip6_route_net_init,
3038         .exit = ip6_route_net_exit,
3039 };
3040
3041 static int __net_init ipv6_inetpeer_init(struct net *net)
3042 {
3043         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3044
3045         if (!bp)
3046                 return -ENOMEM;
3047         inet_peer_base_init(bp);
3048         net->ipv6.peers = bp;
3049         return 0;
3050 }
3051
3052 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3053 {
3054         struct inet_peer_base *bp = net->ipv6.peers;
3055
3056         net->ipv6.peers = NULL;
3057         inetpeer_invalidate_tree(bp);
3058         kfree(bp);
3059 }
3060
3061 static struct pernet_operations ipv6_inetpeer_ops = {
3062         .init   =       ipv6_inetpeer_init,
3063         .exit   =       ipv6_inetpeer_exit,
3064 };
3065
3066 static struct pernet_operations ip6_route_net_late_ops = {
3067         .init = ip6_route_net_init_late,
3068         .exit = ip6_route_net_exit_late,
3069 };
3070
3071 static struct notifier_block ip6_route_dev_notifier = {
3072         .notifier_call = ip6_route_dev_notify,
3073         .priority = 0,
3074 };
3075
3076 int __init ip6_route_init(void)
3077 {
3078         int ret;
3079
3080         ret = -ENOMEM;
3081         ip6_dst_ops_template.kmem_cachep =
3082                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3083                                   SLAB_HWCACHE_ALIGN, NULL);
3084         if (!ip6_dst_ops_template.kmem_cachep)
3085                 goto out;
3086
3087         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3088         if (ret)
3089                 goto out_kmem_cache;
3090
3091         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3092         if (ret)
3093                 goto out_dst_entries;
3094
3095         ret = register_pernet_subsys(&ip6_route_net_ops);
3096         if (ret)
3097                 goto out_register_inetpeer;
3098
3099         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3100
3101         /* Registering of the loopback is done before this portion of code,
3102          * the loopback reference in rt6_info will not be taken, do it
3103          * manually for init_net */
3104         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3105         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3106   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3107         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3108         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3109         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3110         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3111   #endif
3112         ret = fib6_init();
3113         if (ret)
3114                 goto out_register_subsys;
3115
3116         ret = xfrm6_init();
3117         if (ret)
3118                 goto out_fib6_init;
3119
3120         ret = fib6_rules_init();
3121         if (ret)
3122                 goto xfrm6_init;
3123
3124         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3125         if (ret)
3126                 goto fib6_rules_init;
3127
3128         ret = -ENOBUFS;
3129         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3130             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3131             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3132                 goto out_register_late_subsys;
3133
3134         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3135         if (ret)
3136                 goto out_register_late_subsys;
3137
3138 out:
3139         return ret;
3140
3141 out_register_late_subsys:
3142         unregister_pernet_subsys(&ip6_route_net_late_ops);
3143 fib6_rules_init:
3144         fib6_rules_cleanup();
3145 xfrm6_init:
3146         xfrm6_fini();
3147 out_fib6_init:
3148         fib6_gc_cleanup();
3149 out_register_subsys:
3150         unregister_pernet_subsys(&ip6_route_net_ops);
3151 out_register_inetpeer:
3152         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3153 out_dst_entries:
3154         dst_entries_destroy(&ip6_dst_blackhole_ops);
3155 out_kmem_cache:
3156         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3157         goto out;
3158 }
3159
3160 void ip6_route_cleanup(void)
3161 {
3162         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3163         unregister_pernet_subsys(&ip6_route_net_late_ops);
3164         fib6_rules_cleanup();
3165         xfrm6_fini();
3166         fib6_gc_cleanup();
3167         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3168         unregister_pernet_subsys(&ip6_route_net_ops);
3169         dst_entries_destroy(&ip6_dst_blackhole_ops);
3170         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3171 }