]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
ipv6: fix ecmp lookup when oif is specified
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
87
88 #ifdef CONFIG_IPV6_ROUTE_INFO
89 static struct rt6_info *rt6_add_route_info(struct net *net,
90                                            const struct in6_addr *prefix, int prefixlen,
91                                            const struct in6_addr *gwaddr, int ifindex,
92                                            unsigned int pref);
93 static struct rt6_info *rt6_get_route_info(struct net *net,
94                                            const struct in6_addr *prefix, int prefixlen,
95                                            const struct in6_addr *gwaddr, int ifindex);
96 #endif
97
98 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
99 {
100         struct rt6_info *rt = (struct rt6_info *) dst;
101         struct inet_peer *peer;
102         u32 *p = NULL;
103
104         if (!(rt->dst.flags & DST_HOST))
105                 return NULL;
106
107         peer = rt6_get_peer_create(rt);
108         if (peer) {
109                 u32 *old_p = __DST_METRICS_PTR(old);
110                 unsigned long prev, new;
111
112                 p = peer->metrics;
113                 if (inet_metrics_new(peer))
114                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
115
116                 new = (unsigned long) p;
117                 prev = cmpxchg(&dst->_metrics, old, new);
118
119                 if (prev != old) {
120                         p = __DST_METRICS_PTR(prev);
121                         if (prev & DST_METRICS_READ_ONLY)
122                                 p = NULL;
123                 }
124         }
125         return p;
126 }
127
128 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
129                                              struct sk_buff *skb,
130                                              const void *daddr)
131 {
132         struct in6_addr *p = &rt->rt6i_gateway;
133
134         if (!ipv6_addr_any(p))
135                 return (const void *) p;
136         else if (skb)
137                 return &ipv6_hdr(skb)->daddr;
138         return daddr;
139 }
140
141 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
142                                           struct sk_buff *skb,
143                                           const void *daddr)
144 {
145         struct rt6_info *rt = (struct rt6_info *) dst;
146         struct neighbour *n;
147
148         daddr = choose_neigh_daddr(rt, skb, daddr);
149         n = __ipv6_neigh_lookup(dst->dev, daddr);
150         if (n)
151                 return n;
152         return neigh_create(&nd_tbl, daddr, dst->dev);
153 }
154
155 static struct dst_ops ip6_dst_ops_template = {
156         .family                 =       AF_INET6,
157         .protocol               =       cpu_to_be16(ETH_P_IPV6),
158         .gc                     =       ip6_dst_gc,
159         .gc_thresh              =       1024,
160         .check                  =       ip6_dst_check,
161         .default_advmss         =       ip6_default_advmss,
162         .mtu                    =       ip6_mtu,
163         .cow_metrics            =       ipv6_cow_metrics,
164         .destroy                =       ip6_dst_destroy,
165         .ifdown                 =       ip6_dst_ifdown,
166         .negative_advice        =       ip6_negative_advice,
167         .link_failure           =       ip6_link_failure,
168         .update_pmtu            =       ip6_rt_update_pmtu,
169         .redirect               =       rt6_do_redirect,
170         .local_out              =       __ip6_local_out,
171         .neigh_lookup           =       ip6_neigh_lookup,
172 };
173
174 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
175 {
176         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
177
178         return mtu ? : dst->dev->mtu;
179 }
180
181 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
182                                          struct sk_buff *skb, u32 mtu)
183 {
184 }
185
186 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
187                                       struct sk_buff *skb)
188 {
189 }
190
191 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
192                                          unsigned long old)
193 {
194         return NULL;
195 }
196
197 static struct dst_ops ip6_dst_blackhole_ops = {
198         .family                 =       AF_INET6,
199         .protocol               =       cpu_to_be16(ETH_P_IPV6),
200         .destroy                =       ip6_dst_destroy,
201         .check                  =       ip6_dst_check,
202         .mtu                    =       ip6_blackhole_mtu,
203         .default_advmss         =       ip6_default_advmss,
204         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
205         .redirect               =       ip6_rt_blackhole_redirect,
206         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
207         .neigh_lookup           =       ip6_neigh_lookup,
208 };
209
210 static const u32 ip6_template_metrics[RTAX_MAX] = {
211         [RTAX_HOPLIMIT - 1] = 0,
212 };
213
214 static const struct rt6_info ip6_null_entry_template = {
215         .dst = {
216                 .__refcnt       = ATOMIC_INIT(1),
217                 .__use          = 1,
218                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
219                 .error          = -ENETUNREACH,
220                 .input          = ip6_pkt_discard,
221                 .output         = ip6_pkt_discard_out,
222         },
223         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
224         .rt6i_protocol  = RTPROT_KERNEL,
225         .rt6i_metric    = ~(u32) 0,
226         .rt6i_ref       = ATOMIC_INIT(1),
227 };
228
229 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
230
231 static int ip6_pkt_prohibit(struct sk_buff *skb);
232 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
233
234 static const struct rt6_info ip6_prohibit_entry_template = {
235         .dst = {
236                 .__refcnt       = ATOMIC_INIT(1),
237                 .__use          = 1,
238                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
239                 .error          = -EACCES,
240                 .input          = ip6_pkt_prohibit,
241                 .output         = ip6_pkt_prohibit_out,
242         },
243         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
244         .rt6i_protocol  = RTPROT_KERNEL,
245         .rt6i_metric    = ~(u32) 0,
246         .rt6i_ref       = ATOMIC_INIT(1),
247 };
248
249 static const struct rt6_info ip6_blk_hole_entry_template = {
250         .dst = {
251                 .__refcnt       = ATOMIC_INIT(1),
252                 .__use          = 1,
253                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
254                 .error          = -EINVAL,
255                 .input          = dst_discard,
256                 .output         = dst_discard,
257         },
258         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
259         .rt6i_protocol  = RTPROT_KERNEL,
260         .rt6i_metric    = ~(u32) 0,
261         .rt6i_ref       = ATOMIC_INIT(1),
262 };
263
264 #endif
265
266 /* allocate dst with ip6_dst_ops */
267 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
268                                              struct net_device *dev,
269                                              int flags,
270                                              struct fib6_table *table)
271 {
272         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
273                                         0, DST_OBSOLETE_FORCE_CHK, flags);
274
275         if (rt) {
276                 struct dst_entry *dst = &rt->dst;
277
278                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
279                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
280                 rt->rt6i_genid = rt_genid(net);
281                 INIT_LIST_HEAD(&rt->rt6i_siblings);
282                 rt->rt6i_nsiblings = 0;
283         }
284         return rt;
285 }
286
287 static void ip6_dst_destroy(struct dst_entry *dst)
288 {
289         struct rt6_info *rt = (struct rt6_info *)dst;
290         struct inet6_dev *idev = rt->rt6i_idev;
291         struct dst_entry *from = dst->from;
292
293         if (!(rt->dst.flags & DST_HOST))
294                 dst_destroy_metrics_generic(dst);
295
296         if (idev) {
297                 rt->rt6i_idev = NULL;
298                 in6_dev_put(idev);
299         }
300
301         dst->from = NULL;
302         dst_release(from);
303
304         if (rt6_has_peer(rt)) {
305                 struct inet_peer *peer = rt6_peer_ptr(rt);
306                 inet_putpeer(peer);
307         }
308 }
309
310 void rt6_bind_peer(struct rt6_info *rt, int create)
311 {
312         struct inet_peer_base *base;
313         struct inet_peer *peer;
314
315         base = inetpeer_base_ptr(rt->_rt6i_peer);
316         if (!base)
317                 return;
318
319         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
320         if (peer) {
321                 if (!rt6_set_peer(rt, peer))
322                         inet_putpeer(peer);
323         }
324 }
325
326 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
327                            int how)
328 {
329         struct rt6_info *rt = (struct rt6_info *)dst;
330         struct inet6_dev *idev = rt->rt6i_idev;
331         struct net_device *loopback_dev =
332                 dev_net(dev)->loopback_dev;
333
334         if (dev != loopback_dev) {
335                 if (idev && idev->dev == dev) {
336                         struct inet6_dev *loopback_idev =
337                                 in6_dev_get(loopback_dev);
338                         if (loopback_idev) {
339                                 rt->rt6i_idev = loopback_idev;
340                                 in6_dev_put(idev);
341                         }
342                 }
343         }
344 }
345
346 static bool rt6_check_expired(const struct rt6_info *rt)
347 {
348         if (rt->rt6i_flags & RTF_EXPIRES) {
349                 if (time_after(jiffies, rt->dst.expires))
350                         return true;
351         } else if (rt->dst.from) {
352                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
353         }
354         return false;
355 }
356
357 static bool rt6_need_strict(const struct in6_addr *daddr)
358 {
359         return ipv6_addr_type(daddr) &
360                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
361 }
362
363 /* Multipath route selection:
364  *   Hash based function using packet header and flowlabel.
365  * Adapted from fib_info_hashfn()
366  */
367 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
368                                const struct flowi6 *fl6)
369 {
370         unsigned int val = fl6->flowi6_proto;
371
372         val ^= ipv6_addr_hash(&fl6->daddr);
373         val ^= ipv6_addr_hash(&fl6->saddr);
374
375         /* Work only if this not encapsulated */
376         switch (fl6->flowi6_proto) {
377         case IPPROTO_UDP:
378         case IPPROTO_TCP:
379         case IPPROTO_SCTP:
380                 val ^= (__force u16)fl6->fl6_sport;
381                 val ^= (__force u16)fl6->fl6_dport;
382                 break;
383
384         case IPPROTO_ICMPV6:
385                 val ^= (__force u16)fl6->fl6_icmp_type;
386                 val ^= (__force u16)fl6->fl6_icmp_code;
387                 break;
388         }
389         /* RFC6438 recommands to use flowlabel */
390         val ^= (__force u32)fl6->flowlabel;
391
392         /* Perhaps, we need to tune, this function? */
393         val = val ^ (val >> 7) ^ (val >> 12);
394         return val % candidate_count;
395 }
396
397 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
398                                              struct flowi6 *fl6, int oif,
399                                              int strict)
400 {
401         struct rt6_info *sibling, *next_sibling;
402         int route_choosen;
403
404         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
405         /* Don't change the route, if route_choosen == 0
406          * (siblings does not include ourself)
407          */
408         if (route_choosen)
409                 list_for_each_entry_safe(sibling, next_sibling,
410                                 &match->rt6i_siblings, rt6i_siblings) {
411                         route_choosen--;
412                         if (route_choosen == 0) {
413                                 if (rt6_score_route(sibling, oif, strict) < 0)
414                                         break;
415                                 match = sibling;
416                                 break;
417                         }
418                 }
419         return match;
420 }
421
422 /*
423  *      Route lookup. Any table->tb6_lock is implied.
424  */
425
426 static inline struct rt6_info *rt6_device_match(struct net *net,
427                                                     struct rt6_info *rt,
428                                                     const struct in6_addr *saddr,
429                                                     int oif,
430                                                     int flags)
431 {
432         struct rt6_info *local = NULL;
433         struct rt6_info *sprt;
434
435         if (!oif && ipv6_addr_any(saddr))
436                 goto out;
437
438         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
439                 struct net_device *dev = sprt->dst.dev;
440
441                 if (oif) {
442                         if (dev->ifindex == oif)
443                                 return sprt;
444                         if (dev->flags & IFF_LOOPBACK) {
445                                 if (!sprt->rt6i_idev ||
446                                     sprt->rt6i_idev->dev->ifindex != oif) {
447                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
448                                                 continue;
449                                         if (local && (!oif ||
450                                                       local->rt6i_idev->dev->ifindex == oif))
451                                                 continue;
452                                 }
453                                 local = sprt;
454                         }
455                 } else {
456                         if (ipv6_chk_addr(net, saddr, dev,
457                                           flags & RT6_LOOKUP_F_IFACE))
458                                 return sprt;
459                 }
460         }
461
462         if (oif) {
463                 if (local)
464                         return local;
465
466                 if (flags & RT6_LOOKUP_F_IFACE)
467                         return net->ipv6.ip6_null_entry;
468         }
469 out:
470         return rt;
471 }
472
473 #ifdef CONFIG_IPV6_ROUTER_PREF
474 static void rt6_probe(struct rt6_info *rt)
475 {
476         struct neighbour *neigh;
477         /*
478          * Okay, this does not seem to be appropriate
479          * for now, however, we need to check if it
480          * is really so; aka Router Reachability Probing.
481          *
482          * Router Reachability Probe MUST be rate-limited
483          * to no more than one per minute.
484          */
485         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
486                 return;
487         rcu_read_lock_bh();
488         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
489         if (neigh) {
490                 write_lock(&neigh->lock);
491                 if (neigh->nud_state & NUD_VALID)
492                         goto out;
493         }
494
495         if (!neigh ||
496             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
497                 struct in6_addr mcaddr;
498                 struct in6_addr *target;
499
500                 if (neigh) {
501                         neigh->updated = jiffies;
502                         write_unlock(&neigh->lock);
503                 }
504
505                 target = (struct in6_addr *)&rt->rt6i_gateway;
506                 addrconf_addr_solict_mult(target, &mcaddr);
507                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
508         } else {
509 out:
510                 write_unlock(&neigh->lock);
511         }
512         rcu_read_unlock_bh();
513 }
514 #else
515 static inline void rt6_probe(struct rt6_info *rt)
516 {
517 }
518 #endif
519
520 /*
521  * Default Router Selection (RFC 2461 6.3.6)
522  */
523 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
524 {
525         struct net_device *dev = rt->dst.dev;
526         if (!oif || dev->ifindex == oif)
527                 return 2;
528         if ((dev->flags & IFF_LOOPBACK) &&
529             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
530                 return 1;
531         return 0;
532 }
533
534 static inline bool rt6_check_neigh(struct rt6_info *rt)
535 {
536         struct neighbour *neigh;
537         bool ret = false;
538
539         if (rt->rt6i_flags & RTF_NONEXTHOP ||
540             !(rt->rt6i_flags & RTF_GATEWAY))
541                 return true;
542
543         rcu_read_lock_bh();
544         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
545         if (neigh) {
546                 read_lock(&neigh->lock);
547                 if (neigh->nud_state & NUD_VALID)
548                         ret = true;
549 #ifdef CONFIG_IPV6_ROUTER_PREF
550                 else if (!(neigh->nud_state & NUD_FAILED))
551                         ret = true;
552 #endif
553                 read_unlock(&neigh->lock);
554         }
555         rcu_read_unlock_bh();
556
557         return ret;
558 }
559
560 static int rt6_score_route(struct rt6_info *rt, int oif,
561                            int strict)
562 {
563         int m;
564
565         m = rt6_check_dev(rt, oif);
566         if (!m && (strict & RT6_LOOKUP_F_IFACE))
567                 return -1;
568 #ifdef CONFIG_IPV6_ROUTER_PREF
569         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
570 #endif
571         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
572                 return -1;
573         return m;
574 }
575
576 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
577                                    int *mpri, struct rt6_info *match)
578 {
579         int m;
580
581         if (rt6_check_expired(rt))
582                 goto out;
583
584         m = rt6_score_route(rt, oif, strict);
585         if (m < 0)
586                 goto out;
587
588         if (m > *mpri) {
589                 if (strict & RT6_LOOKUP_F_REACHABLE)
590                         rt6_probe(match);
591                 *mpri = m;
592                 match = rt;
593         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
594                 rt6_probe(rt);
595         }
596
597 out:
598         return match;
599 }
600
601 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
602                                      struct rt6_info *rr_head,
603                                      u32 metric, int oif, int strict)
604 {
605         struct rt6_info *rt, *match;
606         int mpri = -1;
607
608         match = NULL;
609         for (rt = rr_head; rt && rt->rt6i_metric == metric;
610              rt = rt->dst.rt6_next)
611                 match = find_match(rt, oif, strict, &mpri, match);
612         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
613              rt = rt->dst.rt6_next)
614                 match = find_match(rt, oif, strict, &mpri, match);
615
616         return match;
617 }
618
619 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
620 {
621         struct rt6_info *match, *rt0;
622         struct net *net;
623
624         rt0 = fn->rr_ptr;
625         if (!rt0)
626                 fn->rr_ptr = rt0 = fn->leaf;
627
628         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
629
630         if (!match &&
631             (strict & RT6_LOOKUP_F_REACHABLE)) {
632                 struct rt6_info *next = rt0->dst.rt6_next;
633
634                 /* no entries matched; do round-robin */
635                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
636                         next = fn->leaf;
637
638                 if (next != rt0)
639                         fn->rr_ptr = next;
640         }
641
642         net = dev_net(rt0->dst.dev);
643         return match ? match : net->ipv6.ip6_null_entry;
644 }
645
646 #ifdef CONFIG_IPV6_ROUTE_INFO
647 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
648                   const struct in6_addr *gwaddr)
649 {
650         struct net *net = dev_net(dev);
651         struct route_info *rinfo = (struct route_info *) opt;
652         struct in6_addr prefix_buf, *prefix;
653         unsigned int pref;
654         unsigned long lifetime;
655         struct rt6_info *rt;
656
657         if (len < sizeof(struct route_info)) {
658                 return -EINVAL;
659         }
660
661         /* Sanity check for prefix_len and length */
662         if (rinfo->length > 3) {
663                 return -EINVAL;
664         } else if (rinfo->prefix_len > 128) {
665                 return -EINVAL;
666         } else if (rinfo->prefix_len > 64) {
667                 if (rinfo->length < 2) {
668                         return -EINVAL;
669                 }
670         } else if (rinfo->prefix_len > 0) {
671                 if (rinfo->length < 1) {
672                         return -EINVAL;
673                 }
674         }
675
676         pref = rinfo->route_pref;
677         if (pref == ICMPV6_ROUTER_PREF_INVALID)
678                 return -EINVAL;
679
680         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
681
682         if (rinfo->length == 3)
683                 prefix = (struct in6_addr *)rinfo->prefix;
684         else {
685                 /* this function is safe */
686                 ipv6_addr_prefix(&prefix_buf,
687                                  (struct in6_addr *)rinfo->prefix,
688                                  rinfo->prefix_len);
689                 prefix = &prefix_buf;
690         }
691
692         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
693                                 dev->ifindex);
694
695         if (rt && !lifetime) {
696                 ip6_del_rt(rt);
697                 rt = NULL;
698         }
699
700         if (!rt && lifetime)
701                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
702                                         pref);
703         else if (rt)
704                 rt->rt6i_flags = RTF_ROUTEINFO |
705                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
706
707         if (rt) {
708                 if (!addrconf_finite_timeout(lifetime))
709                         rt6_clean_expires(rt);
710                 else
711                         rt6_set_expires(rt, jiffies + HZ * lifetime);
712
713                 ip6_rt_put(rt);
714         }
715         return 0;
716 }
717 #endif
718
719 #define BACKTRACK(__net, saddr)                 \
720 do { \
721         if (rt == __net->ipv6.ip6_null_entry) { \
722                 struct fib6_node *pn; \
723                 while (1) { \
724                         if (fn->fn_flags & RTN_TL_ROOT) \
725                                 goto out; \
726                         pn = fn->parent; \
727                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
728                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
729                         else \
730                                 fn = pn; \
731                         if (fn->fn_flags & RTN_RTINFO) \
732                                 goto restart; \
733                 } \
734         } \
735 } while (0)
736
737 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
738                                              struct fib6_table *table,
739                                              struct flowi6 *fl6, int flags)
740 {
741         struct fib6_node *fn;
742         struct rt6_info *rt;
743
744         read_lock_bh(&table->tb6_lock);
745         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
746 restart:
747         rt = fn->leaf;
748         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
749         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
750                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
751         BACKTRACK(net, &fl6->saddr);
752 out:
753         dst_use(&rt->dst, jiffies);
754         read_unlock_bh(&table->tb6_lock);
755         return rt;
756
757 }
758
759 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
760                                     int flags)
761 {
762         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
763 }
764 EXPORT_SYMBOL_GPL(ip6_route_lookup);
765
766 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
767                             const struct in6_addr *saddr, int oif, int strict)
768 {
769         struct flowi6 fl6 = {
770                 .flowi6_oif = oif,
771                 .daddr = *daddr,
772         };
773         struct dst_entry *dst;
774         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
775
776         if (saddr) {
777                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
778                 flags |= RT6_LOOKUP_F_HAS_SADDR;
779         }
780
781         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
782         if (dst->error == 0)
783                 return (struct rt6_info *) dst;
784
785         dst_release(dst);
786
787         return NULL;
788 }
789
790 EXPORT_SYMBOL(rt6_lookup);
791
792 /* ip6_ins_rt is called with FREE table->tb6_lock.
793    It takes new route entry, the addition fails by any reason the
794    route is freed. In any case, if caller does not hold it, it may
795    be destroyed.
796  */
797
798 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
799 {
800         int err;
801         struct fib6_table *table;
802
803         table = rt->rt6i_table;
804         write_lock_bh(&table->tb6_lock);
805         err = fib6_add(&table->tb6_root, rt, info);
806         write_unlock_bh(&table->tb6_lock);
807
808         return err;
809 }
810
811 int ip6_ins_rt(struct rt6_info *rt)
812 {
813         struct nl_info info = {
814                 .nl_net = dev_net(rt->dst.dev),
815         };
816         return __ip6_ins_rt(rt, &info);
817 }
818
819 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
820                                       const struct in6_addr *daddr,
821                                       const struct in6_addr *saddr)
822 {
823         struct rt6_info *rt;
824
825         /*
826          *      Clone the route.
827          */
828
829         rt = ip6_rt_copy(ort, daddr);
830
831         if (rt) {
832                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
833                         if (ort->rt6i_dst.plen != 128 &&
834                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
835                                 rt->rt6i_flags |= RTF_ANYCAST;
836                         rt->rt6i_gateway = *daddr;
837                 }
838
839                 rt->rt6i_flags |= RTF_CACHE;
840
841 #ifdef CONFIG_IPV6_SUBTREES
842                 if (rt->rt6i_src.plen && saddr) {
843                         rt->rt6i_src.addr = *saddr;
844                         rt->rt6i_src.plen = 128;
845                 }
846 #endif
847         }
848
849         return rt;
850 }
851
852 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
853                                         const struct in6_addr *daddr)
854 {
855         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
856
857         if (rt)
858                 rt->rt6i_flags |= RTF_CACHE;
859         return rt;
860 }
861
862 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
863                                       struct flowi6 *fl6, int flags)
864 {
865         struct fib6_node *fn;
866         struct rt6_info *rt, *nrt;
867         int strict = 0;
868         int attempts = 3;
869         int err;
870         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
871
872         strict |= flags & RT6_LOOKUP_F_IFACE;
873
874 relookup:
875         read_lock_bh(&table->tb6_lock);
876
877 restart_2:
878         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
879
880 restart:
881         rt = rt6_select(fn, oif, strict | reachable);
882         if (rt->rt6i_nsiblings)
883                 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
884         BACKTRACK(net, &fl6->saddr);
885         if (rt == net->ipv6.ip6_null_entry ||
886             rt->rt6i_flags & RTF_CACHE)
887                 goto out;
888
889         dst_hold(&rt->dst);
890         read_unlock_bh(&table->tb6_lock);
891
892         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
893                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
894         else if (!(rt->dst.flags & DST_HOST))
895                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
896         else
897                 goto out2;
898
899         ip6_rt_put(rt);
900         rt = nrt ? : net->ipv6.ip6_null_entry;
901
902         dst_hold(&rt->dst);
903         if (nrt) {
904                 err = ip6_ins_rt(nrt);
905                 if (!err)
906                         goto out2;
907         }
908
909         if (--attempts <= 0)
910                 goto out2;
911
912         /*
913          * Race condition! In the gap, when table->tb6_lock was
914          * released someone could insert this route.  Relookup.
915          */
916         ip6_rt_put(rt);
917         goto relookup;
918
919 out:
920         if (reachable) {
921                 reachable = 0;
922                 goto restart_2;
923         }
924         dst_hold(&rt->dst);
925         read_unlock_bh(&table->tb6_lock);
926 out2:
927         rt->dst.lastuse = jiffies;
928         rt->dst.__use++;
929
930         return rt;
931 }
932
933 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
934                                             struct flowi6 *fl6, int flags)
935 {
936         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
937 }
938
939 static struct dst_entry *ip6_route_input_lookup(struct net *net,
940                                                 struct net_device *dev,
941                                                 struct flowi6 *fl6, int flags)
942 {
943         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
944                 flags |= RT6_LOOKUP_F_IFACE;
945
946         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
947 }
948
949 void ip6_route_input(struct sk_buff *skb)
950 {
951         const struct ipv6hdr *iph = ipv6_hdr(skb);
952         struct net *net = dev_net(skb->dev);
953         int flags = RT6_LOOKUP_F_HAS_SADDR;
954         struct flowi6 fl6 = {
955                 .flowi6_iif = skb->dev->ifindex,
956                 .daddr = iph->daddr,
957                 .saddr = iph->saddr,
958                 .flowlabel = ip6_flowinfo(iph),
959                 .flowi6_mark = skb->mark,
960                 .flowi6_proto = iph->nexthdr,
961         };
962
963         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
964 }
965
966 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
967                                              struct flowi6 *fl6, int flags)
968 {
969         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
970 }
971
972 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
973                                     struct flowi6 *fl6)
974 {
975         int flags = 0;
976
977         fl6->flowi6_iif = LOOPBACK_IFINDEX;
978
979         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
980                 flags |= RT6_LOOKUP_F_IFACE;
981
982         if (!ipv6_addr_any(&fl6->saddr))
983                 flags |= RT6_LOOKUP_F_HAS_SADDR;
984         else if (sk)
985                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
986
987         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
988 }
989
990 EXPORT_SYMBOL(ip6_route_output);
991
992 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
993 {
994         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
995         struct dst_entry *new = NULL;
996
997         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
998         if (rt) {
999                 new = &rt->dst;
1000
1001                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1002                 rt6_init_peer(rt, net->ipv6.peers);
1003
1004                 new->__use = 1;
1005                 new->input = dst_discard;
1006                 new->output = dst_discard;
1007
1008                 if (dst_metrics_read_only(&ort->dst))
1009                         new->_metrics = ort->dst._metrics;
1010                 else
1011                         dst_copy_metrics(new, &ort->dst);
1012                 rt->rt6i_idev = ort->rt6i_idev;
1013                 if (rt->rt6i_idev)
1014                         in6_dev_hold(rt->rt6i_idev);
1015
1016                 rt->rt6i_gateway = ort->rt6i_gateway;
1017                 rt->rt6i_flags = ort->rt6i_flags;
1018                 rt->rt6i_metric = 0;
1019
1020                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1021 #ifdef CONFIG_IPV6_SUBTREES
1022                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1023 #endif
1024
1025                 dst_free(new);
1026         }
1027
1028         dst_release(dst_orig);
1029         return new ? new : ERR_PTR(-ENOMEM);
1030 }
1031
1032 /*
1033  *      Destination cache support functions
1034  */
1035
1036 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1037 {
1038         struct rt6_info *rt;
1039
1040         rt = (struct rt6_info *) dst;
1041
1042         /* All IPV6 dsts are created with ->obsolete set to the value
1043          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1044          * into this function always.
1045          */
1046         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1047                 return NULL;
1048
1049         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1050                 return dst;
1051
1052         return NULL;
1053 }
1054
1055 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1056 {
1057         struct rt6_info *rt = (struct rt6_info *) dst;
1058
1059         if (rt) {
1060                 if (rt->rt6i_flags & RTF_CACHE) {
1061                         if (rt6_check_expired(rt)) {
1062                                 ip6_del_rt(rt);
1063                                 dst = NULL;
1064                         }
1065                 } else {
1066                         dst_release(dst);
1067                         dst = NULL;
1068                 }
1069         }
1070         return dst;
1071 }
1072
1073 static void ip6_link_failure(struct sk_buff *skb)
1074 {
1075         struct rt6_info *rt;
1076
1077         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1078
1079         rt = (struct rt6_info *) skb_dst(skb);
1080         if (rt) {
1081                 if (rt->rt6i_flags & RTF_CACHE)
1082                         rt6_update_expires(rt, 0);
1083                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1084                         rt->rt6i_node->fn_sernum = -1;
1085         }
1086 }
1087
1088 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1089                                struct sk_buff *skb, u32 mtu)
1090 {
1091         struct rt6_info *rt6 = (struct rt6_info*)dst;
1092
1093         dst_confirm(dst);
1094         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1095                 struct net *net = dev_net(dst->dev);
1096
1097                 rt6->rt6i_flags |= RTF_MODIFIED;
1098                 if (mtu < IPV6_MIN_MTU) {
1099                         u32 features = dst_metric(dst, RTAX_FEATURES);
1100                         mtu = IPV6_MIN_MTU;
1101                         features |= RTAX_FEATURE_ALLFRAG;
1102                         dst_metric_set(dst, RTAX_FEATURES, features);
1103                 }
1104                 dst_metric_set(dst, RTAX_MTU, mtu);
1105                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1106         }
1107 }
1108
1109 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1110                      int oif, u32 mark)
1111 {
1112         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1113         struct dst_entry *dst;
1114         struct flowi6 fl6;
1115
1116         memset(&fl6, 0, sizeof(fl6));
1117         fl6.flowi6_oif = oif;
1118         fl6.flowi6_mark = mark;
1119         fl6.flowi6_flags = 0;
1120         fl6.daddr = iph->daddr;
1121         fl6.saddr = iph->saddr;
1122         fl6.flowlabel = ip6_flowinfo(iph);
1123
1124         dst = ip6_route_output(net, NULL, &fl6);
1125         if (!dst->error)
1126                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1127         dst_release(dst);
1128 }
1129 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1130
1131 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1132 {
1133         ip6_update_pmtu(skb, sock_net(sk), mtu,
1134                         sk->sk_bound_dev_if, sk->sk_mark);
1135 }
1136 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1137
1138 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1139 {
1140         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1141         struct dst_entry *dst;
1142         struct flowi6 fl6;
1143
1144         memset(&fl6, 0, sizeof(fl6));
1145         fl6.flowi6_oif = oif;
1146         fl6.flowi6_mark = mark;
1147         fl6.flowi6_flags = 0;
1148         fl6.daddr = iph->daddr;
1149         fl6.saddr = iph->saddr;
1150         fl6.flowlabel = ip6_flowinfo(iph);
1151
1152         dst = ip6_route_output(net, NULL, &fl6);
1153         if (!dst->error)
1154                 rt6_do_redirect(dst, NULL, skb);
1155         dst_release(dst);
1156 }
1157 EXPORT_SYMBOL_GPL(ip6_redirect);
1158
1159 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1160 {
1161         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1162 }
1163 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1164
1165 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1166 {
1167         struct net_device *dev = dst->dev;
1168         unsigned int mtu = dst_mtu(dst);
1169         struct net *net = dev_net(dev);
1170
1171         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1172
1173         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1174                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1175
1176         /*
1177          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1178          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1179          * IPV6_MAXPLEN is also valid and means: "any MSS,
1180          * rely only on pmtu discovery"
1181          */
1182         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1183                 mtu = IPV6_MAXPLEN;
1184         return mtu;
1185 }
1186
1187 static unsigned int ip6_mtu(const struct dst_entry *dst)
1188 {
1189         struct inet6_dev *idev;
1190         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1191
1192         if (mtu)
1193                 return mtu;
1194
1195         mtu = IPV6_MIN_MTU;
1196
1197         rcu_read_lock();
1198         idev = __in6_dev_get(dst->dev);
1199         if (idev)
1200                 mtu = idev->cnf.mtu6;
1201         rcu_read_unlock();
1202
1203         return mtu;
1204 }
1205
1206 static struct dst_entry *icmp6_dst_gc_list;
1207 static DEFINE_SPINLOCK(icmp6_dst_lock);
1208
1209 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1210                                   struct flowi6 *fl6)
1211 {
1212         struct dst_entry *dst;
1213         struct rt6_info *rt;
1214         struct inet6_dev *idev = in6_dev_get(dev);
1215         struct net *net = dev_net(dev);
1216
1217         if (unlikely(!idev))
1218                 return ERR_PTR(-ENODEV);
1219
1220         rt = ip6_dst_alloc(net, dev, 0, NULL);
1221         if (unlikely(!rt)) {
1222                 in6_dev_put(idev);
1223                 dst = ERR_PTR(-ENOMEM);
1224                 goto out;
1225         }
1226
1227         rt->dst.flags |= DST_HOST;
1228         rt->dst.output  = ip6_output;
1229         atomic_set(&rt->dst.__refcnt, 1);
1230         rt->rt6i_dst.addr = fl6->daddr;
1231         rt->rt6i_dst.plen = 128;
1232         rt->rt6i_idev     = idev;
1233         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1234
1235         spin_lock_bh(&icmp6_dst_lock);
1236         rt->dst.next = icmp6_dst_gc_list;
1237         icmp6_dst_gc_list = &rt->dst;
1238         spin_unlock_bh(&icmp6_dst_lock);
1239
1240         fib6_force_start_gc(net);
1241
1242         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1243
1244 out:
1245         return dst;
1246 }
1247
1248 int icmp6_dst_gc(void)
1249 {
1250         struct dst_entry *dst, **pprev;
1251         int more = 0;
1252
1253         spin_lock_bh(&icmp6_dst_lock);
1254         pprev = &icmp6_dst_gc_list;
1255
1256         while ((dst = *pprev) != NULL) {
1257                 if (!atomic_read(&dst->__refcnt)) {
1258                         *pprev = dst->next;
1259                         dst_free(dst);
1260                 } else {
1261                         pprev = &dst->next;
1262                         ++more;
1263                 }
1264         }
1265
1266         spin_unlock_bh(&icmp6_dst_lock);
1267
1268         return more;
1269 }
1270
1271 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1272                             void *arg)
1273 {
1274         struct dst_entry *dst, **pprev;
1275
1276         spin_lock_bh(&icmp6_dst_lock);
1277         pprev = &icmp6_dst_gc_list;
1278         while ((dst = *pprev) != NULL) {
1279                 struct rt6_info *rt = (struct rt6_info *) dst;
1280                 if (func(rt, arg)) {
1281                         *pprev = dst->next;
1282                         dst_free(dst);
1283                 } else {
1284                         pprev = &dst->next;
1285                 }
1286         }
1287         spin_unlock_bh(&icmp6_dst_lock);
1288 }
1289
1290 static int ip6_dst_gc(struct dst_ops *ops)
1291 {
1292         unsigned long now = jiffies;
1293         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1294         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1295         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1296         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1297         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1298         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1299         int entries;
1300
1301         entries = dst_entries_get_fast(ops);
1302         if (time_after(rt_last_gc + rt_min_interval, now) &&
1303             entries <= rt_max_size)
1304                 goto out;
1305
1306         net->ipv6.ip6_rt_gc_expire++;
1307         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1308         net->ipv6.ip6_rt_last_gc = now;
1309         entries = dst_entries_get_slow(ops);
1310         if (entries < ops->gc_thresh)
1311                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1312 out:
1313         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1314         return entries > rt_max_size;
1315 }
1316
1317 int ip6_dst_hoplimit(struct dst_entry *dst)
1318 {
1319         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1320         if (hoplimit == 0) {
1321                 struct net_device *dev = dst->dev;
1322                 struct inet6_dev *idev;
1323
1324                 rcu_read_lock();
1325                 idev = __in6_dev_get(dev);
1326                 if (idev)
1327                         hoplimit = idev->cnf.hop_limit;
1328                 else
1329                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1330                 rcu_read_unlock();
1331         }
1332         return hoplimit;
1333 }
1334 EXPORT_SYMBOL(ip6_dst_hoplimit);
1335
1336 /*
1337  *
1338  */
1339
1340 int ip6_route_add(struct fib6_config *cfg)
1341 {
1342         int err;
1343         struct net *net = cfg->fc_nlinfo.nl_net;
1344         struct rt6_info *rt = NULL;
1345         struct net_device *dev = NULL;
1346         struct inet6_dev *idev = NULL;
1347         struct fib6_table *table;
1348         int addr_type;
1349
1350         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1351                 return -EINVAL;
1352 #ifndef CONFIG_IPV6_SUBTREES
1353         if (cfg->fc_src_len)
1354                 return -EINVAL;
1355 #endif
1356         if (cfg->fc_ifindex) {
1357                 err = -ENODEV;
1358                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1359                 if (!dev)
1360                         goto out;
1361                 idev = in6_dev_get(dev);
1362                 if (!idev)
1363                         goto out;
1364         }
1365
1366         if (cfg->fc_metric == 0)
1367                 cfg->fc_metric = IP6_RT_PRIO_USER;
1368
1369         err = -ENOBUFS;
1370         if (cfg->fc_nlinfo.nlh &&
1371             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1372                 table = fib6_get_table(net, cfg->fc_table);
1373                 if (!table) {
1374                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1375                         table = fib6_new_table(net, cfg->fc_table);
1376                 }
1377         } else {
1378                 table = fib6_new_table(net, cfg->fc_table);
1379         }
1380
1381         if (!table)
1382                 goto out;
1383
1384         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1385
1386         if (!rt) {
1387                 err = -ENOMEM;
1388                 goto out;
1389         }
1390
1391         if (cfg->fc_flags & RTF_EXPIRES)
1392                 rt6_set_expires(rt, jiffies +
1393                                 clock_t_to_jiffies(cfg->fc_expires));
1394         else
1395                 rt6_clean_expires(rt);
1396
1397         if (cfg->fc_protocol == RTPROT_UNSPEC)
1398                 cfg->fc_protocol = RTPROT_BOOT;
1399         rt->rt6i_protocol = cfg->fc_protocol;
1400
1401         addr_type = ipv6_addr_type(&cfg->fc_dst);
1402
1403         if (addr_type & IPV6_ADDR_MULTICAST)
1404                 rt->dst.input = ip6_mc_input;
1405         else if (cfg->fc_flags & RTF_LOCAL)
1406                 rt->dst.input = ip6_input;
1407         else
1408                 rt->dst.input = ip6_forward;
1409
1410         rt->dst.output = ip6_output;
1411
1412         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1413         rt->rt6i_dst.plen = cfg->fc_dst_len;
1414         if (rt->rt6i_dst.plen == 128)
1415                rt->dst.flags |= DST_HOST;
1416
1417         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1418                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1419                 if (!metrics) {
1420                         err = -ENOMEM;
1421                         goto out;
1422                 }
1423                 dst_init_metrics(&rt->dst, metrics, 0);
1424         }
1425 #ifdef CONFIG_IPV6_SUBTREES
1426         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1427         rt->rt6i_src.plen = cfg->fc_src_len;
1428 #endif
1429
1430         rt->rt6i_metric = cfg->fc_metric;
1431
1432         /* We cannot add true routes via loopback here,
1433            they would result in kernel looping; promote them to reject routes
1434          */
1435         if ((cfg->fc_flags & RTF_REJECT) ||
1436             (dev && (dev->flags & IFF_LOOPBACK) &&
1437              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1438              !(cfg->fc_flags & RTF_LOCAL))) {
1439                 /* hold loopback dev/idev if we haven't done so. */
1440                 if (dev != net->loopback_dev) {
1441                         if (dev) {
1442                                 dev_put(dev);
1443                                 in6_dev_put(idev);
1444                         }
1445                         dev = net->loopback_dev;
1446                         dev_hold(dev);
1447                         idev = in6_dev_get(dev);
1448                         if (!idev) {
1449                                 err = -ENODEV;
1450                                 goto out;
1451                         }
1452                 }
1453                 rt->dst.output = ip6_pkt_discard_out;
1454                 rt->dst.input = ip6_pkt_discard;
1455                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1456                 switch (cfg->fc_type) {
1457                 case RTN_BLACKHOLE:
1458                         rt->dst.error = -EINVAL;
1459                         break;
1460                 case RTN_PROHIBIT:
1461                         rt->dst.error = -EACCES;
1462                         break;
1463                 case RTN_THROW:
1464                         rt->dst.error = -EAGAIN;
1465                         break;
1466                 default:
1467                         rt->dst.error = -ENETUNREACH;
1468                         break;
1469                 }
1470                 goto install_route;
1471         }
1472
1473         if (cfg->fc_flags & RTF_GATEWAY) {
1474                 const struct in6_addr *gw_addr;
1475                 int gwa_type;
1476
1477                 gw_addr = &cfg->fc_gateway;
1478                 rt->rt6i_gateway = *gw_addr;
1479                 gwa_type = ipv6_addr_type(gw_addr);
1480
1481                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1482                         struct rt6_info *grt;
1483
1484                         /* IPv6 strictly inhibits using not link-local
1485                            addresses as nexthop address.
1486                            Otherwise, router will not able to send redirects.
1487                            It is very good, but in some (rare!) circumstances
1488                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1489                            some exceptions. --ANK
1490                          */
1491                         err = -EINVAL;
1492                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1493                                 goto out;
1494
1495                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1496
1497                         err = -EHOSTUNREACH;
1498                         if (!grt)
1499                                 goto out;
1500                         if (dev) {
1501                                 if (dev != grt->dst.dev) {
1502                                         ip6_rt_put(grt);
1503                                         goto out;
1504                                 }
1505                         } else {
1506                                 dev = grt->dst.dev;
1507                                 idev = grt->rt6i_idev;
1508                                 dev_hold(dev);
1509                                 in6_dev_hold(grt->rt6i_idev);
1510                         }
1511                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1512                                 err = 0;
1513                         ip6_rt_put(grt);
1514
1515                         if (err)
1516                                 goto out;
1517                 }
1518                 err = -EINVAL;
1519                 if (!dev || (dev->flags & IFF_LOOPBACK))
1520                         goto out;
1521         }
1522
1523         err = -ENODEV;
1524         if (!dev)
1525                 goto out;
1526
1527         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1528                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1529                         err = -EINVAL;
1530                         goto out;
1531                 }
1532                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1533                 rt->rt6i_prefsrc.plen = 128;
1534         } else
1535                 rt->rt6i_prefsrc.plen = 0;
1536
1537         rt->rt6i_flags = cfg->fc_flags;
1538
1539 install_route:
1540         if (cfg->fc_mx) {
1541                 struct nlattr *nla;
1542                 int remaining;
1543
1544                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1545                         int type = nla_type(nla);
1546
1547                         if (type) {
1548                                 if (type > RTAX_MAX) {
1549                                         err = -EINVAL;
1550                                         goto out;
1551                                 }
1552
1553                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1554                         }
1555                 }
1556         }
1557
1558         rt->dst.dev = dev;
1559         rt->rt6i_idev = idev;
1560         rt->rt6i_table = table;
1561
1562         cfg->fc_nlinfo.nl_net = dev_net(dev);
1563
1564         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1565
1566 out:
1567         if (dev)
1568                 dev_put(dev);
1569         if (idev)
1570                 in6_dev_put(idev);
1571         if (rt)
1572                 dst_free(&rt->dst);
1573         return err;
1574 }
1575
1576 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1577 {
1578         int err;
1579         struct fib6_table *table;
1580         struct net *net = dev_net(rt->dst.dev);
1581
1582         if (rt == net->ipv6.ip6_null_entry) {
1583                 err = -ENOENT;
1584                 goto out;
1585         }
1586
1587         table = rt->rt6i_table;
1588         write_lock_bh(&table->tb6_lock);
1589         err = fib6_del(rt, info);
1590         write_unlock_bh(&table->tb6_lock);
1591
1592 out:
1593         ip6_rt_put(rt);
1594         return err;
1595 }
1596
1597 int ip6_del_rt(struct rt6_info *rt)
1598 {
1599         struct nl_info info = {
1600                 .nl_net = dev_net(rt->dst.dev),
1601         };
1602         return __ip6_del_rt(rt, &info);
1603 }
1604
1605 static int ip6_route_del(struct fib6_config *cfg)
1606 {
1607         struct fib6_table *table;
1608         struct fib6_node *fn;
1609         struct rt6_info *rt;
1610         int err = -ESRCH;
1611
1612         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1613         if (!table)
1614                 return err;
1615
1616         read_lock_bh(&table->tb6_lock);
1617
1618         fn = fib6_locate(&table->tb6_root,
1619                          &cfg->fc_dst, cfg->fc_dst_len,
1620                          &cfg->fc_src, cfg->fc_src_len);
1621
1622         if (fn) {
1623                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1624                         if (cfg->fc_ifindex &&
1625                             (!rt->dst.dev ||
1626                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1627                                 continue;
1628                         if (cfg->fc_flags & RTF_GATEWAY &&
1629                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1630                                 continue;
1631                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1632                                 continue;
1633                         dst_hold(&rt->dst);
1634                         read_unlock_bh(&table->tb6_lock);
1635
1636                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1637                 }
1638         }
1639         read_unlock_bh(&table->tb6_lock);
1640
1641         return err;
1642 }
1643
1644 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1645 {
1646         struct net *net = dev_net(skb->dev);
1647         struct netevent_redirect netevent;
1648         struct rt6_info *rt, *nrt = NULL;
1649         struct ndisc_options ndopts;
1650         struct inet6_dev *in6_dev;
1651         struct neighbour *neigh;
1652         struct rd_msg *msg;
1653         int optlen, on_link;
1654         u8 *lladdr;
1655
1656         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1657         optlen -= sizeof(*msg);
1658
1659         if (optlen < 0) {
1660                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1661                 return;
1662         }
1663
1664         msg = (struct rd_msg *)icmp6_hdr(skb);
1665
1666         if (ipv6_addr_is_multicast(&msg->dest)) {
1667                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1668                 return;
1669         }
1670
1671         on_link = 0;
1672         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1673                 on_link = 1;
1674         } else if (ipv6_addr_type(&msg->target) !=
1675                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1676                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1677                 return;
1678         }
1679
1680         in6_dev = __in6_dev_get(skb->dev);
1681         if (!in6_dev)
1682                 return;
1683         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1684                 return;
1685
1686         /* RFC2461 8.1:
1687          *      The IP source address of the Redirect MUST be the same as the current
1688          *      first-hop router for the specified ICMP Destination Address.
1689          */
1690
1691         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1692                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1693                 return;
1694         }
1695
1696         lladdr = NULL;
1697         if (ndopts.nd_opts_tgt_lladdr) {
1698                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1699                                              skb->dev);
1700                 if (!lladdr) {
1701                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1702                         return;
1703                 }
1704         }
1705
1706         rt = (struct rt6_info *) dst;
1707         if (rt == net->ipv6.ip6_null_entry) {
1708                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1709                 return;
1710         }
1711
1712         /* Redirect received -> path was valid.
1713          * Look, redirects are sent only in response to data packets,
1714          * so that this nexthop apparently is reachable. --ANK
1715          */
1716         dst_confirm(&rt->dst);
1717
1718         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1719         if (!neigh)
1720                 return;
1721
1722         /*
1723          *      We have finally decided to accept it.
1724          */
1725
1726         neigh_update(neigh, lladdr, NUD_STALE,
1727                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1728                      NEIGH_UPDATE_F_OVERRIDE|
1729                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1730                                      NEIGH_UPDATE_F_ISROUTER))
1731                      );
1732
1733         nrt = ip6_rt_copy(rt, &msg->dest);
1734         if (!nrt)
1735                 goto out;
1736
1737         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1738         if (on_link)
1739                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1740
1741         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1742
1743         if (ip6_ins_rt(nrt))
1744                 goto out;
1745
1746         netevent.old = &rt->dst;
1747         netevent.new = &nrt->dst;
1748         netevent.daddr = &msg->dest;
1749         netevent.neigh = neigh;
1750         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1751
1752         if (rt->rt6i_flags & RTF_CACHE) {
1753                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1754                 ip6_del_rt(rt);
1755         }
1756
1757 out:
1758         neigh_release(neigh);
1759 }
1760
1761 /*
1762  *      Misc support functions
1763  */
1764
1765 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1766                                     const struct in6_addr *dest)
1767 {
1768         struct net *net = dev_net(ort->dst.dev);
1769         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1770                                             ort->rt6i_table);
1771
1772         if (rt) {
1773                 rt->dst.input = ort->dst.input;
1774                 rt->dst.output = ort->dst.output;
1775                 rt->dst.flags |= DST_HOST;
1776
1777                 rt->rt6i_dst.addr = *dest;
1778                 rt->rt6i_dst.plen = 128;
1779                 dst_copy_metrics(&rt->dst, &ort->dst);
1780                 rt->dst.error = ort->dst.error;
1781                 rt->rt6i_idev = ort->rt6i_idev;
1782                 if (rt->rt6i_idev)
1783                         in6_dev_hold(rt->rt6i_idev);
1784                 rt->dst.lastuse = jiffies;
1785
1786                 rt->rt6i_gateway = ort->rt6i_gateway;
1787                 rt->rt6i_flags = ort->rt6i_flags;
1788                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1789                     (RTF_DEFAULT | RTF_ADDRCONF))
1790                         rt6_set_from(rt, ort);
1791                 rt->rt6i_metric = 0;
1792
1793 #ifdef CONFIG_IPV6_SUBTREES
1794                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1795 #endif
1796                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1797                 rt->rt6i_table = ort->rt6i_table;
1798         }
1799         return rt;
1800 }
1801
1802 #ifdef CONFIG_IPV6_ROUTE_INFO
1803 static struct rt6_info *rt6_get_route_info(struct net *net,
1804                                            const struct in6_addr *prefix, int prefixlen,
1805                                            const struct in6_addr *gwaddr, int ifindex)
1806 {
1807         struct fib6_node *fn;
1808         struct rt6_info *rt = NULL;
1809         struct fib6_table *table;
1810
1811         table = fib6_get_table(net, RT6_TABLE_INFO);
1812         if (!table)
1813                 return NULL;
1814
1815         read_lock_bh(&table->tb6_lock);
1816         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1817         if (!fn)
1818                 goto out;
1819
1820         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1821                 if (rt->dst.dev->ifindex != ifindex)
1822                         continue;
1823                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1824                         continue;
1825                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1826                         continue;
1827                 dst_hold(&rt->dst);
1828                 break;
1829         }
1830 out:
1831         read_unlock_bh(&table->tb6_lock);
1832         return rt;
1833 }
1834
1835 static struct rt6_info *rt6_add_route_info(struct net *net,
1836                                            const struct in6_addr *prefix, int prefixlen,
1837                                            const struct in6_addr *gwaddr, int ifindex,
1838                                            unsigned int pref)
1839 {
1840         struct fib6_config cfg = {
1841                 .fc_table       = RT6_TABLE_INFO,
1842                 .fc_metric      = IP6_RT_PRIO_USER,
1843                 .fc_ifindex     = ifindex,
1844                 .fc_dst_len     = prefixlen,
1845                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1846                                   RTF_UP | RTF_PREF(pref),
1847                 .fc_nlinfo.portid = 0,
1848                 .fc_nlinfo.nlh = NULL,
1849                 .fc_nlinfo.nl_net = net,
1850         };
1851
1852         cfg.fc_dst = *prefix;
1853         cfg.fc_gateway = *gwaddr;
1854
1855         /* We should treat it as a default route if prefix length is 0. */
1856         if (!prefixlen)
1857                 cfg.fc_flags |= RTF_DEFAULT;
1858
1859         ip6_route_add(&cfg);
1860
1861         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1862 }
1863 #endif
1864
1865 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1866 {
1867         struct rt6_info *rt;
1868         struct fib6_table *table;
1869
1870         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1871         if (!table)
1872                 return NULL;
1873
1874         read_lock_bh(&table->tb6_lock);
1875         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1876                 if (dev == rt->dst.dev &&
1877                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1878                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1879                         break;
1880         }
1881         if (rt)
1882                 dst_hold(&rt->dst);
1883         read_unlock_bh(&table->tb6_lock);
1884         return rt;
1885 }
1886
1887 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1888                                      struct net_device *dev,
1889                                      unsigned int pref)
1890 {
1891         struct fib6_config cfg = {
1892                 .fc_table       = RT6_TABLE_DFLT,
1893                 .fc_metric      = IP6_RT_PRIO_USER,
1894                 .fc_ifindex     = dev->ifindex,
1895                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1896                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1897                 .fc_nlinfo.portid = 0,
1898                 .fc_nlinfo.nlh = NULL,
1899                 .fc_nlinfo.nl_net = dev_net(dev),
1900         };
1901
1902         cfg.fc_gateway = *gwaddr;
1903
1904         ip6_route_add(&cfg);
1905
1906         return rt6_get_dflt_router(gwaddr, dev);
1907 }
1908
1909 void rt6_purge_dflt_routers(struct net *net)
1910 {
1911         struct rt6_info *rt;
1912         struct fib6_table *table;
1913
1914         /* NOTE: Keep consistent with rt6_get_dflt_router */
1915         table = fib6_get_table(net, RT6_TABLE_DFLT);
1916         if (!table)
1917                 return;
1918
1919 restart:
1920         read_lock_bh(&table->tb6_lock);
1921         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1922                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1923                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1924                         dst_hold(&rt->dst);
1925                         read_unlock_bh(&table->tb6_lock);
1926                         ip6_del_rt(rt);
1927                         goto restart;
1928                 }
1929         }
1930         read_unlock_bh(&table->tb6_lock);
1931 }
1932
1933 static void rtmsg_to_fib6_config(struct net *net,
1934                                  struct in6_rtmsg *rtmsg,
1935                                  struct fib6_config *cfg)
1936 {
1937         memset(cfg, 0, sizeof(*cfg));
1938
1939         cfg->fc_table = RT6_TABLE_MAIN;
1940         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1941         cfg->fc_metric = rtmsg->rtmsg_metric;
1942         cfg->fc_expires = rtmsg->rtmsg_info;
1943         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1944         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1945         cfg->fc_flags = rtmsg->rtmsg_flags;
1946
1947         cfg->fc_nlinfo.nl_net = net;
1948
1949         cfg->fc_dst = rtmsg->rtmsg_dst;
1950         cfg->fc_src = rtmsg->rtmsg_src;
1951         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1952 }
1953
1954 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1955 {
1956         struct fib6_config cfg;
1957         struct in6_rtmsg rtmsg;
1958         int err;
1959
1960         switch(cmd) {
1961         case SIOCADDRT:         /* Add a route */
1962         case SIOCDELRT:         /* Delete a route */
1963                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1964                         return -EPERM;
1965                 err = copy_from_user(&rtmsg, arg,
1966                                      sizeof(struct in6_rtmsg));
1967                 if (err)
1968                         return -EFAULT;
1969
1970                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1971
1972                 rtnl_lock();
1973                 switch (cmd) {
1974                 case SIOCADDRT:
1975                         err = ip6_route_add(&cfg);
1976                         break;
1977                 case SIOCDELRT:
1978                         err = ip6_route_del(&cfg);
1979                         break;
1980                 default:
1981                         err = -EINVAL;
1982                 }
1983                 rtnl_unlock();
1984
1985                 return err;
1986         }
1987
1988         return -EINVAL;
1989 }
1990
1991 /*
1992  *      Drop the packet on the floor
1993  */
1994
1995 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1996 {
1997         int type;
1998         struct dst_entry *dst = skb_dst(skb);
1999         switch (ipstats_mib_noroutes) {
2000         case IPSTATS_MIB_INNOROUTES:
2001                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2002                 if (type == IPV6_ADDR_ANY) {
2003                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2004                                       IPSTATS_MIB_INADDRERRORS);
2005                         break;
2006                 }
2007                 /* FALLTHROUGH */
2008         case IPSTATS_MIB_OUTNOROUTES:
2009                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2010                               ipstats_mib_noroutes);
2011                 break;
2012         }
2013         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2014         kfree_skb(skb);
2015         return 0;
2016 }
2017
2018 static int ip6_pkt_discard(struct sk_buff *skb)
2019 {
2020         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2021 }
2022
2023 static int ip6_pkt_discard_out(struct sk_buff *skb)
2024 {
2025         skb->dev = skb_dst(skb)->dev;
2026         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2027 }
2028
2029 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2030
2031 static int ip6_pkt_prohibit(struct sk_buff *skb)
2032 {
2033         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2034 }
2035
2036 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2037 {
2038         skb->dev = skb_dst(skb)->dev;
2039         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2040 }
2041
2042 #endif
2043
2044 /*
2045  *      Allocate a dst for local (unicast / anycast) address.
2046  */
2047
2048 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2049                                     const struct in6_addr *addr,
2050                                     bool anycast)
2051 {
2052         struct net *net = dev_net(idev->dev);
2053         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2054
2055         if (!rt) {
2056                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2057                 return ERR_PTR(-ENOMEM);
2058         }
2059
2060         in6_dev_hold(idev);
2061
2062         rt->dst.flags |= DST_HOST;
2063         rt->dst.input = ip6_input;
2064         rt->dst.output = ip6_output;
2065         rt->rt6i_idev = idev;
2066
2067         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2068         if (anycast)
2069                 rt->rt6i_flags |= RTF_ANYCAST;
2070         else
2071                 rt->rt6i_flags |= RTF_LOCAL;
2072
2073         rt->rt6i_dst.addr = *addr;
2074         rt->rt6i_dst.plen = 128;
2075         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2076
2077         atomic_set(&rt->dst.__refcnt, 1);
2078
2079         return rt;
2080 }
2081
2082 int ip6_route_get_saddr(struct net *net,
2083                         struct rt6_info *rt,
2084                         const struct in6_addr *daddr,
2085                         unsigned int prefs,
2086                         struct in6_addr *saddr)
2087 {
2088         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2089         int err = 0;
2090         if (rt->rt6i_prefsrc.plen)
2091                 *saddr = rt->rt6i_prefsrc.addr;
2092         else
2093                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2094                                          daddr, prefs, saddr);
2095         return err;
2096 }
2097
2098 /* remove deleted ip from prefsrc entries */
2099 struct arg_dev_net_ip {
2100         struct net_device *dev;
2101         struct net *net;
2102         struct in6_addr *addr;
2103 };
2104
2105 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2106 {
2107         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2108         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2109         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2110
2111         if (((void *)rt->dst.dev == dev || !dev) &&
2112             rt != net->ipv6.ip6_null_entry &&
2113             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2114                 /* remove prefsrc entry */
2115                 rt->rt6i_prefsrc.plen = 0;
2116         }
2117         return 0;
2118 }
2119
2120 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2121 {
2122         struct net *net = dev_net(ifp->idev->dev);
2123         struct arg_dev_net_ip adni = {
2124                 .dev = ifp->idev->dev,
2125                 .net = net,
2126                 .addr = &ifp->addr,
2127         };
2128         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2129 }
2130
2131 struct arg_dev_net {
2132         struct net_device *dev;
2133         struct net *net;
2134 };
2135
2136 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2137 {
2138         const struct arg_dev_net *adn = arg;
2139         const struct net_device *dev = adn->dev;
2140
2141         if ((rt->dst.dev == dev || !dev) &&
2142             rt != adn->net->ipv6.ip6_null_entry)
2143                 return -1;
2144
2145         return 0;
2146 }
2147
2148 void rt6_ifdown(struct net *net, struct net_device *dev)
2149 {
2150         struct arg_dev_net adn = {
2151                 .dev = dev,
2152                 .net = net,
2153         };
2154
2155         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2156         icmp6_clean_all(fib6_ifdown, &adn);
2157 }
2158
2159 struct rt6_mtu_change_arg {
2160         struct net_device *dev;
2161         unsigned int mtu;
2162 };
2163
2164 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2165 {
2166         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2167         struct inet6_dev *idev;
2168
2169         /* In IPv6 pmtu discovery is not optional,
2170            so that RTAX_MTU lock cannot disable it.
2171            We still use this lock to block changes
2172            caused by addrconf/ndisc.
2173         */
2174
2175         idev = __in6_dev_get(arg->dev);
2176         if (!idev)
2177                 return 0;
2178
2179         /* For administrative MTU increase, there is no way to discover
2180            IPv6 PMTU increase, so PMTU increase should be updated here.
2181            Since RFC 1981 doesn't include administrative MTU increase
2182            update PMTU increase is a MUST. (i.e. jumbo frame)
2183          */
2184         /*
2185            If new MTU is less than route PMTU, this new MTU will be the
2186            lowest MTU in the path, update the route PMTU to reflect PMTU
2187            decreases; if new MTU is greater than route PMTU, and the
2188            old MTU is the lowest MTU in the path, update the route PMTU
2189            to reflect the increase. In this case if the other nodes' MTU
2190            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2191            PMTU discouvery.
2192          */
2193         if (rt->dst.dev == arg->dev &&
2194             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2195             (dst_mtu(&rt->dst) >= arg->mtu ||
2196              (dst_mtu(&rt->dst) < arg->mtu &&
2197               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2198                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2199         }
2200         return 0;
2201 }
2202
2203 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2204 {
2205         struct rt6_mtu_change_arg arg = {
2206                 .dev = dev,
2207                 .mtu = mtu,
2208         };
2209
2210         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2211 }
2212
2213 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2214         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2215         [RTA_OIF]               = { .type = NLA_U32 },
2216         [RTA_IIF]               = { .type = NLA_U32 },
2217         [RTA_PRIORITY]          = { .type = NLA_U32 },
2218         [RTA_METRICS]           = { .type = NLA_NESTED },
2219         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2220 };
2221
2222 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2223                               struct fib6_config *cfg)
2224 {
2225         struct rtmsg *rtm;
2226         struct nlattr *tb[RTA_MAX+1];
2227         int err;
2228
2229         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2230         if (err < 0)
2231                 goto errout;
2232
2233         err = -EINVAL;
2234         rtm = nlmsg_data(nlh);
2235         memset(cfg, 0, sizeof(*cfg));
2236
2237         cfg->fc_table = rtm->rtm_table;
2238         cfg->fc_dst_len = rtm->rtm_dst_len;
2239         cfg->fc_src_len = rtm->rtm_src_len;
2240         cfg->fc_flags = RTF_UP;
2241         cfg->fc_protocol = rtm->rtm_protocol;
2242         cfg->fc_type = rtm->rtm_type;
2243
2244         if (rtm->rtm_type == RTN_UNREACHABLE ||
2245             rtm->rtm_type == RTN_BLACKHOLE ||
2246             rtm->rtm_type == RTN_PROHIBIT ||
2247             rtm->rtm_type == RTN_THROW)
2248                 cfg->fc_flags |= RTF_REJECT;
2249
2250         if (rtm->rtm_type == RTN_LOCAL)
2251                 cfg->fc_flags |= RTF_LOCAL;
2252
2253         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2254         cfg->fc_nlinfo.nlh = nlh;
2255         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2256
2257         if (tb[RTA_GATEWAY]) {
2258                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259                 cfg->fc_flags |= RTF_GATEWAY;
2260         }
2261
2262         if (tb[RTA_DST]) {
2263                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265                 if (nla_len(tb[RTA_DST]) < plen)
2266                         goto errout;
2267
2268                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269         }
2270
2271         if (tb[RTA_SRC]) {
2272                 int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274                 if (nla_len(tb[RTA_SRC]) < plen)
2275                         goto errout;
2276
2277                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278         }
2279
2280         if (tb[RTA_PREFSRC])
2281                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283         if (tb[RTA_OIF])
2284                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286         if (tb[RTA_PRIORITY])
2287                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289         if (tb[RTA_METRICS]) {
2290                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292         }
2293
2294         if (tb[RTA_TABLE])
2295                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
2297         if (tb[RTA_MULTIPATH]) {
2298                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2299                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2300         }
2301
2302         err = 0;
2303 errout:
2304         return err;
2305 }
2306
2307 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2308 {
2309         struct fib6_config r_cfg;
2310         struct rtnexthop *rtnh;
2311         int remaining;
2312         int attrlen;
2313         int err = 0, last_err = 0;
2314
2315 beginning:
2316         rtnh = (struct rtnexthop *)cfg->fc_mp;
2317         remaining = cfg->fc_mp_len;
2318
2319         /* Parse a Multipath Entry */
2320         while (rtnh_ok(rtnh, remaining)) {
2321                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2322                 if (rtnh->rtnh_ifindex)
2323                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2324
2325                 attrlen = rtnh_attrlen(rtnh);
2326                 if (attrlen > 0) {
2327                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2328
2329                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2330                         if (nla) {
2331                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2332                                 r_cfg.fc_flags |= RTF_GATEWAY;
2333                         }
2334                 }
2335                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2336                 if (err) {
2337                         last_err = err;
2338                         /* If we are trying to remove a route, do not stop the
2339                          * loop when ip6_route_del() fails (because next hop is
2340                          * already gone), we should try to remove all next hops.
2341                          */
2342                         if (add) {
2343                                 /* If add fails, we should try to delete all
2344                                  * next hops that have been already added.
2345                                  */
2346                                 add = 0;
2347                                 goto beginning;
2348                         }
2349                 }
2350                 /* Because each route is added like a single route we remove
2351                  * this flag after the first nexthop (if there is a collision,
2352                  * we have already fail to add the first nexthop:
2353                  * fib6_add_rt2node() has reject it).
2354                  */
2355                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2356                 rtnh = rtnh_next(rtnh, &remaining);
2357         }
2358
2359         return last_err;
2360 }
2361
2362 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2363 {
2364         struct fib6_config cfg;
2365         int err;
2366
2367         err = rtm_to_fib6_config(skb, nlh, &cfg);
2368         if (err < 0)
2369                 return err;
2370
2371         if (cfg.fc_mp)
2372                 return ip6_route_multipath(&cfg, 0);
2373         else
2374                 return ip6_route_del(&cfg);
2375 }
2376
2377 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2378 {
2379         struct fib6_config cfg;
2380         int err;
2381
2382         err = rtm_to_fib6_config(skb, nlh, &cfg);
2383         if (err < 0)
2384                 return err;
2385
2386         if (cfg.fc_mp)
2387                 return ip6_route_multipath(&cfg, 1);
2388         else
2389                 return ip6_route_add(&cfg);
2390 }
2391
2392 static inline size_t rt6_nlmsg_size(void)
2393 {
2394         return NLMSG_ALIGN(sizeof(struct rtmsg))
2395                + nla_total_size(16) /* RTA_SRC */
2396                + nla_total_size(16) /* RTA_DST */
2397                + nla_total_size(16) /* RTA_GATEWAY */
2398                + nla_total_size(16) /* RTA_PREFSRC */
2399                + nla_total_size(4) /* RTA_TABLE */
2400                + nla_total_size(4) /* RTA_IIF */
2401                + nla_total_size(4) /* RTA_OIF */
2402                + nla_total_size(4) /* RTA_PRIORITY */
2403                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2404                + nla_total_size(sizeof(struct rta_cacheinfo));
2405 }
2406
2407 static int rt6_fill_node(struct net *net,
2408                          struct sk_buff *skb, struct rt6_info *rt,
2409                          struct in6_addr *dst, struct in6_addr *src,
2410                          int iif, int type, u32 portid, u32 seq,
2411                          int prefix, int nowait, unsigned int flags)
2412 {
2413         struct rtmsg *rtm;
2414         struct nlmsghdr *nlh;
2415         long expires;
2416         u32 table;
2417
2418         if (prefix) {   /* user wants prefix routes only */
2419                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2420                         /* success since this is not a prefix route */
2421                         return 1;
2422                 }
2423         }
2424
2425         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2426         if (!nlh)
2427                 return -EMSGSIZE;
2428
2429         rtm = nlmsg_data(nlh);
2430         rtm->rtm_family = AF_INET6;
2431         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2432         rtm->rtm_src_len = rt->rt6i_src.plen;
2433         rtm->rtm_tos = 0;
2434         if (rt->rt6i_table)
2435                 table = rt->rt6i_table->tb6_id;
2436         else
2437                 table = RT6_TABLE_UNSPEC;
2438         rtm->rtm_table = table;
2439         if (nla_put_u32(skb, RTA_TABLE, table))
2440                 goto nla_put_failure;
2441         if (rt->rt6i_flags & RTF_REJECT) {
2442                 switch (rt->dst.error) {
2443                 case -EINVAL:
2444                         rtm->rtm_type = RTN_BLACKHOLE;
2445                         break;
2446                 case -EACCES:
2447                         rtm->rtm_type = RTN_PROHIBIT;
2448                         break;
2449                 case -EAGAIN:
2450                         rtm->rtm_type = RTN_THROW;
2451                         break;
2452                 default:
2453                         rtm->rtm_type = RTN_UNREACHABLE;
2454                         break;
2455                 }
2456         }
2457         else if (rt->rt6i_flags & RTF_LOCAL)
2458                 rtm->rtm_type = RTN_LOCAL;
2459         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2460                 rtm->rtm_type = RTN_LOCAL;
2461         else
2462                 rtm->rtm_type = RTN_UNICAST;
2463         rtm->rtm_flags = 0;
2464         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2465         rtm->rtm_protocol = rt->rt6i_protocol;
2466         if (rt->rt6i_flags & RTF_DYNAMIC)
2467                 rtm->rtm_protocol = RTPROT_REDIRECT;
2468         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2469                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2470                         rtm->rtm_protocol = RTPROT_RA;
2471                 else
2472                         rtm->rtm_protocol = RTPROT_KERNEL;
2473         }
2474
2475         if (rt->rt6i_flags & RTF_CACHE)
2476                 rtm->rtm_flags |= RTM_F_CLONED;
2477
2478         if (dst) {
2479                 if (nla_put(skb, RTA_DST, 16, dst))
2480                         goto nla_put_failure;
2481                 rtm->rtm_dst_len = 128;
2482         } else if (rtm->rtm_dst_len)
2483                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2484                         goto nla_put_failure;
2485 #ifdef CONFIG_IPV6_SUBTREES
2486         if (src) {
2487                 if (nla_put(skb, RTA_SRC, 16, src))
2488                         goto nla_put_failure;
2489                 rtm->rtm_src_len = 128;
2490         } else if (rtm->rtm_src_len &&
2491                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2492                 goto nla_put_failure;
2493 #endif
2494         if (iif) {
2495 #ifdef CONFIG_IPV6_MROUTE
2496                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2497                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2498                         if (err <= 0) {
2499                                 if (!nowait) {
2500                                         if (err == 0)
2501                                                 return 0;
2502                                         goto nla_put_failure;
2503                                 } else {
2504                                         if (err == -EMSGSIZE)
2505                                                 goto nla_put_failure;
2506                                 }
2507                         }
2508                 } else
2509 #endif
2510                         if (nla_put_u32(skb, RTA_IIF, iif))
2511                                 goto nla_put_failure;
2512         } else if (dst) {
2513                 struct in6_addr saddr_buf;
2514                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2515                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2516                         goto nla_put_failure;
2517         }
2518
2519         if (rt->rt6i_prefsrc.plen) {
2520                 struct in6_addr saddr_buf;
2521                 saddr_buf = rt->rt6i_prefsrc.addr;
2522                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2523                         goto nla_put_failure;
2524         }
2525
2526         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2527                 goto nla_put_failure;
2528
2529         if (rt->rt6i_flags & RTF_GATEWAY) {
2530                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2531                         goto nla_put_failure;
2532         }
2533
2534         if (rt->dst.dev &&
2535             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2536                 goto nla_put_failure;
2537         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2538                 goto nla_put_failure;
2539
2540         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2541
2542         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2543                 goto nla_put_failure;
2544
2545         return nlmsg_end(skb, nlh);
2546
2547 nla_put_failure:
2548         nlmsg_cancel(skb, nlh);
2549         return -EMSGSIZE;
2550 }
2551
2552 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2553 {
2554         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2555         int prefix;
2556
2557         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2558                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2559                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2560         } else
2561                 prefix = 0;
2562
2563         return rt6_fill_node(arg->net,
2564                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2565                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2566                      prefix, 0, NLM_F_MULTI);
2567 }
2568
2569 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2570 {
2571         struct net *net = sock_net(in_skb->sk);
2572         struct nlattr *tb[RTA_MAX+1];
2573         struct rt6_info *rt;
2574         struct sk_buff *skb;
2575         struct rtmsg *rtm;
2576         struct flowi6 fl6;
2577         int err, iif = 0, oif = 0;
2578
2579         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2580         if (err < 0)
2581                 goto errout;
2582
2583         err = -EINVAL;
2584         memset(&fl6, 0, sizeof(fl6));
2585
2586         if (tb[RTA_SRC]) {
2587                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2588                         goto errout;
2589
2590                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2591         }
2592
2593         if (tb[RTA_DST]) {
2594                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2595                         goto errout;
2596
2597                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2598         }
2599
2600         if (tb[RTA_IIF])
2601                 iif = nla_get_u32(tb[RTA_IIF]);
2602
2603         if (tb[RTA_OIF])
2604                 oif = nla_get_u32(tb[RTA_OIF]);
2605
2606         if (iif) {
2607                 struct net_device *dev;
2608                 int flags = 0;
2609
2610                 dev = __dev_get_by_index(net, iif);
2611                 if (!dev) {
2612                         err = -ENODEV;
2613                         goto errout;
2614                 }
2615
2616                 fl6.flowi6_iif = iif;
2617
2618                 if (!ipv6_addr_any(&fl6.saddr))
2619                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2620
2621                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2622                                                                flags);
2623         } else {
2624                 fl6.flowi6_oif = oif;
2625
2626                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2627         }
2628
2629         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2630         if (!skb) {
2631                 ip6_rt_put(rt);
2632                 err = -ENOBUFS;
2633                 goto errout;
2634         }
2635
2636         /* Reserve room for dummy headers, this skb can pass
2637            through good chunk of routing engine.
2638          */
2639         skb_reset_mac_header(skb);
2640         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2641
2642         skb_dst_set(skb, &rt->dst);
2643
2644         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2645                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2646                             nlh->nlmsg_seq, 0, 0, 0);
2647         if (err < 0) {
2648                 kfree_skb(skb);
2649                 goto errout;
2650         }
2651
2652         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2653 errout:
2654         return err;
2655 }
2656
2657 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2658 {
2659         struct sk_buff *skb;
2660         struct net *net = info->nl_net;
2661         u32 seq;
2662         int err;
2663
2664         err = -ENOBUFS;
2665         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2666
2667         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2668         if (!skb)
2669                 goto errout;
2670
2671         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2672                                 event, info->portid, seq, 0, 0, 0);
2673         if (err < 0) {
2674                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2675                 WARN_ON(err == -EMSGSIZE);
2676                 kfree_skb(skb);
2677                 goto errout;
2678         }
2679         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2680                     info->nlh, gfp_any());
2681         return;
2682 errout:
2683         if (err < 0)
2684                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2685 }
2686
2687 static int ip6_route_dev_notify(struct notifier_block *this,
2688                                 unsigned long event, void *ptr)
2689 {
2690         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2691         struct net *net = dev_net(dev);
2692
2693         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2694                 net->ipv6.ip6_null_entry->dst.dev = dev;
2695                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2696 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2697                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2698                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2699                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2700                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2701 #endif
2702         }
2703
2704         return NOTIFY_OK;
2705 }
2706
2707 /*
2708  *      /proc
2709  */
2710
2711 #ifdef CONFIG_PROC_FS
2712
2713 struct rt6_proc_arg
2714 {
2715         char *buffer;
2716         int offset;
2717         int length;
2718         int skip;
2719         int len;
2720 };
2721
2722 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2723 {
2724         struct seq_file *m = p_arg;
2725
2726         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2727
2728 #ifdef CONFIG_IPV6_SUBTREES
2729         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2730 #else
2731         seq_puts(m, "00000000000000000000000000000000 00 ");
2732 #endif
2733         if (rt->rt6i_flags & RTF_GATEWAY) {
2734                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2735         } else {
2736                 seq_puts(m, "00000000000000000000000000000000");
2737         }
2738         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2739                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2740                    rt->dst.__use, rt->rt6i_flags,
2741                    rt->dst.dev ? rt->dst.dev->name : "");
2742         return 0;
2743 }
2744
2745 static int ipv6_route_show(struct seq_file *m, void *v)
2746 {
2747         struct net *net = (struct net *)m->private;
2748         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2749         return 0;
2750 }
2751
2752 static int ipv6_route_open(struct inode *inode, struct file *file)
2753 {
2754         return single_open_net(inode, file, ipv6_route_show);
2755 }
2756
2757 static const struct file_operations ipv6_route_proc_fops = {
2758         .owner          = THIS_MODULE,
2759         .open           = ipv6_route_open,
2760         .read           = seq_read,
2761         .llseek         = seq_lseek,
2762         .release        = single_release_net,
2763 };
2764
2765 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2766 {
2767         struct net *net = (struct net *)seq->private;
2768         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2769                    net->ipv6.rt6_stats->fib_nodes,
2770                    net->ipv6.rt6_stats->fib_route_nodes,
2771                    net->ipv6.rt6_stats->fib_rt_alloc,
2772                    net->ipv6.rt6_stats->fib_rt_entries,
2773                    net->ipv6.rt6_stats->fib_rt_cache,
2774                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2775                    net->ipv6.rt6_stats->fib_discarded_routes);
2776
2777         return 0;
2778 }
2779
2780 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2781 {
2782         return single_open_net(inode, file, rt6_stats_seq_show);
2783 }
2784
2785 static const struct file_operations rt6_stats_seq_fops = {
2786         .owner   = THIS_MODULE,
2787         .open    = rt6_stats_seq_open,
2788         .read    = seq_read,
2789         .llseek  = seq_lseek,
2790         .release = single_release_net,
2791 };
2792 #endif  /* CONFIG_PROC_FS */
2793
2794 #ifdef CONFIG_SYSCTL
2795
2796 static
2797 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2798                               void __user *buffer, size_t *lenp, loff_t *ppos)
2799 {
2800         struct net *net;
2801         int delay;
2802         if (!write)
2803                 return -EINVAL;
2804
2805         net = (struct net *)ctl->extra1;
2806         delay = net->ipv6.sysctl.flush_delay;
2807         proc_dointvec(ctl, write, buffer, lenp, ppos);
2808         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2809         return 0;
2810 }
2811
2812 struct ctl_table ipv6_route_table_template[] = {
2813         {
2814                 .procname       =       "flush",
2815                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2816                 .maxlen         =       sizeof(int),
2817                 .mode           =       0200,
2818                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2819         },
2820         {
2821                 .procname       =       "gc_thresh",
2822                 .data           =       &ip6_dst_ops_template.gc_thresh,
2823                 .maxlen         =       sizeof(int),
2824                 .mode           =       0644,
2825                 .proc_handler   =       proc_dointvec,
2826         },
2827         {
2828                 .procname       =       "max_size",
2829                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2830                 .maxlen         =       sizeof(int),
2831                 .mode           =       0644,
2832                 .proc_handler   =       proc_dointvec,
2833         },
2834         {
2835                 .procname       =       "gc_min_interval",
2836                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2837                 .maxlen         =       sizeof(int),
2838                 .mode           =       0644,
2839                 .proc_handler   =       proc_dointvec_jiffies,
2840         },
2841         {
2842                 .procname       =       "gc_timeout",
2843                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2844                 .maxlen         =       sizeof(int),
2845                 .mode           =       0644,
2846                 .proc_handler   =       proc_dointvec_jiffies,
2847         },
2848         {
2849                 .procname       =       "gc_interval",
2850                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2851                 .maxlen         =       sizeof(int),
2852                 .mode           =       0644,
2853                 .proc_handler   =       proc_dointvec_jiffies,
2854         },
2855         {
2856                 .procname       =       "gc_elasticity",
2857                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2858                 .maxlen         =       sizeof(int),
2859                 .mode           =       0644,
2860                 .proc_handler   =       proc_dointvec,
2861         },
2862         {
2863                 .procname       =       "mtu_expires",
2864                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2865                 .maxlen         =       sizeof(int),
2866                 .mode           =       0644,
2867                 .proc_handler   =       proc_dointvec_jiffies,
2868         },
2869         {
2870                 .procname       =       "min_adv_mss",
2871                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2872                 .maxlen         =       sizeof(int),
2873                 .mode           =       0644,
2874                 .proc_handler   =       proc_dointvec,
2875         },
2876         {
2877                 .procname       =       "gc_min_interval_ms",
2878                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2879                 .maxlen         =       sizeof(int),
2880                 .mode           =       0644,
2881                 .proc_handler   =       proc_dointvec_ms_jiffies,
2882         },
2883         { }
2884 };
2885
2886 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2887 {
2888         struct ctl_table *table;
2889
2890         table = kmemdup(ipv6_route_table_template,
2891                         sizeof(ipv6_route_table_template),
2892                         GFP_KERNEL);
2893
2894         if (table) {
2895                 table[0].data = &net->ipv6.sysctl.flush_delay;
2896                 table[0].extra1 = net;
2897                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2898                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2899                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2900                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2901                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2902                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2903                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2904                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2905                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2906
2907                 /* Don't export sysctls to unprivileged users */
2908                 if (net->user_ns != &init_user_ns)
2909                         table[0].procname = NULL;
2910         }
2911
2912         return table;
2913 }
2914 #endif
2915
2916 static int __net_init ip6_route_net_init(struct net *net)
2917 {
2918         int ret = -ENOMEM;
2919
2920         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2921                sizeof(net->ipv6.ip6_dst_ops));
2922
2923         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2924                 goto out_ip6_dst_ops;
2925
2926         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2927                                            sizeof(*net->ipv6.ip6_null_entry),
2928                                            GFP_KERNEL);
2929         if (!net->ipv6.ip6_null_entry)
2930                 goto out_ip6_dst_entries;
2931         net->ipv6.ip6_null_entry->dst.path =
2932                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2933         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2934         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2935                          ip6_template_metrics, true);
2936
2937 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2938         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2939                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2940                                                GFP_KERNEL);
2941         if (!net->ipv6.ip6_prohibit_entry)
2942                 goto out_ip6_null_entry;
2943         net->ipv6.ip6_prohibit_entry->dst.path =
2944                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2945         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2946         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2947                          ip6_template_metrics, true);
2948
2949         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2950                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2951                                                GFP_KERNEL);
2952         if (!net->ipv6.ip6_blk_hole_entry)
2953                 goto out_ip6_prohibit_entry;
2954         net->ipv6.ip6_blk_hole_entry->dst.path =
2955                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2956         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2957         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2958                          ip6_template_metrics, true);
2959 #endif
2960
2961         net->ipv6.sysctl.flush_delay = 0;
2962         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2963         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2964         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2965         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2966         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2967         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2968         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2969
2970         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2971
2972         ret = 0;
2973 out:
2974         return ret;
2975
2976 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2977 out_ip6_prohibit_entry:
2978         kfree(net->ipv6.ip6_prohibit_entry);
2979 out_ip6_null_entry:
2980         kfree(net->ipv6.ip6_null_entry);
2981 #endif
2982 out_ip6_dst_entries:
2983         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2984 out_ip6_dst_ops:
2985         goto out;
2986 }
2987
2988 static void __net_exit ip6_route_net_exit(struct net *net)
2989 {
2990         kfree(net->ipv6.ip6_null_entry);
2991 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2992         kfree(net->ipv6.ip6_prohibit_entry);
2993         kfree(net->ipv6.ip6_blk_hole_entry);
2994 #endif
2995         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2996 }
2997
2998 static int __net_init ip6_route_net_init_late(struct net *net)
2999 {
3000 #ifdef CONFIG_PROC_FS
3001         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3002         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3003 #endif
3004         return 0;
3005 }
3006
3007 static void __net_exit ip6_route_net_exit_late(struct net *net)
3008 {
3009 #ifdef CONFIG_PROC_FS
3010         remove_proc_entry("ipv6_route", net->proc_net);
3011         remove_proc_entry("rt6_stats", net->proc_net);
3012 #endif
3013 }
3014
3015 static struct pernet_operations ip6_route_net_ops = {
3016         .init = ip6_route_net_init,
3017         .exit = ip6_route_net_exit,
3018 };
3019
3020 static int __net_init ipv6_inetpeer_init(struct net *net)
3021 {
3022         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3023
3024         if (!bp)
3025                 return -ENOMEM;
3026         inet_peer_base_init(bp);
3027         net->ipv6.peers = bp;
3028         return 0;
3029 }
3030
3031 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3032 {
3033         struct inet_peer_base *bp = net->ipv6.peers;
3034
3035         net->ipv6.peers = NULL;
3036         inetpeer_invalidate_tree(bp);
3037         kfree(bp);
3038 }
3039
3040 static struct pernet_operations ipv6_inetpeer_ops = {
3041         .init   =       ipv6_inetpeer_init,
3042         .exit   =       ipv6_inetpeer_exit,
3043 };
3044
3045 static struct pernet_operations ip6_route_net_late_ops = {
3046         .init = ip6_route_net_init_late,
3047         .exit = ip6_route_net_exit_late,
3048 };
3049
3050 static struct notifier_block ip6_route_dev_notifier = {
3051         .notifier_call = ip6_route_dev_notify,
3052         .priority = 0,
3053 };
3054
3055 int __init ip6_route_init(void)
3056 {
3057         int ret;
3058
3059         ret = -ENOMEM;
3060         ip6_dst_ops_template.kmem_cachep =
3061                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3062                                   SLAB_HWCACHE_ALIGN, NULL);
3063         if (!ip6_dst_ops_template.kmem_cachep)
3064                 goto out;
3065
3066         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3067         if (ret)
3068                 goto out_kmem_cache;
3069
3070         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3071         if (ret)
3072                 goto out_dst_entries;
3073
3074         ret = register_pernet_subsys(&ip6_route_net_ops);
3075         if (ret)
3076                 goto out_register_inetpeer;
3077
3078         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3079
3080         /* Registering of the loopback is done before this portion of code,
3081          * the loopback reference in rt6_info will not be taken, do it
3082          * manually for init_net */
3083         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3084         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3085   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3086         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3087         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3088         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3089         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3090   #endif
3091         ret = fib6_init();
3092         if (ret)
3093                 goto out_register_subsys;
3094
3095         ret = xfrm6_init();
3096         if (ret)
3097                 goto out_fib6_init;
3098
3099         ret = fib6_rules_init();
3100         if (ret)
3101                 goto xfrm6_init;
3102
3103         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3104         if (ret)
3105                 goto fib6_rules_init;
3106
3107         ret = -ENOBUFS;
3108         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3109             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3110             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3111                 goto out_register_late_subsys;
3112
3113         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3114         if (ret)
3115                 goto out_register_late_subsys;
3116
3117 out:
3118         return ret;
3119
3120 out_register_late_subsys:
3121         unregister_pernet_subsys(&ip6_route_net_late_ops);
3122 fib6_rules_init:
3123         fib6_rules_cleanup();
3124 xfrm6_init:
3125         xfrm6_fini();
3126 out_fib6_init:
3127         fib6_gc_cleanup();
3128 out_register_subsys:
3129         unregister_pernet_subsys(&ip6_route_net_ops);
3130 out_register_inetpeer:
3131         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3132 out_dst_entries:
3133         dst_entries_destroy(&ip6_dst_blackhole_ops);
3134 out_kmem_cache:
3135         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3136         goto out;
3137 }
3138
3139 void ip6_route_cleanup(void)
3140 {
3141         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3142         unregister_pernet_subsys(&ip6_route_net_late_ops);
3143         fib6_rules_cleanup();
3144         xfrm6_fini();
3145         fib6_gc_cleanup();
3146         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3147         unregister_pernet_subsys(&ip6_route_net_ops);
3148         dst_entries_destroy(&ip6_dst_blackhole_ops);
3149         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3150 }