]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net *net,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, int ifindex,
105                                            unsigned int pref);
106 static struct rt6_info *rt6_get_route_info(struct net *net,
107                                            const struct in6_addr *prefix, int prefixlen,
108                                            const struct in6_addr *gwaddr, int ifindex);
109 #endif
110
111 struct uncached_list {
112         spinlock_t              lock;
113         struct list_head        head;
114 };
115
116 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
117
118 static void rt6_uncached_list_add(struct rt6_info *rt)
119 {
120         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
121
122         rt->dst.flags |= DST_NOCACHE;
123         rt->rt6i_uncached_list = ul;
124
125         spin_lock_bh(&ul->lock);
126         list_add_tail(&rt->rt6i_uncached, &ul->head);
127         spin_unlock_bh(&ul->lock);
128 }
129
130 static void rt6_uncached_list_del(struct rt6_info *rt)
131 {
132         if (!list_empty(&rt->rt6i_uncached)) {
133                 struct uncached_list *ul = rt->rt6i_uncached_list;
134
135                 spin_lock_bh(&ul->lock);
136                 list_del(&rt->rt6i_uncached);
137                 spin_unlock_bh(&ul->lock);
138         }
139 }
140
141 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142 {
143         struct net_device *loopback_dev = net->loopback_dev;
144         int cpu;
145
146         if (dev == loopback_dev)
147                 return;
148
149         for_each_possible_cpu(cpu) {
150                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
151                 struct rt6_info *rt;
152
153                 spin_lock_bh(&ul->lock);
154                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
155                         struct inet6_dev *rt_idev = rt->rt6i_idev;
156                         struct net_device *rt_dev = rt->dst.dev;
157
158                         if (rt_idev->dev == dev) {
159                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
160                                 in6_dev_put(rt_idev);
161                         }
162
163                         if (rt_dev == dev) {
164                                 rt->dst.dev = loopback_dev;
165                                 dev_hold(rt->dst.dev);
166                                 dev_put(rt_dev);
167                         }
168                 }
169                 spin_unlock_bh(&ul->lock);
170         }
171 }
172
173 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
174 {
175         return dst_metrics_write_ptr(rt->dst.from);
176 }
177
178 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
179 {
180         struct rt6_info *rt = (struct rt6_info *)dst;
181
182         if (rt->rt6i_flags & RTF_PCPU)
183                 return rt6_pcpu_cow_metrics(rt);
184         else if (rt->rt6i_flags & RTF_CACHE)
185                 return NULL;
186         else
187                 return dst_cow_metrics_generic(dst, old);
188 }
189
190 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
191                                              struct sk_buff *skb,
192                                              const void *daddr)
193 {
194         struct in6_addr *p = &rt->rt6i_gateway;
195
196         if (!ipv6_addr_any(p))
197                 return (const void *) p;
198         else if (skb)
199                 return &ipv6_hdr(skb)->daddr;
200         return daddr;
201 }
202
203 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
204                                           struct sk_buff *skb,
205                                           const void *daddr)
206 {
207         struct rt6_info *rt = (struct rt6_info *) dst;
208         struct neighbour *n;
209
210         daddr = choose_neigh_daddr(rt, skb, daddr);
211         n = __ipv6_neigh_lookup(dst->dev, daddr);
212         if (n)
213                 return n;
214         return neigh_create(&nd_tbl, daddr, dst->dev);
215 }
216
217 static struct dst_ops ip6_dst_ops_template = {
218         .family                 =       AF_INET6,
219         .gc                     =       ip6_dst_gc,
220         .gc_thresh              =       1024,
221         .check                  =       ip6_dst_check,
222         .default_advmss         =       ip6_default_advmss,
223         .mtu                    =       ip6_mtu,
224         .cow_metrics            =       ipv6_cow_metrics,
225         .destroy                =       ip6_dst_destroy,
226         .ifdown                 =       ip6_dst_ifdown,
227         .negative_advice        =       ip6_negative_advice,
228         .link_failure           =       ip6_link_failure,
229         .update_pmtu            =       ip6_rt_update_pmtu,
230         .redirect               =       rt6_do_redirect,
231         .local_out              =       __ip6_local_out,
232         .neigh_lookup           =       ip6_neigh_lookup,
233 };
234
235 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
236 {
237         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
238
239         return mtu ? : dst->dev->mtu;
240 }
241
242 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
243                                          struct sk_buff *skb, u32 mtu)
244 {
245 }
246
247 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
248                                       struct sk_buff *skb)
249 {
250 }
251
252 static struct dst_ops ip6_dst_blackhole_ops = {
253         .family                 =       AF_INET6,
254         .destroy                =       ip6_dst_destroy,
255         .check                  =       ip6_dst_check,
256         .mtu                    =       ip6_blackhole_mtu,
257         .default_advmss         =       ip6_default_advmss,
258         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
259         .redirect               =       ip6_rt_blackhole_redirect,
260         .cow_metrics            =       dst_cow_metrics_generic,
261         .neigh_lookup           =       ip6_neigh_lookup,
262 };
263
264 static const u32 ip6_template_metrics[RTAX_MAX] = {
265         [RTAX_HOPLIMIT - 1] = 0,
266 };
267
268 static const struct rt6_info ip6_null_entry_template = {
269         .dst = {
270                 .__refcnt       = ATOMIC_INIT(1),
271                 .__use          = 1,
272                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
273                 .error          = -ENETUNREACH,
274                 .input          = ip6_pkt_discard,
275                 .output         = ip6_pkt_discard_out,
276         },
277         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
278         .rt6i_protocol  = RTPROT_KERNEL,
279         .rt6i_metric    = ~(u32) 0,
280         .rt6i_ref       = ATOMIC_INIT(1),
281 };
282
283 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
284
285 static const struct rt6_info ip6_prohibit_entry_template = {
286         .dst = {
287                 .__refcnt       = ATOMIC_INIT(1),
288                 .__use          = 1,
289                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
290                 .error          = -EACCES,
291                 .input          = ip6_pkt_prohibit,
292                 .output         = ip6_pkt_prohibit_out,
293         },
294         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
295         .rt6i_protocol  = RTPROT_KERNEL,
296         .rt6i_metric    = ~(u32) 0,
297         .rt6i_ref       = ATOMIC_INIT(1),
298 };
299
300 static const struct rt6_info ip6_blk_hole_entry_template = {
301         .dst = {
302                 .__refcnt       = ATOMIC_INIT(1),
303                 .__use          = 1,
304                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
305                 .error          = -EINVAL,
306                 .input          = dst_discard,
307                 .output         = dst_discard_out,
308         },
309         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
310         .rt6i_protocol  = RTPROT_KERNEL,
311         .rt6i_metric    = ~(u32) 0,
312         .rt6i_ref       = ATOMIC_INIT(1),
313 };
314
315 #endif
316
317 static void rt6_info_init(struct rt6_info *rt)
318 {
319         struct dst_entry *dst = &rt->dst;
320
321         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322         INIT_LIST_HEAD(&rt->rt6i_siblings);
323         INIT_LIST_HEAD(&rt->rt6i_uncached);
324 }
325
326 /* allocate dst with ip6_dst_ops */
327 static struct rt6_info *__ip6_dst_alloc(struct net *net,
328                                         struct net_device *dev,
329                                         int flags)
330 {
331         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
332                                         0, DST_OBSOLETE_FORCE_CHK, flags);
333
334         if (rt)
335                 rt6_info_init(rt);
336
337         return rt;
338 }
339
340 static struct rt6_info *ip6_dst_alloc(struct net *net,
341                                       struct net_device *dev,
342                                       int flags)
343 {
344         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
345
346         if (rt) {
347                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
348                 if (rt->rt6i_pcpu) {
349                         int cpu;
350
351                         for_each_possible_cpu(cpu) {
352                                 struct rt6_info **p;
353
354                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
355                                 /* no one shares rt */
356                                 *p =  NULL;
357                         }
358                 } else {
359                         dst_destroy((struct dst_entry *)rt);
360                         return NULL;
361                 }
362         }
363
364         return rt;
365 }
366
367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369         struct rt6_info *rt = (struct rt6_info *)dst;
370         struct dst_entry *from = dst->from;
371         struct inet6_dev *idev;
372
373         dst_destroy_metrics_generic(dst);
374         free_percpu(rt->rt6i_pcpu);
375         rt6_uncached_list_del(rt);
376
377         idev = rt->rt6i_idev;
378         if (idev) {
379                 rt->rt6i_idev = NULL;
380                 in6_dev_put(idev);
381         }
382
383         dst->from = NULL;
384         dst_release(from);
385 }
386
387 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388                            int how)
389 {
390         struct rt6_info *rt = (struct rt6_info *)dst;
391         struct inet6_dev *idev = rt->rt6i_idev;
392         struct net_device *loopback_dev =
393                 dev_net(dev)->loopback_dev;
394
395         if (dev != loopback_dev) {
396                 if (idev && idev->dev == dev) {
397                         struct inet6_dev *loopback_idev =
398                                 in6_dev_get(loopback_dev);
399                         if (loopback_idev) {
400                                 rt->rt6i_idev = loopback_idev;
401                                 in6_dev_put(idev);
402                         }
403                 }
404         }
405 }
406
407 static bool rt6_check_expired(const struct rt6_info *rt)
408 {
409         if (rt->rt6i_flags & RTF_EXPIRES) {
410                 if (time_after(jiffies, rt->dst.expires))
411                         return true;
412         } else if (rt->dst.from) {
413                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
414         }
415         return false;
416 }
417
418 /* Multipath route selection:
419  *   Hash based function using packet header and flowlabel.
420  * Adapted from fib_info_hashfn()
421  */
422 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
423                                const struct flowi6 *fl6)
424 {
425         return get_hash_from_flowi6(fl6) % candidate_count;
426 }
427
428 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
429                                              struct flowi6 *fl6, int oif,
430                                              int strict)
431 {
432         struct rt6_info *sibling, *next_sibling;
433         int route_choosen;
434
435         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
436         /* Don't change the route, if route_choosen == 0
437          * (siblings does not include ourself)
438          */
439         if (route_choosen)
440                 list_for_each_entry_safe(sibling, next_sibling,
441                                 &match->rt6i_siblings, rt6i_siblings) {
442                         route_choosen--;
443                         if (route_choosen == 0) {
444                                 if (rt6_score_route(sibling, oif, strict) < 0)
445                                         break;
446                                 match = sibling;
447                                 break;
448                         }
449                 }
450         return match;
451 }
452
453 /*
454  *      Route lookup. Any table->tb6_lock is implied.
455  */
456
457 static inline struct rt6_info *rt6_device_match(struct net *net,
458                                                     struct rt6_info *rt,
459                                                     const struct in6_addr *saddr,
460                                                     int oif,
461                                                     int flags)
462 {
463         struct rt6_info *local = NULL;
464         struct rt6_info *sprt;
465
466         if (!oif && ipv6_addr_any(saddr))
467                 goto out;
468
469         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
470                 struct net_device *dev = sprt->dst.dev;
471
472                 if (oif) {
473                         if (dev->ifindex == oif)
474                                 return sprt;
475                         if (dev->flags & IFF_LOOPBACK) {
476                                 if (!sprt->rt6i_idev ||
477                                     sprt->rt6i_idev->dev->ifindex != oif) {
478                                         if (flags & RT6_LOOKUP_F_IFACE)
479                                                 continue;
480                                         if (local &&
481                                             local->rt6i_idev->dev->ifindex == oif)
482                                                 continue;
483                                 }
484                                 local = sprt;
485                         }
486                 } else {
487                         if (ipv6_chk_addr(net, saddr, dev,
488                                           flags & RT6_LOOKUP_F_IFACE))
489                                 return sprt;
490                 }
491         }
492
493         if (oif) {
494                 if (local)
495                         return local;
496
497                 if (flags & RT6_LOOKUP_F_IFACE)
498                         return net->ipv6.ip6_null_entry;
499         }
500 out:
501         return rt;
502 }
503
504 #ifdef CONFIG_IPV6_ROUTER_PREF
505 struct __rt6_probe_work {
506         struct work_struct work;
507         struct in6_addr target;
508         struct net_device *dev;
509 };
510
511 static void rt6_probe_deferred(struct work_struct *w)
512 {
513         struct in6_addr mcaddr;
514         struct __rt6_probe_work *work =
515                 container_of(w, struct __rt6_probe_work, work);
516
517         addrconf_addr_solict_mult(&work->target, &mcaddr);
518         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
519         dev_put(work->dev);
520         kfree(work);
521 }
522
523 static void rt6_probe(struct rt6_info *rt)
524 {
525         struct __rt6_probe_work *work;
526         struct neighbour *neigh;
527         /*
528          * Okay, this does not seem to be appropriate
529          * for now, however, we need to check if it
530          * is really so; aka Router Reachability Probing.
531          *
532          * Router Reachability Probe MUST be rate-limited
533          * to no more than one per minute.
534          */
535         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
536                 return;
537         rcu_read_lock_bh();
538         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
539         if (neigh) {
540                 if (neigh->nud_state & NUD_VALID)
541                         goto out;
542
543                 work = NULL;
544                 write_lock(&neigh->lock);
545                 if (!(neigh->nud_state & NUD_VALID) &&
546                     time_after(jiffies,
547                                neigh->updated +
548                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
549                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
550                         if (work)
551                                 __neigh_set_probe_once(neigh);
552                 }
553                 write_unlock(&neigh->lock);
554         } else {
555                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
556         }
557
558         if (work) {
559                 INIT_WORK(&work->work, rt6_probe_deferred);
560                 work->target = rt->rt6i_gateway;
561                 dev_hold(rt->dst.dev);
562                 work->dev = rt->dst.dev;
563                 schedule_work(&work->work);
564         }
565
566 out:
567         rcu_read_unlock_bh();
568 }
569 #else
570 static inline void rt6_probe(struct rt6_info *rt)
571 {
572 }
573 #endif
574
575 /*
576  * Default Router Selection (RFC 2461 6.3.6)
577  */
578 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
579 {
580         struct net_device *dev = rt->dst.dev;
581         if (!oif || dev->ifindex == oif)
582                 return 2;
583         if ((dev->flags & IFF_LOOPBACK) &&
584             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
585                 return 1;
586         return 0;
587 }
588
589 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
590 {
591         struct neighbour *neigh;
592         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
593
594         if (rt->rt6i_flags & RTF_NONEXTHOP ||
595             !(rt->rt6i_flags & RTF_GATEWAY))
596                 return RT6_NUD_SUCCEED;
597
598         rcu_read_lock_bh();
599         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
600         if (neigh) {
601                 read_lock(&neigh->lock);
602                 if (neigh->nud_state & NUD_VALID)
603                         ret = RT6_NUD_SUCCEED;
604 #ifdef CONFIG_IPV6_ROUTER_PREF
605                 else if (!(neigh->nud_state & NUD_FAILED))
606                         ret = RT6_NUD_SUCCEED;
607                 else
608                         ret = RT6_NUD_FAIL_PROBE;
609 #endif
610                 read_unlock(&neigh->lock);
611         } else {
612                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
613                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
614         }
615         rcu_read_unlock_bh();
616
617         return ret;
618 }
619
620 static int rt6_score_route(struct rt6_info *rt, int oif,
621                            int strict)
622 {
623         int m;
624
625         m = rt6_check_dev(rt, oif);
626         if (!m && (strict & RT6_LOOKUP_F_IFACE))
627                 return RT6_NUD_FAIL_HARD;
628 #ifdef CONFIG_IPV6_ROUTER_PREF
629         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
630 #endif
631         if (strict & RT6_LOOKUP_F_REACHABLE) {
632                 int n = rt6_check_neigh(rt);
633                 if (n < 0)
634                         return n;
635         }
636         return m;
637 }
638
639 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
640                                    int *mpri, struct rt6_info *match,
641                                    bool *do_rr)
642 {
643         int m;
644         bool match_do_rr = false;
645         struct inet6_dev *idev = rt->rt6i_idev;
646         struct net_device *dev = rt->dst.dev;
647
648         if (dev && !netif_carrier_ok(dev) &&
649             idev->cnf.ignore_routes_with_linkdown)
650                 goto out;
651
652         if (rt6_check_expired(rt))
653                 goto out;
654
655         m = rt6_score_route(rt, oif, strict);
656         if (m == RT6_NUD_FAIL_DO_RR) {
657                 match_do_rr = true;
658                 m = 0; /* lowest valid score */
659         } else if (m == RT6_NUD_FAIL_HARD) {
660                 goto out;
661         }
662
663         if (strict & RT6_LOOKUP_F_REACHABLE)
664                 rt6_probe(rt);
665
666         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
667         if (m > *mpri) {
668                 *do_rr = match_do_rr;
669                 *mpri = m;
670                 match = rt;
671         }
672 out:
673         return match;
674 }
675
676 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
677                                      struct rt6_info *rr_head,
678                                      u32 metric, int oif, int strict,
679                                      bool *do_rr)
680 {
681         struct rt6_info *rt, *match, *cont;
682         int mpri = -1;
683
684         match = NULL;
685         cont = NULL;
686         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
687                 if (rt->rt6i_metric != metric) {
688                         cont = rt;
689                         break;
690                 }
691
692                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
693         }
694
695         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
696                 if (rt->rt6i_metric != metric) {
697                         cont = rt;
698                         break;
699                 }
700
701                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
702         }
703
704         if (match || !cont)
705                 return match;
706
707         for (rt = cont; rt; rt = rt->dst.rt6_next)
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709
710         return match;
711 }
712
713 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
714 {
715         struct rt6_info *match, *rt0;
716         struct net *net;
717         bool do_rr = false;
718
719         rt0 = fn->rr_ptr;
720         if (!rt0)
721                 fn->rr_ptr = rt0 = fn->leaf;
722
723         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
724                              &do_rr);
725
726         if (do_rr) {
727                 struct rt6_info *next = rt0->dst.rt6_next;
728
729                 /* no entries matched; do round-robin */
730                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
731                         next = fn->leaf;
732
733                 if (next != rt0)
734                         fn->rr_ptr = next;
735         }
736
737         net = dev_net(rt0->dst.dev);
738         return match ? match : net->ipv6.ip6_null_entry;
739 }
740
741 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
742 {
743         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
744 }
745
746 #ifdef CONFIG_IPV6_ROUTE_INFO
747 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
748                   const struct in6_addr *gwaddr)
749 {
750         struct net *net = dev_net(dev);
751         struct route_info *rinfo = (struct route_info *) opt;
752         struct in6_addr prefix_buf, *prefix;
753         unsigned int pref;
754         unsigned long lifetime;
755         struct rt6_info *rt;
756
757         if (len < sizeof(struct route_info)) {
758                 return -EINVAL;
759         }
760
761         /* Sanity check for prefix_len and length */
762         if (rinfo->length > 3) {
763                 return -EINVAL;
764         } else if (rinfo->prefix_len > 128) {
765                 return -EINVAL;
766         } else if (rinfo->prefix_len > 64) {
767                 if (rinfo->length < 2) {
768                         return -EINVAL;
769                 }
770         } else if (rinfo->prefix_len > 0) {
771                 if (rinfo->length < 1) {
772                         return -EINVAL;
773                 }
774         }
775
776         pref = rinfo->route_pref;
777         if (pref == ICMPV6_ROUTER_PREF_INVALID)
778                 return -EINVAL;
779
780         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
781
782         if (rinfo->length == 3)
783                 prefix = (struct in6_addr *)rinfo->prefix;
784         else {
785                 /* this function is safe */
786                 ipv6_addr_prefix(&prefix_buf,
787                                  (struct in6_addr *)rinfo->prefix,
788                                  rinfo->prefix_len);
789                 prefix = &prefix_buf;
790         }
791
792         if (rinfo->prefix_len == 0)
793                 rt = rt6_get_dflt_router(gwaddr, dev);
794         else
795                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
796                                         gwaddr, dev->ifindex);
797
798         if (rt && !lifetime) {
799                 ip6_del_rt(rt);
800                 rt = NULL;
801         }
802
803         if (!rt && lifetime)
804                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
805                                         pref);
806         else if (rt)
807                 rt->rt6i_flags = RTF_ROUTEINFO |
808                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
809
810         if (rt) {
811                 if (!addrconf_finite_timeout(lifetime))
812                         rt6_clean_expires(rt);
813                 else
814                         rt6_set_expires(rt, jiffies + HZ * lifetime);
815
816                 ip6_rt_put(rt);
817         }
818         return 0;
819 }
820 #endif
821
822 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
823                                         struct in6_addr *saddr)
824 {
825         struct fib6_node *pn;
826         while (1) {
827                 if (fn->fn_flags & RTN_TL_ROOT)
828                         return NULL;
829                 pn = fn->parent;
830                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
831                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
832                 else
833                         fn = pn;
834                 if (fn->fn_flags & RTN_RTINFO)
835                         return fn;
836         }
837 }
838
839 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
840                                              struct fib6_table *table,
841                                              struct flowi6 *fl6, int flags)
842 {
843         struct fib6_node *fn;
844         struct rt6_info *rt;
845
846         read_lock_bh(&table->tb6_lock);
847         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
848 restart:
849         rt = fn->leaf;
850         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
851         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
852                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
853         if (rt == net->ipv6.ip6_null_entry) {
854                 fn = fib6_backtrack(fn, &fl6->saddr);
855                 if (fn)
856                         goto restart;
857         }
858         dst_use(&rt->dst, jiffies);
859         read_unlock_bh(&table->tb6_lock);
860         return rt;
861
862 }
863
864 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
865                                     int flags)
866 {
867         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
868 }
869 EXPORT_SYMBOL_GPL(ip6_route_lookup);
870
871 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
872                             const struct in6_addr *saddr, int oif, int strict)
873 {
874         struct flowi6 fl6 = {
875                 .flowi6_oif = oif,
876                 .daddr = *daddr,
877         };
878         struct dst_entry *dst;
879         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
880
881         if (saddr) {
882                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
883                 flags |= RT6_LOOKUP_F_HAS_SADDR;
884         }
885
886         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
887         if (dst->error == 0)
888                 return (struct rt6_info *) dst;
889
890         dst_release(dst);
891
892         return NULL;
893 }
894 EXPORT_SYMBOL(rt6_lookup);
895
896 /* ip6_ins_rt is called with FREE table->tb6_lock.
897    It takes new route entry, the addition fails by any reason the
898    route is freed. In any case, if caller does not hold it, it may
899    be destroyed.
900  */
901
902 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
903                         struct mx6_config *mxc)
904 {
905         int err;
906         struct fib6_table *table;
907
908         table = rt->rt6i_table;
909         write_lock_bh(&table->tb6_lock);
910         err = fib6_add(&table->tb6_root, rt, info, mxc);
911         write_unlock_bh(&table->tb6_lock);
912
913         return err;
914 }
915
916 int ip6_ins_rt(struct rt6_info *rt)
917 {
918         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
919         struct mx6_config mxc = { .mx = NULL, };
920
921         return __ip6_ins_rt(rt, &info, &mxc);
922 }
923
924 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
925                                            const struct in6_addr *daddr,
926                                            const struct in6_addr *saddr)
927 {
928         struct rt6_info *rt;
929
930         /*
931          *      Clone the route.
932          */
933
934         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
935                 ort = (struct rt6_info *)ort->dst.from;
936
937         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
938
939         if (!rt)
940                 return NULL;
941
942         ip6_rt_copy_init(rt, ort);
943         rt->rt6i_flags |= RTF_CACHE;
944         rt->rt6i_metric = 0;
945         rt->dst.flags |= DST_HOST;
946         rt->rt6i_dst.addr = *daddr;
947         rt->rt6i_dst.plen = 128;
948
949         if (!rt6_is_gw_or_nonexthop(ort)) {
950                 if (ort->rt6i_dst.plen != 128 &&
951                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
952                         rt->rt6i_flags |= RTF_ANYCAST;
953 #ifdef CONFIG_IPV6_SUBTREES
954                 if (rt->rt6i_src.plen && saddr) {
955                         rt->rt6i_src.addr = *saddr;
956                         rt->rt6i_src.plen = 128;
957                 }
958 #endif
959         }
960
961         return rt;
962 }
963
964 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
965 {
966         struct rt6_info *pcpu_rt;
967
968         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
969                                   rt->dst.dev, rt->dst.flags);
970
971         if (!pcpu_rt)
972                 return NULL;
973         ip6_rt_copy_init(pcpu_rt, rt);
974         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
975         pcpu_rt->rt6i_flags |= RTF_PCPU;
976         return pcpu_rt;
977 }
978
979 /* It should be called with read_lock_bh(&tb6_lock) acquired */
980 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
981 {
982         struct rt6_info *pcpu_rt, **p;
983
984         p = this_cpu_ptr(rt->rt6i_pcpu);
985         pcpu_rt = *p;
986
987         if (pcpu_rt) {
988                 dst_hold(&pcpu_rt->dst);
989                 rt6_dst_from_metrics_check(pcpu_rt);
990         }
991         return pcpu_rt;
992 }
993
994 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
995 {
996         struct fib6_table *table = rt->rt6i_table;
997         struct rt6_info *pcpu_rt, *prev, **p;
998
999         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1000         if (!pcpu_rt) {
1001                 struct net *net = dev_net(rt->dst.dev);
1002
1003                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1004                 return net->ipv6.ip6_null_entry;
1005         }
1006
1007         read_lock_bh(&table->tb6_lock);
1008         if (rt->rt6i_pcpu) {
1009                 p = this_cpu_ptr(rt->rt6i_pcpu);
1010                 prev = cmpxchg(p, NULL, pcpu_rt);
1011                 if (prev) {
1012                         /* If someone did it before us, return prev instead */
1013                         dst_destroy(&pcpu_rt->dst);
1014                         pcpu_rt = prev;
1015                 }
1016         } else {
1017                 /* rt has been removed from the fib6 tree
1018                  * before we have a chance to acquire the read_lock.
1019                  * In this case, don't brother to create a pcpu rt
1020                  * since rt is going away anyway.  The next
1021                  * dst_check() will trigger a re-lookup.
1022                  */
1023                 dst_destroy(&pcpu_rt->dst);
1024                 pcpu_rt = rt;
1025         }
1026         dst_hold(&pcpu_rt->dst);
1027         rt6_dst_from_metrics_check(pcpu_rt);
1028         read_unlock_bh(&table->tb6_lock);
1029         return pcpu_rt;
1030 }
1031
1032 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1033                                       struct flowi6 *fl6, int flags)
1034 {
1035         struct fib6_node *fn, *saved_fn;
1036         struct rt6_info *rt;
1037         int strict = 0;
1038
1039         strict |= flags & RT6_LOOKUP_F_IFACE;
1040         if (net->ipv6.devconf_all->forwarding == 0)
1041                 strict |= RT6_LOOKUP_F_REACHABLE;
1042
1043         read_lock_bh(&table->tb6_lock);
1044
1045         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1046         saved_fn = fn;
1047
1048         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1049                 oif = 0;
1050
1051 redo_rt6_select:
1052         rt = rt6_select(fn, oif, strict);
1053         if (rt->rt6i_nsiblings)
1054                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1055         if (rt == net->ipv6.ip6_null_entry) {
1056                 fn = fib6_backtrack(fn, &fl6->saddr);
1057                 if (fn)
1058                         goto redo_rt6_select;
1059                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1060                         /* also consider unreachable route */
1061                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1062                         fn = saved_fn;
1063                         goto redo_rt6_select;
1064                 }
1065         }
1066
1067
1068         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1069                 dst_use(&rt->dst, jiffies);
1070                 read_unlock_bh(&table->tb6_lock);
1071
1072                 rt6_dst_from_metrics_check(rt);
1073                 return rt;
1074         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1075                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1076                 /* Create a RTF_CACHE clone which will not be
1077                  * owned by the fib6 tree.  It is for the special case where
1078                  * the daddr in the skb during the neighbor look-up is different
1079                  * from the fl6->daddr used to look-up route here.
1080                  */
1081
1082                 struct rt6_info *uncached_rt;
1083
1084                 dst_use(&rt->dst, jiffies);
1085                 read_unlock_bh(&table->tb6_lock);
1086
1087                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1088                 dst_release(&rt->dst);
1089
1090                 if (uncached_rt)
1091                         rt6_uncached_list_add(uncached_rt);
1092                 else
1093                         uncached_rt = net->ipv6.ip6_null_entry;
1094
1095                 dst_hold(&uncached_rt->dst);
1096                 return uncached_rt;
1097
1098         } else {
1099                 /* Get a percpu copy */
1100
1101                 struct rt6_info *pcpu_rt;
1102
1103                 rt->dst.lastuse = jiffies;
1104                 rt->dst.__use++;
1105                 pcpu_rt = rt6_get_pcpu_route(rt);
1106
1107                 if (pcpu_rt) {
1108                         read_unlock_bh(&table->tb6_lock);
1109                 } else {
1110                         /* We have to do the read_unlock first
1111                          * because rt6_make_pcpu_route() may trigger
1112                          * ip6_dst_gc() which will take the write_lock.
1113                          */
1114                         dst_hold(&rt->dst);
1115                         read_unlock_bh(&table->tb6_lock);
1116                         pcpu_rt = rt6_make_pcpu_route(rt);
1117                         dst_release(&rt->dst);
1118                 }
1119
1120                 return pcpu_rt;
1121
1122         }
1123 }
1124
1125 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1126                                             struct flowi6 *fl6, int flags)
1127 {
1128         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1129 }
1130
1131 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1132                                                 struct net_device *dev,
1133                                                 struct flowi6 *fl6, int flags)
1134 {
1135         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1136                 flags |= RT6_LOOKUP_F_IFACE;
1137
1138         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1139 }
1140
1141 void ip6_route_input(struct sk_buff *skb)
1142 {
1143         const struct ipv6hdr *iph = ipv6_hdr(skb);
1144         struct net *net = dev_net(skb->dev);
1145         int flags = RT6_LOOKUP_F_HAS_SADDR;
1146         struct ip_tunnel_info *tun_info;
1147         struct flowi6 fl6 = {
1148                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1149                 .daddr = iph->daddr,
1150                 .saddr = iph->saddr,
1151                 .flowlabel = ip6_flowinfo(iph),
1152                 .flowi6_mark = skb->mark,
1153                 .flowi6_proto = iph->nexthdr,
1154         };
1155
1156         tun_info = skb_tunnel_info(skb);
1157         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1158                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1159         skb_dst_drop(skb);
1160         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1161 }
1162
1163 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1164                                              struct flowi6 *fl6, int flags)
1165 {
1166         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1167 }
1168
1169 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1170                                     struct flowi6 *fl6)
1171 {
1172         struct dst_entry *dst;
1173         int flags = 0;
1174         bool any_src;
1175
1176         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1177         if (dst)
1178                 return dst;
1179
1180         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1181
1182         any_src = ipv6_addr_any(&fl6->saddr);
1183         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1184             (fl6->flowi6_oif && any_src))
1185                 flags |= RT6_LOOKUP_F_IFACE;
1186
1187         if (!any_src)
1188                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1189         else if (sk)
1190                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1191
1192         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1193 }
1194 EXPORT_SYMBOL(ip6_route_output);
1195
1196 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1197 {
1198         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1199         struct dst_entry *new = NULL;
1200
1201         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1202         if (rt) {
1203                 rt6_info_init(rt);
1204
1205                 new = &rt->dst;
1206                 new->__use = 1;
1207                 new->input = dst_discard;
1208                 new->output = dst_discard_out;
1209
1210                 dst_copy_metrics(new, &ort->dst);
1211                 rt->rt6i_idev = ort->rt6i_idev;
1212                 if (rt->rt6i_idev)
1213                         in6_dev_hold(rt->rt6i_idev);
1214
1215                 rt->rt6i_gateway = ort->rt6i_gateway;
1216                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1217                 rt->rt6i_metric = 0;
1218
1219                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1220 #ifdef CONFIG_IPV6_SUBTREES
1221                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1222 #endif
1223
1224                 dst_free(new);
1225         }
1226
1227         dst_release(dst_orig);
1228         return new ? new : ERR_PTR(-ENOMEM);
1229 }
1230
1231 /*
1232  *      Destination cache support functions
1233  */
1234
1235 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1236 {
1237         if (rt->dst.from &&
1238             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1239                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1240 }
1241
1242 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1243 {
1244         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1245                 return NULL;
1246
1247         if (rt6_check_expired(rt))
1248                 return NULL;
1249
1250         return &rt->dst;
1251 }
1252
1253 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254 {
1255         if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257                 return &rt->dst;
1258         else
1259                 return NULL;
1260 }
1261
1262 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1263 {
1264         struct rt6_info *rt;
1265
1266         rt = (struct rt6_info *) dst;
1267
1268         /* All IPV6 dsts are created with ->obsolete set to the value
1269          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1270          * into this function always.
1271          */
1272
1273         rt6_dst_from_metrics_check(rt);
1274
1275         if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1276                 return rt6_dst_from_check(rt, cookie);
1277         else
1278                 return rt6_check(rt, cookie);
1279 }
1280
1281 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1282 {
1283         struct rt6_info *rt = (struct rt6_info *) dst;
1284
1285         if (rt) {
1286                 if (rt->rt6i_flags & RTF_CACHE) {
1287                         if (rt6_check_expired(rt)) {
1288                                 ip6_del_rt(rt);
1289                                 dst = NULL;
1290                         }
1291                 } else {
1292                         dst_release(dst);
1293                         dst = NULL;
1294                 }
1295         }
1296         return dst;
1297 }
1298
1299 static void ip6_link_failure(struct sk_buff *skb)
1300 {
1301         struct rt6_info *rt;
1302
1303         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1304
1305         rt = (struct rt6_info *) skb_dst(skb);
1306         if (rt) {
1307                 if (rt->rt6i_flags & RTF_CACHE) {
1308                         dst_hold(&rt->dst);
1309                         ip6_del_rt(rt);
1310                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1311                         rt->rt6i_node->fn_sernum = -1;
1312                 }
1313         }
1314 }
1315
1316 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1317 {
1318         struct net *net = dev_net(rt->dst.dev);
1319
1320         rt->rt6i_flags |= RTF_MODIFIED;
1321         rt->rt6i_pmtu = mtu;
1322         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323 }
1324
1325 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326                                  const struct ipv6hdr *iph, u32 mtu)
1327 {
1328         struct rt6_info *rt6 = (struct rt6_info *)dst;
1329
1330         if (rt6->rt6i_flags & RTF_LOCAL)
1331                 return;
1332
1333         dst_confirm(dst);
1334         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1335         if (mtu >= dst_mtu(dst))
1336                 return;
1337
1338         if (rt6->rt6i_flags & RTF_CACHE) {
1339                 rt6_do_update_pmtu(rt6, mtu);
1340         } else {
1341                 const struct in6_addr *daddr, *saddr;
1342                 struct rt6_info *nrt6;
1343
1344                 if (iph) {
1345                         daddr = &iph->daddr;
1346                         saddr = &iph->saddr;
1347                 } else if (sk) {
1348                         daddr = &sk->sk_v6_daddr;
1349                         saddr = &inet6_sk(sk)->saddr;
1350                 } else {
1351                         return;
1352                 }
1353                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1354                 if (nrt6) {
1355                         rt6_do_update_pmtu(nrt6, mtu);
1356
1357                         /* ip6_ins_rt(nrt6) will bump the
1358                          * rt6->rt6i_node->fn_sernum
1359                          * which will fail the next rt6_check() and
1360                          * invalidate the sk->sk_dst_cache.
1361                          */
1362                         ip6_ins_rt(nrt6);
1363                 }
1364         }
1365 }
1366
1367 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1368                                struct sk_buff *skb, u32 mtu)
1369 {
1370         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1371 }
1372
1373 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1374                      int oif, u32 mark)
1375 {
1376         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1377         struct dst_entry *dst;
1378         struct flowi6 fl6;
1379
1380         memset(&fl6, 0, sizeof(fl6));
1381         fl6.flowi6_oif = oif;
1382         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1383         fl6.daddr = iph->daddr;
1384         fl6.saddr = iph->saddr;
1385         fl6.flowlabel = ip6_flowinfo(iph);
1386
1387         dst = ip6_route_output(net, NULL, &fl6);
1388         if (!dst->error)
1389                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1390         dst_release(dst);
1391 }
1392 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1393
1394 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1395 {
1396         ip6_update_pmtu(skb, sock_net(sk), mtu,
1397                         sk->sk_bound_dev_if, sk->sk_mark);
1398 }
1399 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1400
1401 /* Handle redirects */
1402 struct ip6rd_flowi {
1403         struct flowi6 fl6;
1404         struct in6_addr gateway;
1405 };
1406
1407 static struct rt6_info *__ip6_route_redirect(struct net *net,
1408                                              struct fib6_table *table,
1409                                              struct flowi6 *fl6,
1410                                              int flags)
1411 {
1412         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1413         struct rt6_info *rt;
1414         struct fib6_node *fn;
1415
1416         /* Get the "current" route for this destination and
1417          * check if the redirect has come from approriate router.
1418          *
1419          * RFC 4861 specifies that redirects should only be
1420          * accepted if they come from the nexthop to the target.
1421          * Due to the way the routes are chosen, this notion
1422          * is a bit fuzzy and one might need to check all possible
1423          * routes.
1424          */
1425
1426         read_lock_bh(&table->tb6_lock);
1427         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1428 restart:
1429         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1430                 if (rt6_check_expired(rt))
1431                         continue;
1432                 if (rt->dst.error)
1433                         break;
1434                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435                         continue;
1436                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1437                         continue;
1438                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1439                         continue;
1440                 break;
1441         }
1442
1443         if (!rt)
1444                 rt = net->ipv6.ip6_null_entry;
1445         else if (rt->dst.error) {
1446                 rt = net->ipv6.ip6_null_entry;
1447                 goto out;
1448         }
1449
1450         if (rt == net->ipv6.ip6_null_entry) {
1451                 fn = fib6_backtrack(fn, &fl6->saddr);
1452                 if (fn)
1453                         goto restart;
1454         }
1455
1456 out:
1457         dst_hold(&rt->dst);
1458
1459         read_unlock_bh(&table->tb6_lock);
1460
1461         return rt;
1462 };
1463
1464 static struct dst_entry *ip6_route_redirect(struct net *net,
1465                                         const struct flowi6 *fl6,
1466                                         const struct in6_addr *gateway)
1467 {
1468         int flags = RT6_LOOKUP_F_HAS_SADDR;
1469         struct ip6rd_flowi rdfl;
1470
1471         rdfl.fl6 = *fl6;
1472         rdfl.gateway = *gateway;
1473
1474         return fib6_rule_lookup(net, &rdfl.fl6,
1475                                 flags, __ip6_route_redirect);
1476 }
1477
1478 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1479 {
1480         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1481         struct dst_entry *dst;
1482         struct flowi6 fl6;
1483
1484         memset(&fl6, 0, sizeof(fl6));
1485         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1486         fl6.flowi6_oif = oif;
1487         fl6.flowi6_mark = mark;
1488         fl6.daddr = iph->daddr;
1489         fl6.saddr = iph->saddr;
1490         fl6.flowlabel = ip6_flowinfo(iph);
1491
1492         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1493         rt6_do_redirect(dst, NULL, skb);
1494         dst_release(dst);
1495 }
1496 EXPORT_SYMBOL_GPL(ip6_redirect);
1497
1498 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1499                             u32 mark)
1500 {
1501         const struct ipv6hdr *iph = ipv6_hdr(skb);
1502         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1503         struct dst_entry *dst;
1504         struct flowi6 fl6;
1505
1506         memset(&fl6, 0, sizeof(fl6));
1507         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1508         fl6.flowi6_oif = oif;
1509         fl6.flowi6_mark = mark;
1510         fl6.daddr = msg->dest;
1511         fl6.saddr = iph->daddr;
1512
1513         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1514         rt6_do_redirect(dst, NULL, skb);
1515         dst_release(dst);
1516 }
1517
1518 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1519 {
1520         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1521 }
1522 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1523
1524 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1525 {
1526         struct net_device *dev = dst->dev;
1527         unsigned int mtu = dst_mtu(dst);
1528         struct net *net = dev_net(dev);
1529
1530         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1531
1532         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1533                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1534
1535         /*
1536          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1537          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1538          * IPV6_MAXPLEN is also valid and means: "any MSS,
1539          * rely only on pmtu discovery"
1540          */
1541         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1542                 mtu = IPV6_MAXPLEN;
1543         return mtu;
1544 }
1545
1546 static unsigned int ip6_mtu(const struct dst_entry *dst)
1547 {
1548         const struct rt6_info *rt = (const struct rt6_info *)dst;
1549         unsigned int mtu = rt->rt6i_pmtu;
1550         struct inet6_dev *idev;
1551
1552         if (mtu)
1553                 goto out;
1554
1555         mtu = dst_metric_raw(dst, RTAX_MTU);
1556         if (mtu)
1557                 goto out;
1558
1559         mtu = IPV6_MIN_MTU;
1560
1561         rcu_read_lock();
1562         idev = __in6_dev_get(dst->dev);
1563         if (idev)
1564                 mtu = idev->cnf.mtu6;
1565         rcu_read_unlock();
1566
1567 out:
1568         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1569 }
1570
1571 static struct dst_entry *icmp6_dst_gc_list;
1572 static DEFINE_SPINLOCK(icmp6_dst_lock);
1573
1574 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1575                                   struct flowi6 *fl6)
1576 {
1577         struct dst_entry *dst;
1578         struct rt6_info *rt;
1579         struct inet6_dev *idev = in6_dev_get(dev);
1580         struct net *net = dev_net(dev);
1581
1582         if (unlikely(!idev))
1583                 return ERR_PTR(-ENODEV);
1584
1585         rt = ip6_dst_alloc(net, dev, 0);
1586         if (unlikely(!rt)) {
1587                 in6_dev_put(idev);
1588                 dst = ERR_PTR(-ENOMEM);
1589                 goto out;
1590         }
1591
1592         rt->dst.flags |= DST_HOST;
1593         rt->dst.output  = ip6_output;
1594         atomic_set(&rt->dst.__refcnt, 1);
1595         rt->rt6i_gateway  = fl6->daddr;
1596         rt->rt6i_dst.addr = fl6->daddr;
1597         rt->rt6i_dst.plen = 128;
1598         rt->rt6i_idev     = idev;
1599         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1600
1601         spin_lock_bh(&icmp6_dst_lock);
1602         rt->dst.next = icmp6_dst_gc_list;
1603         icmp6_dst_gc_list = &rt->dst;
1604         spin_unlock_bh(&icmp6_dst_lock);
1605
1606         fib6_force_start_gc(net);
1607
1608         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1609
1610 out:
1611         return dst;
1612 }
1613
1614 int icmp6_dst_gc(void)
1615 {
1616         struct dst_entry *dst, **pprev;
1617         int more = 0;
1618
1619         spin_lock_bh(&icmp6_dst_lock);
1620         pprev = &icmp6_dst_gc_list;
1621
1622         while ((dst = *pprev) != NULL) {
1623                 if (!atomic_read(&dst->__refcnt)) {
1624                         *pprev = dst->next;
1625                         dst_free(dst);
1626                 } else {
1627                         pprev = &dst->next;
1628                         ++more;
1629                 }
1630         }
1631
1632         spin_unlock_bh(&icmp6_dst_lock);
1633
1634         return more;
1635 }
1636
1637 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1638                             void *arg)
1639 {
1640         struct dst_entry *dst, **pprev;
1641
1642         spin_lock_bh(&icmp6_dst_lock);
1643         pprev = &icmp6_dst_gc_list;
1644         while ((dst = *pprev) != NULL) {
1645                 struct rt6_info *rt = (struct rt6_info *) dst;
1646                 if (func(rt, arg)) {
1647                         *pprev = dst->next;
1648                         dst_free(dst);
1649                 } else {
1650                         pprev = &dst->next;
1651                 }
1652         }
1653         spin_unlock_bh(&icmp6_dst_lock);
1654 }
1655
1656 static int ip6_dst_gc(struct dst_ops *ops)
1657 {
1658         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1659         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1660         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1661         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1662         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1663         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1664         int entries;
1665
1666         entries = dst_entries_get_fast(ops);
1667         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1668             entries <= rt_max_size)
1669                 goto out;
1670
1671         net->ipv6.ip6_rt_gc_expire++;
1672         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1673         entries = dst_entries_get_slow(ops);
1674         if (entries < ops->gc_thresh)
1675                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1676 out:
1677         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1678         return entries > rt_max_size;
1679 }
1680
1681 static int ip6_convert_metrics(struct mx6_config *mxc,
1682                                const struct fib6_config *cfg)
1683 {
1684         bool ecn_ca = false;
1685         struct nlattr *nla;
1686         int remaining;
1687         u32 *mp;
1688
1689         if (!cfg->fc_mx)
1690                 return 0;
1691
1692         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1693         if (unlikely(!mp))
1694                 return -ENOMEM;
1695
1696         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1697                 int type = nla_type(nla);
1698                 u32 val;
1699
1700                 if (!type)
1701                         continue;
1702                 if (unlikely(type > RTAX_MAX))
1703                         goto err;
1704
1705                 if (type == RTAX_CC_ALGO) {
1706                         char tmp[TCP_CA_NAME_MAX];
1707
1708                         nla_strlcpy(tmp, nla, sizeof(tmp));
1709                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1710                         if (val == TCP_CA_UNSPEC)
1711                                 goto err;
1712                 } else {
1713                         val = nla_get_u32(nla);
1714                 }
1715                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1716                         goto err;
1717
1718                 mp[type - 1] = val;
1719                 __set_bit(type - 1, mxc->mx_valid);
1720         }
1721
1722         if (ecn_ca) {
1723                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1724                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1725         }
1726
1727         mxc->mx = mp;
1728         return 0;
1729  err:
1730         kfree(mp);
1731         return -EINVAL;
1732 }
1733
1734 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1735 {
1736         struct net *net = cfg->fc_nlinfo.nl_net;
1737         struct rt6_info *rt = NULL;
1738         struct net_device *dev = NULL;
1739         struct inet6_dev *idev = NULL;
1740         struct fib6_table *table;
1741         int addr_type;
1742         int err = -EINVAL;
1743
1744         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1745                 goto out;
1746 #ifndef CONFIG_IPV6_SUBTREES
1747         if (cfg->fc_src_len)
1748                 goto out;
1749 #endif
1750         if (cfg->fc_ifindex) {
1751                 err = -ENODEV;
1752                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1753                 if (!dev)
1754                         goto out;
1755                 idev = in6_dev_get(dev);
1756                 if (!idev)
1757                         goto out;
1758         }
1759
1760         if (cfg->fc_metric == 0)
1761                 cfg->fc_metric = IP6_RT_PRIO_USER;
1762
1763         err = -ENOBUFS;
1764         if (cfg->fc_nlinfo.nlh &&
1765             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1766                 table = fib6_get_table(net, cfg->fc_table);
1767                 if (!table) {
1768                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1769                         table = fib6_new_table(net, cfg->fc_table);
1770                 }
1771         } else {
1772                 table = fib6_new_table(net, cfg->fc_table);
1773         }
1774
1775         if (!table)
1776                 goto out;
1777
1778         rt = ip6_dst_alloc(net, NULL,
1779                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1780
1781         if (!rt) {
1782                 err = -ENOMEM;
1783                 goto out;
1784         }
1785
1786         if (cfg->fc_flags & RTF_EXPIRES)
1787                 rt6_set_expires(rt, jiffies +
1788                                 clock_t_to_jiffies(cfg->fc_expires));
1789         else
1790                 rt6_clean_expires(rt);
1791
1792         if (cfg->fc_protocol == RTPROT_UNSPEC)
1793                 cfg->fc_protocol = RTPROT_BOOT;
1794         rt->rt6i_protocol = cfg->fc_protocol;
1795
1796         addr_type = ipv6_addr_type(&cfg->fc_dst);
1797
1798         if (addr_type & IPV6_ADDR_MULTICAST)
1799                 rt->dst.input = ip6_mc_input;
1800         else if (cfg->fc_flags & RTF_LOCAL)
1801                 rt->dst.input = ip6_input;
1802         else
1803                 rt->dst.input = ip6_forward;
1804
1805         rt->dst.output = ip6_output;
1806
1807         if (cfg->fc_encap) {
1808                 struct lwtunnel_state *lwtstate;
1809
1810                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1811                                            cfg->fc_encap, AF_INET6, cfg,
1812                                            &lwtstate);
1813                 if (err)
1814                         goto out;
1815                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1816                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1817                         rt->dst.lwtstate->orig_output = rt->dst.output;
1818                         rt->dst.output = lwtunnel_output;
1819                 }
1820                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1821                         rt->dst.lwtstate->orig_input = rt->dst.input;
1822                         rt->dst.input = lwtunnel_input;
1823                 }
1824         }
1825
1826         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1827         rt->rt6i_dst.plen = cfg->fc_dst_len;
1828         if (rt->rt6i_dst.plen == 128)
1829                 rt->dst.flags |= DST_HOST;
1830
1831 #ifdef CONFIG_IPV6_SUBTREES
1832         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1833         rt->rt6i_src.plen = cfg->fc_src_len;
1834 #endif
1835
1836         rt->rt6i_metric = cfg->fc_metric;
1837
1838         /* We cannot add true routes via loopback here,
1839            they would result in kernel looping; promote them to reject routes
1840          */
1841         if ((cfg->fc_flags & RTF_REJECT) ||
1842             (dev && (dev->flags & IFF_LOOPBACK) &&
1843              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1844              !(cfg->fc_flags & RTF_LOCAL))) {
1845                 /* hold loopback dev/idev if we haven't done so. */
1846                 if (dev != net->loopback_dev) {
1847                         if (dev) {
1848                                 dev_put(dev);
1849                                 in6_dev_put(idev);
1850                         }
1851                         dev = net->loopback_dev;
1852                         dev_hold(dev);
1853                         idev = in6_dev_get(dev);
1854                         if (!idev) {
1855                                 err = -ENODEV;
1856                                 goto out;
1857                         }
1858                 }
1859                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1860                 switch (cfg->fc_type) {
1861                 case RTN_BLACKHOLE:
1862                         rt->dst.error = -EINVAL;
1863                         rt->dst.output = dst_discard_out;
1864                         rt->dst.input = dst_discard;
1865                         break;
1866                 case RTN_PROHIBIT:
1867                         rt->dst.error = -EACCES;
1868                         rt->dst.output = ip6_pkt_prohibit_out;
1869                         rt->dst.input = ip6_pkt_prohibit;
1870                         break;
1871                 case RTN_THROW:
1872                 case RTN_UNREACHABLE:
1873                 default:
1874                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1875                                         : (cfg->fc_type == RTN_UNREACHABLE)
1876                                         ? -EHOSTUNREACH : -ENETUNREACH;
1877                         rt->dst.output = ip6_pkt_discard_out;
1878                         rt->dst.input = ip6_pkt_discard;
1879                         break;
1880                 }
1881                 goto install_route;
1882         }
1883
1884         if (cfg->fc_flags & RTF_GATEWAY) {
1885                 const struct in6_addr *gw_addr;
1886                 int gwa_type;
1887
1888                 gw_addr = &cfg->fc_gateway;
1889                 gwa_type = ipv6_addr_type(gw_addr);
1890
1891                 /* if gw_addr is local we will fail to detect this in case
1892                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1893                  * will return already-added prefix route via interface that
1894                  * prefix route was assigned to, which might be non-loopback.
1895                  */
1896                 err = -EINVAL;
1897                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1898                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1899                                             dev : NULL, 0, 0))
1900                         goto out;
1901
1902                 rt->rt6i_gateway = *gw_addr;
1903
1904                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1905                         struct rt6_info *grt;
1906
1907                         /* IPv6 strictly inhibits using not link-local
1908                            addresses as nexthop address.
1909                            Otherwise, router will not able to send redirects.
1910                            It is very good, but in some (rare!) circumstances
1911                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1912                            some exceptions. --ANK
1913                          */
1914                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1915                                 goto out;
1916
1917                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1918
1919                         err = -EHOSTUNREACH;
1920                         if (!grt)
1921                                 goto out;
1922                         if (dev) {
1923                                 if (dev != grt->dst.dev) {
1924                                         ip6_rt_put(grt);
1925                                         goto out;
1926                                 }
1927                         } else {
1928                                 dev = grt->dst.dev;
1929                                 idev = grt->rt6i_idev;
1930                                 dev_hold(dev);
1931                                 in6_dev_hold(grt->rt6i_idev);
1932                         }
1933                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1934                                 err = 0;
1935                         ip6_rt_put(grt);
1936
1937                         if (err)
1938                                 goto out;
1939                 }
1940                 err = -EINVAL;
1941                 if (!dev || (dev->flags & IFF_LOOPBACK))
1942                         goto out;
1943         }
1944
1945         err = -ENODEV;
1946         if (!dev)
1947                 goto out;
1948
1949         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1950                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1951                         err = -EINVAL;
1952                         goto out;
1953                 }
1954                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1955                 rt->rt6i_prefsrc.plen = 128;
1956         } else
1957                 rt->rt6i_prefsrc.plen = 0;
1958
1959         rt->rt6i_flags = cfg->fc_flags;
1960
1961 install_route:
1962         rt->dst.dev = dev;
1963         rt->rt6i_idev = idev;
1964         rt->rt6i_table = table;
1965
1966         cfg->fc_nlinfo.nl_net = dev_net(dev);
1967
1968         return rt;
1969 out:
1970         if (dev)
1971                 dev_put(dev);
1972         if (idev)
1973                 in6_dev_put(idev);
1974         if (rt)
1975                 dst_free(&rt->dst);
1976
1977         return ERR_PTR(err);
1978 }
1979
1980 int ip6_route_add(struct fib6_config *cfg)
1981 {
1982         struct mx6_config mxc = { .mx = NULL, };
1983         struct rt6_info *rt;
1984         int err;
1985
1986         rt = ip6_route_info_create(cfg);
1987         if (IS_ERR(rt)) {
1988                 err = PTR_ERR(rt);
1989                 rt = NULL;
1990                 goto out;
1991         }
1992
1993         err = ip6_convert_metrics(&mxc, cfg);
1994         if (err)
1995                 goto out;
1996
1997         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1998
1999         kfree(mxc.mx);
2000
2001         return err;
2002 out:
2003         if (rt)
2004                 dst_free(&rt->dst);
2005
2006         return err;
2007 }
2008
2009 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2010 {
2011         int err;
2012         struct fib6_table *table;
2013         struct net *net = dev_net(rt->dst.dev);
2014
2015         if (rt == net->ipv6.ip6_null_entry ||
2016             rt->dst.flags & DST_NOCACHE) {
2017                 err = -ENOENT;
2018                 goto out;
2019         }
2020
2021         table = rt->rt6i_table;
2022         write_lock_bh(&table->tb6_lock);
2023         err = fib6_del(rt, info);
2024         write_unlock_bh(&table->tb6_lock);
2025
2026 out:
2027         ip6_rt_put(rt);
2028         return err;
2029 }
2030
2031 int ip6_del_rt(struct rt6_info *rt)
2032 {
2033         struct nl_info info = {
2034                 .nl_net = dev_net(rt->dst.dev),
2035         };
2036         return __ip6_del_rt(rt, &info);
2037 }
2038
2039 static int ip6_route_del(struct fib6_config *cfg)
2040 {
2041         struct fib6_table *table;
2042         struct fib6_node *fn;
2043         struct rt6_info *rt;
2044         int err = -ESRCH;
2045
2046         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2047         if (!table)
2048                 return err;
2049
2050         read_lock_bh(&table->tb6_lock);
2051
2052         fn = fib6_locate(&table->tb6_root,
2053                          &cfg->fc_dst, cfg->fc_dst_len,
2054                          &cfg->fc_src, cfg->fc_src_len);
2055
2056         if (fn) {
2057                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2058                         if ((rt->rt6i_flags & RTF_CACHE) &&
2059                             !(cfg->fc_flags & RTF_CACHE))
2060                                 continue;
2061                         if (cfg->fc_ifindex &&
2062                             (!rt->dst.dev ||
2063                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2064                                 continue;
2065                         if (cfg->fc_flags & RTF_GATEWAY &&
2066                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2067                                 continue;
2068                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2069                                 continue;
2070                         dst_hold(&rt->dst);
2071                         read_unlock_bh(&table->tb6_lock);
2072
2073                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2074                 }
2075         }
2076         read_unlock_bh(&table->tb6_lock);
2077
2078         return err;
2079 }
2080
2081 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2082 {
2083         struct netevent_redirect netevent;
2084         struct rt6_info *rt, *nrt = NULL;
2085         struct ndisc_options ndopts;
2086         struct inet6_dev *in6_dev;
2087         struct neighbour *neigh;
2088         struct rd_msg *msg;
2089         int optlen, on_link;
2090         u8 *lladdr;
2091
2092         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2093         optlen -= sizeof(*msg);
2094
2095         if (optlen < 0) {
2096                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2097                 return;
2098         }
2099
2100         msg = (struct rd_msg *)icmp6_hdr(skb);
2101
2102         if (ipv6_addr_is_multicast(&msg->dest)) {
2103                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2104                 return;
2105         }
2106
2107         on_link = 0;
2108         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2109                 on_link = 1;
2110         } else if (ipv6_addr_type(&msg->target) !=
2111                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2112                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2113                 return;
2114         }
2115
2116         in6_dev = __in6_dev_get(skb->dev);
2117         if (!in6_dev)
2118                 return;
2119         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2120                 return;
2121
2122         /* RFC2461 8.1:
2123          *      The IP source address of the Redirect MUST be the same as the current
2124          *      first-hop router for the specified ICMP Destination Address.
2125          */
2126
2127         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2128                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2129                 return;
2130         }
2131
2132         lladdr = NULL;
2133         if (ndopts.nd_opts_tgt_lladdr) {
2134                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2135                                              skb->dev);
2136                 if (!lladdr) {
2137                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2138                         return;
2139                 }
2140         }
2141
2142         rt = (struct rt6_info *) dst;
2143         if (rt->rt6i_flags & RTF_REJECT) {
2144                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2145                 return;
2146         }
2147
2148         /* Redirect received -> path was valid.
2149          * Look, redirects are sent only in response to data packets,
2150          * so that this nexthop apparently is reachable. --ANK
2151          */
2152         dst_confirm(&rt->dst);
2153
2154         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2155         if (!neigh)
2156                 return;
2157
2158         /*
2159          *      We have finally decided to accept it.
2160          */
2161
2162         neigh_update(neigh, lladdr, NUD_STALE,
2163                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2164                      NEIGH_UPDATE_F_OVERRIDE|
2165                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2166                                      NEIGH_UPDATE_F_ISROUTER))
2167                      );
2168
2169         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2170         if (!nrt)
2171                 goto out;
2172
2173         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2174         if (on_link)
2175                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2176
2177         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2178
2179         if (ip6_ins_rt(nrt))
2180                 goto out;
2181
2182         netevent.old = &rt->dst;
2183         netevent.new = &nrt->dst;
2184         netevent.daddr = &msg->dest;
2185         netevent.neigh = neigh;
2186         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2187
2188         if (rt->rt6i_flags & RTF_CACHE) {
2189                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2190                 ip6_del_rt(rt);
2191         }
2192
2193 out:
2194         neigh_release(neigh);
2195 }
2196
2197 /*
2198  *      Misc support functions
2199  */
2200
2201 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2202 {
2203         BUG_ON(from->dst.from);
2204
2205         rt->rt6i_flags &= ~RTF_EXPIRES;
2206         dst_hold(&from->dst);
2207         rt->dst.from = &from->dst;
2208         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2209 }
2210
2211 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2212 {
2213         rt->dst.input = ort->dst.input;
2214         rt->dst.output = ort->dst.output;
2215         rt->rt6i_dst = ort->rt6i_dst;
2216         rt->dst.error = ort->dst.error;
2217         rt->rt6i_idev = ort->rt6i_idev;
2218         if (rt->rt6i_idev)
2219                 in6_dev_hold(rt->rt6i_idev);
2220         rt->dst.lastuse = jiffies;
2221         rt->rt6i_gateway = ort->rt6i_gateway;
2222         rt->rt6i_flags = ort->rt6i_flags;
2223         rt6_set_from(rt, ort);
2224         rt->rt6i_metric = ort->rt6i_metric;
2225 #ifdef CONFIG_IPV6_SUBTREES
2226         rt->rt6i_src = ort->rt6i_src;
2227 #endif
2228         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2229         rt->rt6i_table = ort->rt6i_table;
2230         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2231 }
2232
2233 #ifdef CONFIG_IPV6_ROUTE_INFO
2234 static struct rt6_info *rt6_get_route_info(struct net *net,
2235                                            const struct in6_addr *prefix, int prefixlen,
2236                                            const struct in6_addr *gwaddr, int ifindex)
2237 {
2238         struct fib6_node *fn;
2239         struct rt6_info *rt = NULL;
2240         struct fib6_table *table;
2241
2242         table = fib6_get_table(net, RT6_TABLE_INFO);
2243         if (!table)
2244                 return NULL;
2245
2246         read_lock_bh(&table->tb6_lock);
2247         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2248         if (!fn)
2249                 goto out;
2250
2251         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2252                 if (rt->dst.dev->ifindex != ifindex)
2253                         continue;
2254                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2255                         continue;
2256                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2257                         continue;
2258                 dst_hold(&rt->dst);
2259                 break;
2260         }
2261 out:
2262         read_unlock_bh(&table->tb6_lock);
2263         return rt;
2264 }
2265
2266 static struct rt6_info *rt6_add_route_info(struct net *net,
2267                                            const struct in6_addr *prefix, int prefixlen,
2268                                            const struct in6_addr *gwaddr, int ifindex,
2269                                            unsigned int pref)
2270 {
2271         struct fib6_config cfg = {
2272                 .fc_metric      = IP6_RT_PRIO_USER,
2273                 .fc_ifindex     = ifindex,
2274                 .fc_dst_len     = prefixlen,
2275                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2276                                   RTF_UP | RTF_PREF(pref),
2277                 .fc_nlinfo.portid = 0,
2278                 .fc_nlinfo.nlh = NULL,
2279                 .fc_nlinfo.nl_net = net,
2280         };
2281
2282         cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2283         cfg.fc_dst = *prefix;
2284         cfg.fc_gateway = *gwaddr;
2285
2286         /* We should treat it as a default route if prefix length is 0. */
2287         if (!prefixlen)
2288                 cfg.fc_flags |= RTF_DEFAULT;
2289
2290         ip6_route_add(&cfg);
2291
2292         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2293 }
2294 #endif
2295
2296 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2297 {
2298         struct rt6_info *rt;
2299         struct fib6_table *table;
2300
2301         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2302         if (!table)
2303                 return NULL;
2304
2305         read_lock_bh(&table->tb6_lock);
2306         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2307                 if (dev == rt->dst.dev &&
2308                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2309                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2310                         break;
2311         }
2312         if (rt)
2313                 dst_hold(&rt->dst);
2314         read_unlock_bh(&table->tb6_lock);
2315         return rt;
2316 }
2317
2318 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2319                                      struct net_device *dev,
2320                                      unsigned int pref)
2321 {
2322         struct fib6_config cfg = {
2323                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2324                 .fc_metric      = IP6_RT_PRIO_USER,
2325                 .fc_ifindex     = dev->ifindex,
2326                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2327                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2328                 .fc_nlinfo.portid = 0,
2329                 .fc_nlinfo.nlh = NULL,
2330                 .fc_nlinfo.nl_net = dev_net(dev),
2331         };
2332
2333         cfg.fc_gateway = *gwaddr;
2334
2335         ip6_route_add(&cfg);
2336
2337         return rt6_get_dflt_router(gwaddr, dev);
2338 }
2339
2340 void rt6_purge_dflt_routers(struct net *net)
2341 {
2342         struct rt6_info *rt;
2343         struct fib6_table *table;
2344
2345         /* NOTE: Keep consistent with rt6_get_dflt_router */
2346         table = fib6_get_table(net, RT6_TABLE_DFLT);
2347         if (!table)
2348                 return;
2349
2350 restart:
2351         read_lock_bh(&table->tb6_lock);
2352         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2353                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2354                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2355                         dst_hold(&rt->dst);
2356                         read_unlock_bh(&table->tb6_lock);
2357                         ip6_del_rt(rt);
2358                         goto restart;
2359                 }
2360         }
2361         read_unlock_bh(&table->tb6_lock);
2362 }
2363
2364 static void rtmsg_to_fib6_config(struct net *net,
2365                                  struct in6_rtmsg *rtmsg,
2366                                  struct fib6_config *cfg)
2367 {
2368         memset(cfg, 0, sizeof(*cfg));
2369
2370         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2371                          : RT6_TABLE_MAIN;
2372         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2373         cfg->fc_metric = rtmsg->rtmsg_metric;
2374         cfg->fc_expires = rtmsg->rtmsg_info;
2375         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2376         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2377         cfg->fc_flags = rtmsg->rtmsg_flags;
2378
2379         cfg->fc_nlinfo.nl_net = net;
2380
2381         cfg->fc_dst = rtmsg->rtmsg_dst;
2382         cfg->fc_src = rtmsg->rtmsg_src;
2383         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2384 }
2385
2386 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2387 {
2388         struct fib6_config cfg;
2389         struct in6_rtmsg rtmsg;
2390         int err;
2391
2392         switch (cmd) {
2393         case SIOCADDRT:         /* Add a route */
2394         case SIOCDELRT:         /* Delete a route */
2395                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2396                         return -EPERM;
2397                 err = copy_from_user(&rtmsg, arg,
2398                                      sizeof(struct in6_rtmsg));
2399                 if (err)
2400                         return -EFAULT;
2401
2402                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2403
2404                 rtnl_lock();
2405                 switch (cmd) {
2406                 case SIOCADDRT:
2407                         err = ip6_route_add(&cfg);
2408                         break;
2409                 case SIOCDELRT:
2410                         err = ip6_route_del(&cfg);
2411                         break;
2412                 default:
2413                         err = -EINVAL;
2414                 }
2415                 rtnl_unlock();
2416
2417                 return err;
2418         }
2419
2420         return -EINVAL;
2421 }
2422
2423 /*
2424  *      Drop the packet on the floor
2425  */
2426
2427 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2428 {
2429         int type;
2430         struct dst_entry *dst = skb_dst(skb);
2431         switch (ipstats_mib_noroutes) {
2432         case IPSTATS_MIB_INNOROUTES:
2433                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2434                 if (type == IPV6_ADDR_ANY) {
2435                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2436                                       IPSTATS_MIB_INADDRERRORS);
2437                         break;
2438                 }
2439                 /* FALLTHROUGH */
2440         case IPSTATS_MIB_OUTNOROUTES:
2441                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2442                               ipstats_mib_noroutes);
2443                 break;
2444         }
2445         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2446         kfree_skb(skb);
2447         return 0;
2448 }
2449
2450 static int ip6_pkt_discard(struct sk_buff *skb)
2451 {
2452         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2453 }
2454
2455 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2456 {
2457         skb->dev = skb_dst(skb)->dev;
2458         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2459 }
2460
2461 static int ip6_pkt_prohibit(struct sk_buff *skb)
2462 {
2463         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2464 }
2465
2466 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2467 {
2468         skb->dev = skb_dst(skb)->dev;
2469         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2470 }
2471
2472 /*
2473  *      Allocate a dst for local (unicast / anycast) address.
2474  */
2475
2476 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2477                                     const struct in6_addr *addr,
2478                                     bool anycast)
2479 {
2480         u32 tb_id;
2481         struct net *net = dev_net(idev->dev);
2482         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2483                                             DST_NOCOUNT);
2484         if (!rt)
2485                 return ERR_PTR(-ENOMEM);
2486
2487         in6_dev_hold(idev);
2488
2489         rt->dst.flags |= DST_HOST;
2490         rt->dst.input = ip6_input;
2491         rt->dst.output = ip6_output;
2492         rt->rt6i_idev = idev;
2493
2494         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2495         if (anycast)
2496                 rt->rt6i_flags |= RTF_ANYCAST;
2497         else
2498                 rt->rt6i_flags |= RTF_LOCAL;
2499
2500         rt->rt6i_gateway  = *addr;
2501         rt->rt6i_dst.addr = *addr;
2502         rt->rt6i_dst.plen = 128;
2503         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2504         rt->rt6i_table = fib6_get_table(net, tb_id);
2505         rt->dst.flags |= DST_NOCACHE;
2506
2507         atomic_set(&rt->dst.__refcnt, 1);
2508
2509         return rt;
2510 }
2511
2512 int ip6_route_get_saddr(struct net *net,
2513                         struct rt6_info *rt,
2514                         const struct in6_addr *daddr,
2515                         unsigned int prefs,
2516                         struct in6_addr *saddr)
2517 {
2518         struct inet6_dev *idev =
2519                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2520         int err = 0;
2521         if (rt && rt->rt6i_prefsrc.plen)
2522                 *saddr = rt->rt6i_prefsrc.addr;
2523         else
2524                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2525                                          daddr, prefs, saddr);
2526         return err;
2527 }
2528
2529 /* remove deleted ip from prefsrc entries */
2530 struct arg_dev_net_ip {
2531         struct net_device *dev;
2532         struct net *net;
2533         struct in6_addr *addr;
2534 };
2535
2536 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2537 {
2538         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2539         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2540         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2541
2542         if (((void *)rt->dst.dev == dev || !dev) &&
2543             rt != net->ipv6.ip6_null_entry &&
2544             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2545                 /* remove prefsrc entry */
2546                 rt->rt6i_prefsrc.plen = 0;
2547         }
2548         return 0;
2549 }
2550
2551 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2552 {
2553         struct net *net = dev_net(ifp->idev->dev);
2554         struct arg_dev_net_ip adni = {
2555                 .dev = ifp->idev->dev,
2556                 .net = net,
2557                 .addr = &ifp->addr,
2558         };
2559         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2560 }
2561
2562 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2563 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2564
2565 /* Remove routers and update dst entries when gateway turn into host. */
2566 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2567 {
2568         struct in6_addr *gateway = (struct in6_addr *)arg;
2569
2570         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2571              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2572              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2573                 return -1;
2574         }
2575         return 0;
2576 }
2577
2578 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2579 {
2580         fib6_clean_all(net, fib6_clean_tohost, gateway);
2581 }
2582
2583 struct arg_dev_net {
2584         struct net_device *dev;
2585         struct net *net;
2586 };
2587
2588 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2589 {
2590         const struct arg_dev_net *adn = arg;
2591         const struct net_device *dev = adn->dev;
2592
2593         if ((rt->dst.dev == dev || !dev) &&
2594             rt != adn->net->ipv6.ip6_null_entry)
2595                 return -1;
2596
2597         return 0;
2598 }
2599
2600 void rt6_ifdown(struct net *net, struct net_device *dev)
2601 {
2602         struct arg_dev_net adn = {
2603                 .dev = dev,
2604                 .net = net,
2605         };
2606
2607         fib6_clean_all(net, fib6_ifdown, &adn);
2608         icmp6_clean_all(fib6_ifdown, &adn);
2609         if (dev)
2610                 rt6_uncached_list_flush_dev(net, dev);
2611 }
2612
2613 struct rt6_mtu_change_arg {
2614         struct net_device *dev;
2615         unsigned int mtu;
2616 };
2617
2618 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2619 {
2620         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2621         struct inet6_dev *idev;
2622
2623         /* In IPv6 pmtu discovery is not optional,
2624            so that RTAX_MTU lock cannot disable it.
2625            We still use this lock to block changes
2626            caused by addrconf/ndisc.
2627         */
2628
2629         idev = __in6_dev_get(arg->dev);
2630         if (!idev)
2631                 return 0;
2632
2633         /* For administrative MTU increase, there is no way to discover
2634            IPv6 PMTU increase, so PMTU increase should be updated here.
2635            Since RFC 1981 doesn't include administrative MTU increase
2636            update PMTU increase is a MUST. (i.e. jumbo frame)
2637          */
2638         /*
2639            If new MTU is less than route PMTU, this new MTU will be the
2640            lowest MTU in the path, update the route PMTU to reflect PMTU
2641            decreases; if new MTU is greater than route PMTU, and the
2642            old MTU is the lowest MTU in the path, update the route PMTU
2643            to reflect the increase. In this case if the other nodes' MTU
2644            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2645            PMTU discouvery.
2646          */
2647         if (rt->dst.dev == arg->dev &&
2648             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2649                 if (rt->rt6i_flags & RTF_CACHE) {
2650                         /* For RTF_CACHE with rt6i_pmtu == 0
2651                          * (i.e. a redirected route),
2652                          * the metrics of its rt->dst.from has already
2653                          * been updated.
2654                          */
2655                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2656                                 rt->rt6i_pmtu = arg->mtu;
2657                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2658                            (dst_mtu(&rt->dst) < arg->mtu &&
2659                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2660                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2661                 }
2662         }
2663         return 0;
2664 }
2665
2666 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2667 {
2668         struct rt6_mtu_change_arg arg = {
2669                 .dev = dev,
2670                 .mtu = mtu,
2671         };
2672
2673         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2674 }
2675
2676 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2677         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2678         [RTA_OIF]               = { .type = NLA_U32 },
2679         [RTA_IIF]               = { .type = NLA_U32 },
2680         [RTA_PRIORITY]          = { .type = NLA_U32 },
2681         [RTA_METRICS]           = { .type = NLA_NESTED },
2682         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2683         [RTA_PREF]              = { .type = NLA_U8 },
2684         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2685         [RTA_ENCAP]             = { .type = NLA_NESTED },
2686 };
2687
2688 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2689                               struct fib6_config *cfg)
2690 {
2691         struct rtmsg *rtm;
2692         struct nlattr *tb[RTA_MAX+1];
2693         unsigned int pref;
2694         int err;
2695
2696         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2697         if (err < 0)
2698                 goto errout;
2699
2700         err = -EINVAL;
2701         rtm = nlmsg_data(nlh);
2702         memset(cfg, 0, sizeof(*cfg));
2703
2704         cfg->fc_table = rtm->rtm_table;
2705         cfg->fc_dst_len = rtm->rtm_dst_len;
2706         cfg->fc_src_len = rtm->rtm_src_len;
2707         cfg->fc_flags = RTF_UP;
2708         cfg->fc_protocol = rtm->rtm_protocol;
2709         cfg->fc_type = rtm->rtm_type;
2710
2711         if (rtm->rtm_type == RTN_UNREACHABLE ||
2712             rtm->rtm_type == RTN_BLACKHOLE ||
2713             rtm->rtm_type == RTN_PROHIBIT ||
2714             rtm->rtm_type == RTN_THROW)
2715                 cfg->fc_flags |= RTF_REJECT;
2716
2717         if (rtm->rtm_type == RTN_LOCAL)
2718                 cfg->fc_flags |= RTF_LOCAL;
2719
2720         if (rtm->rtm_flags & RTM_F_CLONED)
2721                 cfg->fc_flags |= RTF_CACHE;
2722
2723         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2724         cfg->fc_nlinfo.nlh = nlh;
2725         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2726
2727         if (tb[RTA_GATEWAY]) {
2728                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2729                 cfg->fc_flags |= RTF_GATEWAY;
2730         }
2731
2732         if (tb[RTA_DST]) {
2733                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2734
2735                 if (nla_len(tb[RTA_DST]) < plen)
2736                         goto errout;
2737
2738                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2739         }
2740
2741         if (tb[RTA_SRC]) {
2742                 int plen = (rtm->rtm_src_len + 7) >> 3;
2743
2744                 if (nla_len(tb[RTA_SRC]) < plen)
2745                         goto errout;
2746
2747                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2748         }
2749
2750         if (tb[RTA_PREFSRC])
2751                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2752
2753         if (tb[RTA_OIF])
2754                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2755
2756         if (tb[RTA_PRIORITY])
2757                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2758
2759         if (tb[RTA_METRICS]) {
2760                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2761                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2762         }
2763
2764         if (tb[RTA_TABLE])
2765                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2766
2767         if (tb[RTA_MULTIPATH]) {
2768                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2769                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2770         }
2771
2772         if (tb[RTA_PREF]) {
2773                 pref = nla_get_u8(tb[RTA_PREF]);
2774                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2775                     pref != ICMPV6_ROUTER_PREF_HIGH)
2776                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2777                 cfg->fc_flags |= RTF_PREF(pref);
2778         }
2779
2780         if (tb[RTA_ENCAP])
2781                 cfg->fc_encap = tb[RTA_ENCAP];
2782
2783         if (tb[RTA_ENCAP_TYPE])
2784                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2785
2786         err = 0;
2787 errout:
2788         return err;
2789 }
2790
2791 struct rt6_nh {
2792         struct rt6_info *rt6_info;
2793         struct fib6_config r_cfg;
2794         struct mx6_config mxc;
2795         struct list_head next;
2796 };
2797
2798 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2799 {
2800         struct rt6_nh *nh;
2801
2802         list_for_each_entry(nh, rt6_nh_list, next) {
2803                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2804                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2805                         nh->r_cfg.fc_ifindex);
2806         }
2807 }
2808
2809 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2810                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2811 {
2812         struct rt6_nh *nh;
2813         struct rt6_info *rtnh;
2814         int err = -EEXIST;
2815
2816         list_for_each_entry(nh, rt6_nh_list, next) {
2817                 /* check if rt6_info already exists */
2818                 rtnh = nh->rt6_info;
2819
2820                 if (rtnh->dst.dev == rt->dst.dev &&
2821                     rtnh->rt6i_idev == rt->rt6i_idev &&
2822                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2823                                     &rt->rt6i_gateway))
2824                         return err;
2825         }
2826
2827         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2828         if (!nh)
2829                 return -ENOMEM;
2830         nh->rt6_info = rt;
2831         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2832         if (err) {
2833                 kfree(nh);
2834                 return err;
2835         }
2836         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2837         list_add_tail(&nh->next, rt6_nh_list);
2838
2839         return 0;
2840 }
2841
2842 static int ip6_route_multipath_add(struct fib6_config *cfg)
2843 {
2844         struct fib6_config r_cfg;
2845         struct rtnexthop *rtnh;
2846         struct rt6_info *rt;
2847         struct rt6_nh *err_nh;
2848         struct rt6_nh *nh, *nh_safe;
2849         int remaining;
2850         int attrlen;
2851         int err = 1;
2852         int nhn = 0;
2853         int replace = (cfg->fc_nlinfo.nlh &&
2854                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2855         LIST_HEAD(rt6_nh_list);
2856
2857         remaining = cfg->fc_mp_len;
2858         rtnh = (struct rtnexthop *)cfg->fc_mp;
2859
2860         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2861          * rt6_info structs per nexthop
2862          */
2863         while (rtnh_ok(rtnh, remaining)) {
2864                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2865                 if (rtnh->rtnh_ifindex)
2866                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2867
2868                 attrlen = rtnh_attrlen(rtnh);
2869                 if (attrlen > 0) {
2870                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2871
2872                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2873                         if (nla) {
2874                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2875                                 r_cfg.fc_flags |= RTF_GATEWAY;
2876                         }
2877                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2878                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2879                         if (nla)
2880                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2881                 }
2882
2883                 rt = ip6_route_info_create(&r_cfg);
2884                 if (IS_ERR(rt)) {
2885                         err = PTR_ERR(rt);
2886                         rt = NULL;
2887                         goto cleanup;
2888                 }
2889
2890                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2891                 if (err) {
2892                         dst_free(&rt->dst);
2893                         goto cleanup;
2894                 }
2895
2896                 rtnh = rtnh_next(rtnh, &remaining);
2897         }
2898
2899         err_nh = NULL;
2900         list_for_each_entry(nh, &rt6_nh_list, next) {
2901                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2902                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2903                 nh->rt6_info = NULL;
2904                 if (err) {
2905                         if (replace && nhn)
2906                                 ip6_print_replace_route_err(&rt6_nh_list);
2907                         err_nh = nh;
2908                         goto add_errout;
2909                 }
2910
2911                 /* Because each route is added like a single route we remove
2912                  * these flags after the first nexthop: if there is a collision,
2913                  * we have already failed to add the first nexthop:
2914                  * fib6_add_rt2node() has rejected it; when replacing, old
2915                  * nexthops have been replaced by first new, the rest should
2916                  * be added to it.
2917                  */
2918                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2919                                                      NLM_F_REPLACE);
2920                 nhn++;
2921         }
2922
2923         goto cleanup;
2924
2925 add_errout:
2926         /* Delete routes that were already added */
2927         list_for_each_entry(nh, &rt6_nh_list, next) {
2928                 if (err_nh == nh)
2929                         break;
2930                 ip6_route_del(&nh->r_cfg);
2931         }
2932
2933 cleanup:
2934         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2935                 if (nh->rt6_info)
2936                         dst_free(&nh->rt6_info->dst);
2937                 kfree(nh->mxc.mx);
2938                 list_del(&nh->next);
2939                 kfree(nh);
2940         }
2941
2942         return err;
2943 }
2944
2945 static int ip6_route_multipath_del(struct fib6_config *cfg)
2946 {
2947         struct fib6_config r_cfg;
2948         struct rtnexthop *rtnh;
2949         int remaining;
2950         int attrlen;
2951         int err = 1, last_err = 0;
2952
2953         remaining = cfg->fc_mp_len;
2954         rtnh = (struct rtnexthop *)cfg->fc_mp;
2955
2956         /* Parse a Multipath Entry */
2957         while (rtnh_ok(rtnh, remaining)) {
2958                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2959                 if (rtnh->rtnh_ifindex)
2960                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2961
2962                 attrlen = rtnh_attrlen(rtnh);
2963                 if (attrlen > 0) {
2964                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2965
2966                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2967                         if (nla) {
2968                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2969                                 r_cfg.fc_flags |= RTF_GATEWAY;
2970                         }
2971                 }
2972                 err = ip6_route_del(&r_cfg);
2973                 if (err)
2974                         last_err = err;
2975
2976                 rtnh = rtnh_next(rtnh, &remaining);
2977         }
2978
2979         return last_err;
2980 }
2981
2982 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2983 {
2984         struct fib6_config cfg;
2985         int err;
2986
2987         err = rtm_to_fib6_config(skb, nlh, &cfg);
2988         if (err < 0)
2989                 return err;
2990
2991         if (cfg.fc_mp)
2992                 return ip6_route_multipath_del(&cfg);
2993         else
2994                 return ip6_route_del(&cfg);
2995 }
2996
2997 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2998 {
2999         struct fib6_config cfg;
3000         int err;
3001
3002         err = rtm_to_fib6_config(skb, nlh, &cfg);
3003         if (err < 0)
3004                 return err;
3005
3006         if (cfg.fc_mp)
3007                 return ip6_route_multipath_add(&cfg);
3008         else
3009                 return ip6_route_add(&cfg);
3010 }
3011
3012 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3013 {
3014         return NLMSG_ALIGN(sizeof(struct rtmsg))
3015                + nla_total_size(16) /* RTA_SRC */
3016                + nla_total_size(16) /* RTA_DST */
3017                + nla_total_size(16) /* RTA_GATEWAY */
3018                + nla_total_size(16) /* RTA_PREFSRC */
3019                + nla_total_size(4) /* RTA_TABLE */
3020                + nla_total_size(4) /* RTA_IIF */
3021                + nla_total_size(4) /* RTA_OIF */
3022                + nla_total_size(4) /* RTA_PRIORITY */
3023                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3024                + nla_total_size(sizeof(struct rta_cacheinfo))
3025                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3026                + nla_total_size(1) /* RTA_PREF */
3027                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3028 }
3029
3030 static int rt6_fill_node(struct net *net,
3031                          struct sk_buff *skb, struct rt6_info *rt,
3032                          struct in6_addr *dst, struct in6_addr *src,
3033                          int iif, int type, u32 portid, u32 seq,
3034                          int prefix, int nowait, unsigned int flags)
3035 {
3036         u32 metrics[RTAX_MAX];
3037         struct rtmsg *rtm;
3038         struct nlmsghdr *nlh;
3039         long expires;
3040         u32 table;
3041
3042         if (prefix) {   /* user wants prefix routes only */
3043                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3044                         /* success since this is not a prefix route */
3045                         return 1;
3046                 }
3047         }
3048
3049         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3050         if (!nlh)
3051                 return -EMSGSIZE;
3052
3053         rtm = nlmsg_data(nlh);
3054         rtm->rtm_family = AF_INET6;
3055         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3056         rtm->rtm_src_len = rt->rt6i_src.plen;
3057         rtm->rtm_tos = 0;
3058         if (rt->rt6i_table)
3059                 table = rt->rt6i_table->tb6_id;
3060         else
3061                 table = RT6_TABLE_UNSPEC;
3062         rtm->rtm_table = table;
3063         if (nla_put_u32(skb, RTA_TABLE, table))
3064                 goto nla_put_failure;
3065         if (rt->rt6i_flags & RTF_REJECT) {
3066                 switch (rt->dst.error) {
3067                 case -EINVAL:
3068                         rtm->rtm_type = RTN_BLACKHOLE;
3069                         break;
3070                 case -EACCES:
3071                         rtm->rtm_type = RTN_PROHIBIT;
3072                         break;
3073                 case -EAGAIN:
3074                         rtm->rtm_type = RTN_THROW;
3075                         break;
3076                 default:
3077                         rtm->rtm_type = RTN_UNREACHABLE;
3078                         break;
3079                 }
3080         }
3081         else if (rt->rt6i_flags & RTF_LOCAL)
3082                 rtm->rtm_type = RTN_LOCAL;
3083         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3084                 rtm->rtm_type = RTN_LOCAL;
3085         else
3086                 rtm->rtm_type = RTN_UNICAST;
3087         rtm->rtm_flags = 0;
3088         if (!netif_carrier_ok(rt->dst.dev)) {
3089                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3090                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3091                         rtm->rtm_flags |= RTNH_F_DEAD;
3092         }
3093         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3094         rtm->rtm_protocol = rt->rt6i_protocol;
3095         if (rt->rt6i_flags & RTF_DYNAMIC)
3096                 rtm->rtm_protocol = RTPROT_REDIRECT;
3097         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3098                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3099                         rtm->rtm_protocol = RTPROT_RA;
3100                 else
3101                         rtm->rtm_protocol = RTPROT_KERNEL;
3102         }
3103
3104         if (rt->rt6i_flags & RTF_CACHE)
3105                 rtm->rtm_flags |= RTM_F_CLONED;
3106
3107         if (dst) {
3108                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3109                         goto nla_put_failure;
3110                 rtm->rtm_dst_len = 128;
3111         } else if (rtm->rtm_dst_len)
3112                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3113                         goto nla_put_failure;
3114 #ifdef CONFIG_IPV6_SUBTREES
3115         if (src) {
3116                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3117                         goto nla_put_failure;
3118                 rtm->rtm_src_len = 128;
3119         } else if (rtm->rtm_src_len &&
3120                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3121                 goto nla_put_failure;
3122 #endif
3123         if (iif) {
3124 #ifdef CONFIG_IPV6_MROUTE
3125                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3126                         int err = ip6mr_get_route(net, skb, rtm, nowait);
3127                         if (err <= 0) {
3128                                 if (!nowait) {
3129                                         if (err == 0)
3130                                                 return 0;
3131                                         goto nla_put_failure;
3132                                 } else {
3133                                         if (err == -EMSGSIZE)
3134                                                 goto nla_put_failure;
3135                                 }
3136                         }
3137                 } else
3138 #endif
3139                         if (nla_put_u32(skb, RTA_IIF, iif))
3140                                 goto nla_put_failure;
3141         } else if (dst) {
3142                 struct in6_addr saddr_buf;
3143                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3144                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3145                         goto nla_put_failure;
3146         }
3147
3148         if (rt->rt6i_prefsrc.plen) {
3149                 struct in6_addr saddr_buf;
3150                 saddr_buf = rt->rt6i_prefsrc.addr;
3151                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3152                         goto nla_put_failure;
3153         }
3154
3155         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3156         if (rt->rt6i_pmtu)
3157                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3158         if (rtnetlink_put_metrics(skb, metrics) < 0)
3159                 goto nla_put_failure;
3160
3161         if (rt->rt6i_flags & RTF_GATEWAY) {
3162                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3163                         goto nla_put_failure;
3164         }
3165
3166         if (rt->dst.dev &&
3167             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3168                 goto nla_put_failure;
3169         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3170                 goto nla_put_failure;
3171
3172         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3173
3174         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3175                 goto nla_put_failure;
3176
3177         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3178                 goto nla_put_failure;
3179
3180         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3181
3182         nlmsg_end(skb, nlh);
3183         return 0;
3184
3185 nla_put_failure:
3186         nlmsg_cancel(skb, nlh);
3187         return -EMSGSIZE;
3188 }
3189
3190 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3191 {
3192         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3193         int prefix;
3194
3195         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3196                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3197                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3198         } else
3199                 prefix = 0;
3200
3201         return rt6_fill_node(arg->net,
3202                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3203                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3204                      prefix, 0, NLM_F_MULTI);
3205 }
3206
3207 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3208 {
3209         struct net *net = sock_net(in_skb->sk);
3210         struct nlattr *tb[RTA_MAX+1];
3211         struct rt6_info *rt;
3212         struct sk_buff *skb;
3213         struct rtmsg *rtm;
3214         struct flowi6 fl6;
3215         int err, iif = 0, oif = 0;
3216
3217         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3218         if (err < 0)
3219                 goto errout;
3220
3221         err = -EINVAL;
3222         memset(&fl6, 0, sizeof(fl6));
3223
3224         if (tb[RTA_SRC]) {
3225                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3226                         goto errout;
3227
3228                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3229         }
3230
3231         if (tb[RTA_DST]) {
3232                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3233                         goto errout;
3234
3235                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3236         }
3237
3238         if (tb[RTA_IIF])
3239                 iif = nla_get_u32(tb[RTA_IIF]);
3240
3241         if (tb[RTA_OIF])
3242                 oif = nla_get_u32(tb[RTA_OIF]);
3243
3244         if (tb[RTA_MARK])
3245                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3246
3247         if (iif) {
3248                 struct net_device *dev;
3249                 int flags = 0;
3250
3251                 dev = __dev_get_by_index(net, iif);
3252                 if (!dev) {
3253                         err = -ENODEV;
3254                         goto errout;
3255                 }
3256
3257                 fl6.flowi6_iif = iif;
3258
3259                 if (!ipv6_addr_any(&fl6.saddr))
3260                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3261
3262                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3263                                                                flags);
3264         } else {
3265                 fl6.flowi6_oif = oif;
3266
3267                 if (netif_index_is_l3_master(net, oif)) {
3268                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3269                                            FLOWI_FLAG_SKIP_NH_OIF;
3270                 }
3271
3272                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3273         }
3274
3275         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3276         if (!skb) {
3277                 ip6_rt_put(rt);
3278                 err = -ENOBUFS;
3279                 goto errout;
3280         }
3281
3282         /* Reserve room for dummy headers, this skb can pass
3283            through good chunk of routing engine.
3284          */
3285         skb_reset_mac_header(skb);
3286         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3287
3288         skb_dst_set(skb, &rt->dst);
3289
3290         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3291                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3292                             nlh->nlmsg_seq, 0, 0, 0);
3293         if (err < 0) {
3294                 kfree_skb(skb);
3295                 goto errout;
3296         }
3297
3298         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3299 errout:
3300         return err;
3301 }
3302
3303 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3304                      unsigned int nlm_flags)
3305 {
3306         struct sk_buff *skb;
3307         struct net *net = info->nl_net;
3308         u32 seq;
3309         int err;
3310
3311         err = -ENOBUFS;
3312         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3313
3314         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3315         if (!skb)
3316                 goto errout;
3317
3318         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3319                                 event, info->portid, seq, 0, 0, nlm_flags);
3320         if (err < 0) {
3321                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3322                 WARN_ON(err == -EMSGSIZE);
3323                 kfree_skb(skb);
3324                 goto errout;
3325         }
3326         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3327                     info->nlh, gfp_any());
3328         return;
3329 errout:
3330         if (err < 0)
3331                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3332 }
3333
3334 static int ip6_route_dev_notify(struct notifier_block *this,
3335                                 unsigned long event, void *ptr)
3336 {
3337         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3338         struct net *net = dev_net(dev);
3339
3340         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3341                 net->ipv6.ip6_null_entry->dst.dev = dev;
3342                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3343 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3344                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3345                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3346                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3347                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3348 #endif
3349         }
3350
3351         return NOTIFY_OK;
3352 }
3353
3354 /*
3355  *      /proc
3356  */
3357
3358 #ifdef CONFIG_PROC_FS
3359
3360 static const struct file_operations ipv6_route_proc_fops = {
3361         .owner          = THIS_MODULE,
3362         .open           = ipv6_route_open,
3363         .read           = seq_read,
3364         .llseek         = seq_lseek,
3365         .release        = seq_release_net,
3366 };
3367
3368 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3369 {
3370         struct net *net = (struct net *)seq->private;
3371         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3372                    net->ipv6.rt6_stats->fib_nodes,
3373                    net->ipv6.rt6_stats->fib_route_nodes,
3374                    net->ipv6.rt6_stats->fib_rt_alloc,
3375                    net->ipv6.rt6_stats->fib_rt_entries,
3376                    net->ipv6.rt6_stats->fib_rt_cache,
3377                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3378                    net->ipv6.rt6_stats->fib_discarded_routes);
3379
3380         return 0;
3381 }
3382
3383 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3384 {
3385         return single_open_net(inode, file, rt6_stats_seq_show);
3386 }
3387
3388 static const struct file_operations rt6_stats_seq_fops = {
3389         .owner   = THIS_MODULE,
3390         .open    = rt6_stats_seq_open,
3391         .read    = seq_read,
3392         .llseek  = seq_lseek,
3393         .release = single_release_net,
3394 };
3395 #endif  /* CONFIG_PROC_FS */
3396
3397 #ifdef CONFIG_SYSCTL
3398
3399 static
3400 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3401                               void __user *buffer, size_t *lenp, loff_t *ppos)
3402 {
3403         struct net *net;
3404         int delay;
3405         if (!write)
3406                 return -EINVAL;
3407
3408         net = (struct net *)ctl->extra1;
3409         delay = net->ipv6.sysctl.flush_delay;
3410         proc_dointvec(ctl, write, buffer, lenp, ppos);
3411         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3412         return 0;
3413 }
3414
3415 struct ctl_table ipv6_route_table_template[] = {
3416         {
3417                 .procname       =       "flush",
3418                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3419                 .maxlen         =       sizeof(int),
3420                 .mode           =       0200,
3421                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3422         },
3423         {
3424                 .procname       =       "gc_thresh",
3425                 .data           =       &ip6_dst_ops_template.gc_thresh,
3426                 .maxlen         =       sizeof(int),
3427                 .mode           =       0644,
3428                 .proc_handler   =       proc_dointvec,
3429         },
3430         {
3431                 .procname       =       "max_size",
3432                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3433                 .maxlen         =       sizeof(int),
3434                 .mode           =       0644,
3435                 .proc_handler   =       proc_dointvec,
3436         },
3437         {
3438                 .procname       =       "gc_min_interval",
3439                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3440                 .maxlen         =       sizeof(int),
3441                 .mode           =       0644,
3442                 .proc_handler   =       proc_dointvec_jiffies,
3443         },
3444         {
3445                 .procname       =       "gc_timeout",
3446                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3447                 .maxlen         =       sizeof(int),
3448                 .mode           =       0644,
3449                 .proc_handler   =       proc_dointvec_jiffies,
3450         },
3451         {
3452                 .procname       =       "gc_interval",
3453                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3454                 .maxlen         =       sizeof(int),
3455                 .mode           =       0644,
3456                 .proc_handler   =       proc_dointvec_jiffies,
3457         },
3458         {
3459                 .procname       =       "gc_elasticity",
3460                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3461                 .maxlen         =       sizeof(int),
3462                 .mode           =       0644,
3463                 .proc_handler   =       proc_dointvec,
3464         },
3465         {
3466                 .procname       =       "mtu_expires",
3467                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3468                 .maxlen         =       sizeof(int),
3469                 .mode           =       0644,
3470                 .proc_handler   =       proc_dointvec_jiffies,
3471         },
3472         {
3473                 .procname       =       "min_adv_mss",
3474                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3475                 .maxlen         =       sizeof(int),
3476                 .mode           =       0644,
3477                 .proc_handler   =       proc_dointvec,
3478         },
3479         {
3480                 .procname       =       "gc_min_interval_ms",
3481                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3482                 .maxlen         =       sizeof(int),
3483                 .mode           =       0644,
3484                 .proc_handler   =       proc_dointvec_ms_jiffies,
3485         },
3486         { }
3487 };
3488
3489 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3490 {
3491         struct ctl_table *table;
3492
3493         table = kmemdup(ipv6_route_table_template,
3494                         sizeof(ipv6_route_table_template),
3495                         GFP_KERNEL);
3496
3497         if (table) {
3498                 table[0].data = &net->ipv6.sysctl.flush_delay;
3499                 table[0].extra1 = net;
3500                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3501                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3502                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3503                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3504                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3505                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3506                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3507                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3508                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3509
3510                 /* Don't export sysctls to unprivileged users */
3511                 if (net->user_ns != &init_user_ns)
3512                         table[0].procname = NULL;
3513         }
3514
3515         return table;
3516 }
3517 #endif
3518
3519 static int __net_init ip6_route_net_init(struct net *net)
3520 {
3521         int ret = -ENOMEM;
3522
3523         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3524                sizeof(net->ipv6.ip6_dst_ops));
3525
3526         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3527                 goto out_ip6_dst_ops;
3528
3529         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3530                                            sizeof(*net->ipv6.ip6_null_entry),
3531                                            GFP_KERNEL);
3532         if (!net->ipv6.ip6_null_entry)
3533                 goto out_ip6_dst_entries;
3534         net->ipv6.ip6_null_entry->dst.path =
3535                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3536         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3537         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3538                          ip6_template_metrics, true);
3539
3540 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3541         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3542                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3543                                                GFP_KERNEL);
3544         if (!net->ipv6.ip6_prohibit_entry)
3545                 goto out_ip6_null_entry;
3546         net->ipv6.ip6_prohibit_entry->dst.path =
3547                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3548         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3549         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3550                          ip6_template_metrics, true);
3551
3552         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3553                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3554                                                GFP_KERNEL);
3555         if (!net->ipv6.ip6_blk_hole_entry)
3556                 goto out_ip6_prohibit_entry;
3557         net->ipv6.ip6_blk_hole_entry->dst.path =
3558                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3559         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3560         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3561                          ip6_template_metrics, true);
3562 #endif
3563
3564         net->ipv6.sysctl.flush_delay = 0;
3565         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3566         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3567         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3568         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3569         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3570         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3571         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3572
3573         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3574
3575         ret = 0;
3576 out:
3577         return ret;
3578
3579 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3580 out_ip6_prohibit_entry:
3581         kfree(net->ipv6.ip6_prohibit_entry);
3582 out_ip6_null_entry:
3583         kfree(net->ipv6.ip6_null_entry);
3584 #endif
3585 out_ip6_dst_entries:
3586         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3587 out_ip6_dst_ops:
3588         goto out;
3589 }
3590
3591 static void __net_exit ip6_route_net_exit(struct net *net)
3592 {
3593         kfree(net->ipv6.ip6_null_entry);
3594 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3595         kfree(net->ipv6.ip6_prohibit_entry);
3596         kfree(net->ipv6.ip6_blk_hole_entry);
3597 #endif
3598         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3599 }
3600
3601 static int __net_init ip6_route_net_init_late(struct net *net)
3602 {
3603 #ifdef CONFIG_PROC_FS
3604         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3605         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3606 #endif
3607         return 0;
3608 }
3609
3610 static void __net_exit ip6_route_net_exit_late(struct net *net)
3611 {
3612 #ifdef CONFIG_PROC_FS
3613         remove_proc_entry("ipv6_route", net->proc_net);
3614         remove_proc_entry("rt6_stats", net->proc_net);
3615 #endif
3616 }
3617
3618 static struct pernet_operations ip6_route_net_ops = {
3619         .init = ip6_route_net_init,
3620         .exit = ip6_route_net_exit,
3621 };
3622
3623 static int __net_init ipv6_inetpeer_init(struct net *net)
3624 {
3625         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3626
3627         if (!bp)
3628                 return -ENOMEM;
3629         inet_peer_base_init(bp);
3630         net->ipv6.peers = bp;
3631         return 0;
3632 }
3633
3634 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3635 {
3636         struct inet_peer_base *bp = net->ipv6.peers;
3637
3638         net->ipv6.peers = NULL;
3639         inetpeer_invalidate_tree(bp);
3640         kfree(bp);
3641 }
3642
3643 static struct pernet_operations ipv6_inetpeer_ops = {
3644         .init   =       ipv6_inetpeer_init,
3645         .exit   =       ipv6_inetpeer_exit,
3646 };
3647
3648 static struct pernet_operations ip6_route_net_late_ops = {
3649         .init = ip6_route_net_init_late,
3650         .exit = ip6_route_net_exit_late,
3651 };
3652
3653 static struct notifier_block ip6_route_dev_notifier = {
3654         .notifier_call = ip6_route_dev_notify,
3655         .priority = 0,
3656 };
3657
3658 int __init ip6_route_init(void)
3659 {
3660         int ret;
3661         int cpu;
3662
3663         ret = -ENOMEM;
3664         ip6_dst_ops_template.kmem_cachep =
3665                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3666                                   SLAB_HWCACHE_ALIGN, NULL);
3667         if (!ip6_dst_ops_template.kmem_cachep)
3668                 goto out;
3669
3670         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3671         if (ret)
3672                 goto out_kmem_cache;
3673
3674         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3675         if (ret)
3676                 goto out_dst_entries;
3677
3678         ret = register_pernet_subsys(&ip6_route_net_ops);
3679         if (ret)
3680                 goto out_register_inetpeer;
3681
3682         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3683
3684         /* Registering of the loopback is done before this portion of code,
3685          * the loopback reference in rt6_info will not be taken, do it
3686          * manually for init_net */
3687         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3688         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3689   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3690         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3691         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3692         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3693         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3694   #endif
3695         ret = fib6_init();
3696         if (ret)
3697                 goto out_register_subsys;
3698
3699         ret = xfrm6_init();
3700         if (ret)
3701                 goto out_fib6_init;
3702
3703         ret = fib6_rules_init();
3704         if (ret)
3705                 goto xfrm6_init;
3706
3707         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3708         if (ret)
3709                 goto fib6_rules_init;
3710
3711         ret = -ENOBUFS;
3712         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3713             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3714             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3715                 goto out_register_late_subsys;
3716
3717         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3718         if (ret)
3719                 goto out_register_late_subsys;
3720
3721         for_each_possible_cpu(cpu) {
3722                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3723
3724                 INIT_LIST_HEAD(&ul->head);
3725                 spin_lock_init(&ul->lock);
3726         }
3727
3728 out:
3729         return ret;
3730
3731 out_register_late_subsys:
3732         unregister_pernet_subsys(&ip6_route_net_late_ops);
3733 fib6_rules_init:
3734         fib6_rules_cleanup();
3735 xfrm6_init:
3736         xfrm6_fini();
3737 out_fib6_init:
3738         fib6_gc_cleanup();
3739 out_register_subsys:
3740         unregister_pernet_subsys(&ip6_route_net_ops);
3741 out_register_inetpeer:
3742         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3743 out_dst_entries:
3744         dst_entries_destroy(&ip6_dst_blackhole_ops);
3745 out_kmem_cache:
3746         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3747         goto out;
3748 }
3749
3750 void ip6_route_cleanup(void)
3751 {
3752         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3753         unregister_pernet_subsys(&ip6_route_net_late_ops);
3754         fib6_rules_cleanup();
3755         xfrm6_fini();
3756         fib6_gc_cleanup();
3757         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3758         unregister_pernet_subsys(&ip6_route_net_ops);
3759         dst_entries_destroy(&ip6_dst_blackhole_ops);
3760         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3761 }