]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
net: fix percpu memory leaks
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net *net,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, int ifindex,
105                                            unsigned int pref);
106 static struct rt6_info *rt6_get_route_info(struct net *net,
107                                            const struct in6_addr *prefix, int prefixlen,
108                                            const struct in6_addr *gwaddr, int ifindex);
109 #endif
110
111 struct uncached_list {
112         spinlock_t              lock;
113         struct list_head        head;
114 };
115
116 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
117
118 static void rt6_uncached_list_add(struct rt6_info *rt)
119 {
120         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
121
122         rt->dst.flags |= DST_NOCACHE;
123         rt->rt6i_uncached_list = ul;
124
125         spin_lock_bh(&ul->lock);
126         list_add_tail(&rt->rt6i_uncached, &ul->head);
127         spin_unlock_bh(&ul->lock);
128 }
129
130 static void rt6_uncached_list_del(struct rt6_info *rt)
131 {
132         if (!list_empty(&rt->rt6i_uncached)) {
133                 struct uncached_list *ul = rt->rt6i_uncached_list;
134
135                 spin_lock_bh(&ul->lock);
136                 list_del(&rt->rt6i_uncached);
137                 spin_unlock_bh(&ul->lock);
138         }
139 }
140
141 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142 {
143         struct net_device *loopback_dev = net->loopback_dev;
144         int cpu;
145
146         if (dev == loopback_dev)
147                 return;
148
149         for_each_possible_cpu(cpu) {
150                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
151                 struct rt6_info *rt;
152
153                 spin_lock_bh(&ul->lock);
154                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
155                         struct inet6_dev *rt_idev = rt->rt6i_idev;
156                         struct net_device *rt_dev = rt->dst.dev;
157
158                         if (rt_idev->dev == dev) {
159                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
160                                 in6_dev_put(rt_idev);
161                         }
162
163                         if (rt_dev == dev) {
164                                 rt->dst.dev = loopback_dev;
165                                 dev_hold(rt->dst.dev);
166                                 dev_put(rt_dev);
167                         }
168                 }
169                 spin_unlock_bh(&ul->lock);
170         }
171 }
172
173 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
174 {
175         return dst_metrics_write_ptr(rt->dst.from);
176 }
177
178 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
179 {
180         struct rt6_info *rt = (struct rt6_info *)dst;
181
182         if (rt->rt6i_flags & RTF_PCPU)
183                 return rt6_pcpu_cow_metrics(rt);
184         else if (rt->rt6i_flags & RTF_CACHE)
185                 return NULL;
186         else
187                 return dst_cow_metrics_generic(dst, old);
188 }
189
190 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
191                                              struct sk_buff *skb,
192                                              const void *daddr)
193 {
194         struct in6_addr *p = &rt->rt6i_gateway;
195
196         if (!ipv6_addr_any(p))
197                 return (const void *) p;
198         else if (skb)
199                 return &ipv6_hdr(skb)->daddr;
200         return daddr;
201 }
202
203 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
204                                           struct sk_buff *skb,
205                                           const void *daddr)
206 {
207         struct rt6_info *rt = (struct rt6_info *) dst;
208         struct neighbour *n;
209
210         daddr = choose_neigh_daddr(rt, skb, daddr);
211         n = __ipv6_neigh_lookup(dst->dev, daddr);
212         if (n)
213                 return n;
214         return neigh_create(&nd_tbl, daddr, dst->dev);
215 }
216
217 static struct dst_ops ip6_dst_ops_template = {
218         .family                 =       AF_INET6,
219         .gc                     =       ip6_dst_gc,
220         .gc_thresh              =       1024,
221         .check                  =       ip6_dst_check,
222         .default_advmss         =       ip6_default_advmss,
223         .mtu                    =       ip6_mtu,
224         .cow_metrics            =       ipv6_cow_metrics,
225         .destroy                =       ip6_dst_destroy,
226         .ifdown                 =       ip6_dst_ifdown,
227         .negative_advice        =       ip6_negative_advice,
228         .link_failure           =       ip6_link_failure,
229         .update_pmtu            =       ip6_rt_update_pmtu,
230         .redirect               =       rt6_do_redirect,
231         .local_out              =       __ip6_local_out,
232         .neigh_lookup           =       ip6_neigh_lookup,
233 };
234
235 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
236 {
237         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
238
239         return mtu ? : dst->dev->mtu;
240 }
241
242 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
243                                          struct sk_buff *skb, u32 mtu)
244 {
245 }
246
247 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
248                                       struct sk_buff *skb)
249 {
250 }
251
252 static struct dst_ops ip6_dst_blackhole_ops = {
253         .family                 =       AF_INET6,
254         .destroy                =       ip6_dst_destroy,
255         .check                  =       ip6_dst_check,
256         .mtu                    =       ip6_blackhole_mtu,
257         .default_advmss         =       ip6_default_advmss,
258         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
259         .redirect               =       ip6_rt_blackhole_redirect,
260         .cow_metrics            =       dst_cow_metrics_generic,
261         .neigh_lookup           =       ip6_neigh_lookup,
262 };
263
264 static const u32 ip6_template_metrics[RTAX_MAX] = {
265         [RTAX_HOPLIMIT - 1] = 0,
266 };
267
268 static const struct rt6_info ip6_null_entry_template = {
269         .dst = {
270                 .__refcnt       = ATOMIC_INIT(1),
271                 .__use          = 1,
272                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
273                 .error          = -ENETUNREACH,
274                 .input          = ip6_pkt_discard,
275                 .output         = ip6_pkt_discard_out,
276         },
277         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
278         .rt6i_protocol  = RTPROT_KERNEL,
279         .rt6i_metric    = ~(u32) 0,
280         .rt6i_ref       = ATOMIC_INIT(1),
281 };
282
283 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
284
285 static const struct rt6_info ip6_prohibit_entry_template = {
286         .dst = {
287                 .__refcnt       = ATOMIC_INIT(1),
288                 .__use          = 1,
289                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
290                 .error          = -EACCES,
291                 .input          = ip6_pkt_prohibit,
292                 .output         = ip6_pkt_prohibit_out,
293         },
294         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
295         .rt6i_protocol  = RTPROT_KERNEL,
296         .rt6i_metric    = ~(u32) 0,
297         .rt6i_ref       = ATOMIC_INIT(1),
298 };
299
300 static const struct rt6_info ip6_blk_hole_entry_template = {
301         .dst = {
302                 .__refcnt       = ATOMIC_INIT(1),
303                 .__use          = 1,
304                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
305                 .error          = -EINVAL,
306                 .input          = dst_discard,
307                 .output         = dst_discard_out,
308         },
309         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
310         .rt6i_protocol  = RTPROT_KERNEL,
311         .rt6i_metric    = ~(u32) 0,
312         .rt6i_ref       = ATOMIC_INIT(1),
313 };
314
315 #endif
316
317 static void rt6_info_init(struct rt6_info *rt)
318 {
319         struct dst_entry *dst = &rt->dst;
320
321         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322         INIT_LIST_HEAD(&rt->rt6i_siblings);
323         INIT_LIST_HEAD(&rt->rt6i_uncached);
324 }
325
326 /* allocate dst with ip6_dst_ops */
327 static struct rt6_info *__ip6_dst_alloc(struct net *net,
328                                         struct net_device *dev,
329                                         int flags)
330 {
331         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
332                                         0, DST_OBSOLETE_FORCE_CHK, flags);
333
334         if (rt)
335                 rt6_info_init(rt);
336
337         return rt;
338 }
339
340 static struct rt6_info *ip6_dst_alloc(struct net *net,
341                                       struct net_device *dev,
342                                       int flags)
343 {
344         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
345
346         if (rt) {
347                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
348                 if (rt->rt6i_pcpu) {
349                         int cpu;
350
351                         for_each_possible_cpu(cpu) {
352                                 struct rt6_info **p;
353
354                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
355                                 /* no one shares rt */
356                                 *p =  NULL;
357                         }
358                 } else {
359                         dst_destroy((struct dst_entry *)rt);
360                         return NULL;
361                 }
362         }
363
364         return rt;
365 }
366
367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369         struct rt6_info *rt = (struct rt6_info *)dst;
370         struct dst_entry *from = dst->from;
371         struct inet6_dev *idev;
372
373         dst_destroy_metrics_generic(dst);
374         free_percpu(rt->rt6i_pcpu);
375         rt6_uncached_list_del(rt);
376
377         idev = rt->rt6i_idev;
378         if (idev) {
379                 rt->rt6i_idev = NULL;
380                 in6_dev_put(idev);
381         }
382
383         dst->from = NULL;
384         dst_release(from);
385 }
386
387 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388                            int how)
389 {
390         struct rt6_info *rt = (struct rt6_info *)dst;
391         struct inet6_dev *idev = rt->rt6i_idev;
392         struct net_device *loopback_dev =
393                 dev_net(dev)->loopback_dev;
394
395         if (dev != loopback_dev) {
396                 if (idev && idev->dev == dev) {
397                         struct inet6_dev *loopback_idev =
398                                 in6_dev_get(loopback_dev);
399                         if (loopback_idev) {
400                                 rt->rt6i_idev = loopback_idev;
401                                 in6_dev_put(idev);
402                         }
403                 }
404         }
405 }
406
407 static bool rt6_check_expired(const struct rt6_info *rt)
408 {
409         if (rt->rt6i_flags & RTF_EXPIRES) {
410                 if (time_after(jiffies, rt->dst.expires))
411                         return true;
412         } else if (rt->dst.from) {
413                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
414         }
415         return false;
416 }
417
418 /* Multipath route selection:
419  *   Hash based function using packet header and flowlabel.
420  * Adapted from fib_info_hashfn()
421  */
422 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
423                                const struct flowi6 *fl6)
424 {
425         return get_hash_from_flowi6(fl6) % candidate_count;
426 }
427
428 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
429                                              struct flowi6 *fl6, int oif,
430                                              int strict)
431 {
432         struct rt6_info *sibling, *next_sibling;
433         int route_choosen;
434
435         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
436         /* Don't change the route, if route_choosen == 0
437          * (siblings does not include ourself)
438          */
439         if (route_choosen)
440                 list_for_each_entry_safe(sibling, next_sibling,
441                                 &match->rt6i_siblings, rt6i_siblings) {
442                         route_choosen--;
443                         if (route_choosen == 0) {
444                                 if (rt6_score_route(sibling, oif, strict) < 0)
445                                         break;
446                                 match = sibling;
447                                 break;
448                         }
449                 }
450         return match;
451 }
452
453 /*
454  *      Route lookup. Any table->tb6_lock is implied.
455  */
456
457 static inline struct rt6_info *rt6_device_match(struct net *net,
458                                                     struct rt6_info *rt,
459                                                     const struct in6_addr *saddr,
460                                                     int oif,
461                                                     int flags)
462 {
463         struct rt6_info *local = NULL;
464         struct rt6_info *sprt;
465
466         if (!oif && ipv6_addr_any(saddr))
467                 goto out;
468
469         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
470                 struct net_device *dev = sprt->dst.dev;
471
472                 if (oif) {
473                         if (dev->ifindex == oif)
474                                 return sprt;
475                         if (dev->flags & IFF_LOOPBACK) {
476                                 if (!sprt->rt6i_idev ||
477                                     sprt->rt6i_idev->dev->ifindex != oif) {
478                                         if (flags & RT6_LOOKUP_F_IFACE)
479                                                 continue;
480                                         if (local &&
481                                             local->rt6i_idev->dev->ifindex == oif)
482                                                 continue;
483                                 }
484                                 local = sprt;
485                         }
486                 } else {
487                         if (ipv6_chk_addr(net, saddr, dev,
488                                           flags & RT6_LOOKUP_F_IFACE))
489                                 return sprt;
490                 }
491         }
492
493         if (oif) {
494                 if (local)
495                         return local;
496
497                 if (flags & RT6_LOOKUP_F_IFACE)
498                         return net->ipv6.ip6_null_entry;
499         }
500 out:
501         return rt;
502 }
503
504 #ifdef CONFIG_IPV6_ROUTER_PREF
505 struct __rt6_probe_work {
506         struct work_struct work;
507         struct in6_addr target;
508         struct net_device *dev;
509 };
510
511 static void rt6_probe_deferred(struct work_struct *w)
512 {
513         struct in6_addr mcaddr;
514         struct __rt6_probe_work *work =
515                 container_of(w, struct __rt6_probe_work, work);
516
517         addrconf_addr_solict_mult(&work->target, &mcaddr);
518         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
519         dev_put(work->dev);
520         kfree(work);
521 }
522
523 static void rt6_probe(struct rt6_info *rt)
524 {
525         struct __rt6_probe_work *work;
526         struct neighbour *neigh;
527         /*
528          * Okay, this does not seem to be appropriate
529          * for now, however, we need to check if it
530          * is really so; aka Router Reachability Probing.
531          *
532          * Router Reachability Probe MUST be rate-limited
533          * to no more than one per minute.
534          */
535         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
536                 return;
537         rcu_read_lock_bh();
538         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
539         if (neigh) {
540                 if (neigh->nud_state & NUD_VALID)
541                         goto out;
542
543                 work = NULL;
544                 write_lock(&neigh->lock);
545                 if (!(neigh->nud_state & NUD_VALID) &&
546                     time_after(jiffies,
547                                neigh->updated +
548                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
549                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
550                         if (work)
551                                 __neigh_set_probe_once(neigh);
552                 }
553                 write_unlock(&neigh->lock);
554         } else {
555                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
556         }
557
558         if (work) {
559                 INIT_WORK(&work->work, rt6_probe_deferred);
560                 work->target = rt->rt6i_gateway;
561                 dev_hold(rt->dst.dev);
562                 work->dev = rt->dst.dev;
563                 schedule_work(&work->work);
564         }
565
566 out:
567         rcu_read_unlock_bh();
568 }
569 #else
570 static inline void rt6_probe(struct rt6_info *rt)
571 {
572 }
573 #endif
574
575 /*
576  * Default Router Selection (RFC 2461 6.3.6)
577  */
578 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
579 {
580         struct net_device *dev = rt->dst.dev;
581         if (!oif || dev->ifindex == oif)
582                 return 2;
583         if ((dev->flags & IFF_LOOPBACK) &&
584             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
585                 return 1;
586         return 0;
587 }
588
589 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
590 {
591         struct neighbour *neigh;
592         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
593
594         if (rt->rt6i_flags & RTF_NONEXTHOP ||
595             !(rt->rt6i_flags & RTF_GATEWAY))
596                 return RT6_NUD_SUCCEED;
597
598         rcu_read_lock_bh();
599         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
600         if (neigh) {
601                 read_lock(&neigh->lock);
602                 if (neigh->nud_state & NUD_VALID)
603                         ret = RT6_NUD_SUCCEED;
604 #ifdef CONFIG_IPV6_ROUTER_PREF
605                 else if (!(neigh->nud_state & NUD_FAILED))
606                         ret = RT6_NUD_SUCCEED;
607                 else
608                         ret = RT6_NUD_FAIL_PROBE;
609 #endif
610                 read_unlock(&neigh->lock);
611         } else {
612                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
613                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
614         }
615         rcu_read_unlock_bh();
616
617         return ret;
618 }
619
620 static int rt6_score_route(struct rt6_info *rt, int oif,
621                            int strict)
622 {
623         int m;
624
625         m = rt6_check_dev(rt, oif);
626         if (!m && (strict & RT6_LOOKUP_F_IFACE))
627                 return RT6_NUD_FAIL_HARD;
628 #ifdef CONFIG_IPV6_ROUTER_PREF
629         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
630 #endif
631         if (strict & RT6_LOOKUP_F_REACHABLE) {
632                 int n = rt6_check_neigh(rt);
633                 if (n < 0)
634                         return n;
635         }
636         return m;
637 }
638
639 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
640                                    int *mpri, struct rt6_info *match,
641                                    bool *do_rr)
642 {
643         int m;
644         bool match_do_rr = false;
645         struct inet6_dev *idev = rt->rt6i_idev;
646         struct net_device *dev = rt->dst.dev;
647
648         if (dev && !netif_carrier_ok(dev) &&
649             idev->cnf.ignore_routes_with_linkdown)
650                 goto out;
651
652         if (rt6_check_expired(rt))
653                 goto out;
654
655         m = rt6_score_route(rt, oif, strict);
656         if (m == RT6_NUD_FAIL_DO_RR) {
657                 match_do_rr = true;
658                 m = 0; /* lowest valid score */
659         } else if (m == RT6_NUD_FAIL_HARD) {
660                 goto out;
661         }
662
663         if (strict & RT6_LOOKUP_F_REACHABLE)
664                 rt6_probe(rt);
665
666         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
667         if (m > *mpri) {
668                 *do_rr = match_do_rr;
669                 *mpri = m;
670                 match = rt;
671         }
672 out:
673         return match;
674 }
675
676 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
677                                      struct rt6_info *rr_head,
678                                      u32 metric, int oif, int strict,
679                                      bool *do_rr)
680 {
681         struct rt6_info *rt, *match, *cont;
682         int mpri = -1;
683
684         match = NULL;
685         cont = NULL;
686         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
687                 if (rt->rt6i_metric != metric) {
688                         cont = rt;
689                         break;
690                 }
691
692                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
693         }
694
695         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
696                 if (rt->rt6i_metric != metric) {
697                         cont = rt;
698                         break;
699                 }
700
701                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
702         }
703
704         if (match || !cont)
705                 return match;
706
707         for (rt = cont; rt; rt = rt->dst.rt6_next)
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709
710         return match;
711 }
712
713 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
714 {
715         struct rt6_info *match, *rt0;
716         struct net *net;
717         bool do_rr = false;
718
719         rt0 = fn->rr_ptr;
720         if (!rt0)
721                 fn->rr_ptr = rt0 = fn->leaf;
722
723         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
724                              &do_rr);
725
726         if (do_rr) {
727                 struct rt6_info *next = rt0->dst.rt6_next;
728
729                 /* no entries matched; do round-robin */
730                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
731                         next = fn->leaf;
732
733                 if (next != rt0)
734                         fn->rr_ptr = next;
735         }
736
737         net = dev_net(rt0->dst.dev);
738         return match ? match : net->ipv6.ip6_null_entry;
739 }
740
741 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
742 {
743         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
744 }
745
746 #ifdef CONFIG_IPV6_ROUTE_INFO
747 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
748                   const struct in6_addr *gwaddr)
749 {
750         struct net *net = dev_net(dev);
751         struct route_info *rinfo = (struct route_info *) opt;
752         struct in6_addr prefix_buf, *prefix;
753         unsigned int pref;
754         unsigned long lifetime;
755         struct rt6_info *rt;
756
757         if (len < sizeof(struct route_info)) {
758                 return -EINVAL;
759         }
760
761         /* Sanity check for prefix_len and length */
762         if (rinfo->length > 3) {
763                 return -EINVAL;
764         } else if (rinfo->prefix_len > 128) {
765                 return -EINVAL;
766         } else if (rinfo->prefix_len > 64) {
767                 if (rinfo->length < 2) {
768                         return -EINVAL;
769                 }
770         } else if (rinfo->prefix_len > 0) {
771                 if (rinfo->length < 1) {
772                         return -EINVAL;
773                 }
774         }
775
776         pref = rinfo->route_pref;
777         if (pref == ICMPV6_ROUTER_PREF_INVALID)
778                 return -EINVAL;
779
780         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
781
782         if (rinfo->length == 3)
783                 prefix = (struct in6_addr *)rinfo->prefix;
784         else {
785                 /* this function is safe */
786                 ipv6_addr_prefix(&prefix_buf,
787                                  (struct in6_addr *)rinfo->prefix,
788                                  rinfo->prefix_len);
789                 prefix = &prefix_buf;
790         }
791
792         if (rinfo->prefix_len == 0)
793                 rt = rt6_get_dflt_router(gwaddr, dev);
794         else
795                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
796                                         gwaddr, dev->ifindex);
797
798         if (rt && !lifetime) {
799                 ip6_del_rt(rt);
800                 rt = NULL;
801         }
802
803         if (!rt && lifetime)
804                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
805                                         pref);
806         else if (rt)
807                 rt->rt6i_flags = RTF_ROUTEINFO |
808                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
809
810         if (rt) {
811                 if (!addrconf_finite_timeout(lifetime))
812                         rt6_clean_expires(rt);
813                 else
814                         rt6_set_expires(rt, jiffies + HZ * lifetime);
815
816                 ip6_rt_put(rt);
817         }
818         return 0;
819 }
820 #endif
821
822 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
823                                         struct in6_addr *saddr)
824 {
825         struct fib6_node *pn;
826         while (1) {
827                 if (fn->fn_flags & RTN_TL_ROOT)
828                         return NULL;
829                 pn = fn->parent;
830                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
831                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
832                 else
833                         fn = pn;
834                 if (fn->fn_flags & RTN_RTINFO)
835                         return fn;
836         }
837 }
838
839 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
840                                              struct fib6_table *table,
841                                              struct flowi6 *fl6, int flags)
842 {
843         struct fib6_node *fn;
844         struct rt6_info *rt;
845
846         read_lock_bh(&table->tb6_lock);
847         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
848 restart:
849         rt = fn->leaf;
850         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
851         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
852                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
853         if (rt == net->ipv6.ip6_null_entry) {
854                 fn = fib6_backtrack(fn, &fl6->saddr);
855                 if (fn)
856                         goto restart;
857         }
858         dst_use(&rt->dst, jiffies);
859         read_unlock_bh(&table->tb6_lock);
860         return rt;
861
862 }
863
864 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
865                                     int flags)
866 {
867         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
868 }
869 EXPORT_SYMBOL_GPL(ip6_route_lookup);
870
871 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
872                             const struct in6_addr *saddr, int oif, int strict)
873 {
874         struct flowi6 fl6 = {
875                 .flowi6_oif = oif,
876                 .daddr = *daddr,
877         };
878         struct dst_entry *dst;
879         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
880
881         if (saddr) {
882                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
883                 flags |= RT6_LOOKUP_F_HAS_SADDR;
884         }
885
886         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
887         if (dst->error == 0)
888                 return (struct rt6_info *) dst;
889
890         dst_release(dst);
891
892         return NULL;
893 }
894 EXPORT_SYMBOL(rt6_lookup);
895
896 /* ip6_ins_rt is called with FREE table->tb6_lock.
897    It takes new route entry, the addition fails by any reason the
898    route is freed. In any case, if caller does not hold it, it may
899    be destroyed.
900  */
901
902 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
903                         struct mx6_config *mxc)
904 {
905         int err;
906         struct fib6_table *table;
907
908         table = rt->rt6i_table;
909         write_lock_bh(&table->tb6_lock);
910         err = fib6_add(&table->tb6_root, rt, info, mxc);
911         write_unlock_bh(&table->tb6_lock);
912
913         return err;
914 }
915
916 int ip6_ins_rt(struct rt6_info *rt)
917 {
918         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
919         struct mx6_config mxc = { .mx = NULL, };
920
921         return __ip6_ins_rt(rt, &info, &mxc);
922 }
923
924 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
925                                            const struct in6_addr *daddr,
926                                            const struct in6_addr *saddr)
927 {
928         struct rt6_info *rt;
929
930         /*
931          *      Clone the route.
932          */
933
934         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
935                 ort = (struct rt6_info *)ort->dst.from;
936
937         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
938
939         if (!rt)
940                 return NULL;
941
942         ip6_rt_copy_init(rt, ort);
943         rt->rt6i_flags |= RTF_CACHE;
944         rt->rt6i_metric = 0;
945         rt->dst.flags |= DST_HOST;
946         rt->rt6i_dst.addr = *daddr;
947         rt->rt6i_dst.plen = 128;
948
949         if (!rt6_is_gw_or_nonexthop(ort)) {
950                 if (ort->rt6i_dst.plen != 128 &&
951                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
952                         rt->rt6i_flags |= RTF_ANYCAST;
953 #ifdef CONFIG_IPV6_SUBTREES
954                 if (rt->rt6i_src.plen && saddr) {
955                         rt->rt6i_src.addr = *saddr;
956                         rt->rt6i_src.plen = 128;
957                 }
958 #endif
959         }
960
961         return rt;
962 }
963
964 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
965 {
966         struct rt6_info *pcpu_rt;
967
968         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
969                                   rt->dst.dev, rt->dst.flags);
970
971         if (!pcpu_rt)
972                 return NULL;
973         ip6_rt_copy_init(pcpu_rt, rt);
974         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
975         pcpu_rt->rt6i_flags |= RTF_PCPU;
976         return pcpu_rt;
977 }
978
979 /* It should be called with read_lock_bh(&tb6_lock) acquired */
980 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
981 {
982         struct rt6_info *pcpu_rt, **p;
983
984         p = this_cpu_ptr(rt->rt6i_pcpu);
985         pcpu_rt = *p;
986
987         if (pcpu_rt) {
988                 dst_hold(&pcpu_rt->dst);
989                 rt6_dst_from_metrics_check(pcpu_rt);
990         }
991         return pcpu_rt;
992 }
993
994 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
995 {
996         struct fib6_table *table = rt->rt6i_table;
997         struct rt6_info *pcpu_rt, *prev, **p;
998
999         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1000         if (!pcpu_rt) {
1001                 struct net *net = dev_net(rt->dst.dev);
1002
1003                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1004                 return net->ipv6.ip6_null_entry;
1005         }
1006
1007         read_lock_bh(&table->tb6_lock);
1008         if (rt->rt6i_pcpu) {
1009                 p = this_cpu_ptr(rt->rt6i_pcpu);
1010                 prev = cmpxchg(p, NULL, pcpu_rt);
1011                 if (prev) {
1012                         /* If someone did it before us, return prev instead */
1013                         dst_destroy(&pcpu_rt->dst);
1014                         pcpu_rt = prev;
1015                 }
1016         } else {
1017                 /* rt has been removed from the fib6 tree
1018                  * before we have a chance to acquire the read_lock.
1019                  * In this case, don't brother to create a pcpu rt
1020                  * since rt is going away anyway.  The next
1021                  * dst_check() will trigger a re-lookup.
1022                  */
1023                 dst_destroy(&pcpu_rt->dst);
1024                 pcpu_rt = rt;
1025         }
1026         dst_hold(&pcpu_rt->dst);
1027         rt6_dst_from_metrics_check(pcpu_rt);
1028         read_unlock_bh(&table->tb6_lock);
1029         return pcpu_rt;
1030 }
1031
1032 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1033                                       struct flowi6 *fl6, int flags)
1034 {
1035         struct fib6_node *fn, *saved_fn;
1036         struct rt6_info *rt;
1037         int strict = 0;
1038
1039         strict |= flags & RT6_LOOKUP_F_IFACE;
1040         if (net->ipv6.devconf_all->forwarding == 0)
1041                 strict |= RT6_LOOKUP_F_REACHABLE;
1042
1043         read_lock_bh(&table->tb6_lock);
1044
1045         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1046         saved_fn = fn;
1047
1048         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1049                 oif = 0;
1050
1051 redo_rt6_select:
1052         rt = rt6_select(fn, oif, strict);
1053         if (rt->rt6i_nsiblings)
1054                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1055         if (rt == net->ipv6.ip6_null_entry) {
1056                 fn = fib6_backtrack(fn, &fl6->saddr);
1057                 if (fn)
1058                         goto redo_rt6_select;
1059                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1060                         /* also consider unreachable route */
1061                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1062                         fn = saved_fn;
1063                         goto redo_rt6_select;
1064                 }
1065         }
1066
1067
1068         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1069                 dst_use(&rt->dst, jiffies);
1070                 read_unlock_bh(&table->tb6_lock);
1071
1072                 rt6_dst_from_metrics_check(rt);
1073                 return rt;
1074         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1075                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1076                 /* Create a RTF_CACHE clone which will not be
1077                  * owned by the fib6 tree.  It is for the special case where
1078                  * the daddr in the skb during the neighbor look-up is different
1079                  * from the fl6->daddr used to look-up route here.
1080                  */
1081
1082                 struct rt6_info *uncached_rt;
1083
1084                 dst_use(&rt->dst, jiffies);
1085                 read_unlock_bh(&table->tb6_lock);
1086
1087                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1088                 dst_release(&rt->dst);
1089
1090                 if (uncached_rt)
1091                         rt6_uncached_list_add(uncached_rt);
1092                 else
1093                         uncached_rt = net->ipv6.ip6_null_entry;
1094
1095                 dst_hold(&uncached_rt->dst);
1096                 return uncached_rt;
1097
1098         } else {
1099                 /* Get a percpu copy */
1100
1101                 struct rt6_info *pcpu_rt;
1102
1103                 rt->dst.lastuse = jiffies;
1104                 rt->dst.__use++;
1105                 pcpu_rt = rt6_get_pcpu_route(rt);
1106
1107                 if (pcpu_rt) {
1108                         read_unlock_bh(&table->tb6_lock);
1109                 } else {
1110                         /* We have to do the read_unlock first
1111                          * because rt6_make_pcpu_route() may trigger
1112                          * ip6_dst_gc() which will take the write_lock.
1113                          */
1114                         dst_hold(&rt->dst);
1115                         read_unlock_bh(&table->tb6_lock);
1116                         pcpu_rt = rt6_make_pcpu_route(rt);
1117                         dst_release(&rt->dst);
1118                 }
1119
1120                 return pcpu_rt;
1121
1122         }
1123 }
1124
1125 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1126                                             struct flowi6 *fl6, int flags)
1127 {
1128         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1129 }
1130
1131 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1132                                                 struct net_device *dev,
1133                                                 struct flowi6 *fl6, int flags)
1134 {
1135         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1136                 flags |= RT6_LOOKUP_F_IFACE;
1137
1138         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1139 }
1140
1141 void ip6_route_input(struct sk_buff *skb)
1142 {
1143         const struct ipv6hdr *iph = ipv6_hdr(skb);
1144         struct net *net = dev_net(skb->dev);
1145         int flags = RT6_LOOKUP_F_HAS_SADDR;
1146         struct ip_tunnel_info *tun_info;
1147         struct flowi6 fl6 = {
1148                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1149                 .daddr = iph->daddr,
1150                 .saddr = iph->saddr,
1151                 .flowlabel = ip6_flowinfo(iph),
1152                 .flowi6_mark = skb->mark,
1153                 .flowi6_proto = iph->nexthdr,
1154         };
1155
1156         tun_info = skb_tunnel_info(skb);
1157         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1158                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1159         skb_dst_drop(skb);
1160         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1161 }
1162
1163 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1164                                              struct flowi6 *fl6, int flags)
1165 {
1166         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1167 }
1168
1169 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1170                                     struct flowi6 *fl6)
1171 {
1172         struct dst_entry *dst;
1173         int flags = 0;
1174         bool any_src;
1175
1176         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1177         if (dst)
1178                 return dst;
1179
1180         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1181
1182         any_src = ipv6_addr_any(&fl6->saddr);
1183         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1184             (fl6->flowi6_oif && any_src))
1185                 flags |= RT6_LOOKUP_F_IFACE;
1186
1187         if (!any_src)
1188                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1189         else if (sk)
1190                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1191
1192         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1193 }
1194 EXPORT_SYMBOL(ip6_route_output);
1195
1196 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1197 {
1198         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1199         struct dst_entry *new = NULL;
1200
1201         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1202         if (rt) {
1203                 rt6_info_init(rt);
1204
1205                 new = &rt->dst;
1206                 new->__use = 1;
1207                 new->input = dst_discard;
1208                 new->output = dst_discard_out;
1209
1210                 dst_copy_metrics(new, &ort->dst);
1211                 rt->rt6i_idev = ort->rt6i_idev;
1212                 if (rt->rt6i_idev)
1213                         in6_dev_hold(rt->rt6i_idev);
1214
1215                 rt->rt6i_gateway = ort->rt6i_gateway;
1216                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1217                 rt->rt6i_metric = 0;
1218
1219                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1220 #ifdef CONFIG_IPV6_SUBTREES
1221                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1222 #endif
1223
1224                 dst_free(new);
1225         }
1226
1227         dst_release(dst_orig);
1228         return new ? new : ERR_PTR(-ENOMEM);
1229 }
1230
1231 /*
1232  *      Destination cache support functions
1233  */
1234
1235 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1236 {
1237         if (rt->dst.from &&
1238             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1239                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1240 }
1241
1242 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1243 {
1244         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1245                 return NULL;
1246
1247         if (rt6_check_expired(rt))
1248                 return NULL;
1249
1250         return &rt->dst;
1251 }
1252
1253 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254 {
1255         if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257                 return &rt->dst;
1258         else
1259                 return NULL;
1260 }
1261
1262 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1263 {
1264         struct rt6_info *rt;
1265
1266         rt = (struct rt6_info *) dst;
1267
1268         /* All IPV6 dsts are created with ->obsolete set to the value
1269          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1270          * into this function always.
1271          */
1272
1273         rt6_dst_from_metrics_check(rt);
1274
1275         if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1276                 return rt6_dst_from_check(rt, cookie);
1277         else
1278                 return rt6_check(rt, cookie);
1279 }
1280
1281 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1282 {
1283         struct rt6_info *rt = (struct rt6_info *) dst;
1284
1285         if (rt) {
1286                 if (rt->rt6i_flags & RTF_CACHE) {
1287                         if (rt6_check_expired(rt)) {
1288                                 ip6_del_rt(rt);
1289                                 dst = NULL;
1290                         }
1291                 } else {
1292                         dst_release(dst);
1293                         dst = NULL;
1294                 }
1295         }
1296         return dst;
1297 }
1298
1299 static void ip6_link_failure(struct sk_buff *skb)
1300 {
1301         struct rt6_info *rt;
1302
1303         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1304
1305         rt = (struct rt6_info *) skb_dst(skb);
1306         if (rt) {
1307                 if (rt->rt6i_flags & RTF_CACHE) {
1308                         dst_hold(&rt->dst);
1309                         ip6_del_rt(rt);
1310                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1311                         rt->rt6i_node->fn_sernum = -1;
1312                 }
1313         }
1314 }
1315
1316 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1317 {
1318         struct net *net = dev_net(rt->dst.dev);
1319
1320         rt->rt6i_flags |= RTF_MODIFIED;
1321         rt->rt6i_pmtu = mtu;
1322         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323 }
1324
1325 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326                                  const struct ipv6hdr *iph, u32 mtu)
1327 {
1328         struct rt6_info *rt6 = (struct rt6_info *)dst;
1329
1330         if (rt6->rt6i_flags & RTF_LOCAL)
1331                 return;
1332
1333         dst_confirm(dst);
1334         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1335         if (mtu >= dst_mtu(dst))
1336                 return;
1337
1338         if (rt6->rt6i_flags & RTF_CACHE) {
1339                 rt6_do_update_pmtu(rt6, mtu);
1340         } else {
1341                 const struct in6_addr *daddr, *saddr;
1342                 struct rt6_info *nrt6;
1343
1344                 if (iph) {
1345                         daddr = &iph->daddr;
1346                         saddr = &iph->saddr;
1347                 } else if (sk) {
1348                         daddr = &sk->sk_v6_daddr;
1349                         saddr = &inet6_sk(sk)->saddr;
1350                 } else {
1351                         return;
1352                 }
1353                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1354                 if (nrt6) {
1355                         rt6_do_update_pmtu(nrt6, mtu);
1356
1357                         /* ip6_ins_rt(nrt6) will bump the
1358                          * rt6->rt6i_node->fn_sernum
1359                          * which will fail the next rt6_check() and
1360                          * invalidate the sk->sk_dst_cache.
1361                          */
1362                         ip6_ins_rt(nrt6);
1363                 }
1364         }
1365 }
1366
1367 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1368                                struct sk_buff *skb, u32 mtu)
1369 {
1370         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1371 }
1372
1373 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1374                      int oif, u32 mark)
1375 {
1376         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1377         struct dst_entry *dst;
1378         struct flowi6 fl6;
1379
1380         memset(&fl6, 0, sizeof(fl6));
1381         fl6.flowi6_oif = oif;
1382         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1383         fl6.daddr = iph->daddr;
1384         fl6.saddr = iph->saddr;
1385         fl6.flowlabel = ip6_flowinfo(iph);
1386
1387         dst = ip6_route_output(net, NULL, &fl6);
1388         if (!dst->error)
1389                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1390         dst_release(dst);
1391 }
1392 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1393
1394 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1395 {
1396         ip6_update_pmtu(skb, sock_net(sk), mtu,
1397                         sk->sk_bound_dev_if, sk->sk_mark);
1398 }
1399 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1400
1401 /* Handle redirects */
1402 struct ip6rd_flowi {
1403         struct flowi6 fl6;
1404         struct in6_addr gateway;
1405 };
1406
1407 static struct rt6_info *__ip6_route_redirect(struct net *net,
1408                                              struct fib6_table *table,
1409                                              struct flowi6 *fl6,
1410                                              int flags)
1411 {
1412         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1413         struct rt6_info *rt;
1414         struct fib6_node *fn;
1415
1416         /* Get the "current" route for this destination and
1417          * check if the redirect has come from approriate router.
1418          *
1419          * RFC 4861 specifies that redirects should only be
1420          * accepted if they come from the nexthop to the target.
1421          * Due to the way the routes are chosen, this notion
1422          * is a bit fuzzy and one might need to check all possible
1423          * routes.
1424          */
1425
1426         read_lock_bh(&table->tb6_lock);
1427         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1428 restart:
1429         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1430                 if (rt6_check_expired(rt))
1431                         continue;
1432                 if (rt->dst.error)
1433                         break;
1434                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435                         continue;
1436                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1437                         continue;
1438                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1439                         continue;
1440                 break;
1441         }
1442
1443         if (!rt)
1444                 rt = net->ipv6.ip6_null_entry;
1445         else if (rt->dst.error) {
1446                 rt = net->ipv6.ip6_null_entry;
1447                 goto out;
1448         }
1449
1450         if (rt == net->ipv6.ip6_null_entry) {
1451                 fn = fib6_backtrack(fn, &fl6->saddr);
1452                 if (fn)
1453                         goto restart;
1454         }
1455
1456 out:
1457         dst_hold(&rt->dst);
1458
1459         read_unlock_bh(&table->tb6_lock);
1460
1461         return rt;
1462 };
1463
1464 static struct dst_entry *ip6_route_redirect(struct net *net,
1465                                         const struct flowi6 *fl6,
1466                                         const struct in6_addr *gateway)
1467 {
1468         int flags = RT6_LOOKUP_F_HAS_SADDR;
1469         struct ip6rd_flowi rdfl;
1470
1471         rdfl.fl6 = *fl6;
1472         rdfl.gateway = *gateway;
1473
1474         return fib6_rule_lookup(net, &rdfl.fl6,
1475                                 flags, __ip6_route_redirect);
1476 }
1477
1478 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1479 {
1480         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1481         struct dst_entry *dst;
1482         struct flowi6 fl6;
1483
1484         memset(&fl6, 0, sizeof(fl6));
1485         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1486         fl6.flowi6_oif = oif;
1487         fl6.flowi6_mark = mark;
1488         fl6.daddr = iph->daddr;
1489         fl6.saddr = iph->saddr;
1490         fl6.flowlabel = ip6_flowinfo(iph);
1491
1492         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1493         rt6_do_redirect(dst, NULL, skb);
1494         dst_release(dst);
1495 }
1496 EXPORT_SYMBOL_GPL(ip6_redirect);
1497
1498 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1499                             u32 mark)
1500 {
1501         const struct ipv6hdr *iph = ipv6_hdr(skb);
1502         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1503         struct dst_entry *dst;
1504         struct flowi6 fl6;
1505
1506         memset(&fl6, 0, sizeof(fl6));
1507         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1508         fl6.flowi6_oif = oif;
1509         fl6.flowi6_mark = mark;
1510         fl6.daddr = msg->dest;
1511         fl6.saddr = iph->daddr;
1512
1513         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1514         rt6_do_redirect(dst, NULL, skb);
1515         dst_release(dst);
1516 }
1517
1518 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1519 {
1520         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1521 }
1522 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1523
1524 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1525 {
1526         struct net_device *dev = dst->dev;
1527         unsigned int mtu = dst_mtu(dst);
1528         struct net *net = dev_net(dev);
1529
1530         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1531
1532         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1533                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1534
1535         /*
1536          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1537          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1538          * IPV6_MAXPLEN is also valid and means: "any MSS,
1539          * rely only on pmtu discovery"
1540          */
1541         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1542                 mtu = IPV6_MAXPLEN;
1543         return mtu;
1544 }
1545
1546 static unsigned int ip6_mtu(const struct dst_entry *dst)
1547 {
1548         const struct rt6_info *rt = (const struct rt6_info *)dst;
1549         unsigned int mtu = rt->rt6i_pmtu;
1550         struct inet6_dev *idev;
1551
1552         if (mtu)
1553                 goto out;
1554
1555         mtu = dst_metric_raw(dst, RTAX_MTU);
1556         if (mtu)
1557                 goto out;
1558
1559         mtu = IPV6_MIN_MTU;
1560
1561         rcu_read_lock();
1562         idev = __in6_dev_get(dst->dev);
1563         if (idev)
1564                 mtu = idev->cnf.mtu6;
1565         rcu_read_unlock();
1566
1567 out:
1568         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1569 }
1570
1571 static struct dst_entry *icmp6_dst_gc_list;
1572 static DEFINE_SPINLOCK(icmp6_dst_lock);
1573
1574 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1575                                   struct flowi6 *fl6)
1576 {
1577         struct dst_entry *dst;
1578         struct rt6_info *rt;
1579         struct inet6_dev *idev = in6_dev_get(dev);
1580         struct net *net = dev_net(dev);
1581
1582         if (unlikely(!idev))
1583                 return ERR_PTR(-ENODEV);
1584
1585         rt = ip6_dst_alloc(net, dev, 0);
1586         if (unlikely(!rt)) {
1587                 in6_dev_put(idev);
1588                 dst = ERR_PTR(-ENOMEM);
1589                 goto out;
1590         }
1591
1592         rt->dst.flags |= DST_HOST;
1593         rt->dst.output  = ip6_output;
1594         atomic_set(&rt->dst.__refcnt, 1);
1595         rt->rt6i_gateway  = fl6->daddr;
1596         rt->rt6i_dst.addr = fl6->daddr;
1597         rt->rt6i_dst.plen = 128;
1598         rt->rt6i_idev     = idev;
1599         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1600
1601         spin_lock_bh(&icmp6_dst_lock);
1602         rt->dst.next = icmp6_dst_gc_list;
1603         icmp6_dst_gc_list = &rt->dst;
1604         spin_unlock_bh(&icmp6_dst_lock);
1605
1606         fib6_force_start_gc(net);
1607
1608         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1609
1610 out:
1611         return dst;
1612 }
1613
1614 int icmp6_dst_gc(void)
1615 {
1616         struct dst_entry *dst, **pprev;
1617         int more = 0;
1618
1619         spin_lock_bh(&icmp6_dst_lock);
1620         pprev = &icmp6_dst_gc_list;
1621
1622         while ((dst = *pprev) != NULL) {
1623                 if (!atomic_read(&dst->__refcnt)) {
1624                         *pprev = dst->next;
1625                         dst_free(dst);
1626                 } else {
1627                         pprev = &dst->next;
1628                         ++more;
1629                 }
1630         }
1631
1632         spin_unlock_bh(&icmp6_dst_lock);
1633
1634         return more;
1635 }
1636
1637 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1638                             void *arg)
1639 {
1640         struct dst_entry *dst, **pprev;
1641
1642         spin_lock_bh(&icmp6_dst_lock);
1643         pprev = &icmp6_dst_gc_list;
1644         while ((dst = *pprev) != NULL) {
1645                 struct rt6_info *rt = (struct rt6_info *) dst;
1646                 if (func(rt, arg)) {
1647                         *pprev = dst->next;
1648                         dst_free(dst);
1649                 } else {
1650                         pprev = &dst->next;
1651                 }
1652         }
1653         spin_unlock_bh(&icmp6_dst_lock);
1654 }
1655
1656 static int ip6_dst_gc(struct dst_ops *ops)
1657 {
1658         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1659         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1660         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1661         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1662         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1663         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1664         int entries;
1665
1666         entries = dst_entries_get_fast(ops);
1667         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1668             entries <= rt_max_size)
1669                 goto out;
1670
1671         net->ipv6.ip6_rt_gc_expire++;
1672         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1673         entries = dst_entries_get_slow(ops);
1674         if (entries < ops->gc_thresh)
1675                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1676 out:
1677         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1678         return entries > rt_max_size;
1679 }
1680
1681 static int ip6_convert_metrics(struct mx6_config *mxc,
1682                                const struct fib6_config *cfg)
1683 {
1684         bool ecn_ca = false;
1685         struct nlattr *nla;
1686         int remaining;
1687         u32 *mp;
1688
1689         if (!cfg->fc_mx)
1690                 return 0;
1691
1692         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1693         if (unlikely(!mp))
1694                 return -ENOMEM;
1695
1696         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1697                 int type = nla_type(nla);
1698                 u32 val;
1699
1700                 if (!type)
1701                         continue;
1702                 if (unlikely(type > RTAX_MAX))
1703                         goto err;
1704
1705                 if (type == RTAX_CC_ALGO) {
1706                         char tmp[TCP_CA_NAME_MAX];
1707
1708                         nla_strlcpy(tmp, nla, sizeof(tmp));
1709                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1710                         if (val == TCP_CA_UNSPEC)
1711                                 goto err;
1712                 } else {
1713                         val = nla_get_u32(nla);
1714                 }
1715                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1716                         goto err;
1717
1718                 mp[type - 1] = val;
1719                 __set_bit(type - 1, mxc->mx_valid);
1720         }
1721
1722         if (ecn_ca) {
1723                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1724                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1725         }
1726
1727         mxc->mx = mp;
1728         return 0;
1729  err:
1730         kfree(mp);
1731         return -EINVAL;
1732 }
1733
1734 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1735 {
1736         struct net *net = cfg->fc_nlinfo.nl_net;
1737         struct rt6_info *rt = NULL;
1738         struct net_device *dev = NULL;
1739         struct inet6_dev *idev = NULL;
1740         struct fib6_table *table;
1741         int addr_type;
1742         int err = -EINVAL;
1743
1744         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1745                 goto out;
1746 #ifndef CONFIG_IPV6_SUBTREES
1747         if (cfg->fc_src_len)
1748                 goto out;
1749 #endif
1750         if (cfg->fc_ifindex) {
1751                 err = -ENODEV;
1752                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1753                 if (!dev)
1754                         goto out;
1755                 idev = in6_dev_get(dev);
1756                 if (!idev)
1757                         goto out;
1758         }
1759
1760         if (cfg->fc_metric == 0)
1761                 cfg->fc_metric = IP6_RT_PRIO_USER;
1762
1763         err = -ENOBUFS;
1764         if (cfg->fc_nlinfo.nlh &&
1765             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1766                 table = fib6_get_table(net, cfg->fc_table);
1767                 if (!table) {
1768                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1769                         table = fib6_new_table(net, cfg->fc_table);
1770                 }
1771         } else {
1772                 table = fib6_new_table(net, cfg->fc_table);
1773         }
1774
1775         if (!table)
1776                 goto out;
1777
1778         rt = ip6_dst_alloc(net, NULL,
1779                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1780
1781         if (!rt) {
1782                 err = -ENOMEM;
1783                 goto out;
1784         }
1785
1786         if (cfg->fc_flags & RTF_EXPIRES)
1787                 rt6_set_expires(rt, jiffies +
1788                                 clock_t_to_jiffies(cfg->fc_expires));
1789         else
1790                 rt6_clean_expires(rt);
1791
1792         if (cfg->fc_protocol == RTPROT_UNSPEC)
1793                 cfg->fc_protocol = RTPROT_BOOT;
1794         rt->rt6i_protocol = cfg->fc_protocol;
1795
1796         addr_type = ipv6_addr_type(&cfg->fc_dst);
1797
1798         if (addr_type & IPV6_ADDR_MULTICAST)
1799                 rt->dst.input = ip6_mc_input;
1800         else if (cfg->fc_flags & RTF_LOCAL)
1801                 rt->dst.input = ip6_input;
1802         else
1803                 rt->dst.input = ip6_forward;
1804
1805         rt->dst.output = ip6_output;
1806
1807         if (cfg->fc_encap) {
1808                 struct lwtunnel_state *lwtstate;
1809
1810                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1811                                            cfg->fc_encap, AF_INET6, cfg,
1812                                            &lwtstate);
1813                 if (err)
1814                         goto out;
1815                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1816                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1817                         rt->dst.lwtstate->orig_output = rt->dst.output;
1818                         rt->dst.output = lwtunnel_output;
1819                 }
1820                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1821                         rt->dst.lwtstate->orig_input = rt->dst.input;
1822                         rt->dst.input = lwtunnel_input;
1823                 }
1824         }
1825
1826         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1827         rt->rt6i_dst.plen = cfg->fc_dst_len;
1828         if (rt->rt6i_dst.plen == 128)
1829                 rt->dst.flags |= DST_HOST;
1830
1831 #ifdef CONFIG_IPV6_SUBTREES
1832         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1833         rt->rt6i_src.plen = cfg->fc_src_len;
1834 #endif
1835
1836         rt->rt6i_metric = cfg->fc_metric;
1837
1838         /* We cannot add true routes via loopback here,
1839            they would result in kernel looping; promote them to reject routes
1840          */
1841         if ((cfg->fc_flags & RTF_REJECT) ||
1842             (dev && (dev->flags & IFF_LOOPBACK) &&
1843              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1844              !(cfg->fc_flags & RTF_LOCAL))) {
1845                 /* hold loopback dev/idev if we haven't done so. */
1846                 if (dev != net->loopback_dev) {
1847                         if (dev) {
1848                                 dev_put(dev);
1849                                 in6_dev_put(idev);
1850                         }
1851                         dev = net->loopback_dev;
1852                         dev_hold(dev);
1853                         idev = in6_dev_get(dev);
1854                         if (!idev) {
1855                                 err = -ENODEV;
1856                                 goto out;
1857                         }
1858                 }
1859                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1860                 switch (cfg->fc_type) {
1861                 case RTN_BLACKHOLE:
1862                         rt->dst.error = -EINVAL;
1863                         rt->dst.output = dst_discard_out;
1864                         rt->dst.input = dst_discard;
1865                         break;
1866                 case RTN_PROHIBIT:
1867                         rt->dst.error = -EACCES;
1868                         rt->dst.output = ip6_pkt_prohibit_out;
1869                         rt->dst.input = ip6_pkt_prohibit;
1870                         break;
1871                 case RTN_THROW:
1872                 case RTN_UNREACHABLE:
1873                 default:
1874                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1875                                         : (cfg->fc_type == RTN_UNREACHABLE)
1876                                         ? -EHOSTUNREACH : -ENETUNREACH;
1877                         rt->dst.output = ip6_pkt_discard_out;
1878                         rt->dst.input = ip6_pkt_discard;
1879                         break;
1880                 }
1881                 goto install_route;
1882         }
1883
1884         if (cfg->fc_flags & RTF_GATEWAY) {
1885                 const struct in6_addr *gw_addr;
1886                 int gwa_type;
1887
1888                 gw_addr = &cfg->fc_gateway;
1889                 gwa_type = ipv6_addr_type(gw_addr);
1890
1891                 /* if gw_addr is local we will fail to detect this in case
1892                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1893                  * will return already-added prefix route via interface that
1894                  * prefix route was assigned to, which might be non-loopback.
1895                  */
1896                 err = -EINVAL;
1897                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1898                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1899                                             dev : NULL, 0, 0))
1900                         goto out;
1901
1902                 rt->rt6i_gateway = *gw_addr;
1903
1904                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1905                         struct rt6_info *grt;
1906
1907                         /* IPv6 strictly inhibits using not link-local
1908                            addresses as nexthop address.
1909                            Otherwise, router will not able to send redirects.
1910                            It is very good, but in some (rare!) circumstances
1911                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1912                            some exceptions. --ANK
1913                          */
1914                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1915                                 goto out;
1916
1917                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1918
1919                         err = -EHOSTUNREACH;
1920                         if (!grt)
1921                                 goto out;
1922                         if (dev) {
1923                                 if (dev != grt->dst.dev) {
1924                                         ip6_rt_put(grt);
1925                                         goto out;
1926                                 }
1927                         } else {
1928                                 dev = grt->dst.dev;
1929                                 idev = grt->rt6i_idev;
1930                                 dev_hold(dev);
1931                                 in6_dev_hold(grt->rt6i_idev);
1932                         }
1933                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1934                                 err = 0;
1935                         ip6_rt_put(grt);
1936
1937                         if (err)
1938                                 goto out;
1939                 }
1940                 err = -EINVAL;
1941                 if (!dev || (dev->flags & IFF_LOOPBACK))
1942                         goto out;
1943         }
1944
1945         err = -ENODEV;
1946         if (!dev)
1947                 goto out;
1948
1949         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1950                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1951                         err = -EINVAL;
1952                         goto out;
1953                 }
1954                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1955                 rt->rt6i_prefsrc.plen = 128;
1956         } else
1957                 rt->rt6i_prefsrc.plen = 0;
1958
1959         rt->rt6i_flags = cfg->fc_flags;
1960
1961 install_route:
1962         rt->dst.dev = dev;
1963         rt->rt6i_idev = idev;
1964         rt->rt6i_table = table;
1965
1966         cfg->fc_nlinfo.nl_net = dev_net(dev);
1967
1968         return rt;
1969 out:
1970         if (dev)
1971                 dev_put(dev);
1972         if (idev)
1973                 in6_dev_put(idev);
1974         if (rt)
1975                 dst_free(&rt->dst);
1976
1977         return ERR_PTR(err);
1978 }
1979
1980 int ip6_route_add(struct fib6_config *cfg)
1981 {
1982         struct mx6_config mxc = { .mx = NULL, };
1983         struct rt6_info *rt;
1984         int err;
1985
1986         rt = ip6_route_info_create(cfg);
1987         if (IS_ERR(rt)) {
1988                 err = PTR_ERR(rt);
1989                 rt = NULL;
1990                 goto out;
1991         }
1992
1993         err = ip6_convert_metrics(&mxc, cfg);
1994         if (err)
1995                 goto out;
1996
1997         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1998
1999         kfree(mxc.mx);
2000
2001         return err;
2002 out:
2003         if (rt)
2004                 dst_free(&rt->dst);
2005
2006         return err;
2007 }
2008
2009 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2010 {
2011         int err;
2012         struct fib6_table *table;
2013         struct net *net = dev_net(rt->dst.dev);
2014
2015         if (rt == net->ipv6.ip6_null_entry ||
2016             rt->dst.flags & DST_NOCACHE) {
2017                 err = -ENOENT;
2018                 goto out;
2019         }
2020
2021         table = rt->rt6i_table;
2022         write_lock_bh(&table->tb6_lock);
2023         err = fib6_del(rt, info);
2024         write_unlock_bh(&table->tb6_lock);
2025
2026 out:
2027         ip6_rt_put(rt);
2028         return err;
2029 }
2030
2031 int ip6_del_rt(struct rt6_info *rt)
2032 {
2033         struct nl_info info = {
2034                 .nl_net = dev_net(rt->dst.dev),
2035         };
2036         return __ip6_del_rt(rt, &info);
2037 }
2038
2039 static int ip6_route_del(struct fib6_config *cfg)
2040 {
2041         struct fib6_table *table;
2042         struct fib6_node *fn;
2043         struct rt6_info *rt;
2044         int err = -ESRCH;
2045
2046         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2047         if (!table)
2048                 return err;
2049
2050         read_lock_bh(&table->tb6_lock);
2051
2052         fn = fib6_locate(&table->tb6_root,
2053                          &cfg->fc_dst, cfg->fc_dst_len,
2054                          &cfg->fc_src, cfg->fc_src_len);
2055
2056         if (fn) {
2057                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2058                         if ((rt->rt6i_flags & RTF_CACHE) &&
2059                             !(cfg->fc_flags & RTF_CACHE))
2060                                 continue;
2061                         if (cfg->fc_ifindex &&
2062                             (!rt->dst.dev ||
2063                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2064                                 continue;
2065                         if (cfg->fc_flags & RTF_GATEWAY &&
2066                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2067                                 continue;
2068                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2069                                 continue;
2070                         dst_hold(&rt->dst);
2071                         read_unlock_bh(&table->tb6_lock);
2072
2073                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2074                 }
2075         }
2076         read_unlock_bh(&table->tb6_lock);
2077
2078         return err;
2079 }
2080
2081 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2082 {
2083         struct net *net = dev_net(skb->dev);
2084         struct netevent_redirect netevent;
2085         struct rt6_info *rt, *nrt = NULL;
2086         struct ndisc_options ndopts;
2087         struct inet6_dev *in6_dev;
2088         struct neighbour *neigh;
2089         struct rd_msg *msg;
2090         int optlen, on_link;
2091         u8 *lladdr;
2092
2093         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2094         optlen -= sizeof(*msg);
2095
2096         if (optlen < 0) {
2097                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2098                 return;
2099         }
2100
2101         msg = (struct rd_msg *)icmp6_hdr(skb);
2102
2103         if (ipv6_addr_is_multicast(&msg->dest)) {
2104                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2105                 return;
2106         }
2107
2108         on_link = 0;
2109         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2110                 on_link = 1;
2111         } else if (ipv6_addr_type(&msg->target) !=
2112                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2113                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2114                 return;
2115         }
2116
2117         in6_dev = __in6_dev_get(skb->dev);
2118         if (!in6_dev)
2119                 return;
2120         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2121                 return;
2122
2123         /* RFC2461 8.1:
2124          *      The IP source address of the Redirect MUST be the same as the current
2125          *      first-hop router for the specified ICMP Destination Address.
2126          */
2127
2128         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2129                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2130                 return;
2131         }
2132
2133         lladdr = NULL;
2134         if (ndopts.nd_opts_tgt_lladdr) {
2135                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2136                                              skb->dev);
2137                 if (!lladdr) {
2138                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2139                         return;
2140                 }
2141         }
2142
2143         rt = (struct rt6_info *) dst;
2144         if (rt == net->ipv6.ip6_null_entry) {
2145                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2146                 return;
2147         }
2148
2149         /* Redirect received -> path was valid.
2150          * Look, redirects are sent only in response to data packets,
2151          * so that this nexthop apparently is reachable. --ANK
2152          */
2153         dst_confirm(&rt->dst);
2154
2155         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2156         if (!neigh)
2157                 return;
2158
2159         /*
2160          *      We have finally decided to accept it.
2161          */
2162
2163         neigh_update(neigh, lladdr, NUD_STALE,
2164                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2165                      NEIGH_UPDATE_F_OVERRIDE|
2166                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2167                                      NEIGH_UPDATE_F_ISROUTER))
2168                      );
2169
2170         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2171         if (!nrt)
2172                 goto out;
2173
2174         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2175         if (on_link)
2176                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2177
2178         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2179
2180         if (ip6_ins_rt(nrt))
2181                 goto out;
2182
2183         netevent.old = &rt->dst;
2184         netevent.new = &nrt->dst;
2185         netevent.daddr = &msg->dest;
2186         netevent.neigh = neigh;
2187         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2188
2189         if (rt->rt6i_flags & RTF_CACHE) {
2190                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2191                 ip6_del_rt(rt);
2192         }
2193
2194 out:
2195         neigh_release(neigh);
2196 }
2197
2198 /*
2199  *      Misc support functions
2200  */
2201
2202 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2203 {
2204         BUG_ON(from->dst.from);
2205
2206         rt->rt6i_flags &= ~RTF_EXPIRES;
2207         dst_hold(&from->dst);
2208         rt->dst.from = &from->dst;
2209         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2210 }
2211
2212 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2213 {
2214         rt->dst.input = ort->dst.input;
2215         rt->dst.output = ort->dst.output;
2216         rt->rt6i_dst = ort->rt6i_dst;
2217         rt->dst.error = ort->dst.error;
2218         rt->rt6i_idev = ort->rt6i_idev;
2219         if (rt->rt6i_idev)
2220                 in6_dev_hold(rt->rt6i_idev);
2221         rt->dst.lastuse = jiffies;
2222         rt->rt6i_gateway = ort->rt6i_gateway;
2223         rt->rt6i_flags = ort->rt6i_flags;
2224         rt6_set_from(rt, ort);
2225         rt->rt6i_metric = ort->rt6i_metric;
2226 #ifdef CONFIG_IPV6_SUBTREES
2227         rt->rt6i_src = ort->rt6i_src;
2228 #endif
2229         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2230         rt->rt6i_table = ort->rt6i_table;
2231         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2232 }
2233
2234 #ifdef CONFIG_IPV6_ROUTE_INFO
2235 static struct rt6_info *rt6_get_route_info(struct net *net,
2236                                            const struct in6_addr *prefix, int prefixlen,
2237                                            const struct in6_addr *gwaddr, int ifindex)
2238 {
2239         struct fib6_node *fn;
2240         struct rt6_info *rt = NULL;
2241         struct fib6_table *table;
2242
2243         table = fib6_get_table(net, RT6_TABLE_INFO);
2244         if (!table)
2245                 return NULL;
2246
2247         read_lock_bh(&table->tb6_lock);
2248         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2249         if (!fn)
2250                 goto out;
2251
2252         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2253                 if (rt->dst.dev->ifindex != ifindex)
2254                         continue;
2255                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2256                         continue;
2257                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2258                         continue;
2259                 dst_hold(&rt->dst);
2260                 break;
2261         }
2262 out:
2263         read_unlock_bh(&table->tb6_lock);
2264         return rt;
2265 }
2266
2267 static struct rt6_info *rt6_add_route_info(struct net *net,
2268                                            const struct in6_addr *prefix, int prefixlen,
2269                                            const struct in6_addr *gwaddr, int ifindex,
2270                                            unsigned int pref)
2271 {
2272         struct fib6_config cfg = {
2273                 .fc_metric      = IP6_RT_PRIO_USER,
2274                 .fc_ifindex     = ifindex,
2275                 .fc_dst_len     = prefixlen,
2276                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2277                                   RTF_UP | RTF_PREF(pref),
2278                 .fc_nlinfo.portid = 0,
2279                 .fc_nlinfo.nlh = NULL,
2280                 .fc_nlinfo.nl_net = net,
2281         };
2282
2283         cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2284         cfg.fc_dst = *prefix;
2285         cfg.fc_gateway = *gwaddr;
2286
2287         /* We should treat it as a default route if prefix length is 0. */
2288         if (!prefixlen)
2289                 cfg.fc_flags |= RTF_DEFAULT;
2290
2291         ip6_route_add(&cfg);
2292
2293         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2294 }
2295 #endif
2296
2297 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2298 {
2299         struct rt6_info *rt;
2300         struct fib6_table *table;
2301
2302         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2303         if (!table)
2304                 return NULL;
2305
2306         read_lock_bh(&table->tb6_lock);
2307         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2308                 if (dev == rt->dst.dev &&
2309                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2310                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2311                         break;
2312         }
2313         if (rt)
2314                 dst_hold(&rt->dst);
2315         read_unlock_bh(&table->tb6_lock);
2316         return rt;
2317 }
2318
2319 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2320                                      struct net_device *dev,
2321                                      unsigned int pref)
2322 {
2323         struct fib6_config cfg = {
2324                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2325                 .fc_metric      = IP6_RT_PRIO_USER,
2326                 .fc_ifindex     = dev->ifindex,
2327                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2328                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2329                 .fc_nlinfo.portid = 0,
2330                 .fc_nlinfo.nlh = NULL,
2331                 .fc_nlinfo.nl_net = dev_net(dev),
2332         };
2333
2334         cfg.fc_gateway = *gwaddr;
2335
2336         ip6_route_add(&cfg);
2337
2338         return rt6_get_dflt_router(gwaddr, dev);
2339 }
2340
2341 void rt6_purge_dflt_routers(struct net *net)
2342 {
2343         struct rt6_info *rt;
2344         struct fib6_table *table;
2345
2346         /* NOTE: Keep consistent with rt6_get_dflt_router */
2347         table = fib6_get_table(net, RT6_TABLE_DFLT);
2348         if (!table)
2349                 return;
2350
2351 restart:
2352         read_lock_bh(&table->tb6_lock);
2353         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2354                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2355                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2356                         dst_hold(&rt->dst);
2357                         read_unlock_bh(&table->tb6_lock);
2358                         ip6_del_rt(rt);
2359                         goto restart;
2360                 }
2361         }
2362         read_unlock_bh(&table->tb6_lock);
2363 }
2364
2365 static void rtmsg_to_fib6_config(struct net *net,
2366                                  struct in6_rtmsg *rtmsg,
2367                                  struct fib6_config *cfg)
2368 {
2369         memset(cfg, 0, sizeof(*cfg));
2370
2371         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2372                          : RT6_TABLE_MAIN;
2373         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2374         cfg->fc_metric = rtmsg->rtmsg_metric;
2375         cfg->fc_expires = rtmsg->rtmsg_info;
2376         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2377         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2378         cfg->fc_flags = rtmsg->rtmsg_flags;
2379
2380         cfg->fc_nlinfo.nl_net = net;
2381
2382         cfg->fc_dst = rtmsg->rtmsg_dst;
2383         cfg->fc_src = rtmsg->rtmsg_src;
2384         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2385 }
2386
2387 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2388 {
2389         struct fib6_config cfg;
2390         struct in6_rtmsg rtmsg;
2391         int err;
2392
2393         switch (cmd) {
2394         case SIOCADDRT:         /* Add a route */
2395         case SIOCDELRT:         /* Delete a route */
2396                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2397                         return -EPERM;
2398                 err = copy_from_user(&rtmsg, arg,
2399                                      sizeof(struct in6_rtmsg));
2400                 if (err)
2401                         return -EFAULT;
2402
2403                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2404
2405                 rtnl_lock();
2406                 switch (cmd) {
2407                 case SIOCADDRT:
2408                         err = ip6_route_add(&cfg);
2409                         break;
2410                 case SIOCDELRT:
2411                         err = ip6_route_del(&cfg);
2412                         break;
2413                 default:
2414                         err = -EINVAL;
2415                 }
2416                 rtnl_unlock();
2417
2418                 return err;
2419         }
2420
2421         return -EINVAL;
2422 }
2423
2424 /*
2425  *      Drop the packet on the floor
2426  */
2427
2428 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2429 {
2430         int type;
2431         struct dst_entry *dst = skb_dst(skb);
2432         switch (ipstats_mib_noroutes) {
2433         case IPSTATS_MIB_INNOROUTES:
2434                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2435                 if (type == IPV6_ADDR_ANY) {
2436                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2437                                       IPSTATS_MIB_INADDRERRORS);
2438                         break;
2439                 }
2440                 /* FALLTHROUGH */
2441         case IPSTATS_MIB_OUTNOROUTES:
2442                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2443                               ipstats_mib_noroutes);
2444                 break;
2445         }
2446         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2447         kfree_skb(skb);
2448         return 0;
2449 }
2450
2451 static int ip6_pkt_discard(struct sk_buff *skb)
2452 {
2453         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2454 }
2455
2456 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2457 {
2458         skb->dev = skb_dst(skb)->dev;
2459         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2460 }
2461
2462 static int ip6_pkt_prohibit(struct sk_buff *skb)
2463 {
2464         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2465 }
2466
2467 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2468 {
2469         skb->dev = skb_dst(skb)->dev;
2470         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2471 }
2472
2473 /*
2474  *      Allocate a dst for local (unicast / anycast) address.
2475  */
2476
2477 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2478                                     const struct in6_addr *addr,
2479                                     bool anycast)
2480 {
2481         u32 tb_id;
2482         struct net *net = dev_net(idev->dev);
2483         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2484                                             DST_NOCOUNT);
2485         if (!rt)
2486                 return ERR_PTR(-ENOMEM);
2487
2488         in6_dev_hold(idev);
2489
2490         rt->dst.flags |= DST_HOST;
2491         rt->dst.input = ip6_input;
2492         rt->dst.output = ip6_output;
2493         rt->rt6i_idev = idev;
2494
2495         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2496         if (anycast)
2497                 rt->rt6i_flags |= RTF_ANYCAST;
2498         else
2499                 rt->rt6i_flags |= RTF_LOCAL;
2500
2501         rt->rt6i_gateway  = *addr;
2502         rt->rt6i_dst.addr = *addr;
2503         rt->rt6i_dst.plen = 128;
2504         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2505         rt->rt6i_table = fib6_get_table(net, tb_id);
2506         rt->dst.flags |= DST_NOCACHE;
2507
2508         atomic_set(&rt->dst.__refcnt, 1);
2509
2510         return rt;
2511 }
2512
2513 int ip6_route_get_saddr(struct net *net,
2514                         struct rt6_info *rt,
2515                         const struct in6_addr *daddr,
2516                         unsigned int prefs,
2517                         struct in6_addr *saddr)
2518 {
2519         struct inet6_dev *idev =
2520                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2521         int err = 0;
2522         if (rt && rt->rt6i_prefsrc.plen)
2523                 *saddr = rt->rt6i_prefsrc.addr;
2524         else
2525                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2526                                          daddr, prefs, saddr);
2527         return err;
2528 }
2529
2530 /* remove deleted ip from prefsrc entries */
2531 struct arg_dev_net_ip {
2532         struct net_device *dev;
2533         struct net *net;
2534         struct in6_addr *addr;
2535 };
2536
2537 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2538 {
2539         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2540         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2541         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2542
2543         if (((void *)rt->dst.dev == dev || !dev) &&
2544             rt != net->ipv6.ip6_null_entry &&
2545             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2546                 /* remove prefsrc entry */
2547                 rt->rt6i_prefsrc.plen = 0;
2548         }
2549         return 0;
2550 }
2551
2552 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2553 {
2554         struct net *net = dev_net(ifp->idev->dev);
2555         struct arg_dev_net_ip adni = {
2556                 .dev = ifp->idev->dev,
2557                 .net = net,
2558                 .addr = &ifp->addr,
2559         };
2560         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2561 }
2562
2563 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2564 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2565
2566 /* Remove routers and update dst entries when gateway turn into host. */
2567 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2568 {
2569         struct in6_addr *gateway = (struct in6_addr *)arg;
2570
2571         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2572              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2573              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2574                 return -1;
2575         }
2576         return 0;
2577 }
2578
2579 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2580 {
2581         fib6_clean_all(net, fib6_clean_tohost, gateway);
2582 }
2583
2584 struct arg_dev_net {
2585         struct net_device *dev;
2586         struct net *net;
2587 };
2588
2589 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2590 {
2591         const struct arg_dev_net *adn = arg;
2592         const struct net_device *dev = adn->dev;
2593
2594         if ((rt->dst.dev == dev || !dev) &&
2595             rt != adn->net->ipv6.ip6_null_entry)
2596                 return -1;
2597
2598         return 0;
2599 }
2600
2601 void rt6_ifdown(struct net *net, struct net_device *dev)
2602 {
2603         struct arg_dev_net adn = {
2604                 .dev = dev,
2605                 .net = net,
2606         };
2607
2608         fib6_clean_all(net, fib6_ifdown, &adn);
2609         icmp6_clean_all(fib6_ifdown, &adn);
2610         if (dev)
2611                 rt6_uncached_list_flush_dev(net, dev);
2612 }
2613
2614 struct rt6_mtu_change_arg {
2615         struct net_device *dev;
2616         unsigned int mtu;
2617 };
2618
2619 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2620 {
2621         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2622         struct inet6_dev *idev;
2623
2624         /* In IPv6 pmtu discovery is not optional,
2625            so that RTAX_MTU lock cannot disable it.
2626            We still use this lock to block changes
2627            caused by addrconf/ndisc.
2628         */
2629
2630         idev = __in6_dev_get(arg->dev);
2631         if (!idev)
2632                 return 0;
2633
2634         /* For administrative MTU increase, there is no way to discover
2635            IPv6 PMTU increase, so PMTU increase should be updated here.
2636            Since RFC 1981 doesn't include administrative MTU increase
2637            update PMTU increase is a MUST. (i.e. jumbo frame)
2638          */
2639         /*
2640            If new MTU is less than route PMTU, this new MTU will be the
2641            lowest MTU in the path, update the route PMTU to reflect PMTU
2642            decreases; if new MTU is greater than route PMTU, and the
2643            old MTU is the lowest MTU in the path, update the route PMTU
2644            to reflect the increase. In this case if the other nodes' MTU
2645            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2646            PMTU discouvery.
2647          */
2648         if (rt->dst.dev == arg->dev &&
2649             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2650                 if (rt->rt6i_flags & RTF_CACHE) {
2651                         /* For RTF_CACHE with rt6i_pmtu == 0
2652                          * (i.e. a redirected route),
2653                          * the metrics of its rt->dst.from has already
2654                          * been updated.
2655                          */
2656                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2657                                 rt->rt6i_pmtu = arg->mtu;
2658                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2659                            (dst_mtu(&rt->dst) < arg->mtu &&
2660                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2661                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2662                 }
2663         }
2664         return 0;
2665 }
2666
2667 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2668 {
2669         struct rt6_mtu_change_arg arg = {
2670                 .dev = dev,
2671                 .mtu = mtu,
2672         };
2673
2674         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2675 }
2676
2677 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2678         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2679         [RTA_OIF]               = { .type = NLA_U32 },
2680         [RTA_IIF]               = { .type = NLA_U32 },
2681         [RTA_PRIORITY]          = { .type = NLA_U32 },
2682         [RTA_METRICS]           = { .type = NLA_NESTED },
2683         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2684         [RTA_PREF]              = { .type = NLA_U8 },
2685         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2686         [RTA_ENCAP]             = { .type = NLA_NESTED },
2687 };
2688
2689 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2690                               struct fib6_config *cfg)
2691 {
2692         struct rtmsg *rtm;
2693         struct nlattr *tb[RTA_MAX+1];
2694         unsigned int pref;
2695         int err;
2696
2697         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2698         if (err < 0)
2699                 goto errout;
2700
2701         err = -EINVAL;
2702         rtm = nlmsg_data(nlh);
2703         memset(cfg, 0, sizeof(*cfg));
2704
2705         cfg->fc_table = rtm->rtm_table;
2706         cfg->fc_dst_len = rtm->rtm_dst_len;
2707         cfg->fc_src_len = rtm->rtm_src_len;
2708         cfg->fc_flags = RTF_UP;
2709         cfg->fc_protocol = rtm->rtm_protocol;
2710         cfg->fc_type = rtm->rtm_type;
2711
2712         if (rtm->rtm_type == RTN_UNREACHABLE ||
2713             rtm->rtm_type == RTN_BLACKHOLE ||
2714             rtm->rtm_type == RTN_PROHIBIT ||
2715             rtm->rtm_type == RTN_THROW)
2716                 cfg->fc_flags |= RTF_REJECT;
2717
2718         if (rtm->rtm_type == RTN_LOCAL)
2719                 cfg->fc_flags |= RTF_LOCAL;
2720
2721         if (rtm->rtm_flags & RTM_F_CLONED)
2722                 cfg->fc_flags |= RTF_CACHE;
2723
2724         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2725         cfg->fc_nlinfo.nlh = nlh;
2726         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2727
2728         if (tb[RTA_GATEWAY]) {
2729                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2730                 cfg->fc_flags |= RTF_GATEWAY;
2731         }
2732
2733         if (tb[RTA_DST]) {
2734                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2735
2736                 if (nla_len(tb[RTA_DST]) < plen)
2737                         goto errout;
2738
2739                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2740         }
2741
2742         if (tb[RTA_SRC]) {
2743                 int plen = (rtm->rtm_src_len + 7) >> 3;
2744
2745                 if (nla_len(tb[RTA_SRC]) < plen)
2746                         goto errout;
2747
2748                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2749         }
2750
2751         if (tb[RTA_PREFSRC])
2752                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2753
2754         if (tb[RTA_OIF])
2755                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2756
2757         if (tb[RTA_PRIORITY])
2758                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2759
2760         if (tb[RTA_METRICS]) {
2761                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2762                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2763         }
2764
2765         if (tb[RTA_TABLE])
2766                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2767
2768         if (tb[RTA_MULTIPATH]) {
2769                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2770                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2771         }
2772
2773         if (tb[RTA_PREF]) {
2774                 pref = nla_get_u8(tb[RTA_PREF]);
2775                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2776                     pref != ICMPV6_ROUTER_PREF_HIGH)
2777                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2778                 cfg->fc_flags |= RTF_PREF(pref);
2779         }
2780
2781         if (tb[RTA_ENCAP])
2782                 cfg->fc_encap = tb[RTA_ENCAP];
2783
2784         if (tb[RTA_ENCAP_TYPE])
2785                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2786
2787         err = 0;
2788 errout:
2789         return err;
2790 }
2791
2792 struct rt6_nh {
2793         struct rt6_info *rt6_info;
2794         struct fib6_config r_cfg;
2795         struct mx6_config mxc;
2796         struct list_head next;
2797 };
2798
2799 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2800 {
2801         struct rt6_nh *nh;
2802
2803         list_for_each_entry(nh, rt6_nh_list, next) {
2804                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2805                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2806                         nh->r_cfg.fc_ifindex);
2807         }
2808 }
2809
2810 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2811                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2812 {
2813         struct rt6_nh *nh;
2814         struct rt6_info *rtnh;
2815         int err = -EEXIST;
2816
2817         list_for_each_entry(nh, rt6_nh_list, next) {
2818                 /* check if rt6_info already exists */
2819                 rtnh = nh->rt6_info;
2820
2821                 if (rtnh->dst.dev == rt->dst.dev &&
2822                     rtnh->rt6i_idev == rt->rt6i_idev &&
2823                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2824                                     &rt->rt6i_gateway))
2825                         return err;
2826         }
2827
2828         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2829         if (!nh)
2830                 return -ENOMEM;
2831         nh->rt6_info = rt;
2832         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2833         if (err) {
2834                 kfree(nh);
2835                 return err;
2836         }
2837         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2838         list_add_tail(&nh->next, rt6_nh_list);
2839
2840         return 0;
2841 }
2842
2843 static int ip6_route_multipath_add(struct fib6_config *cfg)
2844 {
2845         struct fib6_config r_cfg;
2846         struct rtnexthop *rtnh;
2847         struct rt6_info *rt;
2848         struct rt6_nh *err_nh;
2849         struct rt6_nh *nh, *nh_safe;
2850         int remaining;
2851         int attrlen;
2852         int err = 1;
2853         int nhn = 0;
2854         int replace = (cfg->fc_nlinfo.nlh &&
2855                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2856         LIST_HEAD(rt6_nh_list);
2857
2858         remaining = cfg->fc_mp_len;
2859         rtnh = (struct rtnexthop *)cfg->fc_mp;
2860
2861         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2862          * rt6_info structs per nexthop
2863          */
2864         while (rtnh_ok(rtnh, remaining)) {
2865                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2866                 if (rtnh->rtnh_ifindex)
2867                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2868
2869                 attrlen = rtnh_attrlen(rtnh);
2870                 if (attrlen > 0) {
2871                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2872
2873                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2874                         if (nla) {
2875                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2876                                 r_cfg.fc_flags |= RTF_GATEWAY;
2877                         }
2878                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2879                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2880                         if (nla)
2881                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2882                 }
2883
2884                 rt = ip6_route_info_create(&r_cfg);
2885                 if (IS_ERR(rt)) {
2886                         err = PTR_ERR(rt);
2887                         rt = NULL;
2888                         goto cleanup;
2889                 }
2890
2891                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2892                 if (err) {
2893                         dst_free(&rt->dst);
2894                         goto cleanup;
2895                 }
2896
2897                 rtnh = rtnh_next(rtnh, &remaining);
2898         }
2899
2900         err_nh = NULL;
2901         list_for_each_entry(nh, &rt6_nh_list, next) {
2902                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2903                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2904                 nh->rt6_info = NULL;
2905                 if (err) {
2906                         if (replace && nhn)
2907                                 ip6_print_replace_route_err(&rt6_nh_list);
2908                         err_nh = nh;
2909                         goto add_errout;
2910                 }
2911
2912                 /* Because each route is added like a single route we remove
2913                  * these flags after the first nexthop: if there is a collision,
2914                  * we have already failed to add the first nexthop:
2915                  * fib6_add_rt2node() has rejected it; when replacing, old
2916                  * nexthops have been replaced by first new, the rest should
2917                  * be added to it.
2918                  */
2919                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2920                                                      NLM_F_REPLACE);
2921                 nhn++;
2922         }
2923
2924         goto cleanup;
2925
2926 add_errout:
2927         /* Delete routes that were already added */
2928         list_for_each_entry(nh, &rt6_nh_list, next) {
2929                 if (err_nh == nh)
2930                         break;
2931                 ip6_route_del(&nh->r_cfg);
2932         }
2933
2934 cleanup:
2935         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2936                 if (nh->rt6_info)
2937                         dst_free(&nh->rt6_info->dst);
2938                 kfree(nh->mxc.mx);
2939                 list_del(&nh->next);
2940                 kfree(nh);
2941         }
2942
2943         return err;
2944 }
2945
2946 static int ip6_route_multipath_del(struct fib6_config *cfg)
2947 {
2948         struct fib6_config r_cfg;
2949         struct rtnexthop *rtnh;
2950         int remaining;
2951         int attrlen;
2952         int err = 1, last_err = 0;
2953
2954         remaining = cfg->fc_mp_len;
2955         rtnh = (struct rtnexthop *)cfg->fc_mp;
2956
2957         /* Parse a Multipath Entry */
2958         while (rtnh_ok(rtnh, remaining)) {
2959                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2960                 if (rtnh->rtnh_ifindex)
2961                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2962
2963                 attrlen = rtnh_attrlen(rtnh);
2964                 if (attrlen > 0) {
2965                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2966
2967                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2968                         if (nla) {
2969                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2970                                 r_cfg.fc_flags |= RTF_GATEWAY;
2971                         }
2972                 }
2973                 err = ip6_route_del(&r_cfg);
2974                 if (err)
2975                         last_err = err;
2976
2977                 rtnh = rtnh_next(rtnh, &remaining);
2978         }
2979
2980         return last_err;
2981 }
2982
2983 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2984 {
2985         struct fib6_config cfg;
2986         int err;
2987
2988         err = rtm_to_fib6_config(skb, nlh, &cfg);
2989         if (err < 0)
2990                 return err;
2991
2992         if (cfg.fc_mp)
2993                 return ip6_route_multipath_del(&cfg);
2994         else
2995                 return ip6_route_del(&cfg);
2996 }
2997
2998 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2999 {
3000         struct fib6_config cfg;
3001         int err;
3002
3003         err = rtm_to_fib6_config(skb, nlh, &cfg);
3004         if (err < 0)
3005                 return err;
3006
3007         if (cfg.fc_mp)
3008                 return ip6_route_multipath_add(&cfg);
3009         else
3010                 return ip6_route_add(&cfg);
3011 }
3012
3013 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3014 {
3015         return NLMSG_ALIGN(sizeof(struct rtmsg))
3016                + nla_total_size(16) /* RTA_SRC */
3017                + nla_total_size(16) /* RTA_DST */
3018                + nla_total_size(16) /* RTA_GATEWAY */
3019                + nla_total_size(16) /* RTA_PREFSRC */
3020                + nla_total_size(4) /* RTA_TABLE */
3021                + nla_total_size(4) /* RTA_IIF */
3022                + nla_total_size(4) /* RTA_OIF */
3023                + nla_total_size(4) /* RTA_PRIORITY */
3024                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3025                + nla_total_size(sizeof(struct rta_cacheinfo))
3026                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3027                + nla_total_size(1) /* RTA_PREF */
3028                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3029 }
3030
3031 static int rt6_fill_node(struct net *net,
3032                          struct sk_buff *skb, struct rt6_info *rt,
3033                          struct in6_addr *dst, struct in6_addr *src,
3034                          int iif, int type, u32 portid, u32 seq,
3035                          int prefix, int nowait, unsigned int flags)
3036 {
3037         u32 metrics[RTAX_MAX];
3038         struct rtmsg *rtm;
3039         struct nlmsghdr *nlh;
3040         long expires;
3041         u32 table;
3042
3043         if (prefix) {   /* user wants prefix routes only */
3044                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3045                         /* success since this is not a prefix route */
3046                         return 1;
3047                 }
3048         }
3049
3050         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3051         if (!nlh)
3052                 return -EMSGSIZE;
3053
3054         rtm = nlmsg_data(nlh);
3055         rtm->rtm_family = AF_INET6;
3056         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3057         rtm->rtm_src_len = rt->rt6i_src.plen;
3058         rtm->rtm_tos = 0;
3059         if (rt->rt6i_table)
3060                 table = rt->rt6i_table->tb6_id;
3061         else
3062                 table = RT6_TABLE_UNSPEC;
3063         rtm->rtm_table = table;
3064         if (nla_put_u32(skb, RTA_TABLE, table))
3065                 goto nla_put_failure;
3066         if (rt->rt6i_flags & RTF_REJECT) {
3067                 switch (rt->dst.error) {
3068                 case -EINVAL:
3069                         rtm->rtm_type = RTN_BLACKHOLE;
3070                         break;
3071                 case -EACCES:
3072                         rtm->rtm_type = RTN_PROHIBIT;
3073                         break;
3074                 case -EAGAIN:
3075                         rtm->rtm_type = RTN_THROW;
3076                         break;
3077                 default:
3078                         rtm->rtm_type = RTN_UNREACHABLE;
3079                         break;
3080                 }
3081         }
3082         else if (rt->rt6i_flags & RTF_LOCAL)
3083                 rtm->rtm_type = RTN_LOCAL;
3084         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3085                 rtm->rtm_type = RTN_LOCAL;
3086         else
3087                 rtm->rtm_type = RTN_UNICAST;
3088         rtm->rtm_flags = 0;
3089         if (!netif_carrier_ok(rt->dst.dev)) {
3090                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3091                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3092                         rtm->rtm_flags |= RTNH_F_DEAD;
3093         }
3094         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3095         rtm->rtm_protocol = rt->rt6i_protocol;
3096         if (rt->rt6i_flags & RTF_DYNAMIC)
3097                 rtm->rtm_protocol = RTPROT_REDIRECT;
3098         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3099                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3100                         rtm->rtm_protocol = RTPROT_RA;
3101                 else
3102                         rtm->rtm_protocol = RTPROT_KERNEL;
3103         }
3104
3105         if (rt->rt6i_flags & RTF_CACHE)
3106                 rtm->rtm_flags |= RTM_F_CLONED;
3107
3108         if (dst) {
3109                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3110                         goto nla_put_failure;
3111                 rtm->rtm_dst_len = 128;
3112         } else if (rtm->rtm_dst_len)
3113                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3114                         goto nla_put_failure;
3115 #ifdef CONFIG_IPV6_SUBTREES
3116         if (src) {
3117                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3118                         goto nla_put_failure;
3119                 rtm->rtm_src_len = 128;
3120         } else if (rtm->rtm_src_len &&
3121                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3122                 goto nla_put_failure;
3123 #endif
3124         if (iif) {
3125 #ifdef CONFIG_IPV6_MROUTE
3126                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3127                         int err = ip6mr_get_route(net, skb, rtm, nowait);
3128                         if (err <= 0) {
3129                                 if (!nowait) {
3130                                         if (err == 0)
3131                                                 return 0;
3132                                         goto nla_put_failure;
3133                                 } else {
3134                                         if (err == -EMSGSIZE)
3135                                                 goto nla_put_failure;
3136                                 }
3137                         }
3138                 } else
3139 #endif
3140                         if (nla_put_u32(skb, RTA_IIF, iif))
3141                                 goto nla_put_failure;
3142         } else if (dst) {
3143                 struct in6_addr saddr_buf;
3144                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3145                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3146                         goto nla_put_failure;
3147         }
3148
3149         if (rt->rt6i_prefsrc.plen) {
3150                 struct in6_addr saddr_buf;
3151                 saddr_buf = rt->rt6i_prefsrc.addr;
3152                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3153                         goto nla_put_failure;
3154         }
3155
3156         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3157         if (rt->rt6i_pmtu)
3158                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3159         if (rtnetlink_put_metrics(skb, metrics) < 0)
3160                 goto nla_put_failure;
3161
3162         if (rt->rt6i_flags & RTF_GATEWAY) {
3163                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3164                         goto nla_put_failure;
3165         }
3166
3167         if (rt->dst.dev &&
3168             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3169                 goto nla_put_failure;
3170         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3171                 goto nla_put_failure;
3172
3173         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3174
3175         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3176                 goto nla_put_failure;
3177
3178         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3179                 goto nla_put_failure;
3180
3181         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3182
3183         nlmsg_end(skb, nlh);
3184         return 0;
3185
3186 nla_put_failure:
3187         nlmsg_cancel(skb, nlh);
3188         return -EMSGSIZE;
3189 }
3190
3191 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3192 {
3193         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3194         int prefix;
3195
3196         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3197                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3198                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3199         } else
3200                 prefix = 0;
3201
3202         return rt6_fill_node(arg->net,
3203                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3204                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3205                      prefix, 0, NLM_F_MULTI);
3206 }
3207
3208 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3209 {
3210         struct net *net = sock_net(in_skb->sk);
3211         struct nlattr *tb[RTA_MAX+1];
3212         struct rt6_info *rt;
3213         struct sk_buff *skb;
3214         struct rtmsg *rtm;
3215         struct flowi6 fl6;
3216         int err, iif = 0, oif = 0;
3217
3218         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3219         if (err < 0)
3220                 goto errout;
3221
3222         err = -EINVAL;
3223         memset(&fl6, 0, sizeof(fl6));
3224
3225         if (tb[RTA_SRC]) {
3226                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3227                         goto errout;
3228
3229                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3230         }
3231
3232         if (tb[RTA_DST]) {
3233                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3234                         goto errout;
3235
3236                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3237         }
3238
3239         if (tb[RTA_IIF])
3240                 iif = nla_get_u32(tb[RTA_IIF]);
3241
3242         if (tb[RTA_OIF])
3243                 oif = nla_get_u32(tb[RTA_OIF]);
3244
3245         if (tb[RTA_MARK])
3246                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3247
3248         if (iif) {
3249                 struct net_device *dev;
3250                 int flags = 0;
3251
3252                 dev = __dev_get_by_index(net, iif);
3253                 if (!dev) {
3254                         err = -ENODEV;
3255                         goto errout;
3256                 }
3257
3258                 fl6.flowi6_iif = iif;
3259
3260                 if (!ipv6_addr_any(&fl6.saddr))
3261                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3262
3263                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3264                                                                flags);
3265         } else {
3266                 fl6.flowi6_oif = oif;
3267
3268                 if (netif_index_is_l3_master(net, oif)) {
3269                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3270                                            FLOWI_FLAG_SKIP_NH_OIF;
3271                 }
3272
3273                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3274         }
3275
3276         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3277         if (!skb) {
3278                 ip6_rt_put(rt);
3279                 err = -ENOBUFS;
3280                 goto errout;
3281         }
3282
3283         /* Reserve room for dummy headers, this skb can pass
3284            through good chunk of routing engine.
3285          */
3286         skb_reset_mac_header(skb);
3287         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3288
3289         skb_dst_set(skb, &rt->dst);
3290
3291         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3292                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3293                             nlh->nlmsg_seq, 0, 0, 0);
3294         if (err < 0) {
3295                 kfree_skb(skb);
3296                 goto errout;
3297         }
3298
3299         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3300 errout:
3301         return err;
3302 }
3303
3304 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3305                      unsigned int nlm_flags)
3306 {
3307         struct sk_buff *skb;
3308         struct net *net = info->nl_net;
3309         u32 seq;
3310         int err;
3311
3312         err = -ENOBUFS;
3313         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3314
3315         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3316         if (!skb)
3317                 goto errout;
3318
3319         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3320                                 event, info->portid, seq, 0, 0, nlm_flags);
3321         if (err < 0) {
3322                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3323                 WARN_ON(err == -EMSGSIZE);
3324                 kfree_skb(skb);
3325                 goto errout;
3326         }
3327         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3328                     info->nlh, gfp_any());
3329         return;
3330 errout:
3331         if (err < 0)
3332                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3333 }
3334
3335 static int ip6_route_dev_notify(struct notifier_block *this,
3336                                 unsigned long event, void *ptr)
3337 {
3338         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3339         struct net *net = dev_net(dev);
3340
3341         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3342                 net->ipv6.ip6_null_entry->dst.dev = dev;
3343                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3344 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3345                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3346                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3347                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3348                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3349 #endif
3350         }
3351
3352         return NOTIFY_OK;
3353 }
3354
3355 /*
3356  *      /proc
3357  */
3358
3359 #ifdef CONFIG_PROC_FS
3360
3361 static const struct file_operations ipv6_route_proc_fops = {
3362         .owner          = THIS_MODULE,
3363         .open           = ipv6_route_open,
3364         .read           = seq_read,
3365         .llseek         = seq_lseek,
3366         .release        = seq_release_net,
3367 };
3368
3369 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3370 {
3371         struct net *net = (struct net *)seq->private;
3372         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3373                    net->ipv6.rt6_stats->fib_nodes,
3374                    net->ipv6.rt6_stats->fib_route_nodes,
3375                    net->ipv6.rt6_stats->fib_rt_alloc,
3376                    net->ipv6.rt6_stats->fib_rt_entries,
3377                    net->ipv6.rt6_stats->fib_rt_cache,
3378                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3379                    net->ipv6.rt6_stats->fib_discarded_routes);
3380
3381         return 0;
3382 }
3383
3384 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3385 {
3386         return single_open_net(inode, file, rt6_stats_seq_show);
3387 }
3388
3389 static const struct file_operations rt6_stats_seq_fops = {
3390         .owner   = THIS_MODULE,
3391         .open    = rt6_stats_seq_open,
3392         .read    = seq_read,
3393         .llseek  = seq_lseek,
3394         .release = single_release_net,
3395 };
3396 #endif  /* CONFIG_PROC_FS */
3397
3398 #ifdef CONFIG_SYSCTL
3399
3400 static
3401 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3402                               void __user *buffer, size_t *lenp, loff_t *ppos)
3403 {
3404         struct net *net;
3405         int delay;
3406         if (!write)
3407                 return -EINVAL;
3408
3409         net = (struct net *)ctl->extra1;
3410         delay = net->ipv6.sysctl.flush_delay;
3411         proc_dointvec(ctl, write, buffer, lenp, ppos);
3412         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3413         return 0;
3414 }
3415
3416 struct ctl_table ipv6_route_table_template[] = {
3417         {
3418                 .procname       =       "flush",
3419                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3420                 .maxlen         =       sizeof(int),
3421                 .mode           =       0200,
3422                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3423         },
3424         {
3425                 .procname       =       "gc_thresh",
3426                 .data           =       &ip6_dst_ops_template.gc_thresh,
3427                 .maxlen         =       sizeof(int),
3428                 .mode           =       0644,
3429                 .proc_handler   =       proc_dointvec,
3430         },
3431         {
3432                 .procname       =       "max_size",
3433                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3434                 .maxlen         =       sizeof(int),
3435                 .mode           =       0644,
3436                 .proc_handler   =       proc_dointvec,
3437         },
3438         {
3439                 .procname       =       "gc_min_interval",
3440                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3441                 .maxlen         =       sizeof(int),
3442                 .mode           =       0644,
3443                 .proc_handler   =       proc_dointvec_jiffies,
3444         },
3445         {
3446                 .procname       =       "gc_timeout",
3447                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3448                 .maxlen         =       sizeof(int),
3449                 .mode           =       0644,
3450                 .proc_handler   =       proc_dointvec_jiffies,
3451         },
3452         {
3453                 .procname       =       "gc_interval",
3454                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3455                 .maxlen         =       sizeof(int),
3456                 .mode           =       0644,
3457                 .proc_handler   =       proc_dointvec_jiffies,
3458         },
3459         {
3460                 .procname       =       "gc_elasticity",
3461                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3462                 .maxlen         =       sizeof(int),
3463                 .mode           =       0644,
3464                 .proc_handler   =       proc_dointvec,
3465         },
3466         {
3467                 .procname       =       "mtu_expires",
3468                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3469                 .maxlen         =       sizeof(int),
3470                 .mode           =       0644,
3471                 .proc_handler   =       proc_dointvec_jiffies,
3472         },
3473         {
3474                 .procname       =       "min_adv_mss",
3475                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3476                 .maxlen         =       sizeof(int),
3477                 .mode           =       0644,
3478                 .proc_handler   =       proc_dointvec,
3479         },
3480         {
3481                 .procname       =       "gc_min_interval_ms",
3482                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3483                 .maxlen         =       sizeof(int),
3484                 .mode           =       0644,
3485                 .proc_handler   =       proc_dointvec_ms_jiffies,
3486         },
3487         { }
3488 };
3489
3490 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3491 {
3492         struct ctl_table *table;
3493
3494         table = kmemdup(ipv6_route_table_template,
3495                         sizeof(ipv6_route_table_template),
3496                         GFP_KERNEL);
3497
3498         if (table) {
3499                 table[0].data = &net->ipv6.sysctl.flush_delay;
3500                 table[0].extra1 = net;
3501                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3502                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3503                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3504                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3505                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3506                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3507                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3508                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3509                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3510
3511                 /* Don't export sysctls to unprivileged users */
3512                 if (net->user_ns != &init_user_ns)
3513                         table[0].procname = NULL;
3514         }
3515
3516         return table;
3517 }
3518 #endif
3519
3520 static int __net_init ip6_route_net_init(struct net *net)
3521 {
3522         int ret = -ENOMEM;
3523
3524         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3525                sizeof(net->ipv6.ip6_dst_ops));
3526
3527         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3528                 goto out_ip6_dst_ops;
3529
3530         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3531                                            sizeof(*net->ipv6.ip6_null_entry),
3532                                            GFP_KERNEL);
3533         if (!net->ipv6.ip6_null_entry)
3534                 goto out_ip6_dst_entries;
3535         net->ipv6.ip6_null_entry->dst.path =
3536                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3537         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3538         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3539                          ip6_template_metrics, true);
3540
3541 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3542         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3543                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3544                                                GFP_KERNEL);
3545         if (!net->ipv6.ip6_prohibit_entry)
3546                 goto out_ip6_null_entry;
3547         net->ipv6.ip6_prohibit_entry->dst.path =
3548                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3549         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3550         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3551                          ip6_template_metrics, true);
3552
3553         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3554                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3555                                                GFP_KERNEL);
3556         if (!net->ipv6.ip6_blk_hole_entry)
3557                 goto out_ip6_prohibit_entry;
3558         net->ipv6.ip6_blk_hole_entry->dst.path =
3559                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3560         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3561         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3562                          ip6_template_metrics, true);
3563 #endif
3564
3565         net->ipv6.sysctl.flush_delay = 0;
3566         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3567         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3568         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3569         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3570         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3571         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3572         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3573
3574         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3575
3576         ret = 0;
3577 out:
3578         return ret;
3579
3580 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3581 out_ip6_prohibit_entry:
3582         kfree(net->ipv6.ip6_prohibit_entry);
3583 out_ip6_null_entry:
3584         kfree(net->ipv6.ip6_null_entry);
3585 #endif
3586 out_ip6_dst_entries:
3587         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3588 out_ip6_dst_ops:
3589         goto out;
3590 }
3591
3592 static void __net_exit ip6_route_net_exit(struct net *net)
3593 {
3594         kfree(net->ipv6.ip6_null_entry);
3595 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3596         kfree(net->ipv6.ip6_prohibit_entry);
3597         kfree(net->ipv6.ip6_blk_hole_entry);
3598 #endif
3599         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3600 }
3601
3602 static int __net_init ip6_route_net_init_late(struct net *net)
3603 {
3604 #ifdef CONFIG_PROC_FS
3605         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3606         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3607 #endif
3608         return 0;
3609 }
3610
3611 static void __net_exit ip6_route_net_exit_late(struct net *net)
3612 {
3613 #ifdef CONFIG_PROC_FS
3614         remove_proc_entry("ipv6_route", net->proc_net);
3615         remove_proc_entry("rt6_stats", net->proc_net);
3616 #endif
3617 }
3618
3619 static struct pernet_operations ip6_route_net_ops = {
3620         .init = ip6_route_net_init,
3621         .exit = ip6_route_net_exit,
3622 };
3623
3624 static int __net_init ipv6_inetpeer_init(struct net *net)
3625 {
3626         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3627
3628         if (!bp)
3629                 return -ENOMEM;
3630         inet_peer_base_init(bp);
3631         net->ipv6.peers = bp;
3632         return 0;
3633 }
3634
3635 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3636 {
3637         struct inet_peer_base *bp = net->ipv6.peers;
3638
3639         net->ipv6.peers = NULL;
3640         inetpeer_invalidate_tree(bp);
3641         kfree(bp);
3642 }
3643
3644 static struct pernet_operations ipv6_inetpeer_ops = {
3645         .init   =       ipv6_inetpeer_init,
3646         .exit   =       ipv6_inetpeer_exit,
3647 };
3648
3649 static struct pernet_operations ip6_route_net_late_ops = {
3650         .init = ip6_route_net_init_late,
3651         .exit = ip6_route_net_exit_late,
3652 };
3653
3654 static struct notifier_block ip6_route_dev_notifier = {
3655         .notifier_call = ip6_route_dev_notify,
3656         .priority = 0,
3657 };
3658
3659 int __init ip6_route_init(void)
3660 {
3661         int ret;
3662         int cpu;
3663
3664         ret = -ENOMEM;
3665         ip6_dst_ops_template.kmem_cachep =
3666                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3667                                   SLAB_HWCACHE_ALIGN, NULL);
3668         if (!ip6_dst_ops_template.kmem_cachep)
3669                 goto out;
3670
3671         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3672         if (ret)
3673                 goto out_kmem_cache;
3674
3675         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3676         if (ret)
3677                 goto out_dst_entries;
3678
3679         ret = register_pernet_subsys(&ip6_route_net_ops);
3680         if (ret)
3681                 goto out_register_inetpeer;
3682
3683         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3684
3685         /* Registering of the loopback is done before this portion of code,
3686          * the loopback reference in rt6_info will not be taken, do it
3687          * manually for init_net */
3688         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3689         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3690   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3691         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3692         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3693         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3694         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3695   #endif
3696         ret = fib6_init();
3697         if (ret)
3698                 goto out_register_subsys;
3699
3700         ret = xfrm6_init();
3701         if (ret)
3702                 goto out_fib6_init;
3703
3704         ret = fib6_rules_init();
3705         if (ret)
3706                 goto xfrm6_init;
3707
3708         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3709         if (ret)
3710                 goto fib6_rules_init;
3711
3712         ret = -ENOBUFS;
3713         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3714             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3715             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3716                 goto out_register_late_subsys;
3717
3718         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3719         if (ret)
3720                 goto out_register_late_subsys;
3721
3722         for_each_possible_cpu(cpu) {
3723                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3724
3725                 INIT_LIST_HEAD(&ul->head);
3726                 spin_lock_init(&ul->lock);
3727         }
3728
3729 out:
3730         return ret;
3731
3732 out_register_late_subsys:
3733         unregister_pernet_subsys(&ip6_route_net_late_ops);
3734 fib6_rules_init:
3735         fib6_rules_cleanup();
3736 xfrm6_init:
3737         xfrm6_fini();
3738 out_fib6_init:
3739         fib6_gc_cleanup();
3740 out_register_subsys:
3741         unregister_pernet_subsys(&ip6_route_net_ops);
3742 out_register_inetpeer:
3743         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3744 out_dst_entries:
3745         dst_entries_destroy(&ip6_dst_blackhole_ops);
3746 out_kmem_cache:
3747         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3748         goto out;
3749 }
3750
3751 void ip6_route_cleanup(void)
3752 {
3753         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3754         unregister_pernet_subsys(&ip6_route_net_late_ops);
3755         fib6_rules_cleanup();
3756         xfrm6_fini();
3757         fib6_gc_cleanup();
3758         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3759         unregister_pernet_subsys(&ip6_route_net_ops);
3760         dst_entries_destroy(&ip6_dst_blackhole_ops);
3761         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3762 }