]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv6/route.c
Merge branches 'atags', 'cache-l2x0', 'clkdev', 'fixes', 'integrator', 'misc', 'opcod...
[karo-tx-linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82                                            struct sk_buff *skb, u32 mtu);
83 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84                                         struct sk_buff *skb);
85
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88                                            const struct in6_addr *prefix, int prefixlen,
89                                            const struct in6_addr *gwaddr, int ifindex,
90                                            unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92                                            const struct in6_addr *prefix, int prefixlen,
93                                            const struct in6_addr *gwaddr, int ifindex);
94 #endif
95
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98         struct rt6_info *rt = (struct rt6_info *) dst;
99         struct inet_peer *peer;
100         u32 *p = NULL;
101
102         if (!(rt->dst.flags & DST_HOST))
103                 return NULL;
104
105         peer = rt6_get_peer_create(rt);
106         if (peer) {
107                 u32 *old_p = __DST_METRICS_PTR(old);
108                 unsigned long prev, new;
109
110                 p = peer->metrics;
111                 if (inet_metrics_new(peer))
112                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114                 new = (unsigned long) p;
115                 prev = cmpxchg(&dst->_metrics, old, new);
116
117                 if (prev != old) {
118                         p = __DST_METRICS_PTR(prev);
119                         if (prev & DST_METRICS_READ_ONLY)
120                                 p = NULL;
121                 }
122         }
123         return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127                                              struct sk_buff *skb,
128                                              const void *daddr)
129 {
130         struct in6_addr *p = &rt->rt6i_gateway;
131
132         if (!ipv6_addr_any(p))
133                 return (const void *) p;
134         else if (skb)
135                 return &ipv6_hdr(skb)->daddr;
136         return daddr;
137 }
138
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140                                           struct sk_buff *skb,
141                                           const void *daddr)
142 {
143         struct rt6_info *rt = (struct rt6_info *) dst;
144         struct neighbour *n;
145
146         daddr = choose_neigh_daddr(rt, skb, daddr);
147         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148         if (n)
149                 return n;
150         return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156         if (!n) {
157                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158                 if (IS_ERR(n))
159                         return PTR_ERR(n);
160         }
161         rt->n = n;
162
163         return 0;
164 }
165
166 static struct dst_ops ip6_dst_ops_template = {
167         .family                 =       AF_INET6,
168         .protocol               =       cpu_to_be16(ETH_P_IPV6),
169         .gc                     =       ip6_dst_gc,
170         .gc_thresh              =       1024,
171         .check                  =       ip6_dst_check,
172         .default_advmss         =       ip6_default_advmss,
173         .mtu                    =       ip6_mtu,
174         .cow_metrics            =       ipv6_cow_metrics,
175         .destroy                =       ip6_dst_destroy,
176         .ifdown                 =       ip6_dst_ifdown,
177         .negative_advice        =       ip6_negative_advice,
178         .link_failure           =       ip6_link_failure,
179         .update_pmtu            =       ip6_rt_update_pmtu,
180         .redirect               =       rt6_do_redirect,
181         .local_out              =       __ip6_local_out,
182         .neigh_lookup           =       ip6_neigh_lookup,
183 };
184
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189         return mtu ? : dst->dev->mtu;
190 }
191
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193                                          struct sk_buff *skb, u32 mtu)
194 {
195 }
196
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198                                       struct sk_buff *skb)
199 {
200 }
201
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203                                          unsigned long old)
204 {
205         return NULL;
206 }
207
208 static struct dst_ops ip6_dst_blackhole_ops = {
209         .family                 =       AF_INET6,
210         .protocol               =       cpu_to_be16(ETH_P_IPV6),
211         .destroy                =       ip6_dst_destroy,
212         .check                  =       ip6_dst_check,
213         .mtu                    =       ip6_blackhole_mtu,
214         .default_advmss         =       ip6_default_advmss,
215         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
216         .redirect               =       ip6_rt_blackhole_redirect,
217         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
218         .neigh_lookup           =       ip6_neigh_lookup,
219 };
220
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222         [RTAX_HOPLIMIT - 1] = 255,
223 };
224
225 static struct rt6_info ip6_null_entry_template = {
226         .dst = {
227                 .__refcnt       = ATOMIC_INIT(1),
228                 .__use          = 1,
229                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
230                 .error          = -ENETUNREACH,
231                 .input          = ip6_pkt_discard,
232                 .output         = ip6_pkt_discard_out,
233         },
234         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
235         .rt6i_protocol  = RTPROT_KERNEL,
236         .rt6i_metric    = ~(u32) 0,
237         .rt6i_ref       = ATOMIC_INIT(1),
238 };
239
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245 static struct rt6_info ip6_prohibit_entry_template = {
246         .dst = {
247                 .__refcnt       = ATOMIC_INIT(1),
248                 .__use          = 1,
249                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
250                 .error          = -EACCES,
251                 .input          = ip6_pkt_prohibit,
252                 .output         = ip6_pkt_prohibit_out,
253         },
254         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
255         .rt6i_protocol  = RTPROT_KERNEL,
256         .rt6i_metric    = ~(u32) 0,
257         .rt6i_ref       = ATOMIC_INIT(1),
258 };
259
260 static struct rt6_info ip6_blk_hole_entry_template = {
261         .dst = {
262                 .__refcnt       = ATOMIC_INIT(1),
263                 .__use          = 1,
264                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
265                 .error          = -EINVAL,
266                 .input          = dst_discard,
267                 .output         = dst_discard,
268         },
269         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
270         .rt6i_protocol  = RTPROT_KERNEL,
271         .rt6i_metric    = ~(u32) 0,
272         .rt6i_ref       = ATOMIC_INIT(1),
273 };
274
275 #endif
276
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279                                              struct net_device *dev,
280                                              int flags,
281                                              struct fib6_table *table)
282 {
283         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284                                         0, DST_OBSOLETE_FORCE_CHK, flags);
285
286         if (rt) {
287                 struct dst_entry *dst = &rt->dst;
288
289                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291                 rt->rt6i_genid = rt_genid(net);
292         }
293         return rt;
294 }
295
296 static void ip6_dst_destroy(struct dst_entry *dst)
297 {
298         struct rt6_info *rt = (struct rt6_info *)dst;
299         struct inet6_dev *idev = rt->rt6i_idev;
300
301         if (rt->n)
302                 neigh_release(rt->n);
303
304         if (!(rt->dst.flags & DST_HOST))
305                 dst_destroy_metrics_generic(dst);
306
307         if (idev) {
308                 rt->rt6i_idev = NULL;
309                 in6_dev_put(idev);
310         }
311
312         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313                 dst_release(dst->from);
314
315         if (rt6_has_peer(rt)) {
316                 struct inet_peer *peer = rt6_peer_ptr(rt);
317                 inet_putpeer(peer);
318         }
319 }
320
321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323 static u32 rt6_peer_genid(void)
324 {
325         return atomic_read(&__rt6_peer_genid);
326 }
327
328 void rt6_bind_peer(struct rt6_info *rt, int create)
329 {
330         struct inet_peer_base *base;
331         struct inet_peer *peer;
332
333         base = inetpeer_base_ptr(rt->_rt6i_peer);
334         if (!base)
335                 return;
336
337         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338         if (peer) {
339                 if (!rt6_set_peer(rt, peer))
340                         inet_putpeer(peer);
341                 else
342                         rt->rt6i_peer_genid = rt6_peer_genid();
343         }
344 }
345
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347                            int how)
348 {
349         struct rt6_info *rt = (struct rt6_info *)dst;
350         struct inet6_dev *idev = rt->rt6i_idev;
351         struct net_device *loopback_dev =
352                 dev_net(dev)->loopback_dev;
353
354         if (dev != loopback_dev) {
355                 if (idev && idev->dev == dev) {
356                         struct inet6_dev *loopback_idev =
357                                 in6_dev_get(loopback_dev);
358                         if (loopback_idev) {
359                                 rt->rt6i_idev = loopback_idev;
360                                 in6_dev_put(idev);
361                         }
362                 }
363                 if (rt->n && rt->n->dev == dev) {
364                         rt->n->dev = loopback_dev;
365                         dev_hold(loopback_dev);
366                         dev_put(dev);
367                 }
368         }
369 }
370
371 static bool rt6_check_expired(const struct rt6_info *rt)
372 {
373         struct rt6_info *ort = NULL;
374
375         if (rt->rt6i_flags & RTF_EXPIRES) {
376                 if (time_after(jiffies, rt->dst.expires))
377                         return true;
378         } else if (rt->dst.from) {
379                 ort = (struct rt6_info *) rt->dst.from;
380                 return (ort->rt6i_flags & RTF_EXPIRES) &&
381                         time_after(jiffies, ort->dst.expires);
382         }
383         return false;
384 }
385
386 static bool rt6_need_strict(const struct in6_addr *daddr)
387 {
388         return ipv6_addr_type(daddr) &
389                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
390 }
391
392 /*
393  *      Route lookup. Any table->tb6_lock is implied.
394  */
395
396 static inline struct rt6_info *rt6_device_match(struct net *net,
397                                                     struct rt6_info *rt,
398                                                     const struct in6_addr *saddr,
399                                                     int oif,
400                                                     int flags)
401 {
402         struct rt6_info *local = NULL;
403         struct rt6_info *sprt;
404
405         if (!oif && ipv6_addr_any(saddr))
406                 goto out;
407
408         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
409                 struct net_device *dev = sprt->dst.dev;
410
411                 if (oif) {
412                         if (dev->ifindex == oif)
413                                 return sprt;
414                         if (dev->flags & IFF_LOOPBACK) {
415                                 if (!sprt->rt6i_idev ||
416                                     sprt->rt6i_idev->dev->ifindex != oif) {
417                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
418                                                 continue;
419                                         if (local && (!oif ||
420                                                       local->rt6i_idev->dev->ifindex == oif))
421                                                 continue;
422                                 }
423                                 local = sprt;
424                         }
425                 } else {
426                         if (ipv6_chk_addr(net, saddr, dev,
427                                           flags & RT6_LOOKUP_F_IFACE))
428                                 return sprt;
429                 }
430         }
431
432         if (oif) {
433                 if (local)
434                         return local;
435
436                 if (flags & RT6_LOOKUP_F_IFACE)
437                         return net->ipv6.ip6_null_entry;
438         }
439 out:
440         return rt;
441 }
442
443 #ifdef CONFIG_IPV6_ROUTER_PREF
444 static void rt6_probe(struct rt6_info *rt)
445 {
446         struct neighbour *neigh;
447         /*
448          * Okay, this does not seem to be appropriate
449          * for now, however, we need to check if it
450          * is really so; aka Router Reachability Probing.
451          *
452          * Router Reachability Probe MUST be rate-limited
453          * to no more than one per minute.
454          */
455         rcu_read_lock();
456         neigh = rt ? rt->n : NULL;
457         if (!neigh || (neigh->nud_state & NUD_VALID))
458                 goto out;
459         read_lock_bh(&neigh->lock);
460         if (!(neigh->nud_state & NUD_VALID) &&
461             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
462                 struct in6_addr mcaddr;
463                 struct in6_addr *target;
464
465                 neigh->updated = jiffies;
466                 read_unlock_bh(&neigh->lock);
467
468                 target = (struct in6_addr *)&neigh->primary_key;
469                 addrconf_addr_solict_mult(target, &mcaddr);
470                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
471         } else {
472                 read_unlock_bh(&neigh->lock);
473         }
474 out:
475         rcu_read_unlock();
476 }
477 #else
478 static inline void rt6_probe(struct rt6_info *rt)
479 {
480 }
481 #endif
482
483 /*
484  * Default Router Selection (RFC 2461 6.3.6)
485  */
486 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
487 {
488         struct net_device *dev = rt->dst.dev;
489         if (!oif || dev->ifindex == oif)
490                 return 2;
491         if ((dev->flags & IFF_LOOPBACK) &&
492             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
493                 return 1;
494         return 0;
495 }
496
497 static inline int rt6_check_neigh(struct rt6_info *rt)
498 {
499         struct neighbour *neigh;
500         int m;
501
502         rcu_read_lock();
503         neigh = rt->n;
504         if (rt->rt6i_flags & RTF_NONEXTHOP ||
505             !(rt->rt6i_flags & RTF_GATEWAY))
506                 m = 1;
507         else if (neigh) {
508                 read_lock_bh(&neigh->lock);
509                 if (neigh->nud_state & NUD_VALID)
510                         m = 2;
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512                 else if (neigh->nud_state & NUD_FAILED)
513                         m = 0;
514 #endif
515                 else
516                         m = 1;
517                 read_unlock_bh(&neigh->lock);
518         } else
519                 m = 0;
520         rcu_read_unlock();
521         return m;
522 }
523
524 static int rt6_score_route(struct rt6_info *rt, int oif,
525                            int strict)
526 {
527         int m, n;
528
529         m = rt6_check_dev(rt, oif);
530         if (!m && (strict & RT6_LOOKUP_F_IFACE))
531                 return -1;
532 #ifdef CONFIG_IPV6_ROUTER_PREF
533         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
534 #endif
535         n = rt6_check_neigh(rt);
536         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
537                 return -1;
538         return m;
539 }
540
541 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
542                                    int *mpri, struct rt6_info *match)
543 {
544         int m;
545
546         if (rt6_check_expired(rt))
547                 goto out;
548
549         m = rt6_score_route(rt, oif, strict);
550         if (m < 0)
551                 goto out;
552
553         if (m > *mpri) {
554                 if (strict & RT6_LOOKUP_F_REACHABLE)
555                         rt6_probe(match);
556                 *mpri = m;
557                 match = rt;
558         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
559                 rt6_probe(rt);
560         }
561
562 out:
563         return match;
564 }
565
566 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
567                                      struct rt6_info *rr_head,
568                                      u32 metric, int oif, int strict)
569 {
570         struct rt6_info *rt, *match;
571         int mpri = -1;
572
573         match = NULL;
574         for (rt = rr_head; rt && rt->rt6i_metric == metric;
575              rt = rt->dst.rt6_next)
576                 match = find_match(rt, oif, strict, &mpri, match);
577         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
578              rt = rt->dst.rt6_next)
579                 match = find_match(rt, oif, strict, &mpri, match);
580
581         return match;
582 }
583
584 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
585 {
586         struct rt6_info *match, *rt0;
587         struct net *net;
588
589         rt0 = fn->rr_ptr;
590         if (!rt0)
591                 fn->rr_ptr = rt0 = fn->leaf;
592
593         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
594
595         if (!match &&
596             (strict & RT6_LOOKUP_F_REACHABLE)) {
597                 struct rt6_info *next = rt0->dst.rt6_next;
598
599                 /* no entries matched; do round-robin */
600                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
601                         next = fn->leaf;
602
603                 if (next != rt0)
604                         fn->rr_ptr = next;
605         }
606
607         net = dev_net(rt0->dst.dev);
608         return match ? match : net->ipv6.ip6_null_entry;
609 }
610
611 #ifdef CONFIG_IPV6_ROUTE_INFO
612 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
613                   const struct in6_addr *gwaddr)
614 {
615         struct net *net = dev_net(dev);
616         struct route_info *rinfo = (struct route_info *) opt;
617         struct in6_addr prefix_buf, *prefix;
618         unsigned int pref;
619         unsigned long lifetime;
620         struct rt6_info *rt;
621
622         if (len < sizeof(struct route_info)) {
623                 return -EINVAL;
624         }
625
626         /* Sanity check for prefix_len and length */
627         if (rinfo->length > 3) {
628                 return -EINVAL;
629         } else if (rinfo->prefix_len > 128) {
630                 return -EINVAL;
631         } else if (rinfo->prefix_len > 64) {
632                 if (rinfo->length < 2) {
633                         return -EINVAL;
634                 }
635         } else if (rinfo->prefix_len > 0) {
636                 if (rinfo->length < 1) {
637                         return -EINVAL;
638                 }
639         }
640
641         pref = rinfo->route_pref;
642         if (pref == ICMPV6_ROUTER_PREF_INVALID)
643                 return -EINVAL;
644
645         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
646
647         if (rinfo->length == 3)
648                 prefix = (struct in6_addr *)rinfo->prefix;
649         else {
650                 /* this function is safe */
651                 ipv6_addr_prefix(&prefix_buf,
652                                  (struct in6_addr *)rinfo->prefix,
653                                  rinfo->prefix_len);
654                 prefix = &prefix_buf;
655         }
656
657         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
658                                 dev->ifindex);
659
660         if (rt && !lifetime) {
661                 ip6_del_rt(rt);
662                 rt = NULL;
663         }
664
665         if (!rt && lifetime)
666                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
667                                         pref);
668         else if (rt)
669                 rt->rt6i_flags = RTF_ROUTEINFO |
670                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
671
672         if (rt) {
673                 if (!addrconf_finite_timeout(lifetime))
674                         rt6_clean_expires(rt);
675                 else
676                         rt6_set_expires(rt, jiffies + HZ * lifetime);
677
678                 dst_release(&rt->dst);
679         }
680         return 0;
681 }
682 #endif
683
684 #define BACKTRACK(__net, saddr)                 \
685 do { \
686         if (rt == __net->ipv6.ip6_null_entry) { \
687                 struct fib6_node *pn; \
688                 while (1) { \
689                         if (fn->fn_flags & RTN_TL_ROOT) \
690                                 goto out; \
691                         pn = fn->parent; \
692                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
693                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
694                         else \
695                                 fn = pn; \
696                         if (fn->fn_flags & RTN_RTINFO) \
697                                 goto restart; \
698                 } \
699         } \
700 } while (0)
701
702 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
703                                              struct fib6_table *table,
704                                              struct flowi6 *fl6, int flags)
705 {
706         struct fib6_node *fn;
707         struct rt6_info *rt;
708
709         read_lock_bh(&table->tb6_lock);
710         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
711 restart:
712         rt = fn->leaf;
713         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
714         BACKTRACK(net, &fl6->saddr);
715 out:
716         dst_use(&rt->dst, jiffies);
717         read_unlock_bh(&table->tb6_lock);
718         return rt;
719
720 }
721
722 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
723                                     int flags)
724 {
725         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
726 }
727 EXPORT_SYMBOL_GPL(ip6_route_lookup);
728
729 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
730                             const struct in6_addr *saddr, int oif, int strict)
731 {
732         struct flowi6 fl6 = {
733                 .flowi6_oif = oif,
734                 .daddr = *daddr,
735         };
736         struct dst_entry *dst;
737         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
738
739         if (saddr) {
740                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
741                 flags |= RT6_LOOKUP_F_HAS_SADDR;
742         }
743
744         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
745         if (dst->error == 0)
746                 return (struct rt6_info *) dst;
747
748         dst_release(dst);
749
750         return NULL;
751 }
752
753 EXPORT_SYMBOL(rt6_lookup);
754
755 /* ip6_ins_rt is called with FREE table->tb6_lock.
756    It takes new route entry, the addition fails by any reason the
757    route is freed. In any case, if caller does not hold it, it may
758    be destroyed.
759  */
760
761 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
762 {
763         int err;
764         struct fib6_table *table;
765
766         table = rt->rt6i_table;
767         write_lock_bh(&table->tb6_lock);
768         err = fib6_add(&table->tb6_root, rt, info);
769         write_unlock_bh(&table->tb6_lock);
770
771         return err;
772 }
773
774 int ip6_ins_rt(struct rt6_info *rt)
775 {
776         struct nl_info info = {
777                 .nl_net = dev_net(rt->dst.dev),
778         };
779         return __ip6_ins_rt(rt, &info);
780 }
781
782 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
783                                       const struct in6_addr *daddr,
784                                       const struct in6_addr *saddr)
785 {
786         struct rt6_info *rt;
787
788         /*
789          *      Clone the route.
790          */
791
792         rt = ip6_rt_copy(ort, daddr);
793
794         if (rt) {
795                 int attempts = !in_softirq();
796
797                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
798                         if (ort->rt6i_dst.plen != 128 &&
799                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
800                                 rt->rt6i_flags |= RTF_ANYCAST;
801                         rt->rt6i_gateway = *daddr;
802                 }
803
804                 rt->rt6i_flags |= RTF_CACHE;
805
806 #ifdef CONFIG_IPV6_SUBTREES
807                 if (rt->rt6i_src.plen && saddr) {
808                         rt->rt6i_src.addr = *saddr;
809                         rt->rt6i_src.plen = 128;
810                 }
811 #endif
812
813         retry:
814                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
815                         struct net *net = dev_net(rt->dst.dev);
816                         int saved_rt_min_interval =
817                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
818                         int saved_rt_elasticity =
819                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
820
821                         if (attempts-- > 0) {
822                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
823                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
824
825                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
826
827                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
828                                         saved_rt_elasticity;
829                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
830                                         saved_rt_min_interval;
831                                 goto retry;
832                         }
833
834                         net_warn_ratelimited("Neighbour table overflow\n");
835                         dst_free(&rt->dst);
836                         return NULL;
837                 }
838         }
839
840         return rt;
841 }
842
843 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
844                                         const struct in6_addr *daddr)
845 {
846         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
847
848         if (rt) {
849                 rt->rt6i_flags |= RTF_CACHE;
850                 rt->n = neigh_clone(ort->n);
851         }
852         return rt;
853 }
854
855 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
856                                       struct flowi6 *fl6, int flags)
857 {
858         struct fib6_node *fn;
859         struct rt6_info *rt, *nrt;
860         int strict = 0;
861         int attempts = 3;
862         int err;
863         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
864
865         strict |= flags & RT6_LOOKUP_F_IFACE;
866
867 relookup:
868         read_lock_bh(&table->tb6_lock);
869
870 restart_2:
871         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
872
873 restart:
874         rt = rt6_select(fn, oif, strict | reachable);
875
876         BACKTRACK(net, &fl6->saddr);
877         if (rt == net->ipv6.ip6_null_entry ||
878             rt->rt6i_flags & RTF_CACHE)
879                 goto out;
880
881         dst_hold(&rt->dst);
882         read_unlock_bh(&table->tb6_lock);
883
884         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
885                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
886         else if (!(rt->dst.flags & DST_HOST))
887                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
888         else
889                 goto out2;
890
891         dst_release(&rt->dst);
892         rt = nrt ? : net->ipv6.ip6_null_entry;
893
894         dst_hold(&rt->dst);
895         if (nrt) {
896                 err = ip6_ins_rt(nrt);
897                 if (!err)
898                         goto out2;
899         }
900
901         if (--attempts <= 0)
902                 goto out2;
903
904         /*
905          * Race condition! In the gap, when table->tb6_lock was
906          * released someone could insert this route.  Relookup.
907          */
908         dst_release(&rt->dst);
909         goto relookup;
910
911 out:
912         if (reachable) {
913                 reachable = 0;
914                 goto restart_2;
915         }
916         dst_hold(&rt->dst);
917         read_unlock_bh(&table->tb6_lock);
918 out2:
919         rt->dst.lastuse = jiffies;
920         rt->dst.__use++;
921
922         return rt;
923 }
924
925 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
926                                             struct flowi6 *fl6, int flags)
927 {
928         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
929 }
930
931 static struct dst_entry *ip6_route_input_lookup(struct net *net,
932                                                 struct net_device *dev,
933                                                 struct flowi6 *fl6, int flags)
934 {
935         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
936                 flags |= RT6_LOOKUP_F_IFACE;
937
938         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
939 }
940
941 void ip6_route_input(struct sk_buff *skb)
942 {
943         const struct ipv6hdr *iph = ipv6_hdr(skb);
944         struct net *net = dev_net(skb->dev);
945         int flags = RT6_LOOKUP_F_HAS_SADDR;
946         struct flowi6 fl6 = {
947                 .flowi6_iif = skb->dev->ifindex,
948                 .daddr = iph->daddr,
949                 .saddr = iph->saddr,
950                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
951                 .flowi6_mark = skb->mark,
952                 .flowi6_proto = iph->nexthdr,
953         };
954
955         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
956 }
957
958 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
959                                              struct flowi6 *fl6, int flags)
960 {
961         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
962 }
963
964 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
965                                     struct flowi6 *fl6)
966 {
967         int flags = 0;
968
969         fl6->flowi6_iif = net->loopback_dev->ifindex;
970
971         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
972                 flags |= RT6_LOOKUP_F_IFACE;
973
974         if (!ipv6_addr_any(&fl6->saddr))
975                 flags |= RT6_LOOKUP_F_HAS_SADDR;
976         else if (sk)
977                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
978
979         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
980 }
981
982 EXPORT_SYMBOL(ip6_route_output);
983
984 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
985 {
986         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
987         struct dst_entry *new = NULL;
988
989         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
990         if (rt) {
991                 new = &rt->dst;
992
993                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
994                 rt6_init_peer(rt, net->ipv6.peers);
995
996                 new->__use = 1;
997                 new->input = dst_discard;
998                 new->output = dst_discard;
999
1000                 if (dst_metrics_read_only(&ort->dst))
1001                         new->_metrics = ort->dst._metrics;
1002                 else
1003                         dst_copy_metrics(new, &ort->dst);
1004                 rt->rt6i_idev = ort->rt6i_idev;
1005                 if (rt->rt6i_idev)
1006                         in6_dev_hold(rt->rt6i_idev);
1007
1008                 rt->rt6i_gateway = ort->rt6i_gateway;
1009                 rt->rt6i_flags = ort->rt6i_flags;
1010                 rt6_clean_expires(rt);
1011                 rt->rt6i_metric = 0;
1012
1013                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1014 #ifdef CONFIG_IPV6_SUBTREES
1015                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1016 #endif
1017
1018                 dst_free(new);
1019         }
1020
1021         dst_release(dst_orig);
1022         return new ? new : ERR_PTR(-ENOMEM);
1023 }
1024
1025 /*
1026  *      Destination cache support functions
1027  */
1028
1029 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030 {
1031         struct rt6_info *rt;
1032
1033         rt = (struct rt6_info *) dst;
1034
1035         /* All IPV6 dsts are created with ->obsolete set to the value
1036          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1037          * into this function always.
1038          */
1039         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1040                 return NULL;
1041
1042         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1043                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1044                         if (!rt6_has_peer(rt))
1045                                 rt6_bind_peer(rt, 0);
1046                         rt->rt6i_peer_genid = rt6_peer_genid();
1047                 }
1048                 return dst;
1049         }
1050         return NULL;
1051 }
1052
1053 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1054 {
1055         struct rt6_info *rt = (struct rt6_info *) dst;
1056
1057         if (rt) {
1058                 if (rt->rt6i_flags & RTF_CACHE) {
1059                         if (rt6_check_expired(rt)) {
1060                                 ip6_del_rt(rt);
1061                                 dst = NULL;
1062                         }
1063                 } else {
1064                         dst_release(dst);
1065                         dst = NULL;
1066                 }
1067         }
1068         return dst;
1069 }
1070
1071 static void ip6_link_failure(struct sk_buff *skb)
1072 {
1073         struct rt6_info *rt;
1074
1075         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1076
1077         rt = (struct rt6_info *) skb_dst(skb);
1078         if (rt) {
1079                 if (rt->rt6i_flags & RTF_CACHE)
1080                         rt6_update_expires(rt, 0);
1081                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1082                         rt->rt6i_node->fn_sernum = -1;
1083         }
1084 }
1085
1086 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1087                                struct sk_buff *skb, u32 mtu)
1088 {
1089         struct rt6_info *rt6 = (struct rt6_info*)dst;
1090
1091         dst_confirm(dst);
1092         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1093                 struct net *net = dev_net(dst->dev);
1094
1095                 rt6->rt6i_flags |= RTF_MODIFIED;
1096                 if (mtu < IPV6_MIN_MTU) {
1097                         u32 features = dst_metric(dst, RTAX_FEATURES);
1098                         mtu = IPV6_MIN_MTU;
1099                         features |= RTAX_FEATURE_ALLFRAG;
1100                         dst_metric_set(dst, RTAX_FEATURES, features);
1101                 }
1102                 dst_metric_set(dst, RTAX_MTU, mtu);
1103                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1104         }
1105 }
1106
1107 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108                      int oif, u32 mark)
1109 {
1110         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1111         struct dst_entry *dst;
1112         struct flowi6 fl6;
1113
1114         memset(&fl6, 0, sizeof(fl6));
1115         fl6.flowi6_oif = oif;
1116         fl6.flowi6_mark = mark;
1117         fl6.flowi6_flags = 0;
1118         fl6.daddr = iph->daddr;
1119         fl6.saddr = iph->saddr;
1120         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1121
1122         dst = ip6_route_output(net, NULL, &fl6);
1123         if (!dst->error)
1124                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1125         dst_release(dst);
1126 }
1127 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1128
1129 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1130 {
1131         ip6_update_pmtu(skb, sock_net(sk), mtu,
1132                         sk->sk_bound_dev_if, sk->sk_mark);
1133 }
1134 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1135
1136 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1137 {
1138         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1139         struct dst_entry *dst;
1140         struct flowi6 fl6;
1141
1142         memset(&fl6, 0, sizeof(fl6));
1143         fl6.flowi6_oif = oif;
1144         fl6.flowi6_mark = mark;
1145         fl6.flowi6_flags = 0;
1146         fl6.daddr = iph->daddr;
1147         fl6.saddr = iph->saddr;
1148         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1149
1150         dst = ip6_route_output(net, NULL, &fl6);
1151         if (!dst->error)
1152                 rt6_do_redirect(dst, NULL, skb);
1153         dst_release(dst);
1154 }
1155 EXPORT_SYMBOL_GPL(ip6_redirect);
1156
1157 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158 {
1159         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1160 }
1161 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1162
1163 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1164 {
1165         struct net_device *dev = dst->dev;
1166         unsigned int mtu = dst_mtu(dst);
1167         struct net *net = dev_net(dev);
1168
1169         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1170
1171         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1172                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1173
1174         /*
1175          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1176          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1177          * IPV6_MAXPLEN is also valid and means: "any MSS,
1178          * rely only on pmtu discovery"
1179          */
1180         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1181                 mtu = IPV6_MAXPLEN;
1182         return mtu;
1183 }
1184
1185 static unsigned int ip6_mtu(const struct dst_entry *dst)
1186 {
1187         struct inet6_dev *idev;
1188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1189
1190         if (mtu)
1191                 return mtu;
1192
1193         mtu = IPV6_MIN_MTU;
1194
1195         rcu_read_lock();
1196         idev = __in6_dev_get(dst->dev);
1197         if (idev)
1198                 mtu = idev->cnf.mtu6;
1199         rcu_read_unlock();
1200
1201         return mtu;
1202 }
1203
1204 static struct dst_entry *icmp6_dst_gc_list;
1205 static DEFINE_SPINLOCK(icmp6_dst_lock);
1206
1207 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1208                                   struct neighbour *neigh,
1209                                   struct flowi6 *fl6)
1210 {
1211         struct dst_entry *dst;
1212         struct rt6_info *rt;
1213         struct inet6_dev *idev = in6_dev_get(dev);
1214         struct net *net = dev_net(dev);
1215
1216         if (unlikely(!idev))
1217                 return ERR_PTR(-ENODEV);
1218
1219         rt = ip6_dst_alloc(net, dev, 0, NULL);
1220         if (unlikely(!rt)) {
1221                 in6_dev_put(idev);
1222                 dst = ERR_PTR(-ENOMEM);
1223                 goto out;
1224         }
1225
1226         if (neigh)
1227                 neigh_hold(neigh);
1228         else {
1229                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1230                 if (IS_ERR(neigh)) {
1231                         in6_dev_put(idev);
1232                         dst_free(&rt->dst);
1233                         return ERR_CAST(neigh);
1234                 }
1235         }
1236
1237         rt->dst.flags |= DST_HOST;
1238         rt->dst.output  = ip6_output;
1239         rt->n = neigh;
1240         atomic_set(&rt->dst.__refcnt, 1);
1241         rt->rt6i_dst.addr = fl6->daddr;
1242         rt->rt6i_dst.plen = 128;
1243         rt->rt6i_idev     = idev;
1244         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1245
1246         spin_lock_bh(&icmp6_dst_lock);
1247         rt->dst.next = icmp6_dst_gc_list;
1248         icmp6_dst_gc_list = &rt->dst;
1249         spin_unlock_bh(&icmp6_dst_lock);
1250
1251         fib6_force_start_gc(net);
1252
1253         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1254
1255 out:
1256         return dst;
1257 }
1258
1259 int icmp6_dst_gc(void)
1260 {
1261         struct dst_entry *dst, **pprev;
1262         int more = 0;
1263
1264         spin_lock_bh(&icmp6_dst_lock);
1265         pprev = &icmp6_dst_gc_list;
1266
1267         while ((dst = *pprev) != NULL) {
1268                 if (!atomic_read(&dst->__refcnt)) {
1269                         *pprev = dst->next;
1270                         dst_free(dst);
1271                 } else {
1272                         pprev = &dst->next;
1273                         ++more;
1274                 }
1275         }
1276
1277         spin_unlock_bh(&icmp6_dst_lock);
1278
1279         return more;
1280 }
1281
1282 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1283                             void *arg)
1284 {
1285         struct dst_entry *dst, **pprev;
1286
1287         spin_lock_bh(&icmp6_dst_lock);
1288         pprev = &icmp6_dst_gc_list;
1289         while ((dst = *pprev) != NULL) {
1290                 struct rt6_info *rt = (struct rt6_info *) dst;
1291                 if (func(rt, arg)) {
1292                         *pprev = dst->next;
1293                         dst_free(dst);
1294                 } else {
1295                         pprev = &dst->next;
1296                 }
1297         }
1298         spin_unlock_bh(&icmp6_dst_lock);
1299 }
1300
1301 static int ip6_dst_gc(struct dst_ops *ops)
1302 {
1303         unsigned long now = jiffies;
1304         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1305         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1306         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1307         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1308         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1309         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1310         int entries;
1311
1312         entries = dst_entries_get_fast(ops);
1313         if (time_after(rt_last_gc + rt_min_interval, now) &&
1314             entries <= rt_max_size)
1315                 goto out;
1316
1317         net->ipv6.ip6_rt_gc_expire++;
1318         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1319         net->ipv6.ip6_rt_last_gc = now;
1320         entries = dst_entries_get_slow(ops);
1321         if (entries < ops->gc_thresh)
1322                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1323 out:
1324         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1325         return entries > rt_max_size;
1326 }
1327
1328 /* Clean host part of a prefix. Not necessary in radix tree,
1329    but results in cleaner routing tables.
1330
1331    Remove it only when all the things will work!
1332  */
1333
1334 int ip6_dst_hoplimit(struct dst_entry *dst)
1335 {
1336         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1337         if (hoplimit == 0) {
1338                 struct net_device *dev = dst->dev;
1339                 struct inet6_dev *idev;
1340
1341                 rcu_read_lock();
1342                 idev = __in6_dev_get(dev);
1343                 if (idev)
1344                         hoplimit = idev->cnf.hop_limit;
1345                 else
1346                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1347                 rcu_read_unlock();
1348         }
1349         return hoplimit;
1350 }
1351 EXPORT_SYMBOL(ip6_dst_hoplimit);
1352
1353 /*
1354  *
1355  */
1356
1357 int ip6_route_add(struct fib6_config *cfg)
1358 {
1359         int err;
1360         struct net *net = cfg->fc_nlinfo.nl_net;
1361         struct rt6_info *rt = NULL;
1362         struct net_device *dev = NULL;
1363         struct inet6_dev *idev = NULL;
1364         struct fib6_table *table;
1365         int addr_type;
1366
1367         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1368                 return -EINVAL;
1369 #ifndef CONFIG_IPV6_SUBTREES
1370         if (cfg->fc_src_len)
1371                 return -EINVAL;
1372 #endif
1373         if (cfg->fc_ifindex) {
1374                 err = -ENODEV;
1375                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1376                 if (!dev)
1377                         goto out;
1378                 idev = in6_dev_get(dev);
1379                 if (!idev)
1380                         goto out;
1381         }
1382
1383         if (cfg->fc_metric == 0)
1384                 cfg->fc_metric = IP6_RT_PRIO_USER;
1385
1386         err = -ENOBUFS;
1387         if (cfg->fc_nlinfo.nlh &&
1388             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1389                 table = fib6_get_table(net, cfg->fc_table);
1390                 if (!table) {
1391                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1392                         table = fib6_new_table(net, cfg->fc_table);
1393                 }
1394         } else {
1395                 table = fib6_new_table(net, cfg->fc_table);
1396         }
1397
1398         if (!table)
1399                 goto out;
1400
1401         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1402
1403         if (!rt) {
1404                 err = -ENOMEM;
1405                 goto out;
1406         }
1407
1408         if (cfg->fc_flags & RTF_EXPIRES)
1409                 rt6_set_expires(rt, jiffies +
1410                                 clock_t_to_jiffies(cfg->fc_expires));
1411         else
1412                 rt6_clean_expires(rt);
1413
1414         if (cfg->fc_protocol == RTPROT_UNSPEC)
1415                 cfg->fc_protocol = RTPROT_BOOT;
1416         rt->rt6i_protocol = cfg->fc_protocol;
1417
1418         addr_type = ipv6_addr_type(&cfg->fc_dst);
1419
1420         if (addr_type & IPV6_ADDR_MULTICAST)
1421                 rt->dst.input = ip6_mc_input;
1422         else if (cfg->fc_flags & RTF_LOCAL)
1423                 rt->dst.input = ip6_input;
1424         else
1425                 rt->dst.input = ip6_forward;
1426
1427         rt->dst.output = ip6_output;
1428
1429         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1430         rt->rt6i_dst.plen = cfg->fc_dst_len;
1431         if (rt->rt6i_dst.plen == 128)
1432                rt->dst.flags |= DST_HOST;
1433
1434         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1435                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1436                 if (!metrics) {
1437                         err = -ENOMEM;
1438                         goto out;
1439                 }
1440                 dst_init_metrics(&rt->dst, metrics, 0);
1441         }
1442 #ifdef CONFIG_IPV6_SUBTREES
1443         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1444         rt->rt6i_src.plen = cfg->fc_src_len;
1445 #endif
1446
1447         rt->rt6i_metric = cfg->fc_metric;
1448
1449         /* We cannot add true routes via loopback here,
1450            they would result in kernel looping; promote them to reject routes
1451          */
1452         if ((cfg->fc_flags & RTF_REJECT) ||
1453             (dev && (dev->flags & IFF_LOOPBACK) &&
1454              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1455              !(cfg->fc_flags & RTF_LOCAL))) {
1456                 /* hold loopback dev/idev if we haven't done so. */
1457                 if (dev != net->loopback_dev) {
1458                         if (dev) {
1459                                 dev_put(dev);
1460                                 in6_dev_put(idev);
1461                         }
1462                         dev = net->loopback_dev;
1463                         dev_hold(dev);
1464                         idev = in6_dev_get(dev);
1465                         if (!idev) {
1466                                 err = -ENODEV;
1467                                 goto out;
1468                         }
1469                 }
1470                 rt->dst.output = ip6_pkt_discard_out;
1471                 rt->dst.input = ip6_pkt_discard;
1472                 rt->dst.error = -ENETUNREACH;
1473                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1474                 goto install_route;
1475         }
1476
1477         if (cfg->fc_flags & RTF_GATEWAY) {
1478                 const struct in6_addr *gw_addr;
1479                 int gwa_type;
1480
1481                 gw_addr = &cfg->fc_gateway;
1482                 rt->rt6i_gateway = *gw_addr;
1483                 gwa_type = ipv6_addr_type(gw_addr);
1484
1485                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1486                         struct rt6_info *grt;
1487
1488                         /* IPv6 strictly inhibits using not link-local
1489                            addresses as nexthop address.
1490                            Otherwise, router will not able to send redirects.
1491                            It is very good, but in some (rare!) circumstances
1492                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1493                            some exceptions. --ANK
1494                          */
1495                         err = -EINVAL;
1496                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1497                                 goto out;
1498
1499                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1500
1501                         err = -EHOSTUNREACH;
1502                         if (!grt)
1503                                 goto out;
1504                         if (dev) {
1505                                 if (dev != grt->dst.dev) {
1506                                         dst_release(&grt->dst);
1507                                         goto out;
1508                                 }
1509                         } else {
1510                                 dev = grt->dst.dev;
1511                                 idev = grt->rt6i_idev;
1512                                 dev_hold(dev);
1513                                 in6_dev_hold(grt->rt6i_idev);
1514                         }
1515                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1516                                 err = 0;
1517                         dst_release(&grt->dst);
1518
1519                         if (err)
1520                                 goto out;
1521                 }
1522                 err = -EINVAL;
1523                 if (!dev || (dev->flags & IFF_LOOPBACK))
1524                         goto out;
1525         }
1526
1527         err = -ENODEV;
1528         if (!dev)
1529                 goto out;
1530
1531         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1532                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1533                         err = -EINVAL;
1534                         goto out;
1535                 }
1536                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1537                 rt->rt6i_prefsrc.plen = 128;
1538         } else
1539                 rt->rt6i_prefsrc.plen = 0;
1540
1541         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1542                 err = rt6_bind_neighbour(rt, dev);
1543                 if (err)
1544                         goto out;
1545         }
1546
1547         rt->rt6i_flags = cfg->fc_flags;
1548
1549 install_route:
1550         if (cfg->fc_mx) {
1551                 struct nlattr *nla;
1552                 int remaining;
1553
1554                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1555                         int type = nla_type(nla);
1556
1557                         if (type) {
1558                                 if (type > RTAX_MAX) {
1559                                         err = -EINVAL;
1560                                         goto out;
1561                                 }
1562
1563                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1564                         }
1565                 }
1566         }
1567
1568         rt->dst.dev = dev;
1569         rt->rt6i_idev = idev;
1570         rt->rt6i_table = table;
1571
1572         cfg->fc_nlinfo.nl_net = dev_net(dev);
1573
1574         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1575
1576 out:
1577         if (dev)
1578                 dev_put(dev);
1579         if (idev)
1580                 in6_dev_put(idev);
1581         if (rt)
1582                 dst_free(&rt->dst);
1583         return err;
1584 }
1585
1586 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1587 {
1588         int err;
1589         struct fib6_table *table;
1590         struct net *net = dev_net(rt->dst.dev);
1591
1592         if (rt == net->ipv6.ip6_null_entry)
1593                 return -ENOENT;
1594
1595         table = rt->rt6i_table;
1596         write_lock_bh(&table->tb6_lock);
1597
1598         err = fib6_del(rt, info);
1599         dst_release(&rt->dst);
1600
1601         write_unlock_bh(&table->tb6_lock);
1602
1603         return err;
1604 }
1605
1606 int ip6_del_rt(struct rt6_info *rt)
1607 {
1608         struct nl_info info = {
1609                 .nl_net = dev_net(rt->dst.dev),
1610         };
1611         return __ip6_del_rt(rt, &info);
1612 }
1613
1614 static int ip6_route_del(struct fib6_config *cfg)
1615 {
1616         struct fib6_table *table;
1617         struct fib6_node *fn;
1618         struct rt6_info *rt;
1619         int err = -ESRCH;
1620
1621         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1622         if (!table)
1623                 return err;
1624
1625         read_lock_bh(&table->tb6_lock);
1626
1627         fn = fib6_locate(&table->tb6_root,
1628                          &cfg->fc_dst, cfg->fc_dst_len,
1629                          &cfg->fc_src, cfg->fc_src_len);
1630
1631         if (fn) {
1632                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1633                         if (cfg->fc_ifindex &&
1634                             (!rt->dst.dev ||
1635                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1636                                 continue;
1637                         if (cfg->fc_flags & RTF_GATEWAY &&
1638                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1639                                 continue;
1640                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1641                                 continue;
1642                         dst_hold(&rt->dst);
1643                         read_unlock_bh(&table->tb6_lock);
1644
1645                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1646                 }
1647         }
1648         read_unlock_bh(&table->tb6_lock);
1649
1650         return err;
1651 }
1652
1653 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1654 {
1655         struct net *net = dev_net(skb->dev);
1656         struct netevent_redirect netevent;
1657         struct rt6_info *rt, *nrt = NULL;
1658         const struct in6_addr *target;
1659         struct ndisc_options ndopts;
1660         const struct in6_addr *dest;
1661         struct neighbour *old_neigh;
1662         struct inet6_dev *in6_dev;
1663         struct neighbour *neigh;
1664         struct icmp6hdr *icmph;
1665         int optlen, on_link;
1666         u8 *lladdr;
1667
1668         optlen = skb->tail - skb->transport_header;
1669         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1670
1671         if (optlen < 0) {
1672                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1673                 return;
1674         }
1675
1676         icmph = icmp6_hdr(skb);
1677         target = (const struct in6_addr *) (icmph + 1);
1678         dest = target + 1;
1679
1680         if (ipv6_addr_is_multicast(dest)) {
1681                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1682                 return;
1683         }
1684
1685         on_link = 0;
1686         if (ipv6_addr_equal(dest, target)) {
1687                 on_link = 1;
1688         } else if (ipv6_addr_type(target) !=
1689                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1690                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1691                 return;
1692         }
1693
1694         in6_dev = __in6_dev_get(skb->dev);
1695         if (!in6_dev)
1696                 return;
1697         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1698                 return;
1699
1700         /* RFC2461 8.1:
1701          *      The IP source address of the Redirect MUST be the same as the current
1702          *      first-hop router for the specified ICMP Destination Address.
1703          */
1704
1705         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1706                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1707                 return;
1708         }
1709
1710         lladdr = NULL;
1711         if (ndopts.nd_opts_tgt_lladdr) {
1712                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1713                                              skb->dev);
1714                 if (!lladdr) {
1715                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1716                         return;
1717                 }
1718         }
1719
1720         rt = (struct rt6_info *) dst;
1721         if (rt == net->ipv6.ip6_null_entry) {
1722                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1723                 return;
1724         }
1725
1726         /* Redirect received -> path was valid.
1727          * Look, redirects are sent only in response to data packets,
1728          * so that this nexthop apparently is reachable. --ANK
1729          */
1730         dst_confirm(&rt->dst);
1731
1732         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1733         if (!neigh)
1734                 return;
1735
1736         /* Duplicate redirect: silently ignore. */
1737         old_neigh = rt->n;
1738         if (neigh == old_neigh)
1739                 goto out;
1740
1741         /*
1742          *      We have finally decided to accept it.
1743          */
1744
1745         neigh_update(neigh, lladdr, NUD_STALE,
1746                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1747                      NEIGH_UPDATE_F_OVERRIDE|
1748                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1749                                      NEIGH_UPDATE_F_ISROUTER))
1750                      );
1751
1752         nrt = ip6_rt_copy(rt, dest);
1753         if (!nrt)
1754                 goto out;
1755
1756         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1757         if (on_link)
1758                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1759
1760         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1761         nrt->n = neigh_clone(neigh);
1762
1763         if (ip6_ins_rt(nrt))
1764                 goto out;
1765
1766         netevent.old = &rt->dst;
1767         netevent.old_neigh = old_neigh;
1768         netevent.new = &nrt->dst;
1769         netevent.new_neigh = neigh;
1770         netevent.daddr = dest;
1771         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1772
1773         if (rt->rt6i_flags & RTF_CACHE) {
1774                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1775                 ip6_del_rt(rt);
1776         }
1777
1778 out:
1779         neigh_release(neigh);
1780 }
1781
1782 /*
1783  *      Misc support functions
1784  */
1785
1786 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1787                                     const struct in6_addr *dest)
1788 {
1789         struct net *net = dev_net(ort->dst.dev);
1790         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1791                                             ort->rt6i_table);
1792
1793         if (rt) {
1794                 rt->dst.input = ort->dst.input;
1795                 rt->dst.output = ort->dst.output;
1796                 rt->dst.flags |= DST_HOST;
1797
1798                 rt->rt6i_dst.addr = *dest;
1799                 rt->rt6i_dst.plen = 128;
1800                 dst_copy_metrics(&rt->dst, &ort->dst);
1801                 rt->dst.error = ort->dst.error;
1802                 rt->rt6i_idev = ort->rt6i_idev;
1803                 if (rt->rt6i_idev)
1804                         in6_dev_hold(rt->rt6i_idev);
1805                 rt->dst.lastuse = jiffies;
1806
1807                 rt->rt6i_gateway = ort->rt6i_gateway;
1808                 rt->rt6i_flags = ort->rt6i_flags;
1809                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1810                     (RTF_DEFAULT | RTF_ADDRCONF))
1811                         rt6_set_from(rt, ort);
1812                 else
1813                         rt6_clean_expires(rt);
1814                 rt->rt6i_metric = 0;
1815
1816 #ifdef CONFIG_IPV6_SUBTREES
1817                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1818 #endif
1819                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1820                 rt->rt6i_table = ort->rt6i_table;
1821         }
1822         return rt;
1823 }
1824
1825 #ifdef CONFIG_IPV6_ROUTE_INFO
1826 static struct rt6_info *rt6_get_route_info(struct net *net,
1827                                            const struct in6_addr *prefix, int prefixlen,
1828                                            const struct in6_addr *gwaddr, int ifindex)
1829 {
1830         struct fib6_node *fn;
1831         struct rt6_info *rt = NULL;
1832         struct fib6_table *table;
1833
1834         table = fib6_get_table(net, RT6_TABLE_INFO);
1835         if (!table)
1836                 return NULL;
1837
1838         write_lock_bh(&table->tb6_lock);
1839         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1840         if (!fn)
1841                 goto out;
1842
1843         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1844                 if (rt->dst.dev->ifindex != ifindex)
1845                         continue;
1846                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1847                         continue;
1848                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1849                         continue;
1850                 dst_hold(&rt->dst);
1851                 break;
1852         }
1853 out:
1854         write_unlock_bh(&table->tb6_lock);
1855         return rt;
1856 }
1857
1858 static struct rt6_info *rt6_add_route_info(struct net *net,
1859                                            const struct in6_addr *prefix, int prefixlen,
1860                                            const struct in6_addr *gwaddr, int ifindex,
1861                                            unsigned int pref)
1862 {
1863         struct fib6_config cfg = {
1864                 .fc_table       = RT6_TABLE_INFO,
1865                 .fc_metric      = IP6_RT_PRIO_USER,
1866                 .fc_ifindex     = ifindex,
1867                 .fc_dst_len     = prefixlen,
1868                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1869                                   RTF_UP | RTF_PREF(pref),
1870                 .fc_nlinfo.pid = 0,
1871                 .fc_nlinfo.nlh = NULL,
1872                 .fc_nlinfo.nl_net = net,
1873         };
1874
1875         cfg.fc_dst = *prefix;
1876         cfg.fc_gateway = *gwaddr;
1877
1878         /* We should treat it as a default route if prefix length is 0. */
1879         if (!prefixlen)
1880                 cfg.fc_flags |= RTF_DEFAULT;
1881
1882         ip6_route_add(&cfg);
1883
1884         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1885 }
1886 #endif
1887
1888 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1889 {
1890         struct rt6_info *rt;
1891         struct fib6_table *table;
1892
1893         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1894         if (!table)
1895                 return NULL;
1896
1897         write_lock_bh(&table->tb6_lock);
1898         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1899                 if (dev == rt->dst.dev &&
1900                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1901                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1902                         break;
1903         }
1904         if (rt)
1905                 dst_hold(&rt->dst);
1906         write_unlock_bh(&table->tb6_lock);
1907         return rt;
1908 }
1909
1910 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1911                                      struct net_device *dev,
1912                                      unsigned int pref)
1913 {
1914         struct fib6_config cfg = {
1915                 .fc_table       = RT6_TABLE_DFLT,
1916                 .fc_metric      = IP6_RT_PRIO_USER,
1917                 .fc_ifindex     = dev->ifindex,
1918                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1919                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1920                 .fc_nlinfo.pid = 0,
1921                 .fc_nlinfo.nlh = NULL,
1922                 .fc_nlinfo.nl_net = dev_net(dev),
1923         };
1924
1925         cfg.fc_gateway = *gwaddr;
1926
1927         ip6_route_add(&cfg);
1928
1929         return rt6_get_dflt_router(gwaddr, dev);
1930 }
1931
1932 void rt6_purge_dflt_routers(struct net *net)
1933 {
1934         struct rt6_info *rt;
1935         struct fib6_table *table;
1936
1937         /* NOTE: Keep consistent with rt6_get_dflt_router */
1938         table = fib6_get_table(net, RT6_TABLE_DFLT);
1939         if (!table)
1940                 return;
1941
1942 restart:
1943         read_lock_bh(&table->tb6_lock);
1944         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1945                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1946                         dst_hold(&rt->dst);
1947                         read_unlock_bh(&table->tb6_lock);
1948                         ip6_del_rt(rt);
1949                         goto restart;
1950                 }
1951         }
1952         read_unlock_bh(&table->tb6_lock);
1953 }
1954
1955 static void rtmsg_to_fib6_config(struct net *net,
1956                                  struct in6_rtmsg *rtmsg,
1957                                  struct fib6_config *cfg)
1958 {
1959         memset(cfg, 0, sizeof(*cfg));
1960
1961         cfg->fc_table = RT6_TABLE_MAIN;
1962         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1963         cfg->fc_metric = rtmsg->rtmsg_metric;
1964         cfg->fc_expires = rtmsg->rtmsg_info;
1965         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1966         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1967         cfg->fc_flags = rtmsg->rtmsg_flags;
1968
1969         cfg->fc_nlinfo.nl_net = net;
1970
1971         cfg->fc_dst = rtmsg->rtmsg_dst;
1972         cfg->fc_src = rtmsg->rtmsg_src;
1973         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1974 }
1975
1976 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1977 {
1978         struct fib6_config cfg;
1979         struct in6_rtmsg rtmsg;
1980         int err;
1981
1982         switch(cmd) {
1983         case SIOCADDRT:         /* Add a route */
1984         case SIOCDELRT:         /* Delete a route */
1985                 if (!capable(CAP_NET_ADMIN))
1986                         return -EPERM;
1987                 err = copy_from_user(&rtmsg, arg,
1988                                      sizeof(struct in6_rtmsg));
1989                 if (err)
1990                         return -EFAULT;
1991
1992                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1993
1994                 rtnl_lock();
1995                 switch (cmd) {
1996                 case SIOCADDRT:
1997                         err = ip6_route_add(&cfg);
1998                         break;
1999                 case SIOCDELRT:
2000                         err = ip6_route_del(&cfg);
2001                         break;
2002                 default:
2003                         err = -EINVAL;
2004                 }
2005                 rtnl_unlock();
2006
2007                 return err;
2008         }
2009
2010         return -EINVAL;
2011 }
2012
2013 /*
2014  *      Drop the packet on the floor
2015  */
2016
2017 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2018 {
2019         int type;
2020         struct dst_entry *dst = skb_dst(skb);
2021         switch (ipstats_mib_noroutes) {
2022         case IPSTATS_MIB_INNOROUTES:
2023                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2024                 if (type == IPV6_ADDR_ANY) {
2025                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2026                                       IPSTATS_MIB_INADDRERRORS);
2027                         break;
2028                 }
2029                 /* FALLTHROUGH */
2030         case IPSTATS_MIB_OUTNOROUTES:
2031                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032                               ipstats_mib_noroutes);
2033                 break;
2034         }
2035         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2036         kfree_skb(skb);
2037         return 0;
2038 }
2039
2040 static int ip6_pkt_discard(struct sk_buff *skb)
2041 {
2042         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2043 }
2044
2045 static int ip6_pkt_discard_out(struct sk_buff *skb)
2046 {
2047         skb->dev = skb_dst(skb)->dev;
2048         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2049 }
2050
2051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2052
2053 static int ip6_pkt_prohibit(struct sk_buff *skb)
2054 {
2055         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2056 }
2057
2058 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2059 {
2060         skb->dev = skb_dst(skb)->dev;
2061         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2062 }
2063
2064 #endif
2065
2066 /*
2067  *      Allocate a dst for local (unicast / anycast) address.
2068  */
2069
2070 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2071                                     const struct in6_addr *addr,
2072                                     bool anycast)
2073 {
2074         struct net *net = dev_net(idev->dev);
2075         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2076         int err;
2077
2078         if (!rt) {
2079                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2080                 return ERR_PTR(-ENOMEM);
2081         }
2082
2083         in6_dev_hold(idev);
2084
2085         rt->dst.flags |= DST_HOST;
2086         rt->dst.input = ip6_input;
2087         rt->dst.output = ip6_output;
2088         rt->rt6i_idev = idev;
2089
2090         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2091         if (anycast)
2092                 rt->rt6i_flags |= RTF_ANYCAST;
2093         else
2094                 rt->rt6i_flags |= RTF_LOCAL;
2095         err = rt6_bind_neighbour(rt, rt->dst.dev);
2096         if (err) {
2097                 dst_free(&rt->dst);
2098                 return ERR_PTR(err);
2099         }
2100
2101         rt->rt6i_dst.addr = *addr;
2102         rt->rt6i_dst.plen = 128;
2103         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2104
2105         atomic_set(&rt->dst.__refcnt, 1);
2106
2107         return rt;
2108 }
2109
2110 int ip6_route_get_saddr(struct net *net,
2111                         struct rt6_info *rt,
2112                         const struct in6_addr *daddr,
2113                         unsigned int prefs,
2114                         struct in6_addr *saddr)
2115 {
2116         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2117         int err = 0;
2118         if (rt->rt6i_prefsrc.plen)
2119                 *saddr = rt->rt6i_prefsrc.addr;
2120         else
2121                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2122                                          daddr, prefs, saddr);
2123         return err;
2124 }
2125
2126 /* remove deleted ip from prefsrc entries */
2127 struct arg_dev_net_ip {
2128         struct net_device *dev;
2129         struct net *net;
2130         struct in6_addr *addr;
2131 };
2132
2133 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2134 {
2135         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2136         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2137         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2138
2139         if (((void *)rt->dst.dev == dev || !dev) &&
2140             rt != net->ipv6.ip6_null_entry &&
2141             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2142                 /* remove prefsrc entry */
2143                 rt->rt6i_prefsrc.plen = 0;
2144         }
2145         return 0;
2146 }
2147
2148 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2149 {
2150         struct net *net = dev_net(ifp->idev->dev);
2151         struct arg_dev_net_ip adni = {
2152                 .dev = ifp->idev->dev,
2153                 .net = net,
2154                 .addr = &ifp->addr,
2155         };
2156         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2157 }
2158
2159 struct arg_dev_net {
2160         struct net_device *dev;
2161         struct net *net;
2162 };
2163
2164 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2165 {
2166         const struct arg_dev_net *adn = arg;
2167         const struct net_device *dev = adn->dev;
2168
2169         if ((rt->dst.dev == dev || !dev) &&
2170             rt != adn->net->ipv6.ip6_null_entry)
2171                 return -1;
2172
2173         return 0;
2174 }
2175
2176 void rt6_ifdown(struct net *net, struct net_device *dev)
2177 {
2178         struct arg_dev_net adn = {
2179                 .dev = dev,
2180                 .net = net,
2181         };
2182
2183         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2184         icmp6_clean_all(fib6_ifdown, &adn);
2185 }
2186
2187 struct rt6_mtu_change_arg {
2188         struct net_device *dev;
2189         unsigned int mtu;
2190 };
2191
2192 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2193 {
2194         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2195         struct inet6_dev *idev;
2196
2197         /* In IPv6 pmtu discovery is not optional,
2198            so that RTAX_MTU lock cannot disable it.
2199            We still use this lock to block changes
2200            caused by addrconf/ndisc.
2201         */
2202
2203         idev = __in6_dev_get(arg->dev);
2204         if (!idev)
2205                 return 0;
2206
2207         /* For administrative MTU increase, there is no way to discover
2208            IPv6 PMTU increase, so PMTU increase should be updated here.
2209            Since RFC 1981 doesn't include administrative MTU increase
2210            update PMTU increase is a MUST. (i.e. jumbo frame)
2211          */
2212         /*
2213            If new MTU is less than route PMTU, this new MTU will be the
2214            lowest MTU in the path, update the route PMTU to reflect PMTU
2215            decreases; if new MTU is greater than route PMTU, and the
2216            old MTU is the lowest MTU in the path, update the route PMTU
2217            to reflect the increase. In this case if the other nodes' MTU
2218            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2219            PMTU discouvery.
2220          */
2221         if (rt->dst.dev == arg->dev &&
2222             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2223             (dst_mtu(&rt->dst) >= arg->mtu ||
2224              (dst_mtu(&rt->dst) < arg->mtu &&
2225               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2226                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2227         }
2228         return 0;
2229 }
2230
2231 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2232 {
2233         struct rt6_mtu_change_arg arg = {
2234                 .dev = dev,
2235                 .mtu = mtu,
2236         };
2237
2238         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2239 }
2240
2241 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2242         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2243         [RTA_OIF]               = { .type = NLA_U32 },
2244         [RTA_IIF]               = { .type = NLA_U32 },
2245         [RTA_PRIORITY]          = { .type = NLA_U32 },
2246         [RTA_METRICS]           = { .type = NLA_NESTED },
2247 };
2248
2249 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2250                               struct fib6_config *cfg)
2251 {
2252         struct rtmsg *rtm;
2253         struct nlattr *tb[RTA_MAX+1];
2254         int err;
2255
2256         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2257         if (err < 0)
2258                 goto errout;
2259
2260         err = -EINVAL;
2261         rtm = nlmsg_data(nlh);
2262         memset(cfg, 0, sizeof(*cfg));
2263
2264         cfg->fc_table = rtm->rtm_table;
2265         cfg->fc_dst_len = rtm->rtm_dst_len;
2266         cfg->fc_src_len = rtm->rtm_src_len;
2267         cfg->fc_flags = RTF_UP;
2268         cfg->fc_protocol = rtm->rtm_protocol;
2269
2270         if (rtm->rtm_type == RTN_UNREACHABLE)
2271                 cfg->fc_flags |= RTF_REJECT;
2272
2273         if (rtm->rtm_type == RTN_LOCAL)
2274                 cfg->fc_flags |= RTF_LOCAL;
2275
2276         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2277         cfg->fc_nlinfo.nlh = nlh;
2278         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2279
2280         if (tb[RTA_GATEWAY]) {
2281                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2282                 cfg->fc_flags |= RTF_GATEWAY;
2283         }
2284
2285         if (tb[RTA_DST]) {
2286                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2287
2288                 if (nla_len(tb[RTA_DST]) < plen)
2289                         goto errout;
2290
2291                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2292         }
2293
2294         if (tb[RTA_SRC]) {
2295                 int plen = (rtm->rtm_src_len + 7) >> 3;
2296
2297                 if (nla_len(tb[RTA_SRC]) < plen)
2298                         goto errout;
2299
2300                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2301         }
2302
2303         if (tb[RTA_PREFSRC])
2304                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2305
2306         if (tb[RTA_OIF])
2307                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2308
2309         if (tb[RTA_PRIORITY])
2310                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2311
2312         if (tb[RTA_METRICS]) {
2313                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2314                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2315         }
2316
2317         if (tb[RTA_TABLE])
2318                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2319
2320         err = 0;
2321 errout:
2322         return err;
2323 }
2324
2325 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2326 {
2327         struct fib6_config cfg;
2328         int err;
2329
2330         err = rtm_to_fib6_config(skb, nlh, &cfg);
2331         if (err < 0)
2332                 return err;
2333
2334         return ip6_route_del(&cfg);
2335 }
2336
2337 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2338 {
2339         struct fib6_config cfg;
2340         int err;
2341
2342         err = rtm_to_fib6_config(skb, nlh, &cfg);
2343         if (err < 0)
2344                 return err;
2345
2346         return ip6_route_add(&cfg);
2347 }
2348
2349 static inline size_t rt6_nlmsg_size(void)
2350 {
2351         return NLMSG_ALIGN(sizeof(struct rtmsg))
2352                + nla_total_size(16) /* RTA_SRC */
2353                + nla_total_size(16) /* RTA_DST */
2354                + nla_total_size(16) /* RTA_GATEWAY */
2355                + nla_total_size(16) /* RTA_PREFSRC */
2356                + nla_total_size(4) /* RTA_TABLE */
2357                + nla_total_size(4) /* RTA_IIF */
2358                + nla_total_size(4) /* RTA_OIF */
2359                + nla_total_size(4) /* RTA_PRIORITY */
2360                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2361                + nla_total_size(sizeof(struct rta_cacheinfo));
2362 }
2363
2364 static int rt6_fill_node(struct net *net,
2365                          struct sk_buff *skb, struct rt6_info *rt,
2366                          struct in6_addr *dst, struct in6_addr *src,
2367                          int iif, int type, u32 pid, u32 seq,
2368                          int prefix, int nowait, unsigned int flags)
2369 {
2370         struct rtmsg *rtm;
2371         struct nlmsghdr *nlh;
2372         long expires;
2373         u32 table;
2374         struct neighbour *n;
2375
2376         if (prefix) {   /* user wants prefix routes only */
2377                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2378                         /* success since this is not a prefix route */
2379                         return 1;
2380                 }
2381         }
2382
2383         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2384         if (!nlh)
2385                 return -EMSGSIZE;
2386
2387         rtm = nlmsg_data(nlh);
2388         rtm->rtm_family = AF_INET6;
2389         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2390         rtm->rtm_src_len = rt->rt6i_src.plen;
2391         rtm->rtm_tos = 0;
2392         if (rt->rt6i_table)
2393                 table = rt->rt6i_table->tb6_id;
2394         else
2395                 table = RT6_TABLE_UNSPEC;
2396         rtm->rtm_table = table;
2397         if (nla_put_u32(skb, RTA_TABLE, table))
2398                 goto nla_put_failure;
2399         if (rt->rt6i_flags & RTF_REJECT)
2400                 rtm->rtm_type = RTN_UNREACHABLE;
2401         else if (rt->rt6i_flags & RTF_LOCAL)
2402                 rtm->rtm_type = RTN_LOCAL;
2403         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2404                 rtm->rtm_type = RTN_LOCAL;
2405         else
2406                 rtm->rtm_type = RTN_UNICAST;
2407         rtm->rtm_flags = 0;
2408         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2409         rtm->rtm_protocol = rt->rt6i_protocol;
2410         if (rt->rt6i_flags & RTF_DYNAMIC)
2411                 rtm->rtm_protocol = RTPROT_REDIRECT;
2412         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2413                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2414                         rtm->rtm_protocol = RTPROT_RA;
2415                 else
2416                         rtm->rtm_protocol = RTPROT_KERNEL;
2417         }
2418
2419         if (rt->rt6i_flags & RTF_CACHE)
2420                 rtm->rtm_flags |= RTM_F_CLONED;
2421
2422         if (dst) {
2423                 if (nla_put(skb, RTA_DST, 16, dst))
2424                         goto nla_put_failure;
2425                 rtm->rtm_dst_len = 128;
2426         } else if (rtm->rtm_dst_len)
2427                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2428                         goto nla_put_failure;
2429 #ifdef CONFIG_IPV6_SUBTREES
2430         if (src) {
2431                 if (nla_put(skb, RTA_SRC, 16, src))
2432                         goto nla_put_failure;
2433                 rtm->rtm_src_len = 128;
2434         } else if (rtm->rtm_src_len &&
2435                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2436                 goto nla_put_failure;
2437 #endif
2438         if (iif) {
2439 #ifdef CONFIG_IPV6_MROUTE
2440                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2441                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2442                         if (err <= 0) {
2443                                 if (!nowait) {
2444                                         if (err == 0)
2445                                                 return 0;
2446                                         goto nla_put_failure;
2447                                 } else {
2448                                         if (err == -EMSGSIZE)
2449                                                 goto nla_put_failure;
2450                                 }
2451                         }
2452                 } else
2453 #endif
2454                         if (nla_put_u32(skb, RTA_IIF, iif))
2455                                 goto nla_put_failure;
2456         } else if (dst) {
2457                 struct in6_addr saddr_buf;
2458                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2459                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2460                         goto nla_put_failure;
2461         }
2462
2463         if (rt->rt6i_prefsrc.plen) {
2464                 struct in6_addr saddr_buf;
2465                 saddr_buf = rt->rt6i_prefsrc.addr;
2466                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2467                         goto nla_put_failure;
2468         }
2469
2470         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2471                 goto nla_put_failure;
2472
2473         rcu_read_lock();
2474         n = rt->n;
2475         if (n) {
2476                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2477                         rcu_read_unlock();
2478                         goto nla_put_failure;
2479                 }
2480         }
2481         rcu_read_unlock();
2482
2483         if (rt->dst.dev &&
2484             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2485                 goto nla_put_failure;
2486         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2487                 goto nla_put_failure;
2488
2489         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2490
2491         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2492                 goto nla_put_failure;
2493
2494         return nlmsg_end(skb, nlh);
2495
2496 nla_put_failure:
2497         nlmsg_cancel(skb, nlh);
2498         return -EMSGSIZE;
2499 }
2500
2501 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2502 {
2503         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2504         int prefix;
2505
2506         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2507                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2508                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2509         } else
2510                 prefix = 0;
2511
2512         return rt6_fill_node(arg->net,
2513                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2514                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2515                      prefix, 0, NLM_F_MULTI);
2516 }
2517
2518 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2519 {
2520         struct net *net = sock_net(in_skb->sk);
2521         struct nlattr *tb[RTA_MAX+1];
2522         struct rt6_info *rt;
2523         struct sk_buff *skb;
2524         struct rtmsg *rtm;
2525         struct flowi6 fl6;
2526         int err, iif = 0, oif = 0;
2527
2528         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2529         if (err < 0)
2530                 goto errout;
2531
2532         err = -EINVAL;
2533         memset(&fl6, 0, sizeof(fl6));
2534
2535         if (tb[RTA_SRC]) {
2536                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2537                         goto errout;
2538
2539                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2540         }
2541
2542         if (tb[RTA_DST]) {
2543                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2544                         goto errout;
2545
2546                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2547         }
2548
2549         if (tb[RTA_IIF])
2550                 iif = nla_get_u32(tb[RTA_IIF]);
2551
2552         if (tb[RTA_OIF])
2553                 oif = nla_get_u32(tb[RTA_OIF]);
2554
2555         if (iif) {
2556                 struct net_device *dev;
2557                 int flags = 0;
2558
2559                 dev = __dev_get_by_index(net, iif);
2560                 if (!dev) {
2561                         err = -ENODEV;
2562                         goto errout;
2563                 }
2564
2565                 fl6.flowi6_iif = iif;
2566
2567                 if (!ipv6_addr_any(&fl6.saddr))
2568                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2569
2570                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2571                                                                flags);
2572         } else {
2573                 fl6.flowi6_oif = oif;
2574
2575                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2576         }
2577
2578         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2579         if (!skb) {
2580                 dst_release(&rt->dst);
2581                 err = -ENOBUFS;
2582                 goto errout;
2583         }
2584
2585         /* Reserve room for dummy headers, this skb can pass
2586            through good chunk of routing engine.
2587          */
2588         skb_reset_mac_header(skb);
2589         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2590
2591         skb_dst_set(skb, &rt->dst);
2592
2593         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2594                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2595                             nlh->nlmsg_seq, 0, 0, 0);
2596         if (err < 0) {
2597                 kfree_skb(skb);
2598                 goto errout;
2599         }
2600
2601         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2602 errout:
2603         return err;
2604 }
2605
2606 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2607 {
2608         struct sk_buff *skb;
2609         struct net *net = info->nl_net;
2610         u32 seq;
2611         int err;
2612
2613         err = -ENOBUFS;
2614         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2615
2616         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2617         if (!skb)
2618                 goto errout;
2619
2620         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2621                                 event, info->pid, seq, 0, 0, 0);
2622         if (err < 0) {
2623                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624                 WARN_ON(err == -EMSGSIZE);
2625                 kfree_skb(skb);
2626                 goto errout;
2627         }
2628         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2629                     info->nlh, gfp_any());
2630         return;
2631 errout:
2632         if (err < 0)
2633                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2634 }
2635
2636 static int ip6_route_dev_notify(struct notifier_block *this,
2637                                 unsigned long event, void *data)
2638 {
2639         struct net_device *dev = (struct net_device *)data;
2640         struct net *net = dev_net(dev);
2641
2642         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2643                 net->ipv6.ip6_null_entry->dst.dev = dev;
2644                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2645 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2646                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2647                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2648                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2649                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2650 #endif
2651         }
2652
2653         return NOTIFY_OK;
2654 }
2655
2656 /*
2657  *      /proc
2658  */
2659
2660 #ifdef CONFIG_PROC_FS
2661
2662 struct rt6_proc_arg
2663 {
2664         char *buffer;
2665         int offset;
2666         int length;
2667         int skip;
2668         int len;
2669 };
2670
2671 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2672 {
2673         struct seq_file *m = p_arg;
2674         struct neighbour *n;
2675
2676         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2677
2678 #ifdef CONFIG_IPV6_SUBTREES
2679         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2680 #else
2681         seq_puts(m, "00000000000000000000000000000000 00 ");
2682 #endif
2683         rcu_read_lock();
2684         n = rt->n;
2685         if (n) {
2686                 seq_printf(m, "%pi6", n->primary_key);
2687         } else {
2688                 seq_puts(m, "00000000000000000000000000000000");
2689         }
2690         rcu_read_unlock();
2691         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2692                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693                    rt->dst.__use, rt->rt6i_flags,
2694                    rt->dst.dev ? rt->dst.dev->name : "");
2695         return 0;
2696 }
2697
2698 static int ipv6_route_show(struct seq_file *m, void *v)
2699 {
2700         struct net *net = (struct net *)m->private;
2701         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2702         return 0;
2703 }
2704
2705 static int ipv6_route_open(struct inode *inode, struct file *file)
2706 {
2707         return single_open_net(inode, file, ipv6_route_show);
2708 }
2709
2710 static const struct file_operations ipv6_route_proc_fops = {
2711         .owner          = THIS_MODULE,
2712         .open           = ipv6_route_open,
2713         .read           = seq_read,
2714         .llseek         = seq_lseek,
2715         .release        = single_release_net,
2716 };
2717
2718 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2719 {
2720         struct net *net = (struct net *)seq->private;
2721         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2722                    net->ipv6.rt6_stats->fib_nodes,
2723                    net->ipv6.rt6_stats->fib_route_nodes,
2724                    net->ipv6.rt6_stats->fib_rt_alloc,
2725                    net->ipv6.rt6_stats->fib_rt_entries,
2726                    net->ipv6.rt6_stats->fib_rt_cache,
2727                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2728                    net->ipv6.rt6_stats->fib_discarded_routes);
2729
2730         return 0;
2731 }
2732
2733 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2734 {
2735         return single_open_net(inode, file, rt6_stats_seq_show);
2736 }
2737
2738 static const struct file_operations rt6_stats_seq_fops = {
2739         .owner   = THIS_MODULE,
2740         .open    = rt6_stats_seq_open,
2741         .read    = seq_read,
2742         .llseek  = seq_lseek,
2743         .release = single_release_net,
2744 };
2745 #endif  /* CONFIG_PROC_FS */
2746
2747 #ifdef CONFIG_SYSCTL
2748
2749 static
2750 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2751                               void __user *buffer, size_t *lenp, loff_t *ppos)
2752 {
2753         struct net *net;
2754         int delay;
2755         if (!write)
2756                 return -EINVAL;
2757
2758         net = (struct net *)ctl->extra1;
2759         delay = net->ipv6.sysctl.flush_delay;
2760         proc_dointvec(ctl, write, buffer, lenp, ppos);
2761         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2762         return 0;
2763 }
2764
2765 ctl_table ipv6_route_table_template[] = {
2766         {
2767                 .procname       =       "flush",
2768                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2769                 .maxlen         =       sizeof(int),
2770                 .mode           =       0200,
2771                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2772         },
2773         {
2774                 .procname       =       "gc_thresh",
2775                 .data           =       &ip6_dst_ops_template.gc_thresh,
2776                 .maxlen         =       sizeof(int),
2777                 .mode           =       0644,
2778                 .proc_handler   =       proc_dointvec,
2779         },
2780         {
2781                 .procname       =       "max_size",
2782                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2783                 .maxlen         =       sizeof(int),
2784                 .mode           =       0644,
2785                 .proc_handler   =       proc_dointvec,
2786         },
2787         {
2788                 .procname       =       "gc_min_interval",
2789                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2790                 .maxlen         =       sizeof(int),
2791                 .mode           =       0644,
2792                 .proc_handler   =       proc_dointvec_jiffies,
2793         },
2794         {
2795                 .procname       =       "gc_timeout",
2796                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2797                 .maxlen         =       sizeof(int),
2798                 .mode           =       0644,
2799                 .proc_handler   =       proc_dointvec_jiffies,
2800         },
2801         {
2802                 .procname       =       "gc_interval",
2803                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2804                 .maxlen         =       sizeof(int),
2805                 .mode           =       0644,
2806                 .proc_handler   =       proc_dointvec_jiffies,
2807         },
2808         {
2809                 .procname       =       "gc_elasticity",
2810                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2811                 .maxlen         =       sizeof(int),
2812                 .mode           =       0644,
2813                 .proc_handler   =       proc_dointvec,
2814         },
2815         {
2816                 .procname       =       "mtu_expires",
2817                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2818                 .maxlen         =       sizeof(int),
2819                 .mode           =       0644,
2820                 .proc_handler   =       proc_dointvec_jiffies,
2821         },
2822         {
2823                 .procname       =       "min_adv_mss",
2824                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2825                 .maxlen         =       sizeof(int),
2826                 .mode           =       0644,
2827                 .proc_handler   =       proc_dointvec,
2828         },
2829         {
2830                 .procname       =       "gc_min_interval_ms",
2831                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2832                 .maxlen         =       sizeof(int),
2833                 .mode           =       0644,
2834                 .proc_handler   =       proc_dointvec_ms_jiffies,
2835         },
2836         { }
2837 };
2838
2839 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2840 {
2841         struct ctl_table *table;
2842
2843         table = kmemdup(ipv6_route_table_template,
2844                         sizeof(ipv6_route_table_template),
2845                         GFP_KERNEL);
2846
2847         if (table) {
2848                 table[0].data = &net->ipv6.sysctl.flush_delay;
2849                 table[0].extra1 = net;
2850                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2851                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2852                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2853                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2854                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2855                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2856                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2857                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2858                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2859         }
2860
2861         return table;
2862 }
2863 #endif
2864
2865 static int __net_init ip6_route_net_init(struct net *net)
2866 {
2867         int ret = -ENOMEM;
2868
2869         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2870                sizeof(net->ipv6.ip6_dst_ops));
2871
2872         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2873                 goto out_ip6_dst_ops;
2874
2875         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2876                                            sizeof(*net->ipv6.ip6_null_entry),
2877                                            GFP_KERNEL);
2878         if (!net->ipv6.ip6_null_entry)
2879                 goto out_ip6_dst_entries;
2880         net->ipv6.ip6_null_entry->dst.path =
2881                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2882         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2883         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2884                          ip6_template_metrics, true);
2885
2886 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2887         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2888                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2889                                                GFP_KERNEL);
2890         if (!net->ipv6.ip6_prohibit_entry)
2891                 goto out_ip6_null_entry;
2892         net->ipv6.ip6_prohibit_entry->dst.path =
2893                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2894         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2895         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2896                          ip6_template_metrics, true);
2897
2898         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2899                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2900                                                GFP_KERNEL);
2901         if (!net->ipv6.ip6_blk_hole_entry)
2902                 goto out_ip6_prohibit_entry;
2903         net->ipv6.ip6_blk_hole_entry->dst.path =
2904                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2905         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2906         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2907                          ip6_template_metrics, true);
2908 #endif
2909
2910         net->ipv6.sysctl.flush_delay = 0;
2911         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2912         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2913         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2914         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2915         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2916         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2917         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2918
2919         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2920
2921         ret = 0;
2922 out:
2923         return ret;
2924
2925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926 out_ip6_prohibit_entry:
2927         kfree(net->ipv6.ip6_prohibit_entry);
2928 out_ip6_null_entry:
2929         kfree(net->ipv6.ip6_null_entry);
2930 #endif
2931 out_ip6_dst_entries:
2932         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2933 out_ip6_dst_ops:
2934         goto out;
2935 }
2936
2937 static void __net_exit ip6_route_net_exit(struct net *net)
2938 {
2939         kfree(net->ipv6.ip6_null_entry);
2940 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2941         kfree(net->ipv6.ip6_prohibit_entry);
2942         kfree(net->ipv6.ip6_blk_hole_entry);
2943 #endif
2944         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2945 }
2946
2947 static int __net_init ip6_route_net_init_late(struct net *net)
2948 {
2949 #ifdef CONFIG_PROC_FS
2950         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2951         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2952 #endif
2953         return 0;
2954 }
2955
2956 static void __net_exit ip6_route_net_exit_late(struct net *net)
2957 {
2958 #ifdef CONFIG_PROC_FS
2959         proc_net_remove(net, "ipv6_route");
2960         proc_net_remove(net, "rt6_stats");
2961 #endif
2962 }
2963
2964 static struct pernet_operations ip6_route_net_ops = {
2965         .init = ip6_route_net_init,
2966         .exit = ip6_route_net_exit,
2967 };
2968
2969 static int __net_init ipv6_inetpeer_init(struct net *net)
2970 {
2971         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2972
2973         if (!bp)
2974                 return -ENOMEM;
2975         inet_peer_base_init(bp);
2976         net->ipv6.peers = bp;
2977         return 0;
2978 }
2979
2980 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2981 {
2982         struct inet_peer_base *bp = net->ipv6.peers;
2983
2984         net->ipv6.peers = NULL;
2985         inetpeer_invalidate_tree(bp);
2986         kfree(bp);
2987 }
2988
2989 static struct pernet_operations ipv6_inetpeer_ops = {
2990         .init   =       ipv6_inetpeer_init,
2991         .exit   =       ipv6_inetpeer_exit,
2992 };
2993
2994 static struct pernet_operations ip6_route_net_late_ops = {
2995         .init = ip6_route_net_init_late,
2996         .exit = ip6_route_net_exit_late,
2997 };
2998
2999 static struct notifier_block ip6_route_dev_notifier = {
3000         .notifier_call = ip6_route_dev_notify,
3001         .priority = 0,
3002 };
3003
3004 int __init ip6_route_init(void)
3005 {
3006         int ret;
3007
3008         ret = -ENOMEM;
3009         ip6_dst_ops_template.kmem_cachep =
3010                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3011                                   SLAB_HWCACHE_ALIGN, NULL);
3012         if (!ip6_dst_ops_template.kmem_cachep)
3013                 goto out;
3014
3015         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3016         if (ret)
3017                 goto out_kmem_cache;
3018
3019         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3020         if (ret)
3021                 goto out_dst_entries;
3022
3023         ret = register_pernet_subsys(&ip6_route_net_ops);
3024         if (ret)
3025                 goto out_register_inetpeer;
3026
3027         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3028
3029         /* Registering of the loopback is done before this portion of code,
3030          * the loopback reference in rt6_info will not be taken, do it
3031          * manually for init_net */
3032         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3033         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3034   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3035         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3036         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3037         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3038         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3039   #endif
3040         ret = fib6_init();
3041         if (ret)
3042                 goto out_register_subsys;
3043
3044         ret = xfrm6_init();
3045         if (ret)
3046                 goto out_fib6_init;
3047
3048         ret = fib6_rules_init();
3049         if (ret)
3050                 goto xfrm6_init;
3051
3052         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3053         if (ret)
3054                 goto fib6_rules_init;
3055
3056         ret = -ENOBUFS;
3057         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3058             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3059             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3060                 goto out_register_late_subsys;
3061
3062         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3063         if (ret)
3064                 goto out_register_late_subsys;
3065
3066 out:
3067         return ret;
3068
3069 out_register_late_subsys:
3070         unregister_pernet_subsys(&ip6_route_net_late_ops);
3071 fib6_rules_init:
3072         fib6_rules_cleanup();
3073 xfrm6_init:
3074         xfrm6_fini();
3075 out_fib6_init:
3076         fib6_gc_cleanup();
3077 out_register_subsys:
3078         unregister_pernet_subsys(&ip6_route_net_ops);
3079 out_register_inetpeer:
3080         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3081 out_dst_entries:
3082         dst_entries_destroy(&ip6_dst_blackhole_ops);
3083 out_kmem_cache:
3084         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3085         goto out;
3086 }
3087
3088 void ip6_route_cleanup(void)
3089 {
3090         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3091         unregister_pernet_subsys(&ip6_route_net_late_ops);
3092         fib6_rules_cleanup();
3093         xfrm6_fini();
3094         fib6_gc_cleanup();
3095         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3096         unregister_pernet_subsys(&ip6_route_net_ops);
3097         dst_entries_destroy(&ip6_dst_blackhole_ops);
3098         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3099 }