]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv4/devinet.c
Merge remote-tracking branch 'ipsec/master'
[karo-tx-linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         /* 1. Deleting primary ifaddr forces deletion all secondaries
338          * unless alias promotion is set
339          **/
340
341         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344                 while ((ifa = *ifap1) != NULL) {
345                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346                             ifa1->ifa_scope <= ifa->ifa_scope)
347                                 last_prim = ifa;
348
349                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350                             ifa1->ifa_mask != ifa->ifa_mask ||
351                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
352                                 ifap1 = &ifa->ifa_next;
353                                 prev_prom = ifa;
354                                 continue;
355                         }
356
357                         if (!do_promote) {
358                                 inet_hash_remove(ifa);
359                                 *ifap1 = ifa->ifa_next;
360
361                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362                                 blocking_notifier_call_chain(&inetaddr_chain,
363                                                 NETDEV_DOWN, ifa);
364                                 inet_free_ifa(ifa);
365                         } else {
366                                 promote = ifa;
367                                 break;
368                         }
369                 }
370         }
371
372         /* On promotion all secondaries from subnet are changing
373          * the primary IP, we must remove all their routes silently
374          * and later to add them back with new prefsrc. Do this
375          * while all addresses are on the device list.
376          */
377         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378                 if (ifa1->ifa_mask == ifa->ifa_mask &&
379                     inet_ifa_match(ifa1->ifa_address, ifa))
380                         fib_del_ifaddr(ifa, ifa1);
381         }
382
383         /* 2. Unlink it */
384
385         *ifap = ifa1->ifa_next;
386         inet_hash_remove(ifa1);
387
388         /* 3. Announce address deletion */
389
390         /* Send message first, then call notifier.
391            At first sight, FIB update triggered by notifier
392            will refer to already deleted ifaddr, that could confuse
393            netlink listeners. It is not true: look, gated sees
394            that route deleted and if it still thinks that ifaddr
395            is valid, it will try to restore deleted routes... Grr.
396            So that, this order is correct.
397          */
398         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401         if (promote) {
402                 struct in_ifaddr *next_sec = promote->ifa_next;
403
404                 if (prev_prom) {
405                         prev_prom->ifa_next = promote->ifa_next;
406                         promote->ifa_next = last_prim->ifa_next;
407                         last_prim->ifa_next = promote;
408                 }
409
410                 promote->ifa_flags &= ~IFA_F_SECONDARY;
411                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412                 blocking_notifier_call_chain(&inetaddr_chain,
413                                 NETDEV_UP, promote);
414                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415                         if (ifa1->ifa_mask != ifa->ifa_mask ||
416                             !inet_ifa_match(ifa1->ifa_address, ifa))
417                                         continue;
418                         fib_add_ifaddr(ifa);
419                 }
420
421         }
422         if (destroy)
423                 inet_free_ifa(ifa1);
424 }
425
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427                          int destroy)
428 {
429         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431
432 static void check_lifetime(struct work_struct *work);
433
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437                              u32 portid)
438 {
439         struct in_device *in_dev = ifa->ifa_dev;
440         struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442         ASSERT_RTNL();
443
444         if (!ifa->ifa_local) {
445                 inet_free_ifa(ifa);
446                 return 0;
447         }
448
449         ifa->ifa_flags &= ~IFA_F_SECONDARY;
450         last_primary = &in_dev->ifa_list;
451
452         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453              ifap = &ifa1->ifa_next) {
454                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455                     ifa->ifa_scope <= ifa1->ifa_scope)
456                         last_primary = &ifa1->ifa_next;
457                 if (ifa1->ifa_mask == ifa->ifa_mask &&
458                     inet_ifa_match(ifa1->ifa_address, ifa)) {
459                         if (ifa1->ifa_local == ifa->ifa_local) {
460                                 inet_free_ifa(ifa);
461                                 return -EEXIST;
462                         }
463                         if (ifa1->ifa_scope != ifa->ifa_scope) {
464                                 inet_free_ifa(ifa);
465                                 return -EINVAL;
466                         }
467                         ifa->ifa_flags |= IFA_F_SECONDARY;
468                 }
469         }
470
471         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472                 prandom_seed((__force u32) ifa->ifa_local);
473                 ifap = last_primary;
474         }
475
476         ifa->ifa_next = *ifap;
477         *ifap = ifa;
478
479         inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481         cancel_delayed_work(&check_lifetime_work);
482         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484         /* Send message first, then call notifier.
485            Notifier will trigger FIB update, so that
486            listeners of netlink will know about new ifaddr */
487         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490         return 0;
491 }
492
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495         return __inet_insert_ifa(ifa, NULL, 0);
496 }
497
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500         struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502         ASSERT_RTNL();
503
504         if (!in_dev) {
505                 inet_free_ifa(ifa);
506                 return -ENOBUFS;
507         }
508         ipv4_devconf_setall(in_dev);
509         neigh_parms_data_state_setall(in_dev->arp_parms);
510         if (ifa->ifa_dev != in_dev) {
511                 WARN_ON(ifa->ifa_dev);
512                 in_dev_hold(in_dev);
513                 ifa->ifa_dev = in_dev;
514         }
515         if (ipv4_is_loopback(ifa->ifa_local))
516                 ifa->ifa_scope = RT_SCOPE_HOST;
517         return inet_insert_ifa(ifa);
518 }
519
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525         struct net_device *dev;
526         struct in_device *in_dev = NULL;
527
528         rcu_read_lock();
529         dev = dev_get_by_index_rcu(net, ifindex);
530         if (dev)
531                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532         rcu_read_unlock();
533         return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536
537 /* Called only from RTNL semaphored context. No locks. */
538
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540                                     __be32 mask)
541 {
542         ASSERT_RTNL();
543
544         for_primary_ifa(in_dev) {
545                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546                         return ifa;
547         } endfor_ifa(in_dev);
548         return NULL;
549 }
550
551 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
552 {
553         struct ip_mreqn mreq = {
554                 .imr_multiaddr.s_addr = ifa->ifa_address,
555                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
556         };
557         int ret;
558
559         ASSERT_RTNL();
560
561         lock_sock(sk);
562         if (join)
563                 ret = ip_mc_join_group(sk, &mreq);
564         else
565                 ret = ip_mc_leave_group(sk, &mreq);
566         release_sock(sk);
567
568         return ret;
569 }
570
571 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
572 {
573         struct net *net = sock_net(skb->sk);
574         struct nlattr *tb[IFA_MAX+1];
575         struct in_device *in_dev;
576         struct ifaddrmsg *ifm;
577         struct in_ifaddr *ifa, **ifap;
578         int err = -EINVAL;
579
580         ASSERT_RTNL();
581
582         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
583         if (err < 0)
584                 goto errout;
585
586         ifm = nlmsg_data(nlh);
587         in_dev = inetdev_by_index(net, ifm->ifa_index);
588         if (!in_dev) {
589                 err = -ENODEV;
590                 goto errout;
591         }
592
593         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
594              ifap = &ifa->ifa_next) {
595                 if (tb[IFA_LOCAL] &&
596                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
597                         continue;
598
599                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
600                         continue;
601
602                 if (tb[IFA_ADDRESS] &&
603                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
604                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
605                         continue;
606
607                 if (ipv4_is_multicast(ifa->ifa_address))
608                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
609                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
610                 return 0;
611         }
612
613         err = -EADDRNOTAVAIL;
614 errout:
615         return err;
616 }
617
618 #define INFINITY_LIFE_TIME      0xFFFFFFFF
619
620 static void check_lifetime(struct work_struct *work)
621 {
622         unsigned long now, next, next_sec, next_sched;
623         struct in_ifaddr *ifa;
624         struct hlist_node *n;
625         int i;
626
627         now = jiffies;
628         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
629
630         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
631                 bool change_needed = false;
632
633                 rcu_read_lock();
634                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
635                         unsigned long age;
636
637                         if (ifa->ifa_flags & IFA_F_PERMANENT)
638                                 continue;
639
640                         /* We try to batch several events at once. */
641                         age = (now - ifa->ifa_tstamp +
642                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
643
644                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
645                             age >= ifa->ifa_valid_lft) {
646                                 change_needed = true;
647                         } else if (ifa->ifa_preferred_lft ==
648                                    INFINITY_LIFE_TIME) {
649                                 continue;
650                         } else if (age >= ifa->ifa_preferred_lft) {
651                                 if (time_before(ifa->ifa_tstamp +
652                                                 ifa->ifa_valid_lft * HZ, next))
653                                         next = ifa->ifa_tstamp +
654                                                ifa->ifa_valid_lft * HZ;
655
656                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
657                                         change_needed = true;
658                         } else if (time_before(ifa->ifa_tstamp +
659                                                ifa->ifa_preferred_lft * HZ,
660                                                next)) {
661                                 next = ifa->ifa_tstamp +
662                                        ifa->ifa_preferred_lft * HZ;
663                         }
664                 }
665                 rcu_read_unlock();
666                 if (!change_needed)
667                         continue;
668                 rtnl_lock();
669                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
670                         unsigned long age;
671
672                         if (ifa->ifa_flags & IFA_F_PERMANENT)
673                                 continue;
674
675                         /* We try to batch several events at once. */
676                         age = (now - ifa->ifa_tstamp +
677                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
678
679                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
680                             age >= ifa->ifa_valid_lft) {
681                                 struct in_ifaddr **ifap;
682
683                                 for (ifap = &ifa->ifa_dev->ifa_list;
684                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
685                                         if (*ifap == ifa) {
686                                                 inet_del_ifa(ifa->ifa_dev,
687                                                              ifap, 1);
688                                                 break;
689                                         }
690                                 }
691                         } else if (ifa->ifa_preferred_lft !=
692                                    INFINITY_LIFE_TIME &&
693                                    age >= ifa->ifa_preferred_lft &&
694                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
695                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
696                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
697                         }
698                 }
699                 rtnl_unlock();
700         }
701
702         next_sec = round_jiffies_up(next);
703         next_sched = next;
704
705         /* If rounded timeout is accurate enough, accept it. */
706         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
707                 next_sched = next_sec;
708
709         now = jiffies;
710         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
711         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
712                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
713
714         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
715                         next_sched - now);
716 }
717
718 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
719                              __u32 prefered_lft)
720 {
721         unsigned long timeout;
722
723         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
724
725         timeout = addrconf_timeout_fixup(valid_lft, HZ);
726         if (addrconf_finite_timeout(timeout))
727                 ifa->ifa_valid_lft = timeout;
728         else
729                 ifa->ifa_flags |= IFA_F_PERMANENT;
730
731         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
732         if (addrconf_finite_timeout(timeout)) {
733                 if (timeout == 0)
734                         ifa->ifa_flags |= IFA_F_DEPRECATED;
735                 ifa->ifa_preferred_lft = timeout;
736         }
737         ifa->ifa_tstamp = jiffies;
738         if (!ifa->ifa_cstamp)
739                 ifa->ifa_cstamp = ifa->ifa_tstamp;
740 }
741
742 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
743                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
744 {
745         struct nlattr *tb[IFA_MAX+1];
746         struct in_ifaddr *ifa;
747         struct ifaddrmsg *ifm;
748         struct net_device *dev;
749         struct in_device *in_dev;
750         int err;
751
752         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
753         if (err < 0)
754                 goto errout;
755
756         ifm = nlmsg_data(nlh);
757         err = -EINVAL;
758         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
759                 goto errout;
760
761         dev = __dev_get_by_index(net, ifm->ifa_index);
762         err = -ENODEV;
763         if (!dev)
764                 goto errout;
765
766         in_dev = __in_dev_get_rtnl(dev);
767         err = -ENOBUFS;
768         if (!in_dev)
769                 goto errout;
770
771         ifa = inet_alloc_ifa();
772         if (!ifa)
773                 /*
774                  * A potential indev allocation can be left alive, it stays
775                  * assigned to its device and is destroy with it.
776                  */
777                 goto errout;
778
779         ipv4_devconf_setall(in_dev);
780         neigh_parms_data_state_setall(in_dev->arp_parms);
781         in_dev_hold(in_dev);
782
783         if (!tb[IFA_ADDRESS])
784                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
785
786         INIT_HLIST_NODE(&ifa->hash);
787         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
788         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
789         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
790                                          ifm->ifa_flags;
791         ifa->ifa_scope = ifm->ifa_scope;
792         ifa->ifa_dev = in_dev;
793
794         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
795         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
796
797         if (tb[IFA_BROADCAST])
798                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
799
800         if (tb[IFA_LABEL])
801                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
802         else
803                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
804
805         if (tb[IFA_CACHEINFO]) {
806                 struct ifa_cacheinfo *ci;
807
808                 ci = nla_data(tb[IFA_CACHEINFO]);
809                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
810                         err = -EINVAL;
811                         goto errout_free;
812                 }
813                 *pvalid_lft = ci->ifa_valid;
814                 *pprefered_lft = ci->ifa_prefered;
815         }
816
817         return ifa;
818
819 errout_free:
820         inet_free_ifa(ifa);
821 errout:
822         return ERR_PTR(err);
823 }
824
825 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
826 {
827         struct in_device *in_dev = ifa->ifa_dev;
828         struct in_ifaddr *ifa1, **ifap;
829
830         if (!ifa->ifa_local)
831                 return NULL;
832
833         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
834              ifap = &ifa1->ifa_next) {
835                 if (ifa1->ifa_mask == ifa->ifa_mask &&
836                     inet_ifa_match(ifa1->ifa_address, ifa) &&
837                     ifa1->ifa_local == ifa->ifa_local)
838                         return ifa1;
839         }
840         return NULL;
841 }
842
843 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
844 {
845         struct net *net = sock_net(skb->sk);
846         struct in_ifaddr *ifa;
847         struct in_ifaddr *ifa_existing;
848         __u32 valid_lft = INFINITY_LIFE_TIME;
849         __u32 prefered_lft = INFINITY_LIFE_TIME;
850
851         ASSERT_RTNL();
852
853         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
854         if (IS_ERR(ifa))
855                 return PTR_ERR(ifa);
856
857         ifa_existing = find_matching_ifa(ifa);
858         if (!ifa_existing) {
859                 /* It would be best to check for !NLM_F_CREATE here but
860                  * userspace already relies on not having to provide this.
861                  */
862                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
863                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
864                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
865                                                true, ifa);
866
867                         if (ret < 0) {
868                                 inet_free_ifa(ifa);
869                                 return ret;
870                         }
871                 }
872                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
873         } else {
874                 inet_free_ifa(ifa);
875
876                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
877                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
878                         return -EEXIST;
879                 ifa = ifa_existing;
880                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
881                 cancel_delayed_work(&check_lifetime_work);
882                 queue_delayed_work(system_power_efficient_wq,
883                                 &check_lifetime_work, 0);
884                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885         }
886         return 0;
887 }
888
889 /*
890  *      Determine a default network mask, based on the IP address.
891  */
892
893 static int inet_abc_len(__be32 addr)
894 {
895         int rc = -1;    /* Something else, probably a multicast. */
896
897         if (ipv4_is_zeronet(addr))
898                 rc = 0;
899         else {
900                 __u32 haddr = ntohl(addr);
901
902                 if (IN_CLASSA(haddr))
903                         rc = 8;
904                 else if (IN_CLASSB(haddr))
905                         rc = 16;
906                 else if (IN_CLASSC(haddr))
907                         rc = 24;
908         }
909
910         return rc;
911 }
912
913
914 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
915 {
916         struct ifreq ifr;
917         struct sockaddr_in sin_orig;
918         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
919         struct in_device *in_dev;
920         struct in_ifaddr **ifap = NULL;
921         struct in_ifaddr *ifa = NULL;
922         struct net_device *dev;
923         char *colon;
924         int ret = -EFAULT;
925         int tryaddrmatch = 0;
926
927         /*
928          *      Fetch the caller's info block into kernel space
929          */
930
931         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
932                 goto out;
933         ifr.ifr_name[IFNAMSIZ - 1] = 0;
934
935         /* save original address for comparison */
936         memcpy(&sin_orig, sin, sizeof(*sin));
937
938         colon = strchr(ifr.ifr_name, ':');
939         if (colon)
940                 *colon = 0;
941
942         dev_load(net, ifr.ifr_name);
943
944         switch (cmd) {
945         case SIOCGIFADDR:       /* Get interface address */
946         case SIOCGIFBRDADDR:    /* Get the broadcast address */
947         case SIOCGIFDSTADDR:    /* Get the destination address */
948         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
949                 /* Note that these ioctls will not sleep,
950                    so that we do not impose a lock.
951                    One day we will be forced to put shlock here (I mean SMP)
952                  */
953                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
954                 memset(sin, 0, sizeof(*sin));
955                 sin->sin_family = AF_INET;
956                 break;
957
958         case SIOCSIFFLAGS:
959                 ret = -EPERM;
960                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
961                         goto out;
962                 break;
963         case SIOCSIFADDR:       /* Set interface address (and family) */
964         case SIOCSIFBRDADDR:    /* Set the broadcast address */
965         case SIOCSIFDSTADDR:    /* Set the destination address */
966         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
967                 ret = -EPERM;
968                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
969                         goto out;
970                 ret = -EINVAL;
971                 if (sin->sin_family != AF_INET)
972                         goto out;
973                 break;
974         default:
975                 ret = -EINVAL;
976                 goto out;
977         }
978
979         rtnl_lock();
980
981         ret = -ENODEV;
982         dev = __dev_get_by_name(net, ifr.ifr_name);
983         if (!dev)
984                 goto done;
985
986         if (colon)
987                 *colon = ':';
988
989         in_dev = __in_dev_get_rtnl(dev);
990         if (in_dev) {
991                 if (tryaddrmatch) {
992                         /* Matthias Andree */
993                         /* compare label and address (4.4BSD style) */
994                         /* note: we only do this for a limited set of ioctls
995                            and only if the original address family was AF_INET.
996                            This is checked above. */
997                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
998                              ifap = &ifa->ifa_next) {
999                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1000                                     sin_orig.sin_addr.s_addr ==
1001                                                         ifa->ifa_local) {
1002                                         break; /* found */
1003                                 }
1004                         }
1005                 }
1006                 /* we didn't get a match, maybe the application is
1007                    4.3BSD-style and passed in junk so we fall back to
1008                    comparing just the label */
1009                 if (!ifa) {
1010                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1011                              ifap = &ifa->ifa_next)
1012                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1013                                         break;
1014                 }
1015         }
1016
1017         ret = -EADDRNOTAVAIL;
1018         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1019                 goto done;
1020
1021         switch (cmd) {
1022         case SIOCGIFADDR:       /* Get interface address */
1023                 sin->sin_addr.s_addr = ifa->ifa_local;
1024                 goto rarok;
1025
1026         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1027                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1028                 goto rarok;
1029
1030         case SIOCGIFDSTADDR:    /* Get the destination address */
1031                 sin->sin_addr.s_addr = ifa->ifa_address;
1032                 goto rarok;
1033
1034         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1035                 sin->sin_addr.s_addr = ifa->ifa_mask;
1036                 goto rarok;
1037
1038         case SIOCSIFFLAGS:
1039                 if (colon) {
1040                         ret = -EADDRNOTAVAIL;
1041                         if (!ifa)
1042                                 break;
1043                         ret = 0;
1044                         if (!(ifr.ifr_flags & IFF_UP))
1045                                 inet_del_ifa(in_dev, ifap, 1);
1046                         break;
1047                 }
1048                 ret = dev_change_flags(dev, ifr.ifr_flags);
1049                 break;
1050
1051         case SIOCSIFADDR:       /* Set interface address (and family) */
1052                 ret = -EINVAL;
1053                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1054                         break;
1055
1056                 if (!ifa) {
1057                         ret = -ENOBUFS;
1058                         ifa = inet_alloc_ifa();
1059                         if (!ifa)
1060                                 break;
1061                         INIT_HLIST_NODE(&ifa->hash);
1062                         if (colon)
1063                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1064                         else
1065                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1066                 } else {
1067                         ret = 0;
1068                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1069                                 break;
1070                         inet_del_ifa(in_dev, ifap, 0);
1071                         ifa->ifa_broadcast = 0;
1072                         ifa->ifa_scope = 0;
1073                 }
1074
1075                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1076
1077                 if (!(dev->flags & IFF_POINTOPOINT)) {
1078                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1079                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1080                         if ((dev->flags & IFF_BROADCAST) &&
1081                             ifa->ifa_prefixlen < 31)
1082                                 ifa->ifa_broadcast = ifa->ifa_address |
1083                                                      ~ifa->ifa_mask;
1084                 } else {
1085                         ifa->ifa_prefixlen = 32;
1086                         ifa->ifa_mask = inet_make_mask(32);
1087                 }
1088                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1089                 ret = inet_set_ifa(dev, ifa);
1090                 break;
1091
1092         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1093                 ret = 0;
1094                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1095                         inet_del_ifa(in_dev, ifap, 0);
1096                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1097                         inet_insert_ifa(ifa);
1098                 }
1099                 break;
1100
1101         case SIOCSIFDSTADDR:    /* Set the destination address */
1102                 ret = 0;
1103                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1104                         break;
1105                 ret = -EINVAL;
1106                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1107                         break;
1108                 ret = 0;
1109                 inet_del_ifa(in_dev, ifap, 0);
1110                 ifa->ifa_address = sin->sin_addr.s_addr;
1111                 inet_insert_ifa(ifa);
1112                 break;
1113
1114         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1115
1116                 /*
1117                  *      The mask we set must be legal.
1118                  */
1119                 ret = -EINVAL;
1120                 if (bad_mask(sin->sin_addr.s_addr, 0))
1121                         break;
1122                 ret = 0;
1123                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1124                         __be32 old_mask = ifa->ifa_mask;
1125                         inet_del_ifa(in_dev, ifap, 0);
1126                         ifa->ifa_mask = sin->sin_addr.s_addr;
1127                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1128
1129                         /* See if current broadcast address matches
1130                          * with current netmask, then recalculate
1131                          * the broadcast address. Otherwise it's a
1132                          * funny address, so don't touch it since
1133                          * the user seems to know what (s)he's doing...
1134                          */
1135                         if ((dev->flags & IFF_BROADCAST) &&
1136                             (ifa->ifa_prefixlen < 31) &&
1137                             (ifa->ifa_broadcast ==
1138                              (ifa->ifa_local|~old_mask))) {
1139                                 ifa->ifa_broadcast = (ifa->ifa_local |
1140                                                       ~sin->sin_addr.s_addr);
1141                         }
1142                         inet_insert_ifa(ifa);
1143                 }
1144                 break;
1145         }
1146 done:
1147         rtnl_unlock();
1148 out:
1149         return ret;
1150 rarok:
1151         rtnl_unlock();
1152         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1153         goto out;
1154 }
1155
1156 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1157 {
1158         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1159         struct in_ifaddr *ifa;
1160         struct ifreq ifr;
1161         int done = 0;
1162
1163         if (!in_dev)
1164                 goto out;
1165
1166         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1167                 if (!buf) {
1168                         done += sizeof(ifr);
1169                         continue;
1170                 }
1171                 if (len < (int) sizeof(ifr))
1172                         break;
1173                 memset(&ifr, 0, sizeof(struct ifreq));
1174                 strcpy(ifr.ifr_name, ifa->ifa_label);
1175
1176                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1177                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1178                                                                 ifa->ifa_local;
1179
1180                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1181                         done = -EFAULT;
1182                         break;
1183                 }
1184                 buf  += sizeof(struct ifreq);
1185                 len  -= sizeof(struct ifreq);
1186                 done += sizeof(struct ifreq);
1187         }
1188 out:
1189         return done;
1190 }
1191
1192 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1193 {
1194         __be32 addr = 0;
1195         struct in_device *in_dev;
1196         struct net *net = dev_net(dev);
1197
1198         rcu_read_lock();
1199         in_dev = __in_dev_get_rcu(dev);
1200         if (!in_dev)
1201                 goto no_in_dev;
1202
1203         for_primary_ifa(in_dev) {
1204                 if (ifa->ifa_scope > scope)
1205                         continue;
1206                 if (!dst || inet_ifa_match(dst, ifa)) {
1207                         addr = ifa->ifa_local;
1208                         break;
1209                 }
1210                 if (!addr)
1211                         addr = ifa->ifa_local;
1212         } endfor_ifa(in_dev);
1213
1214         if (addr)
1215                 goto out_unlock;
1216 no_in_dev:
1217
1218         /* Not loopback addresses on loopback should be preferred
1219            in this case. It is important that lo is the first interface
1220            in dev_base list.
1221          */
1222         for_each_netdev_rcu(net, dev) {
1223                 in_dev = __in_dev_get_rcu(dev);
1224                 if (!in_dev)
1225                         continue;
1226
1227                 for_primary_ifa(in_dev) {
1228                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1229                             ifa->ifa_scope <= scope) {
1230                                 addr = ifa->ifa_local;
1231                                 goto out_unlock;
1232                         }
1233                 } endfor_ifa(in_dev);
1234         }
1235 out_unlock:
1236         rcu_read_unlock();
1237         return addr;
1238 }
1239 EXPORT_SYMBOL(inet_select_addr);
1240
1241 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1242                               __be32 local, int scope)
1243 {
1244         int same = 0;
1245         __be32 addr = 0;
1246
1247         for_ifa(in_dev) {
1248                 if (!addr &&
1249                     (local == ifa->ifa_local || !local) &&
1250                     ifa->ifa_scope <= scope) {
1251                         addr = ifa->ifa_local;
1252                         if (same)
1253                                 break;
1254                 }
1255                 if (!same) {
1256                         same = (!local || inet_ifa_match(local, ifa)) &&
1257                                 (!dst || inet_ifa_match(dst, ifa));
1258                         if (same && addr) {
1259                                 if (local || !dst)
1260                                         break;
1261                                 /* Is the selected addr into dst subnet? */
1262                                 if (inet_ifa_match(addr, ifa))
1263                                         break;
1264                                 /* No, then can we use new local src? */
1265                                 if (ifa->ifa_scope <= scope) {
1266                                         addr = ifa->ifa_local;
1267                                         break;
1268                                 }
1269                                 /* search for large dst subnet for addr */
1270                                 same = 0;
1271                         }
1272                 }
1273         } endfor_ifa(in_dev);
1274
1275         return same ? addr : 0;
1276 }
1277
1278 /*
1279  * Confirm that local IP address exists using wildcards:
1280  * - net: netns to check, cannot be NULL
1281  * - in_dev: only on this interface, NULL=any interface
1282  * - dst: only in the same subnet as dst, 0=any dst
1283  * - local: address, 0=autoselect the local address
1284  * - scope: maximum allowed scope value for the local address
1285  */
1286 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1287                          __be32 dst, __be32 local, int scope)
1288 {
1289         __be32 addr = 0;
1290         struct net_device *dev;
1291
1292         if (in_dev)
1293                 return confirm_addr_indev(in_dev, dst, local, scope);
1294
1295         rcu_read_lock();
1296         for_each_netdev_rcu(net, dev) {
1297                 in_dev = __in_dev_get_rcu(dev);
1298                 if (in_dev) {
1299                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1300                         if (addr)
1301                                 break;
1302                 }
1303         }
1304         rcu_read_unlock();
1305
1306         return addr;
1307 }
1308 EXPORT_SYMBOL(inet_confirm_addr);
1309
1310 /*
1311  *      Device notifier
1312  */
1313
1314 int register_inetaddr_notifier(struct notifier_block *nb)
1315 {
1316         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1317 }
1318 EXPORT_SYMBOL(register_inetaddr_notifier);
1319
1320 int unregister_inetaddr_notifier(struct notifier_block *nb)
1321 {
1322         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1323 }
1324 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1325
1326 /* Rename ifa_labels for a device name change. Make some effort to preserve
1327  * existing alias numbering and to create unique labels if possible.
1328 */
1329 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1330 {
1331         struct in_ifaddr *ifa;
1332         int named = 0;
1333
1334         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1335                 char old[IFNAMSIZ], *dot;
1336
1337                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1338                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1339                 if (named++ == 0)
1340                         goto skip;
1341                 dot = strchr(old, ':');
1342                 if (!dot) {
1343                         sprintf(old, ":%d", named);
1344                         dot = old;
1345                 }
1346                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1347                         strcat(ifa->ifa_label, dot);
1348                 else
1349                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1350 skip:
1351                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1352         }
1353 }
1354
1355 static bool inetdev_valid_mtu(unsigned int mtu)
1356 {
1357         return mtu >= 68;
1358 }
1359
1360 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1361                                         struct in_device *in_dev)
1362
1363 {
1364         struct in_ifaddr *ifa;
1365
1366         for (ifa = in_dev->ifa_list; ifa;
1367              ifa = ifa->ifa_next) {
1368                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1369                          ifa->ifa_local, dev,
1370                          ifa->ifa_local, NULL,
1371                          dev->dev_addr, NULL);
1372         }
1373 }
1374
1375 /* Called only under RTNL semaphore */
1376
1377 static int inetdev_event(struct notifier_block *this, unsigned long event,
1378                          void *ptr)
1379 {
1380         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1381         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1382
1383         ASSERT_RTNL();
1384
1385         if (!in_dev) {
1386                 if (event == NETDEV_REGISTER) {
1387                         in_dev = inetdev_init(dev);
1388                         if (IS_ERR(in_dev))
1389                                 return notifier_from_errno(PTR_ERR(in_dev));
1390                         if (dev->flags & IFF_LOOPBACK) {
1391                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1392                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1393                         }
1394                 } else if (event == NETDEV_CHANGEMTU) {
1395                         /* Re-enabling IP */
1396                         if (inetdev_valid_mtu(dev->mtu))
1397                                 in_dev = inetdev_init(dev);
1398                 }
1399                 goto out;
1400         }
1401
1402         switch (event) {
1403         case NETDEV_REGISTER:
1404                 pr_debug("%s: bug\n", __func__);
1405                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1406                 break;
1407         case NETDEV_UP:
1408                 if (!inetdev_valid_mtu(dev->mtu))
1409                         break;
1410                 if (dev->flags & IFF_LOOPBACK) {
1411                         struct in_ifaddr *ifa = inet_alloc_ifa();
1412
1413                         if (ifa) {
1414                                 INIT_HLIST_NODE(&ifa->hash);
1415                                 ifa->ifa_local =
1416                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1417                                 ifa->ifa_prefixlen = 8;
1418                                 ifa->ifa_mask = inet_make_mask(8);
1419                                 in_dev_hold(in_dev);
1420                                 ifa->ifa_dev = in_dev;
1421                                 ifa->ifa_scope = RT_SCOPE_HOST;
1422                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1423                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1424                                                  INFINITY_LIFE_TIME);
1425                                 ipv4_devconf_setall(in_dev);
1426                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1427                                 inet_insert_ifa(ifa);
1428                         }
1429                 }
1430                 ip_mc_up(in_dev);
1431                 /* fall through */
1432         case NETDEV_CHANGEADDR:
1433                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1434                         break;
1435                 /* fall through */
1436         case NETDEV_NOTIFY_PEERS:
1437                 /* Send gratuitous ARP to notify of link change */
1438                 inetdev_send_gratuitous_arp(dev, in_dev);
1439                 break;
1440         case NETDEV_DOWN:
1441                 ip_mc_down(in_dev);
1442                 break;
1443         case NETDEV_PRE_TYPE_CHANGE:
1444                 ip_mc_unmap(in_dev);
1445                 break;
1446         case NETDEV_POST_TYPE_CHANGE:
1447                 ip_mc_remap(in_dev);
1448                 break;
1449         case NETDEV_CHANGEMTU:
1450                 if (inetdev_valid_mtu(dev->mtu))
1451                         break;
1452                 /* disable IP when MTU is not enough */
1453         case NETDEV_UNREGISTER:
1454                 inetdev_destroy(in_dev);
1455                 break;
1456         case NETDEV_CHANGENAME:
1457                 /* Do not notify about label change, this event is
1458                  * not interesting to applications using netlink.
1459                  */
1460                 inetdev_changename(dev, in_dev);
1461
1462                 devinet_sysctl_unregister(in_dev);
1463                 devinet_sysctl_register(in_dev);
1464                 break;
1465         }
1466 out:
1467         return NOTIFY_DONE;
1468 }
1469
1470 static struct notifier_block ip_netdev_notifier = {
1471         .notifier_call = inetdev_event,
1472 };
1473
1474 static size_t inet_nlmsg_size(void)
1475 {
1476         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1477                + nla_total_size(4) /* IFA_ADDRESS */
1478                + nla_total_size(4) /* IFA_LOCAL */
1479                + nla_total_size(4) /* IFA_BROADCAST */
1480                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1481                + nla_total_size(4)  /* IFA_FLAGS */
1482                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1483 }
1484
1485 static inline u32 cstamp_delta(unsigned long cstamp)
1486 {
1487         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1488 }
1489
1490 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1491                          unsigned long tstamp, u32 preferred, u32 valid)
1492 {
1493         struct ifa_cacheinfo ci;
1494
1495         ci.cstamp = cstamp_delta(cstamp);
1496         ci.tstamp = cstamp_delta(tstamp);
1497         ci.ifa_prefered = preferred;
1498         ci.ifa_valid = valid;
1499
1500         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1501 }
1502
1503 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1504                             u32 portid, u32 seq, int event, unsigned int flags)
1505 {
1506         struct ifaddrmsg *ifm;
1507         struct nlmsghdr  *nlh;
1508         u32 preferred, valid;
1509
1510         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1511         if (!nlh)
1512                 return -EMSGSIZE;
1513
1514         ifm = nlmsg_data(nlh);
1515         ifm->ifa_family = AF_INET;
1516         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1517         ifm->ifa_flags = ifa->ifa_flags;
1518         ifm->ifa_scope = ifa->ifa_scope;
1519         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1520
1521         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1522                 preferred = ifa->ifa_preferred_lft;
1523                 valid = ifa->ifa_valid_lft;
1524                 if (preferred != INFINITY_LIFE_TIME) {
1525                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1526
1527                         if (preferred > tval)
1528                                 preferred -= tval;
1529                         else
1530                                 preferred = 0;
1531                         if (valid != INFINITY_LIFE_TIME) {
1532                                 if (valid > tval)
1533                                         valid -= tval;
1534                                 else
1535                                         valid = 0;
1536                         }
1537                 }
1538         } else {
1539                 preferred = INFINITY_LIFE_TIME;
1540                 valid = INFINITY_LIFE_TIME;
1541         }
1542         if ((ifa->ifa_address &&
1543              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1544             (ifa->ifa_local &&
1545              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1546             (ifa->ifa_broadcast &&
1547              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1548             (ifa->ifa_label[0] &&
1549              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1550             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1551             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1552                           preferred, valid))
1553                 goto nla_put_failure;
1554
1555         nlmsg_end(skb, nlh);
1556         return 0;
1557
1558 nla_put_failure:
1559         nlmsg_cancel(skb, nlh);
1560         return -EMSGSIZE;
1561 }
1562
1563 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1564 {
1565         struct net *net = sock_net(skb->sk);
1566         int h, s_h;
1567         int idx, s_idx;
1568         int ip_idx, s_ip_idx;
1569         struct net_device *dev;
1570         struct in_device *in_dev;
1571         struct in_ifaddr *ifa;
1572         struct hlist_head *head;
1573
1574         s_h = cb->args[0];
1575         s_idx = idx = cb->args[1];
1576         s_ip_idx = ip_idx = cb->args[2];
1577
1578         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1579                 idx = 0;
1580                 head = &net->dev_index_head[h];
1581                 rcu_read_lock();
1582                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1583                           net->dev_base_seq;
1584                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1585                         if (idx < s_idx)
1586                                 goto cont;
1587                         if (h > s_h || idx > s_idx)
1588                                 s_ip_idx = 0;
1589                         in_dev = __in_dev_get_rcu(dev);
1590                         if (!in_dev)
1591                                 goto cont;
1592
1593                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1594                              ifa = ifa->ifa_next, ip_idx++) {
1595                                 if (ip_idx < s_ip_idx)
1596                                         continue;
1597                                 if (inet_fill_ifaddr(skb, ifa,
1598                                              NETLINK_CB(cb->skb).portid,
1599                                              cb->nlh->nlmsg_seq,
1600                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1601                                         rcu_read_unlock();
1602                                         goto done;
1603                                 }
1604                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1605                         }
1606 cont:
1607                         idx++;
1608                 }
1609                 rcu_read_unlock();
1610         }
1611
1612 done:
1613         cb->args[0] = h;
1614         cb->args[1] = idx;
1615         cb->args[2] = ip_idx;
1616
1617         return skb->len;
1618 }
1619
1620 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1621                       u32 portid)
1622 {
1623         struct sk_buff *skb;
1624         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1625         int err = -ENOBUFS;
1626         struct net *net;
1627
1628         net = dev_net(ifa->ifa_dev->dev);
1629         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1630         if (!skb)
1631                 goto errout;
1632
1633         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1634         if (err < 0) {
1635                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1636                 WARN_ON(err == -EMSGSIZE);
1637                 kfree_skb(skb);
1638                 goto errout;
1639         }
1640         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1641         return;
1642 errout:
1643         if (err < 0)
1644                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1645 }
1646
1647 static size_t inet_get_link_af_size(const struct net_device *dev,
1648                                     u32 ext_filter_mask)
1649 {
1650         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1651
1652         if (!in_dev)
1653                 return 0;
1654
1655         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1656 }
1657
1658 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1659                              u32 ext_filter_mask)
1660 {
1661         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1662         struct nlattr *nla;
1663         int i;
1664
1665         if (!in_dev)
1666                 return -ENODATA;
1667
1668         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1669         if (!nla)
1670                 return -EMSGSIZE;
1671
1672         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1673                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1674
1675         return 0;
1676 }
1677
1678 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1679         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1680 };
1681
1682 static int inet_validate_link_af(const struct net_device *dev,
1683                                  const struct nlattr *nla)
1684 {
1685         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1686         int err, rem;
1687
1688         if (dev && !__in_dev_get_rtnl(dev))
1689                 return -EAFNOSUPPORT;
1690
1691         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1692         if (err < 0)
1693                 return err;
1694
1695         if (tb[IFLA_INET_CONF]) {
1696                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1697                         int cfgid = nla_type(a);
1698
1699                         if (nla_len(a) < 4)
1700                                 return -EINVAL;
1701
1702                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1703                                 return -EINVAL;
1704                 }
1705         }
1706
1707         return 0;
1708 }
1709
1710 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1711 {
1712         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1713         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1714         int rem;
1715
1716         if (!in_dev)
1717                 return -EAFNOSUPPORT;
1718
1719         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1720                 BUG();
1721
1722         if (tb[IFLA_INET_CONF]) {
1723                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1724                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1725         }
1726
1727         return 0;
1728 }
1729
1730 static int inet_netconf_msgsize_devconf(int type)
1731 {
1732         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1733                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1734
1735         /* type -1 is used for ALL */
1736         if (type == -1 || type == NETCONFA_FORWARDING)
1737                 size += nla_total_size(4);
1738         if (type == -1 || type == NETCONFA_RP_FILTER)
1739                 size += nla_total_size(4);
1740         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1741                 size += nla_total_size(4);
1742         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1743                 size += nla_total_size(4);
1744         if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1745                 size += nla_total_size(4);
1746
1747         return size;
1748 }
1749
1750 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1751                                      struct ipv4_devconf *devconf, u32 portid,
1752                                      u32 seq, int event, unsigned int flags,
1753                                      int type)
1754 {
1755         struct nlmsghdr  *nlh;
1756         struct netconfmsg *ncm;
1757
1758         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1759                         flags);
1760         if (!nlh)
1761                 return -EMSGSIZE;
1762
1763         ncm = nlmsg_data(nlh);
1764         ncm->ncm_family = AF_INET;
1765
1766         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1767                 goto nla_put_failure;
1768
1769         /* type -1 is used for ALL */
1770         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1771             nla_put_s32(skb, NETCONFA_FORWARDING,
1772                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1773                 goto nla_put_failure;
1774         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1775             nla_put_s32(skb, NETCONFA_RP_FILTER,
1776                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1777                 goto nla_put_failure;
1778         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1779             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1780                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1781                 goto nla_put_failure;
1782         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1783             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1784                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1785                 goto nla_put_failure;
1786         if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1787             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1788                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1789                 goto nla_put_failure;
1790
1791         nlmsg_end(skb, nlh);
1792         return 0;
1793
1794 nla_put_failure:
1795         nlmsg_cancel(skb, nlh);
1796         return -EMSGSIZE;
1797 }
1798
1799 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1800                                  struct ipv4_devconf *devconf)
1801 {
1802         struct sk_buff *skb;
1803         int err = -ENOBUFS;
1804
1805         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1806         if (!skb)
1807                 goto errout;
1808
1809         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1810                                         RTM_NEWNETCONF, 0, type);
1811         if (err < 0) {
1812                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1813                 WARN_ON(err == -EMSGSIZE);
1814                 kfree_skb(skb);
1815                 goto errout;
1816         }
1817         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1818         return;
1819 errout:
1820         if (err < 0)
1821                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1822 }
1823
1824 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1825         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1826         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1827         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1828         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1829         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1830 };
1831
1832 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1833                                     struct nlmsghdr *nlh)
1834 {
1835         struct net *net = sock_net(in_skb->sk);
1836         struct nlattr *tb[NETCONFA_MAX+1];
1837         struct netconfmsg *ncm;
1838         struct sk_buff *skb;
1839         struct ipv4_devconf *devconf;
1840         struct in_device *in_dev;
1841         struct net_device *dev;
1842         int ifindex;
1843         int err;
1844
1845         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1846                           devconf_ipv4_policy);
1847         if (err < 0)
1848                 goto errout;
1849
1850         err = EINVAL;
1851         if (!tb[NETCONFA_IFINDEX])
1852                 goto errout;
1853
1854         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1855         switch (ifindex) {
1856         case NETCONFA_IFINDEX_ALL:
1857                 devconf = net->ipv4.devconf_all;
1858                 break;
1859         case NETCONFA_IFINDEX_DEFAULT:
1860                 devconf = net->ipv4.devconf_dflt;
1861                 break;
1862         default:
1863                 dev = __dev_get_by_index(net, ifindex);
1864                 if (!dev)
1865                         goto errout;
1866                 in_dev = __in_dev_get_rtnl(dev);
1867                 if (!in_dev)
1868                         goto errout;
1869                 devconf = &in_dev->cnf;
1870                 break;
1871         }
1872
1873         err = -ENOBUFS;
1874         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1875         if (!skb)
1876                 goto errout;
1877
1878         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1879                                         NETLINK_CB(in_skb).portid,
1880                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1881                                         -1);
1882         if (err < 0) {
1883                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1884                 WARN_ON(err == -EMSGSIZE);
1885                 kfree_skb(skb);
1886                 goto errout;
1887         }
1888         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1889 errout:
1890         return err;
1891 }
1892
1893 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1894                                      struct netlink_callback *cb)
1895 {
1896         struct net *net = sock_net(skb->sk);
1897         int h, s_h;
1898         int idx, s_idx;
1899         struct net_device *dev;
1900         struct in_device *in_dev;
1901         struct hlist_head *head;
1902
1903         s_h = cb->args[0];
1904         s_idx = idx = cb->args[1];
1905
1906         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1907                 idx = 0;
1908                 head = &net->dev_index_head[h];
1909                 rcu_read_lock();
1910                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1911                           net->dev_base_seq;
1912                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1913                         if (idx < s_idx)
1914                                 goto cont;
1915                         in_dev = __in_dev_get_rcu(dev);
1916                         if (!in_dev)
1917                                 goto cont;
1918
1919                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1920                                                       &in_dev->cnf,
1921                                                       NETLINK_CB(cb->skb).portid,
1922                                                       cb->nlh->nlmsg_seq,
1923                                                       RTM_NEWNETCONF,
1924                                                       NLM_F_MULTI,
1925                                                       -1) < 0) {
1926                                 rcu_read_unlock();
1927                                 goto done;
1928                         }
1929                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1930 cont:
1931                         idx++;
1932                 }
1933                 rcu_read_unlock();
1934         }
1935         if (h == NETDEV_HASHENTRIES) {
1936                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1937                                               net->ipv4.devconf_all,
1938                                               NETLINK_CB(cb->skb).portid,
1939                                               cb->nlh->nlmsg_seq,
1940                                               RTM_NEWNETCONF, NLM_F_MULTI,
1941                                               -1) < 0)
1942                         goto done;
1943                 else
1944                         h++;
1945         }
1946         if (h == NETDEV_HASHENTRIES + 1) {
1947                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1948                                               net->ipv4.devconf_dflt,
1949                                               NETLINK_CB(cb->skb).portid,
1950                                               cb->nlh->nlmsg_seq,
1951                                               RTM_NEWNETCONF, NLM_F_MULTI,
1952                                               -1) < 0)
1953                         goto done;
1954                 else
1955                         h++;
1956         }
1957 done:
1958         cb->args[0] = h;
1959         cb->args[1] = idx;
1960
1961         return skb->len;
1962 }
1963
1964 #ifdef CONFIG_SYSCTL
1965
1966 static void devinet_copy_dflt_conf(struct net *net, int i)
1967 {
1968         struct net_device *dev;
1969
1970         rcu_read_lock();
1971         for_each_netdev_rcu(net, dev) {
1972                 struct in_device *in_dev;
1973
1974                 in_dev = __in_dev_get_rcu(dev);
1975                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1976                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1977         }
1978         rcu_read_unlock();
1979 }
1980
1981 /* called with RTNL locked */
1982 static void inet_forward_change(struct net *net)
1983 {
1984         struct net_device *dev;
1985         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1986
1987         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1988         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1989         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1990                                     NETCONFA_IFINDEX_ALL,
1991                                     net->ipv4.devconf_all);
1992         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1993                                     NETCONFA_IFINDEX_DEFAULT,
1994                                     net->ipv4.devconf_dflt);
1995
1996         for_each_netdev(net, dev) {
1997                 struct in_device *in_dev;
1998                 if (on)
1999                         dev_disable_lro(dev);
2000                 rcu_read_lock();
2001                 in_dev = __in_dev_get_rcu(dev);
2002                 if (in_dev) {
2003                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2004                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2005                                                     dev->ifindex, &in_dev->cnf);
2006                 }
2007                 rcu_read_unlock();
2008         }
2009 }
2010
2011 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2012 {
2013         if (cnf == net->ipv4.devconf_dflt)
2014                 return NETCONFA_IFINDEX_DEFAULT;
2015         else if (cnf == net->ipv4.devconf_all)
2016                 return NETCONFA_IFINDEX_ALL;
2017         else {
2018                 struct in_device *idev
2019                         = container_of(cnf, struct in_device, cnf);
2020                 return idev->dev->ifindex;
2021         }
2022 }
2023
2024 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2025                              void __user *buffer,
2026                              size_t *lenp, loff_t *ppos)
2027 {
2028         int old_value = *(int *)ctl->data;
2029         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2030         int new_value = *(int *)ctl->data;
2031
2032         if (write) {
2033                 struct ipv4_devconf *cnf = ctl->extra1;
2034                 struct net *net = ctl->extra2;
2035                 int i = (int *)ctl->data - cnf->data;
2036                 int ifindex;
2037
2038                 set_bit(i, cnf->state);
2039
2040                 if (cnf == net->ipv4.devconf_dflt)
2041                         devinet_copy_dflt_conf(net, i);
2042                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2043                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2044                         if ((new_value == 0) && (old_value != 0))
2045                                 rt_cache_flush(net);
2046
2047                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2048                     new_value != old_value) {
2049                         ifindex = devinet_conf_ifindex(net, cnf);
2050                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2051                                                     ifindex, cnf);
2052                 }
2053                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2054                     new_value != old_value) {
2055                         ifindex = devinet_conf_ifindex(net, cnf);
2056                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2057                                                     ifindex, cnf);
2058                 }
2059                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2060                     new_value != old_value) {
2061                         ifindex = devinet_conf_ifindex(net, cnf);
2062                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2063                                                     ifindex, cnf);
2064                 }
2065         }
2066
2067         return ret;
2068 }
2069
2070 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2071                                   void __user *buffer,
2072                                   size_t *lenp, loff_t *ppos)
2073 {
2074         int *valp = ctl->data;
2075         int val = *valp;
2076         loff_t pos = *ppos;
2077         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2078
2079         if (write && *valp != val) {
2080                 struct net *net = ctl->extra2;
2081
2082                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2083                         if (!rtnl_trylock()) {
2084                                 /* Restore the original values before restarting */
2085                                 *valp = val;
2086                                 *ppos = pos;
2087                                 return restart_syscall();
2088                         }
2089                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2090                                 inet_forward_change(net);
2091                         } else {
2092                                 struct ipv4_devconf *cnf = ctl->extra1;
2093                                 struct in_device *idev =
2094                                         container_of(cnf, struct in_device, cnf);
2095                                 if (*valp)
2096                                         dev_disable_lro(idev->dev);
2097                                 inet_netconf_notify_devconf(net,
2098                                                             NETCONFA_FORWARDING,
2099                                                             idev->dev->ifindex,
2100                                                             cnf);
2101                         }
2102                         rtnl_unlock();
2103                         rt_cache_flush(net);
2104                 } else
2105                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2106                                                     NETCONFA_IFINDEX_DEFAULT,
2107                                                     net->ipv4.devconf_dflt);
2108         }
2109
2110         return ret;
2111 }
2112
2113 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2114                                 void __user *buffer,
2115                                 size_t *lenp, loff_t *ppos)
2116 {
2117         int *valp = ctl->data;
2118         int val = *valp;
2119         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2120         struct net *net = ctl->extra2;
2121
2122         if (write && *valp != val)
2123                 rt_cache_flush(net);
2124
2125         return ret;
2126 }
2127
2128 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2129         { \
2130                 .procname       = name, \
2131                 .data           = ipv4_devconf.data + \
2132                                   IPV4_DEVCONF_ ## attr - 1, \
2133                 .maxlen         = sizeof(int), \
2134                 .mode           = mval, \
2135                 .proc_handler   = proc, \
2136                 .extra1         = &ipv4_devconf, \
2137         }
2138
2139 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2140         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2141
2142 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2143         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2144
2145 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2146         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2147
2148 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2149         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2150
2151 static struct devinet_sysctl_table {
2152         struct ctl_table_header *sysctl_header;
2153         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2154 } devinet_sysctl = {
2155         .devinet_vars = {
2156                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2157                                              devinet_sysctl_forward),
2158                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2159
2160                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2161                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2162                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2163                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2164                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2165                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2166                                         "accept_source_route"),
2167                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2168                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2169                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2170                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2171                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2172                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2173                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2174                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2175                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2176                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2177                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2178                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2179                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2180                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2181                                         "force_igmp_version"),
2182                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2183                                         "igmpv2_unsolicited_report_interval"),
2184                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2185                                         "igmpv3_unsolicited_report_interval"),
2186                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2187                                         "ignore_routes_with_linkdown"),
2188
2189                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2190                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2191                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2192                                               "promote_secondaries"),
2193                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2194                                               "route_localnet"),
2195         },
2196 };
2197
2198 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2199                                         struct ipv4_devconf *p)
2200 {
2201         int i;
2202         struct devinet_sysctl_table *t;
2203         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2204
2205         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2206         if (!t)
2207                 goto out;
2208
2209         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2210                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2211                 t->devinet_vars[i].extra1 = p;
2212                 t->devinet_vars[i].extra2 = net;
2213         }
2214
2215         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2216
2217         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2218         if (!t->sysctl_header)
2219                 goto free;
2220
2221         p->sysctl = t;
2222         return 0;
2223
2224 free:
2225         kfree(t);
2226 out:
2227         return -ENOBUFS;
2228 }
2229
2230 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2231 {
2232         struct devinet_sysctl_table *t = cnf->sysctl;
2233
2234         if (!t)
2235                 return;
2236
2237         cnf->sysctl = NULL;
2238         unregister_net_sysctl_table(t->sysctl_header);
2239         kfree(t);
2240 }
2241
2242 static int devinet_sysctl_register(struct in_device *idev)
2243 {
2244         int err;
2245
2246         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2247                 return -EINVAL;
2248
2249         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2250         if (err)
2251                 return err;
2252         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2253                                         &idev->cnf);
2254         if (err)
2255                 neigh_sysctl_unregister(idev->arp_parms);
2256         return err;
2257 }
2258
2259 static void devinet_sysctl_unregister(struct in_device *idev)
2260 {
2261         __devinet_sysctl_unregister(&idev->cnf);
2262         neigh_sysctl_unregister(idev->arp_parms);
2263 }
2264
2265 static struct ctl_table ctl_forward_entry[] = {
2266         {
2267                 .procname       = "ip_forward",
2268                 .data           = &ipv4_devconf.data[
2269                                         IPV4_DEVCONF_FORWARDING - 1],
2270                 .maxlen         = sizeof(int),
2271                 .mode           = 0644,
2272                 .proc_handler   = devinet_sysctl_forward,
2273                 .extra1         = &ipv4_devconf,
2274                 .extra2         = &init_net,
2275         },
2276         { },
2277 };
2278 #endif
2279
2280 static __net_init int devinet_init_net(struct net *net)
2281 {
2282         int err;
2283         struct ipv4_devconf *all, *dflt;
2284 #ifdef CONFIG_SYSCTL
2285         struct ctl_table *tbl = ctl_forward_entry;
2286         struct ctl_table_header *forw_hdr;
2287 #endif
2288
2289         err = -ENOMEM;
2290         all = &ipv4_devconf;
2291         dflt = &ipv4_devconf_dflt;
2292
2293         if (!net_eq(net, &init_net)) {
2294                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2295                 if (!all)
2296                         goto err_alloc_all;
2297
2298                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2299                 if (!dflt)
2300                         goto err_alloc_dflt;
2301
2302 #ifdef CONFIG_SYSCTL
2303                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2304                 if (!tbl)
2305                         goto err_alloc_ctl;
2306
2307                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2308                 tbl[0].extra1 = all;
2309                 tbl[0].extra2 = net;
2310 #endif
2311         }
2312
2313 #ifdef CONFIG_SYSCTL
2314         err = __devinet_sysctl_register(net, "all", all);
2315         if (err < 0)
2316                 goto err_reg_all;
2317
2318         err = __devinet_sysctl_register(net, "default", dflt);
2319         if (err < 0)
2320                 goto err_reg_dflt;
2321
2322         err = -ENOMEM;
2323         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2324         if (!forw_hdr)
2325                 goto err_reg_ctl;
2326         net->ipv4.forw_hdr = forw_hdr;
2327 #endif
2328
2329         net->ipv4.devconf_all = all;
2330         net->ipv4.devconf_dflt = dflt;
2331         return 0;
2332
2333 #ifdef CONFIG_SYSCTL
2334 err_reg_ctl:
2335         __devinet_sysctl_unregister(dflt);
2336 err_reg_dflt:
2337         __devinet_sysctl_unregister(all);
2338 err_reg_all:
2339         if (tbl != ctl_forward_entry)
2340                 kfree(tbl);
2341 err_alloc_ctl:
2342 #endif
2343         if (dflt != &ipv4_devconf_dflt)
2344                 kfree(dflt);
2345 err_alloc_dflt:
2346         if (all != &ipv4_devconf)
2347                 kfree(all);
2348 err_alloc_all:
2349         return err;
2350 }
2351
2352 static __net_exit void devinet_exit_net(struct net *net)
2353 {
2354 #ifdef CONFIG_SYSCTL
2355         struct ctl_table *tbl;
2356
2357         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2358         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2359         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2360         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2361         kfree(tbl);
2362 #endif
2363         kfree(net->ipv4.devconf_dflt);
2364         kfree(net->ipv4.devconf_all);
2365 }
2366
2367 static __net_initdata struct pernet_operations devinet_ops = {
2368         .init = devinet_init_net,
2369         .exit = devinet_exit_net,
2370 };
2371
2372 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2373         .family           = AF_INET,
2374         .fill_link_af     = inet_fill_link_af,
2375         .get_link_af_size = inet_get_link_af_size,
2376         .validate_link_af = inet_validate_link_af,
2377         .set_link_af      = inet_set_link_af,
2378 };
2379
2380 void __init devinet_init(void)
2381 {
2382         int i;
2383
2384         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2385                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2386
2387         register_pernet_subsys(&devinet_ops);
2388
2389         register_gifconf(PF_INET, inet_gifconf);
2390         register_netdevice_notifier(&ip_netdev_notifier);
2391
2392         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2393
2394         rtnl_af_register(&inet_af_ops);
2395
2396         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2397         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2398         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2399         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2400                       inet_netconf_dump_devconf, NULL);
2401 }