]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/core/neighbour.c
pwm: imx: indentation cleanup
[karo-tx-linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 __skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         __skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         write_lock_bh(&neigh->lock);
712         __skb_queue_purge(&neigh->arp_queue);
713         write_unlock_bh(&neigh->lock);
714         neigh->arp_queue_len_bytes = 0;
715
716         if (dev->netdev_ops->ndo_neigh_destroy)
717                 dev->netdev_ops->ndo_neigh_destroy(neigh);
718
719         dev_put(dev);
720         neigh_parms_put(neigh->parms);
721
722         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723
724         atomic_dec(&neigh->tbl->entries);
725         kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728
729 /* Neighbour state is suspicious;
730    disable fast path.
731
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736         neigh_dbg(2, "neigh %p is suspected\n", neigh);
737
738         neigh->output = neigh->ops->output;
739 }
740
741 /* Neighbour state is OK;
742    enable fast path.
743
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748         neigh_dbg(2, "neigh %p is connected\n", neigh);
749
750         neigh->output = neigh->ops->connected_output;
751 }
752
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756         struct neighbour *n;
757         struct neighbour __rcu **np;
758         unsigned int i;
759         struct neigh_hash_table *nht;
760
761         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762
763         write_lock_bh(&tbl->lock);
764         nht = rcu_dereference_protected(tbl->nht,
765                                         lockdep_is_held(&tbl->lock));
766
767         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768                 goto out;
769
770         /*
771          *      periodically recompute ReachableTime from random function
772          */
773
774         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775                 struct neigh_parms *p;
776                 tbl->last_rand = jiffies;
777                 for (p = &tbl->parms; p; p = p->next)
778                         p->reachable_time =
779                                 neigh_rand_reach_time(p->base_reachable_time);
780         }
781
782         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783                 np = &nht->hash_buckets[i];
784
785                 while ((n = rcu_dereference_protected(*np,
786                                 lockdep_is_held(&tbl->lock))) != NULL) {
787                         unsigned int state;
788
789                         write_lock(&n->lock);
790
791                         state = n->nud_state;
792                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793                                 write_unlock(&n->lock);
794                                 goto next_elt;
795                         }
796
797                         if (time_before(n->used, n->confirmed))
798                                 n->used = n->confirmed;
799
800                         if (atomic_read(&n->refcnt) == 1 &&
801                             (state == NUD_FAILED ||
802                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
803                                 *np = n->next;
804                                 n->dead = 1;
805                                 write_unlock(&n->lock);
806                                 neigh_cleanup_and_release(n);
807                                 continue;
808                         }
809                         write_unlock(&n->lock);
810
811 next_elt:
812                         np = &n->next;
813                 }
814                 /*
815                  * It's fine to release lock here, even if hash table
816                  * grows while we are preempted.
817                  */
818                 write_unlock_bh(&tbl->lock);
819                 cond_resched();
820                 write_lock_bh(&tbl->lock);
821                 nht = rcu_dereference_protected(tbl->nht,
822                                                 lockdep_is_held(&tbl->lock));
823         }
824 out:
825         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
826          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827          * base_reachable_time.
828          */
829         schedule_delayed_work(&tbl->gc_work,
830                               tbl->parms.base_reachable_time >> 1);
831         write_unlock_bh(&tbl->lock);
832 }
833
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836         struct neigh_parms *p = n->parms;
837         return (n->nud_state & NUD_PROBE) ?
838                 p->ucast_probes :
839                 p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841
842 static void neigh_invalidate(struct neighbour *neigh)
843         __releases(neigh->lock)
844         __acquires(neigh->lock)
845 {
846         struct sk_buff *skb;
847
848         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849         neigh_dbg(2, "neigh %p is failed\n", neigh);
850         neigh->updated = jiffies;
851
852         /* It is very thin place. report_unreachable is very complicated
853            routine. Particularly, it can hit the same neighbour entry!
854
855            So that, we try to be accurate and avoid dead loop. --ANK
856          */
857         while (neigh->nud_state == NUD_FAILED &&
858                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859                 write_unlock(&neigh->lock);
860                 neigh->ops->error_report(neigh, skb);
861                 write_lock(&neigh->lock);
862         }
863         __skb_queue_purge(&neigh->arp_queue);
864         neigh->arp_queue_len_bytes = 0;
865 }
866
867 static void neigh_probe(struct neighbour *neigh)
868         __releases(neigh->lock)
869 {
870         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
871         /* keep skb alive even if arp_queue overflows */
872         if (skb)
873                 skb = skb_copy(skb, GFP_ATOMIC);
874         write_unlock(&neigh->lock);
875         neigh->ops->solicit(neigh, skb);
876         atomic_inc(&neigh->probes);
877         kfree_skb(skb);
878 }
879
880 /* Called when a timer expires for a neighbour entry. */
881
882 static void neigh_timer_handler(unsigned long arg)
883 {
884         unsigned long now, next;
885         struct neighbour *neigh = (struct neighbour *)arg;
886         unsigned int state;
887         int notify = 0;
888
889         write_lock(&neigh->lock);
890
891         state = neigh->nud_state;
892         now = jiffies;
893         next = now + HZ;
894
895         if (!(state & NUD_IN_TIMER))
896                 goto out;
897
898         if (state & NUD_REACHABLE) {
899                 if (time_before_eq(now,
900                                    neigh->confirmed + neigh->parms->reachable_time)) {
901                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
902                         next = neigh->confirmed + neigh->parms->reachable_time;
903                 } else if (time_before_eq(now,
904                                           neigh->used + neigh->parms->delay_probe_time)) {
905                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
906                         neigh->nud_state = NUD_DELAY;
907                         neigh->updated = jiffies;
908                         neigh_suspect(neigh);
909                         next = now + neigh->parms->delay_probe_time;
910                 } else {
911                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
912                         neigh->nud_state = NUD_STALE;
913                         neigh->updated = jiffies;
914                         neigh_suspect(neigh);
915                         notify = 1;
916                 }
917         } else if (state & NUD_DELAY) {
918                 if (time_before_eq(now,
919                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
920                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921                         neigh->nud_state = NUD_REACHABLE;
922                         neigh->updated = jiffies;
923                         neigh_connect(neigh);
924                         notify = 1;
925                         next = neigh->confirmed + neigh->parms->reachable_time;
926                 } else {
927                         neigh_dbg(2, "neigh %p is probed\n", neigh);
928                         neigh->nud_state = NUD_PROBE;
929                         neigh->updated = jiffies;
930                         atomic_set(&neigh->probes, 0);
931                         next = now + neigh->parms->retrans_time;
932                 }
933         } else {
934                 /* NUD_PROBE|NUD_INCOMPLETE */
935                 next = now + neigh->parms->retrans_time;
936         }
937
938         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940                 neigh->nud_state = NUD_FAILED;
941                 notify = 1;
942                 neigh_invalidate(neigh);
943         }
944
945         if (neigh->nud_state & NUD_IN_TIMER) {
946                 if (time_before(next, jiffies + HZ/2))
947                         next = jiffies + HZ/2;
948                 if (!mod_timer(&neigh->timer, next))
949                         neigh_hold(neigh);
950         }
951         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952                 neigh_probe(neigh);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         bool immediate_probe = false;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977                         unsigned long next, now = jiffies;
978
979                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980                         neigh->nud_state     = NUD_INCOMPLETE;
981                         neigh->updated = now;
982                         next = now + max(neigh->parms->retrans_time, HZ/2);
983                         neigh_add_timer(neigh, next);
984                         immediate_probe = true;
985                 } else {
986                         neigh->nud_state = NUD_FAILED;
987                         neigh->updated = jiffies;
988                         write_unlock_bh(&neigh->lock);
989
990                         kfree_skb(skb);
991                         return 1;
992                 }
993         } else if (neigh->nud_state & NUD_STALE) {
994                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
995                 neigh->nud_state = NUD_DELAY;
996                 neigh->updated = jiffies;
997                 neigh_add_timer(neigh,
998                                 jiffies + neigh->parms->delay_probe_time);
999         }
1000
1001         if (neigh->nud_state == NUD_INCOMPLETE) {
1002                 if (skb) {
1003                         while (neigh->arp_queue_len_bytes + skb->truesize >
1004                                neigh->parms->queue_len_bytes) {
1005                                 struct sk_buff *buff;
1006
1007                                 buff = __skb_dequeue(&neigh->arp_queue);
1008                                 if (!buff)
1009                                         break;
1010                                 neigh->arp_queue_len_bytes -= buff->truesize;
1011                                 kfree_skb(buff);
1012                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013                         }
1014                         skb_dst_force(skb);
1015                         __skb_queue_tail(&neigh->arp_queue, skb);
1016                         neigh->arp_queue_len_bytes += skb->truesize;
1017                 }
1018                 rc = 1;
1019         }
1020 out_unlock_bh:
1021         if (immediate_probe)
1022                 neigh_probe(neigh);
1023         else
1024                 write_unlock(&neigh->lock);
1025         local_bh_enable();
1026         return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032         struct hh_cache *hh;
1033         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034                 = NULL;
1035
1036         if (neigh->dev->header_ops)
1037                 update = neigh->dev->header_ops->cache_update;
1038
1039         if (update) {
1040                 hh = &neigh->hh;
1041                 if (hh->hh_len) {
1042                         write_seqlock_bh(&hh->hh_lock);
1043                         update(hh, neigh->dev, neigh->ha);
1044                         write_sequnlock_bh(&hh->hh_lock);
1045                 }
1046         }
1047 }
1048
1049
1050
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056                                 if it is different.
1057         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058                                 lladdr instead of overriding it
1059                                 if it is different.
1060                                 It also allows to retain current state
1061                                 if lladdr is unchanged.
1062         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1063
1064         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065                                 NTF_ROUTER flag.
1066         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1067                                 a router.
1068
1069    Caller MUST hold reference count on the entry.
1070  */
1071
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073                  u32 flags)
1074 {
1075         u8 old;
1076         int err;
1077         int notify = 0;
1078         struct net_device *dev;
1079         int update_isrouter = 0;
1080
1081         write_lock_bh(&neigh->lock);
1082
1083         dev    = neigh->dev;
1084         old    = neigh->nud_state;
1085         err    = -EPERM;
1086
1087         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088             (old & (NUD_NOARP | NUD_PERMANENT)))
1089                 goto out;
1090
1091         if (!(new & NUD_VALID)) {
1092                 neigh_del_timer(neigh);
1093                 if (old & NUD_CONNECTED)
1094                         neigh_suspect(neigh);
1095                 neigh->nud_state = new;
1096                 err = 0;
1097                 notify = old & NUD_VALID;
1098                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099                     (new & NUD_FAILED)) {
1100                         neigh_invalidate(neigh);
1101                         notify = 1;
1102                 }
1103                 goto out;
1104         }
1105
1106         /* Compare new lladdr with cached one */
1107         if (!dev->addr_len) {
1108                 /* First case: device needs no address. */
1109                 lladdr = neigh->ha;
1110         } else if (lladdr) {
1111                 /* The second case: if something is already cached
1112                    and a new address is proposed:
1113                    - compare new & old
1114                    - if they are different, check override flag
1115                  */
1116                 if ((old & NUD_VALID) &&
1117                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1118                         lladdr = neigh->ha;
1119         } else {
1120                 /* No address is supplied; if we know something,
1121                    use it, otherwise discard the request.
1122                  */
1123                 err = -EINVAL;
1124                 if (!(old & NUD_VALID))
1125                         goto out;
1126                 lladdr = neigh->ha;
1127         }
1128
1129         if (new & NUD_CONNECTED)
1130                 neigh->confirmed = jiffies;
1131         neigh->updated = jiffies;
1132
1133         /* If entry was valid and address is not changed,
1134            do not change entry state, if new one is STALE.
1135          */
1136         err = 0;
1137         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138         if (old & NUD_VALID) {
1139                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140                         update_isrouter = 0;
1141                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142                             (old & NUD_CONNECTED)) {
1143                                 lladdr = neigh->ha;
1144                                 new = NUD_STALE;
1145                         } else
1146                                 goto out;
1147                 } else {
1148                         if (lladdr == neigh->ha && new == NUD_STALE &&
1149                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150                              (old & NUD_CONNECTED))
1151                             )
1152                                 new = old;
1153                 }
1154         }
1155
1156         if (new != old) {
1157                 neigh_del_timer(neigh);
1158                 if (new & NUD_IN_TIMER)
1159                         neigh_add_timer(neigh, (jiffies +
1160                                                 ((new & NUD_REACHABLE) ?
1161                                                  neigh->parms->reachable_time :
1162                                                  0)));
1163                 neigh->nud_state = new;
1164         }
1165
1166         if (lladdr != neigh->ha) {
1167                 write_seqlock(&neigh->ha_lock);
1168                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1169                 write_sequnlock(&neigh->ha_lock);
1170                 neigh_update_hhs(neigh);
1171                 if (!(new & NUD_CONNECTED))
1172                         neigh->confirmed = jiffies -
1173                                       (neigh->parms->base_reachable_time << 1);
1174                 notify = 1;
1175         }
1176         if (new == old)
1177                 goto out;
1178         if (new & NUD_CONNECTED)
1179                 neigh_connect(neigh);
1180         else
1181                 neigh_suspect(neigh);
1182         if (!(old & NUD_VALID)) {
1183                 struct sk_buff *skb;
1184
1185                 /* Again: avoid dead loop if something went wrong */
1186
1187                 while (neigh->nud_state & NUD_VALID &&
1188                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1189                         struct dst_entry *dst = skb_dst(skb);
1190                         struct neighbour *n2, *n1 = neigh;
1191                         write_unlock_bh(&neigh->lock);
1192
1193                         rcu_read_lock();
1194
1195                         /* Why not just use 'neigh' as-is?  The problem is that
1196                          * things such as shaper, eql, and sch_teql can end up
1197                          * using alternative, different, neigh objects to output
1198                          * the packet in the output path.  So what we need to do
1199                          * here is re-lookup the top-level neigh in the path so
1200                          * we can reinject the packet there.
1201                          */
1202                         n2 = NULL;
1203                         if (dst) {
1204                                 n2 = dst_neigh_lookup_skb(dst, skb);
1205                                 if (n2)
1206                                         n1 = n2;
1207                         }
1208                         n1->output(n1, skb);
1209                         if (n2)
1210                                 neigh_release(n2);
1211                         rcu_read_unlock();
1212
1213                         write_lock_bh(&neigh->lock);
1214                 }
1215                 __skb_queue_purge(&neigh->arp_queue);
1216                 neigh->arp_queue_len_bytes = 0;
1217         }
1218 out:
1219         if (update_isrouter) {
1220                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1221                         (neigh->flags | NTF_ROUTER) :
1222                         (neigh->flags & ~NTF_ROUTER);
1223         }
1224         write_unlock_bh(&neigh->lock);
1225
1226         if (notify)
1227                 neigh_update_notify(neigh);
1228
1229         return err;
1230 }
1231 EXPORT_SYMBOL(neigh_update);
1232
1233 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1234                                  u8 *lladdr, void *saddr,
1235                                  struct net_device *dev)
1236 {
1237         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1238                                                  lladdr || !dev->addr_len);
1239         if (neigh)
1240                 neigh_update(neigh, lladdr, NUD_STALE,
1241                              NEIGH_UPDATE_F_OVERRIDE);
1242         return neigh;
1243 }
1244 EXPORT_SYMBOL(neigh_event_ns);
1245
1246 /* called with read_lock_bh(&n->lock); */
1247 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1248 {
1249         struct net_device *dev = dst->dev;
1250         __be16 prot = dst->ops->protocol;
1251         struct hh_cache *hh = &n->hh;
1252
1253         write_lock_bh(&n->lock);
1254
1255         /* Only one thread can come in here and initialize the
1256          * hh_cache entry.
1257          */
1258         if (!hh->hh_len)
1259                 dev->header_ops->cache(n, hh, prot);
1260
1261         write_unlock_bh(&n->lock);
1262 }
1263
1264 /* This function can be used in contexts, where only old dev_queue_xmit
1265  * worked, f.e. if you want to override normal output path (eql, shaper),
1266  * but resolution is not made yet.
1267  */
1268
1269 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1270 {
1271         struct net_device *dev = skb->dev;
1272
1273         __skb_pull(skb, skb_network_offset(skb));
1274
1275         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276                             skb->len) < 0 &&
1277             dev->header_ops->rebuild(skb))
1278                 return 0;
1279
1280         return dev_queue_xmit(skb);
1281 }
1282 EXPORT_SYMBOL(neigh_compat_output);
1283
1284 /* Slow and careful. */
1285
1286 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1287 {
1288         struct dst_entry *dst = skb_dst(skb);
1289         int rc = 0;
1290
1291         if (!dst)
1292                 goto discard;
1293
1294         if (!neigh_event_send(neigh, skb)) {
1295                 int err;
1296                 struct net_device *dev = neigh->dev;
1297                 unsigned int seq;
1298
1299                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1300                         neigh_hh_init(neigh, dst);
1301
1302                 do {
1303                         __skb_pull(skb, skb_network_offset(skb));
1304                         seq = read_seqbegin(&neigh->ha_lock);
1305                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306                                               neigh->ha, NULL, skb->len);
1307                 } while (read_seqretry(&neigh->ha_lock, seq));
1308
1309                 if (err >= 0)
1310                         rc = dev_queue_xmit(skb);
1311                 else
1312                         goto out_kfree_skb;
1313         }
1314 out:
1315         return rc;
1316 discard:
1317         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1318 out_kfree_skb:
1319         rc = -EINVAL;
1320         kfree_skb(skb);
1321         goto out;
1322 }
1323 EXPORT_SYMBOL(neigh_resolve_output);
1324
1325 /* As fast as possible without hh cache */
1326
1327 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1328 {
1329         struct net_device *dev = neigh->dev;
1330         unsigned int seq;
1331         int err;
1332
1333         do {
1334                 __skb_pull(skb, skb_network_offset(skb));
1335                 seq = read_seqbegin(&neigh->ha_lock);
1336                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1337                                       neigh->ha, NULL, skb->len);
1338         } while (read_seqretry(&neigh->ha_lock, seq));
1339
1340         if (err >= 0)
1341                 err = dev_queue_xmit(skb);
1342         else {
1343                 err = -EINVAL;
1344                 kfree_skb(skb);
1345         }
1346         return err;
1347 }
1348 EXPORT_SYMBOL(neigh_connected_output);
1349
1350 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1351 {
1352         return dev_queue_xmit(skb);
1353 }
1354 EXPORT_SYMBOL(neigh_direct_output);
1355
1356 static void neigh_proxy_process(unsigned long arg)
1357 {
1358         struct neigh_table *tbl = (struct neigh_table *)arg;
1359         long sched_next = 0;
1360         unsigned long now = jiffies;
1361         struct sk_buff *skb, *n;
1362
1363         spin_lock(&tbl->proxy_queue.lock);
1364
1365         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1366                 long tdif = NEIGH_CB(skb)->sched_next - now;
1367
1368                 if (tdif <= 0) {
1369                         struct net_device *dev = skb->dev;
1370
1371                         __skb_unlink(skb, &tbl->proxy_queue);
1372                         if (tbl->proxy_redo && netif_running(dev)) {
1373                                 rcu_read_lock();
1374                                 tbl->proxy_redo(skb);
1375                                 rcu_read_unlock();
1376                         } else {
1377                                 kfree_skb(skb);
1378                         }
1379
1380                         dev_put(dev);
1381                 } else if (!sched_next || tdif < sched_next)
1382                         sched_next = tdif;
1383         }
1384         del_timer(&tbl->proxy_timer);
1385         if (sched_next)
1386                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1387         spin_unlock(&tbl->proxy_queue.lock);
1388 }
1389
1390 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1391                     struct sk_buff *skb)
1392 {
1393         unsigned long now = jiffies;
1394         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1395
1396         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1397                 kfree_skb(skb);
1398                 return;
1399         }
1400
1401         NEIGH_CB(skb)->sched_next = sched_next;
1402         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403
1404         spin_lock(&tbl->proxy_queue.lock);
1405         if (del_timer(&tbl->proxy_timer)) {
1406                 if (time_before(tbl->proxy_timer.expires, sched_next))
1407                         sched_next = tbl->proxy_timer.expires;
1408         }
1409         skb_dst_drop(skb);
1410         dev_hold(skb->dev);
1411         __skb_queue_tail(&tbl->proxy_queue, skb);
1412         mod_timer(&tbl->proxy_timer, sched_next);
1413         spin_unlock(&tbl->proxy_queue.lock);
1414 }
1415 EXPORT_SYMBOL(pneigh_enqueue);
1416
1417 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418                                                       struct net *net, int ifindex)
1419 {
1420         struct neigh_parms *p;
1421
1422         for (p = &tbl->parms; p; p = p->next) {
1423                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1425                         return p;
1426         }
1427
1428         return NULL;
1429 }
1430
1431 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432                                       struct neigh_table *tbl)
1433 {
1434         struct neigh_parms *p;
1435         struct net *net = dev_net(dev);
1436         const struct net_device_ops *ops = dev->netdev_ops;
1437
1438         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1439         if (p) {
1440                 p->tbl            = tbl;
1441                 atomic_set(&p->refcnt, 1);
1442                 p->reachable_time =
1443                                 neigh_rand_reach_time(p->base_reachable_time);
1444                 dev_hold(dev);
1445                 p->dev = dev;
1446                 write_pnet(&p->net, hold_net(net));
1447                 p->sysctl_table = NULL;
1448
1449                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1450                         release_net(net);
1451                         dev_put(dev);
1452                         kfree(p);
1453                         return NULL;
1454                 }
1455
1456                 write_lock_bh(&tbl->lock);
1457                 p->next         = tbl->parms.next;
1458                 tbl->parms.next = p;
1459                 write_unlock_bh(&tbl->lock);
1460         }
1461         return p;
1462 }
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1464
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1466 {
1467         struct neigh_parms *parms =
1468                 container_of(head, struct neigh_parms, rcu_head);
1469
1470         neigh_parms_put(parms);
1471 }
1472
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474 {
1475         struct neigh_parms **p;
1476
1477         if (!parms || parms == &tbl->parms)
1478                 return;
1479         write_lock_bh(&tbl->lock);
1480         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481                 if (*p == parms) {
1482                         *p = parms->next;
1483                         parms->dead = 1;
1484                         write_unlock_bh(&tbl->lock);
1485                         if (parms->dev)
1486                                 dev_put(parms->dev);
1487                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488                         return;
1489                 }
1490         }
1491         write_unlock_bh(&tbl->lock);
1492         neigh_dbg(1, "%s: not found\n", __func__);
1493 }
1494 EXPORT_SYMBOL(neigh_parms_release);
1495
1496 static void neigh_parms_destroy(struct neigh_parms *parms)
1497 {
1498         release_net(neigh_parms_net(parms));
1499         kfree(parms);
1500 }
1501
1502 static struct lock_class_key neigh_table_proxy_queue_class;
1503
1504 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505 {
1506         unsigned long now = jiffies;
1507         unsigned long phsize;
1508
1509         write_pnet(&tbl->parms.net, &init_net);
1510         atomic_set(&tbl->parms.refcnt, 1);
1511         tbl->parms.reachable_time =
1512                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513
1514         tbl->stats = alloc_percpu(struct neigh_statistics);
1515         if (!tbl->stats)
1516                 panic("cannot create neighbour cache statistics");
1517
1518 #ifdef CONFIG_PROC_FS
1519         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520                               &neigh_stat_seq_fops, tbl))
1521                 panic("cannot create neighbour proc dir entry");
1522 #endif
1523
1524         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525
1526         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528
1529         if (!tbl->nht || !tbl->phash_buckets)
1530                 panic("cannot allocate neighbour cache hashes");
1531
1532         if (!tbl->entry_size)
1533                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1534                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1535         else
1536                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1537
1538         rwlock_init(&tbl->lock);
1539         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1540         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1541         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542         skb_queue_head_init_class(&tbl->proxy_queue,
1543                         &neigh_table_proxy_queue_class);
1544
1545         tbl->last_flush = now;
1546         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1547 }
1548
1549 void neigh_table_init(struct neigh_table *tbl)
1550 {
1551         struct neigh_table *tmp;
1552
1553         neigh_table_init_no_netlink(tbl);
1554         write_lock(&neigh_tbl_lock);
1555         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1556                 if (tmp->family == tbl->family)
1557                         break;
1558         }
1559         tbl->next       = neigh_tables;
1560         neigh_tables    = tbl;
1561         write_unlock(&neigh_tbl_lock);
1562
1563         if (unlikely(tmp)) {
1564                 pr_err("Registering multiple tables for family %d\n",
1565                        tbl->family);
1566                 dump_stack();
1567         }
1568 }
1569 EXPORT_SYMBOL(neigh_table_init);
1570
1571 int neigh_table_clear(struct neigh_table *tbl)
1572 {
1573         struct neigh_table **tp;
1574
1575         /* It is not clean... Fix it to unload IPv6 module safely */
1576         cancel_delayed_work_sync(&tbl->gc_work);
1577         del_timer_sync(&tbl->proxy_timer);
1578         pneigh_queue_purge(&tbl->proxy_queue);
1579         neigh_ifdown(tbl, NULL);
1580         if (atomic_read(&tbl->entries))
1581                 pr_crit("neighbour leakage\n");
1582         write_lock(&neigh_tbl_lock);
1583         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1584                 if (*tp == tbl) {
1585                         *tp = tbl->next;
1586                         break;
1587                 }
1588         }
1589         write_unlock(&neigh_tbl_lock);
1590
1591         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1592                  neigh_hash_free_rcu);
1593         tbl->nht = NULL;
1594
1595         kfree(tbl->phash_buckets);
1596         tbl->phash_buckets = NULL;
1597
1598         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1599
1600         free_percpu(tbl->stats);
1601         tbl->stats = NULL;
1602
1603         return 0;
1604 }
1605 EXPORT_SYMBOL(neigh_table_clear);
1606
1607 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1608 {
1609         struct net *net = sock_net(skb->sk);
1610         struct ndmsg *ndm;
1611         struct nlattr *dst_attr;
1612         struct neigh_table *tbl;
1613         struct net_device *dev = NULL;
1614         int err = -EINVAL;
1615
1616         ASSERT_RTNL();
1617         if (nlmsg_len(nlh) < sizeof(*ndm))
1618                 goto out;
1619
1620         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1621         if (dst_attr == NULL)
1622                 goto out;
1623
1624         ndm = nlmsg_data(nlh);
1625         if (ndm->ndm_ifindex) {
1626                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1627                 if (dev == NULL) {
1628                         err = -ENODEV;
1629                         goto out;
1630                 }
1631         }
1632
1633         read_lock(&neigh_tbl_lock);
1634         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1635                 struct neighbour *neigh;
1636
1637                 if (tbl->family != ndm->ndm_family)
1638                         continue;
1639                 read_unlock(&neigh_tbl_lock);
1640
1641                 if (nla_len(dst_attr) < tbl->key_len)
1642                         goto out;
1643
1644                 if (ndm->ndm_flags & NTF_PROXY) {
1645                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1646                         goto out;
1647                 }
1648
1649                 if (dev == NULL)
1650                         goto out;
1651
1652                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1653                 if (neigh == NULL) {
1654                         err = -ENOENT;
1655                         goto out;
1656                 }
1657
1658                 err = neigh_update(neigh, NULL, NUD_FAILED,
1659                                    NEIGH_UPDATE_F_OVERRIDE |
1660                                    NEIGH_UPDATE_F_ADMIN);
1661                 neigh_release(neigh);
1662                 goto out;
1663         }
1664         read_unlock(&neigh_tbl_lock);
1665         err = -EAFNOSUPPORT;
1666
1667 out:
1668         return err;
1669 }
1670
1671 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1672 {
1673         struct net *net = sock_net(skb->sk);
1674         struct ndmsg *ndm;
1675         struct nlattr *tb[NDA_MAX+1];
1676         struct neigh_table *tbl;
1677         struct net_device *dev = NULL;
1678         int err;
1679
1680         ASSERT_RTNL();
1681         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1682         if (err < 0)
1683                 goto out;
1684
1685         err = -EINVAL;
1686         if (tb[NDA_DST] == NULL)
1687                 goto out;
1688
1689         ndm = nlmsg_data(nlh);
1690         if (ndm->ndm_ifindex) {
1691                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1692                 if (dev == NULL) {
1693                         err = -ENODEV;
1694                         goto out;
1695                 }
1696
1697                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1698                         goto out;
1699         }
1700
1701         read_lock(&neigh_tbl_lock);
1702         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1703                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1704                 struct neighbour *neigh;
1705                 void *dst, *lladdr;
1706
1707                 if (tbl->family != ndm->ndm_family)
1708                         continue;
1709                 read_unlock(&neigh_tbl_lock);
1710
1711                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1712                         goto out;
1713                 dst = nla_data(tb[NDA_DST]);
1714                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1715
1716                 if (ndm->ndm_flags & NTF_PROXY) {
1717                         struct pneigh_entry *pn;
1718
1719                         err = -ENOBUFS;
1720                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1721                         if (pn) {
1722                                 pn->flags = ndm->ndm_flags;
1723                                 err = 0;
1724                         }
1725                         goto out;
1726                 }
1727
1728                 if (dev == NULL)
1729                         goto out;
1730
1731                 neigh = neigh_lookup(tbl, dst, dev);
1732                 if (neigh == NULL) {
1733                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1734                                 err = -ENOENT;
1735                                 goto out;
1736                         }
1737
1738                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1739                         if (IS_ERR(neigh)) {
1740                                 err = PTR_ERR(neigh);
1741                                 goto out;
1742                         }
1743                 } else {
1744                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1745                                 err = -EEXIST;
1746                                 neigh_release(neigh);
1747                                 goto out;
1748                         }
1749
1750                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1751                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1752                 }
1753
1754                 if (ndm->ndm_flags & NTF_USE) {
1755                         neigh_event_send(neigh, NULL);
1756                         err = 0;
1757                 } else
1758                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1759                 neigh_release(neigh);
1760                 goto out;
1761         }
1762
1763         read_unlock(&neigh_tbl_lock);
1764         err = -EAFNOSUPPORT;
1765 out:
1766         return err;
1767 }
1768
1769 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1770 {
1771         struct nlattr *nest;
1772
1773         nest = nla_nest_start(skb, NDTA_PARMS);
1774         if (nest == NULL)
1775                 return -ENOBUFS;
1776
1777         if ((parms->dev &&
1778              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1779             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1780             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1781             /* approximative value for deprecated QUEUE_LEN (in packets) */
1782             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1785             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1786             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1787             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1788             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1789             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1790                           parms->base_reachable_time) ||
1791             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1792             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1793                           parms->delay_probe_time) ||
1794             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1795             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1796             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1797             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1798                 goto nla_put_failure;
1799         return nla_nest_end(skb, nest);
1800
1801 nla_put_failure:
1802         nla_nest_cancel(skb, nest);
1803         return -EMSGSIZE;
1804 }
1805
1806 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1807                               u32 pid, u32 seq, int type, int flags)
1808 {
1809         struct nlmsghdr *nlh;
1810         struct ndtmsg *ndtmsg;
1811
1812         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1813         if (nlh == NULL)
1814                 return -EMSGSIZE;
1815
1816         ndtmsg = nlmsg_data(nlh);
1817
1818         read_lock_bh(&tbl->lock);
1819         ndtmsg->ndtm_family = tbl->family;
1820         ndtmsg->ndtm_pad1   = 0;
1821         ndtmsg->ndtm_pad2   = 0;
1822
1823         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1824             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1825             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1826             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1827             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1828                 goto nla_put_failure;
1829         {
1830                 unsigned long now = jiffies;
1831                 unsigned int flush_delta = now - tbl->last_flush;
1832                 unsigned int rand_delta = now - tbl->last_rand;
1833                 struct neigh_hash_table *nht;
1834                 struct ndt_config ndc = {
1835                         .ndtc_key_len           = tbl->key_len,
1836                         .ndtc_entry_size        = tbl->entry_size,
1837                         .ndtc_entries           = atomic_read(&tbl->entries),
1838                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1839                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1840                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1841                 };
1842
1843                 rcu_read_lock_bh();
1844                 nht = rcu_dereference_bh(tbl->nht);
1845                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1846                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1847                 rcu_read_unlock_bh();
1848
1849                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1850                         goto nla_put_failure;
1851         }
1852
1853         {
1854                 int cpu;
1855                 struct ndt_stats ndst;
1856
1857                 memset(&ndst, 0, sizeof(ndst));
1858
1859                 for_each_possible_cpu(cpu) {
1860                         struct neigh_statistics *st;
1861
1862                         st = per_cpu_ptr(tbl->stats, cpu);
1863                         ndst.ndts_allocs                += st->allocs;
1864                         ndst.ndts_destroys              += st->destroys;
1865                         ndst.ndts_hash_grows            += st->hash_grows;
1866                         ndst.ndts_res_failed            += st->res_failed;
1867                         ndst.ndts_lookups               += st->lookups;
1868                         ndst.ndts_hits                  += st->hits;
1869                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1870                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1871                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1872                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1873                 }
1874
1875                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1876                         goto nla_put_failure;
1877         }
1878
1879         BUG_ON(tbl->parms.dev);
1880         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1881                 goto nla_put_failure;
1882
1883         read_unlock_bh(&tbl->lock);
1884         return nlmsg_end(skb, nlh);
1885
1886 nla_put_failure:
1887         read_unlock_bh(&tbl->lock);
1888         nlmsg_cancel(skb, nlh);
1889         return -EMSGSIZE;
1890 }
1891
1892 static int neightbl_fill_param_info(struct sk_buff *skb,
1893                                     struct neigh_table *tbl,
1894                                     struct neigh_parms *parms,
1895                                     u32 pid, u32 seq, int type,
1896                                     unsigned int flags)
1897 {
1898         struct ndtmsg *ndtmsg;
1899         struct nlmsghdr *nlh;
1900
1901         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1902         if (nlh == NULL)
1903                 return -EMSGSIZE;
1904
1905         ndtmsg = nlmsg_data(nlh);
1906
1907         read_lock_bh(&tbl->lock);
1908         ndtmsg->ndtm_family = tbl->family;
1909         ndtmsg->ndtm_pad1   = 0;
1910         ndtmsg->ndtm_pad2   = 0;
1911
1912         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1913             neightbl_fill_parms(skb, parms) < 0)
1914                 goto errout;
1915
1916         read_unlock_bh(&tbl->lock);
1917         return nlmsg_end(skb, nlh);
1918 errout:
1919         read_unlock_bh(&tbl->lock);
1920         nlmsg_cancel(skb, nlh);
1921         return -EMSGSIZE;
1922 }
1923
1924 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1925         [NDTA_NAME]             = { .type = NLA_STRING },
1926         [NDTA_THRESH1]          = { .type = NLA_U32 },
1927         [NDTA_THRESH2]          = { .type = NLA_U32 },
1928         [NDTA_THRESH3]          = { .type = NLA_U32 },
1929         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1930         [NDTA_PARMS]            = { .type = NLA_NESTED },
1931 };
1932
1933 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1934         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1935         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1936         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1937         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1938         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1939         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1940         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1941         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1942         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1943         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1944         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1945         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1946         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1947 };
1948
1949 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1950 {
1951         struct net *net = sock_net(skb->sk);
1952         struct neigh_table *tbl;
1953         struct ndtmsg *ndtmsg;
1954         struct nlattr *tb[NDTA_MAX+1];
1955         int err;
1956
1957         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1958                           nl_neightbl_policy);
1959         if (err < 0)
1960                 goto errout;
1961
1962         if (tb[NDTA_NAME] == NULL) {
1963                 err = -EINVAL;
1964                 goto errout;
1965         }
1966
1967         ndtmsg = nlmsg_data(nlh);
1968         read_lock(&neigh_tbl_lock);
1969         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1970                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1971                         continue;
1972
1973                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1974                         break;
1975         }
1976
1977         if (tbl == NULL) {
1978                 err = -ENOENT;
1979                 goto errout_locked;
1980         }
1981
1982         /*
1983          * We acquire tbl->lock to be nice to the periodic timers and
1984          * make sure they always see a consistent set of values.
1985          */
1986         write_lock_bh(&tbl->lock);
1987
1988         if (tb[NDTA_PARMS]) {
1989                 struct nlattr *tbp[NDTPA_MAX+1];
1990                 struct neigh_parms *p;
1991                 int i, ifindex = 0;
1992
1993                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1994                                        nl_ntbl_parm_policy);
1995                 if (err < 0)
1996                         goto errout_tbl_lock;
1997
1998                 if (tbp[NDTPA_IFINDEX])
1999                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2000
2001                 p = lookup_neigh_parms(tbl, net, ifindex);
2002                 if (p == NULL) {
2003                         err = -ENOENT;
2004                         goto errout_tbl_lock;
2005                 }
2006
2007                 for (i = 1; i <= NDTPA_MAX; i++) {
2008                         if (tbp[i] == NULL)
2009                                 continue;
2010
2011                         switch (i) {
2012                         case NDTPA_QUEUE_LEN:
2013                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2014                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2015                                 break;
2016                         case NDTPA_QUEUE_LENBYTES:
2017                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2018                                 break;
2019                         case NDTPA_PROXY_QLEN:
2020                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2021                                 break;
2022                         case NDTPA_APP_PROBES:
2023                                 p->app_probes = nla_get_u32(tbp[i]);
2024                                 break;
2025                         case NDTPA_UCAST_PROBES:
2026                                 p->ucast_probes = nla_get_u32(tbp[i]);
2027                                 break;
2028                         case NDTPA_MCAST_PROBES:
2029                                 p->mcast_probes = nla_get_u32(tbp[i]);
2030                                 break;
2031                         case NDTPA_BASE_REACHABLE_TIME:
2032                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2033                                 break;
2034                         case NDTPA_GC_STALETIME:
2035                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2036                                 break;
2037                         case NDTPA_DELAY_PROBE_TIME:
2038                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2039                                 break;
2040                         case NDTPA_RETRANS_TIME:
2041                                 p->retrans_time = nla_get_msecs(tbp[i]);
2042                                 break;
2043                         case NDTPA_ANYCAST_DELAY:
2044                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2045                                 break;
2046                         case NDTPA_PROXY_DELAY:
2047                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2048                                 break;
2049                         case NDTPA_LOCKTIME:
2050                                 p->locktime = nla_get_msecs(tbp[i]);
2051                                 break;
2052                         }
2053                 }
2054         }
2055
2056         err = -ENOENT;
2057         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2058              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2059             !net_eq(net, &init_net))
2060                 goto errout_tbl_lock;
2061
2062         if (tb[NDTA_THRESH1])
2063                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2064
2065         if (tb[NDTA_THRESH2])
2066                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2067
2068         if (tb[NDTA_THRESH3])
2069                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2070
2071         if (tb[NDTA_GC_INTERVAL])
2072                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2073
2074         err = 0;
2075
2076 errout_tbl_lock:
2077         write_unlock_bh(&tbl->lock);
2078 errout_locked:
2079         read_unlock(&neigh_tbl_lock);
2080 errout:
2081         return err;
2082 }
2083
2084 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2085 {
2086         struct net *net = sock_net(skb->sk);
2087         int family, tidx, nidx = 0;
2088         int tbl_skip = cb->args[0];
2089         int neigh_skip = cb->args[1];
2090         struct neigh_table *tbl;
2091
2092         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2093
2094         read_lock(&neigh_tbl_lock);
2095         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2096                 struct neigh_parms *p;
2097
2098                 if (tidx < tbl_skip || (family && tbl->family != family))
2099                         continue;
2100
2101                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2102                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2103                                        NLM_F_MULTI) <= 0)
2104                         break;
2105
2106                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2107                         if (!net_eq(neigh_parms_net(p), net))
2108                                 continue;
2109
2110                         if (nidx < neigh_skip)
2111                                 goto next;
2112
2113                         if (neightbl_fill_param_info(skb, tbl, p,
2114                                                      NETLINK_CB(cb->skb).portid,
2115                                                      cb->nlh->nlmsg_seq,
2116                                                      RTM_NEWNEIGHTBL,
2117                                                      NLM_F_MULTI) <= 0)
2118                                 goto out;
2119                 next:
2120                         nidx++;
2121                 }
2122
2123                 neigh_skip = 0;
2124         }
2125 out:
2126         read_unlock(&neigh_tbl_lock);
2127         cb->args[0] = tidx;
2128         cb->args[1] = nidx;
2129
2130         return skb->len;
2131 }
2132
2133 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2134                            u32 pid, u32 seq, int type, unsigned int flags)
2135 {
2136         unsigned long now = jiffies;
2137         struct nda_cacheinfo ci;
2138         struct nlmsghdr *nlh;
2139         struct ndmsg *ndm;
2140
2141         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2142         if (nlh == NULL)
2143                 return -EMSGSIZE;
2144
2145         ndm = nlmsg_data(nlh);
2146         ndm->ndm_family  = neigh->ops->family;
2147         ndm->ndm_pad1    = 0;
2148         ndm->ndm_pad2    = 0;
2149         ndm->ndm_flags   = neigh->flags;
2150         ndm->ndm_type    = neigh->type;
2151         ndm->ndm_ifindex = neigh->dev->ifindex;
2152
2153         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2154                 goto nla_put_failure;
2155
2156         read_lock_bh(&neigh->lock);
2157         ndm->ndm_state   = neigh->nud_state;
2158         if (neigh->nud_state & NUD_VALID) {
2159                 char haddr[MAX_ADDR_LEN];
2160
2161                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2162                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2163                         read_unlock_bh(&neigh->lock);
2164                         goto nla_put_failure;
2165                 }
2166         }
2167
2168         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2169         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2170         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2171         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2172         read_unlock_bh(&neigh->lock);
2173
2174         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2175             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2176                 goto nla_put_failure;
2177
2178         return nlmsg_end(skb, nlh);
2179
2180 nla_put_failure:
2181         nlmsg_cancel(skb, nlh);
2182         return -EMSGSIZE;
2183 }
2184
2185 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2186                             u32 pid, u32 seq, int type, unsigned int flags,
2187                             struct neigh_table *tbl)
2188 {
2189         struct nlmsghdr *nlh;
2190         struct ndmsg *ndm;
2191
2192         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2193         if (nlh == NULL)
2194                 return -EMSGSIZE;
2195
2196         ndm = nlmsg_data(nlh);
2197         ndm->ndm_family  = tbl->family;
2198         ndm->ndm_pad1    = 0;
2199         ndm->ndm_pad2    = 0;
2200         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2201         ndm->ndm_type    = NDA_DST;
2202         ndm->ndm_ifindex = pn->dev->ifindex;
2203         ndm->ndm_state   = NUD_NONE;
2204
2205         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2206                 goto nla_put_failure;
2207
2208         return nlmsg_end(skb, nlh);
2209
2210 nla_put_failure:
2211         nlmsg_cancel(skb, nlh);
2212         return -EMSGSIZE;
2213 }
2214
2215 static void neigh_update_notify(struct neighbour *neigh)
2216 {
2217         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2218         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2219 }
2220
2221 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2222                             struct netlink_callback *cb)
2223 {
2224         struct net *net = sock_net(skb->sk);
2225         struct neighbour *n;
2226         int rc, h, s_h = cb->args[1];
2227         int idx, s_idx = idx = cb->args[2];
2228         struct neigh_hash_table *nht;
2229
2230         rcu_read_lock_bh();
2231         nht = rcu_dereference_bh(tbl->nht);
2232
2233         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2234                 if (h > s_h)
2235                         s_idx = 0;
2236                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2237                      n != NULL;
2238                      n = rcu_dereference_bh(n->next)) {
2239                         if (!net_eq(dev_net(n->dev), net))
2240                                 continue;
2241                         if (idx < s_idx)
2242                                 goto next;
2243                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2244                                             cb->nlh->nlmsg_seq,
2245                                             RTM_NEWNEIGH,
2246                                             NLM_F_MULTI) <= 0) {
2247                                 rc = -1;
2248                                 goto out;
2249                         }
2250 next:
2251                         idx++;
2252                 }
2253         }
2254         rc = skb->len;
2255 out:
2256         rcu_read_unlock_bh();
2257         cb->args[1] = h;
2258         cb->args[2] = idx;
2259         return rc;
2260 }
2261
2262 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2263                              struct netlink_callback *cb)
2264 {
2265         struct pneigh_entry *n;
2266         struct net *net = sock_net(skb->sk);
2267         int rc, h, s_h = cb->args[3];
2268         int idx, s_idx = idx = cb->args[4];
2269
2270         read_lock_bh(&tbl->lock);
2271
2272         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2273                 if (h > s_h)
2274                         s_idx = 0;
2275                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2276                         if (dev_net(n->dev) != net)
2277                                 continue;
2278                         if (idx < s_idx)
2279                                 goto next;
2280                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2281                                             cb->nlh->nlmsg_seq,
2282                                             RTM_NEWNEIGH,
2283                                             NLM_F_MULTI, tbl) <= 0) {
2284                                 read_unlock_bh(&tbl->lock);
2285                                 rc = -1;
2286                                 goto out;
2287                         }
2288                 next:
2289                         idx++;
2290                 }
2291         }
2292
2293         read_unlock_bh(&tbl->lock);
2294         rc = skb->len;
2295 out:
2296         cb->args[3] = h;
2297         cb->args[4] = idx;
2298         return rc;
2299
2300 }
2301
2302 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2303 {
2304         struct neigh_table *tbl;
2305         int t, family, s_t;
2306         int proxy = 0;
2307         int err;
2308
2309         read_lock(&neigh_tbl_lock);
2310         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2311
2312         /* check for full ndmsg structure presence, family member is
2313          * the same for both structures
2314          */
2315         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2316             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2317                 proxy = 1;
2318
2319         s_t = cb->args[0];
2320
2321         for (tbl = neigh_tables, t = 0; tbl;
2322              tbl = tbl->next, t++) {
2323                 if (t < s_t || (family && tbl->family != family))
2324                         continue;
2325                 if (t > s_t)
2326                         memset(&cb->args[1], 0, sizeof(cb->args) -
2327                                                 sizeof(cb->args[0]));
2328                 if (proxy)
2329                         err = pneigh_dump_table(tbl, skb, cb);
2330                 else
2331                         err = neigh_dump_table(tbl, skb, cb);
2332                 if (err < 0)
2333                         break;
2334         }
2335         read_unlock(&neigh_tbl_lock);
2336
2337         cb->args[0] = t;
2338         return skb->len;
2339 }
2340
2341 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2342 {
2343         int chain;
2344         struct neigh_hash_table *nht;
2345
2346         rcu_read_lock_bh();
2347         nht = rcu_dereference_bh(tbl->nht);
2348
2349         read_lock(&tbl->lock); /* avoid resizes */
2350         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2351                 struct neighbour *n;
2352
2353                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2354                      n != NULL;
2355                      n = rcu_dereference_bh(n->next))
2356                         cb(n, cookie);
2357         }
2358         read_unlock(&tbl->lock);
2359         rcu_read_unlock_bh();
2360 }
2361 EXPORT_SYMBOL(neigh_for_each);
2362
2363 /* The tbl->lock must be held as a writer and BH disabled. */
2364 void __neigh_for_each_release(struct neigh_table *tbl,
2365                               int (*cb)(struct neighbour *))
2366 {
2367         int chain;
2368         struct neigh_hash_table *nht;
2369
2370         nht = rcu_dereference_protected(tbl->nht,
2371                                         lockdep_is_held(&tbl->lock));
2372         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2373                 struct neighbour *n;
2374                 struct neighbour __rcu **np;
2375
2376                 np = &nht->hash_buckets[chain];
2377                 while ((n = rcu_dereference_protected(*np,
2378                                         lockdep_is_held(&tbl->lock))) != NULL) {
2379                         int release;
2380
2381                         write_lock(&n->lock);
2382                         release = cb(n);
2383                         if (release) {
2384                                 rcu_assign_pointer(*np,
2385                                         rcu_dereference_protected(n->next,
2386                                                 lockdep_is_held(&tbl->lock)));
2387                                 n->dead = 1;
2388                         } else
2389                                 np = &n->next;
2390                         write_unlock(&n->lock);
2391                         if (release)
2392                                 neigh_cleanup_and_release(n);
2393                 }
2394         }
2395 }
2396 EXPORT_SYMBOL(__neigh_for_each_release);
2397
2398 #ifdef CONFIG_PROC_FS
2399
2400 static struct neighbour *neigh_get_first(struct seq_file *seq)
2401 {
2402         struct neigh_seq_state *state = seq->private;
2403         struct net *net = seq_file_net(seq);
2404         struct neigh_hash_table *nht = state->nht;
2405         struct neighbour *n = NULL;
2406         int bucket = state->bucket;
2407
2408         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2409         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2410                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2411
2412                 while (n) {
2413                         if (!net_eq(dev_net(n->dev), net))
2414                                 goto next;
2415                         if (state->neigh_sub_iter) {
2416                                 loff_t fakep = 0;
2417                                 void *v;
2418
2419                                 v = state->neigh_sub_iter(state, n, &fakep);
2420                                 if (!v)
2421                                         goto next;
2422                         }
2423                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2424                                 break;
2425                         if (n->nud_state & ~NUD_NOARP)
2426                                 break;
2427 next:
2428                         n = rcu_dereference_bh(n->next);
2429                 }
2430
2431                 if (n)
2432                         break;
2433         }
2434         state->bucket = bucket;
2435
2436         return n;
2437 }
2438
2439 static struct neighbour *neigh_get_next(struct seq_file *seq,
2440                                         struct neighbour *n,
2441                                         loff_t *pos)
2442 {
2443         struct neigh_seq_state *state = seq->private;
2444         struct net *net = seq_file_net(seq);
2445         struct neigh_hash_table *nht = state->nht;
2446
2447         if (state->neigh_sub_iter) {
2448                 void *v = state->neigh_sub_iter(state, n, pos);
2449                 if (v)
2450                         return n;
2451         }
2452         n = rcu_dereference_bh(n->next);
2453
2454         while (1) {
2455                 while (n) {
2456                         if (!net_eq(dev_net(n->dev), net))
2457                                 goto next;
2458                         if (state->neigh_sub_iter) {
2459                                 void *v = state->neigh_sub_iter(state, n, pos);
2460                                 if (v)
2461                                         return n;
2462                                 goto next;
2463                         }
2464                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2465                                 break;
2466
2467                         if (n->nud_state & ~NUD_NOARP)
2468                                 break;
2469 next:
2470                         n = rcu_dereference_bh(n->next);
2471                 }
2472
2473                 if (n)
2474                         break;
2475
2476                 if (++state->bucket >= (1 << nht->hash_shift))
2477                         break;
2478
2479                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2480         }
2481
2482         if (n && pos)
2483                 --(*pos);
2484         return n;
2485 }
2486
2487 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2488 {
2489         struct neighbour *n = neigh_get_first(seq);
2490
2491         if (n) {
2492                 --(*pos);
2493                 while (*pos) {
2494                         n = neigh_get_next(seq, n, pos);
2495                         if (!n)
2496                                 break;
2497                 }
2498         }
2499         return *pos ? NULL : n;
2500 }
2501
2502 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2503 {
2504         struct neigh_seq_state *state = seq->private;
2505         struct net *net = seq_file_net(seq);
2506         struct neigh_table *tbl = state->tbl;
2507         struct pneigh_entry *pn = NULL;
2508         int bucket = state->bucket;
2509
2510         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2511         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2512                 pn = tbl->phash_buckets[bucket];
2513                 while (pn && !net_eq(pneigh_net(pn), net))
2514                         pn = pn->next;
2515                 if (pn)
2516                         break;
2517         }
2518         state->bucket = bucket;
2519
2520         return pn;
2521 }
2522
2523 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2524                                             struct pneigh_entry *pn,
2525                                             loff_t *pos)
2526 {
2527         struct neigh_seq_state *state = seq->private;
2528         struct net *net = seq_file_net(seq);
2529         struct neigh_table *tbl = state->tbl;
2530
2531         do {
2532                 pn = pn->next;
2533         } while (pn && !net_eq(pneigh_net(pn), net));
2534
2535         while (!pn) {
2536                 if (++state->bucket > PNEIGH_HASHMASK)
2537                         break;
2538                 pn = tbl->phash_buckets[state->bucket];
2539                 while (pn && !net_eq(pneigh_net(pn), net))
2540                         pn = pn->next;
2541                 if (pn)
2542                         break;
2543         }
2544
2545         if (pn && pos)
2546                 --(*pos);
2547
2548         return pn;
2549 }
2550
2551 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2552 {
2553         struct pneigh_entry *pn = pneigh_get_first(seq);
2554
2555         if (pn) {
2556                 --(*pos);
2557                 while (*pos) {
2558                         pn = pneigh_get_next(seq, pn, pos);
2559                         if (!pn)
2560                                 break;
2561                 }
2562         }
2563         return *pos ? NULL : pn;
2564 }
2565
2566 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2567 {
2568         struct neigh_seq_state *state = seq->private;
2569         void *rc;
2570         loff_t idxpos = *pos;
2571
2572         rc = neigh_get_idx(seq, &idxpos);
2573         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2574                 rc = pneigh_get_idx(seq, &idxpos);
2575
2576         return rc;
2577 }
2578
2579 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2580         __acquires(rcu_bh)
2581 {
2582         struct neigh_seq_state *state = seq->private;
2583
2584         state->tbl = tbl;
2585         state->bucket = 0;
2586         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2587
2588         rcu_read_lock_bh();
2589         state->nht = rcu_dereference_bh(tbl->nht);
2590
2591         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2592 }
2593 EXPORT_SYMBOL(neigh_seq_start);
2594
2595 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2596 {
2597         struct neigh_seq_state *state;
2598         void *rc;
2599
2600         if (v == SEQ_START_TOKEN) {
2601                 rc = neigh_get_first(seq);
2602                 goto out;
2603         }
2604
2605         state = seq->private;
2606         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2607                 rc = neigh_get_next(seq, v, NULL);
2608                 if (rc)
2609                         goto out;
2610                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2611                         rc = pneigh_get_first(seq);
2612         } else {
2613                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2614                 rc = pneigh_get_next(seq, v, NULL);
2615         }
2616 out:
2617         ++(*pos);
2618         return rc;
2619 }
2620 EXPORT_SYMBOL(neigh_seq_next);
2621
2622 void neigh_seq_stop(struct seq_file *seq, void *v)
2623         __releases(rcu_bh)
2624 {
2625         rcu_read_unlock_bh();
2626 }
2627 EXPORT_SYMBOL(neigh_seq_stop);
2628
2629 /* statistics via seq_file */
2630
2631 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2632 {
2633         struct neigh_table *tbl = seq->private;
2634         int cpu;
2635
2636         if (*pos == 0)
2637                 return SEQ_START_TOKEN;
2638
2639         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2640                 if (!cpu_possible(cpu))
2641                         continue;
2642                 *pos = cpu+1;
2643                 return per_cpu_ptr(tbl->stats, cpu);
2644         }
2645         return NULL;
2646 }
2647
2648 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2649 {
2650         struct neigh_table *tbl = seq->private;
2651         int cpu;
2652
2653         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2654                 if (!cpu_possible(cpu))
2655                         continue;
2656                 *pos = cpu+1;
2657                 return per_cpu_ptr(tbl->stats, cpu);
2658         }
2659         return NULL;
2660 }
2661
2662 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2663 {
2664
2665 }
2666
2667 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2668 {
2669         struct neigh_table *tbl = seq->private;
2670         struct neigh_statistics *st = v;
2671
2672         if (v == SEQ_START_TOKEN) {
2673                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2674                 return 0;
2675         }
2676
2677         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2678                         "%08lx %08lx  %08lx %08lx %08lx\n",
2679                    atomic_read(&tbl->entries),
2680
2681                    st->allocs,
2682                    st->destroys,
2683                    st->hash_grows,
2684
2685                    st->lookups,
2686                    st->hits,
2687
2688                    st->res_failed,
2689
2690                    st->rcv_probes_mcast,
2691                    st->rcv_probes_ucast,
2692
2693                    st->periodic_gc_runs,
2694                    st->forced_gc_runs,
2695                    st->unres_discards
2696                    );
2697
2698         return 0;
2699 }
2700
2701 static const struct seq_operations neigh_stat_seq_ops = {
2702         .start  = neigh_stat_seq_start,
2703         .next   = neigh_stat_seq_next,
2704         .stop   = neigh_stat_seq_stop,
2705         .show   = neigh_stat_seq_show,
2706 };
2707
2708 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2709 {
2710         int ret = seq_open(file, &neigh_stat_seq_ops);
2711
2712         if (!ret) {
2713                 struct seq_file *sf = file->private_data;
2714                 sf->private = PDE_DATA(inode);
2715         }
2716         return ret;
2717 };
2718
2719 static const struct file_operations neigh_stat_seq_fops = {
2720         .owner   = THIS_MODULE,
2721         .open    = neigh_stat_seq_open,
2722         .read    = seq_read,
2723         .llseek  = seq_lseek,
2724         .release = seq_release,
2725 };
2726
2727 #endif /* CONFIG_PROC_FS */
2728
2729 static inline size_t neigh_nlmsg_size(void)
2730 {
2731         return NLMSG_ALIGN(sizeof(struct ndmsg))
2732                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2733                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2734                + nla_total_size(sizeof(struct nda_cacheinfo))
2735                + nla_total_size(4); /* NDA_PROBES */
2736 }
2737
2738 static void __neigh_notify(struct neighbour *n, int type, int flags)
2739 {
2740         struct net *net = dev_net(n->dev);
2741         struct sk_buff *skb;
2742         int err = -ENOBUFS;
2743
2744         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2745         if (skb == NULL)
2746                 goto errout;
2747
2748         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2749         if (err < 0) {
2750                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2751                 WARN_ON(err == -EMSGSIZE);
2752                 kfree_skb(skb);
2753                 goto errout;
2754         }
2755         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2756         return;
2757 errout:
2758         if (err < 0)
2759                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2760 }
2761
2762 void neigh_app_ns(struct neighbour *n)
2763 {
2764         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2765 }
2766 EXPORT_SYMBOL(neigh_app_ns);
2767
2768 #ifdef CONFIG_SYSCTL
2769 static int zero;
2770 static int int_max = INT_MAX;
2771 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2772
2773 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2774                            void __user *buffer, size_t *lenp, loff_t *ppos)
2775 {
2776         int size, ret;
2777         struct ctl_table tmp = *ctl;
2778
2779         tmp.extra1 = &zero;
2780         tmp.extra2 = &unres_qlen_max;
2781         tmp.data = &size;
2782
2783         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2784         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2785
2786         if (write && !ret)
2787                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2788         return ret;
2789 }
2790
2791 enum {
2792         NEIGH_VAR_MCAST_PROBE,
2793         NEIGH_VAR_UCAST_PROBE,
2794         NEIGH_VAR_APP_PROBE,
2795         NEIGH_VAR_RETRANS_TIME,
2796         NEIGH_VAR_BASE_REACHABLE_TIME,
2797         NEIGH_VAR_DELAY_PROBE_TIME,
2798         NEIGH_VAR_GC_STALETIME,
2799         NEIGH_VAR_QUEUE_LEN,
2800         NEIGH_VAR_QUEUE_LEN_BYTES,
2801         NEIGH_VAR_PROXY_QLEN,
2802         NEIGH_VAR_ANYCAST_DELAY,
2803         NEIGH_VAR_PROXY_DELAY,
2804         NEIGH_VAR_LOCKTIME,
2805         NEIGH_VAR_RETRANS_TIME_MS,
2806         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2807         NEIGH_VAR_GC_INTERVAL,
2808         NEIGH_VAR_GC_THRESH1,
2809         NEIGH_VAR_GC_THRESH2,
2810         NEIGH_VAR_GC_THRESH3,
2811         NEIGH_VAR_MAX
2812 };
2813
2814 static struct neigh_sysctl_table {
2815         struct ctl_table_header *sysctl_header;
2816         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2817 } neigh_sysctl_template __read_mostly = {
2818         .neigh_vars = {
2819                 [NEIGH_VAR_MCAST_PROBE] = {
2820                         .procname       = "mcast_solicit",
2821                         .maxlen         = sizeof(int),
2822                         .mode           = 0644,
2823                         .extra1         = &zero,
2824                         .extra2         = &int_max,
2825                         .proc_handler   = proc_dointvec_minmax,
2826                 },
2827                 [NEIGH_VAR_UCAST_PROBE] = {
2828                         .procname       = "ucast_solicit",
2829                         .maxlen         = sizeof(int),
2830                         .mode           = 0644,
2831                         .extra1         = &zero,
2832                         .extra2         = &int_max,
2833                         .proc_handler   = proc_dointvec_minmax,
2834                 },
2835                 [NEIGH_VAR_APP_PROBE] = {
2836                         .procname       = "app_solicit",
2837                         .maxlen         = sizeof(int),
2838                         .mode           = 0644,
2839                         .extra1         = &zero,
2840                         .extra2         = &int_max,
2841                         .proc_handler   = proc_dointvec_minmax,
2842                 },
2843                 [NEIGH_VAR_RETRANS_TIME] = {
2844                         .procname       = "retrans_time",
2845                         .maxlen         = sizeof(int),
2846                         .mode           = 0644,
2847                         .proc_handler   = proc_dointvec_userhz_jiffies,
2848                 },
2849                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2850                         .procname       = "base_reachable_time",
2851                         .maxlen         = sizeof(int),
2852                         .mode           = 0644,
2853                         .proc_handler   = proc_dointvec_jiffies,
2854                 },
2855                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2856                         .procname       = "delay_first_probe_time",
2857                         .maxlen         = sizeof(int),
2858                         .mode           = 0644,
2859                         .proc_handler   = proc_dointvec_jiffies,
2860                 },
2861                 [NEIGH_VAR_GC_STALETIME] = {
2862                         .procname       = "gc_stale_time",
2863                         .maxlen         = sizeof(int),
2864                         .mode           = 0644,
2865                         .proc_handler   = proc_dointvec_jiffies,
2866                 },
2867                 [NEIGH_VAR_QUEUE_LEN] = {
2868                         .procname       = "unres_qlen",
2869                         .maxlen         = sizeof(int),
2870                         .mode           = 0644,
2871                         .proc_handler   = proc_unres_qlen,
2872                 },
2873                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2874                         .procname       = "unres_qlen_bytes",
2875                         .maxlen         = sizeof(int),
2876                         .mode           = 0644,
2877                         .extra1         = &zero,
2878                         .proc_handler   = proc_dointvec_minmax,
2879                 },
2880                 [NEIGH_VAR_PROXY_QLEN] = {
2881                         .procname       = "proxy_qlen",
2882                         .maxlen         = sizeof(int),
2883                         .mode           = 0644,
2884                         .extra1         = &zero,
2885                         .extra2         = &int_max,
2886                         .proc_handler   = proc_dointvec_minmax,
2887                 },
2888                 [NEIGH_VAR_ANYCAST_DELAY] = {
2889                         .procname       = "anycast_delay",
2890                         .maxlen         = sizeof(int),
2891                         .mode           = 0644,
2892                         .proc_handler   = proc_dointvec_userhz_jiffies,
2893                 },
2894                 [NEIGH_VAR_PROXY_DELAY] = {
2895                         .procname       = "proxy_delay",
2896                         .maxlen         = sizeof(int),
2897                         .mode           = 0644,
2898                         .proc_handler   = proc_dointvec_userhz_jiffies,
2899                 },
2900                 [NEIGH_VAR_LOCKTIME] = {
2901                         .procname       = "locktime",
2902                         .maxlen         = sizeof(int),
2903                         .mode           = 0644,
2904                         .proc_handler   = proc_dointvec_userhz_jiffies,
2905                 },
2906                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2907                         .procname       = "retrans_time_ms",
2908                         .maxlen         = sizeof(int),
2909                         .mode           = 0644,
2910                         .proc_handler   = proc_dointvec_ms_jiffies,
2911                 },
2912                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2913                         .procname       = "base_reachable_time_ms",
2914                         .maxlen         = sizeof(int),
2915                         .mode           = 0644,
2916                         .proc_handler   = proc_dointvec_ms_jiffies,
2917                 },
2918                 [NEIGH_VAR_GC_INTERVAL] = {
2919                         .procname       = "gc_interval",
2920                         .maxlen         = sizeof(int),
2921                         .mode           = 0644,
2922                         .proc_handler   = proc_dointvec_jiffies,
2923                 },
2924                 [NEIGH_VAR_GC_THRESH1] = {
2925                         .procname       = "gc_thresh1",
2926                         .maxlen         = sizeof(int),
2927                         .mode           = 0644,
2928                         .extra1         = &zero,
2929                         .extra2         = &int_max,
2930                         .proc_handler   = proc_dointvec_minmax,
2931                 },
2932                 [NEIGH_VAR_GC_THRESH2] = {
2933                         .procname       = "gc_thresh2",
2934                         .maxlen         = sizeof(int),
2935                         .mode           = 0644,
2936                         .extra1         = &zero,
2937                         .extra2         = &int_max,
2938                         .proc_handler   = proc_dointvec_minmax,
2939                 },
2940                 [NEIGH_VAR_GC_THRESH3] = {
2941                         .procname       = "gc_thresh3",
2942                         .maxlen         = sizeof(int),
2943                         .mode           = 0644,
2944                         .extra1         = &zero,
2945                         .extra2         = &int_max,
2946                         .proc_handler   = proc_dointvec_minmax,
2947                 },
2948                 {},
2949         },
2950 };
2951
2952 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2953                           char *p_name, proc_handler *handler)
2954 {
2955         struct neigh_sysctl_table *t;
2956         const char *dev_name_source = NULL;
2957         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2958
2959         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2960         if (!t)
2961                 goto err;
2962
2963         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2964         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2965         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2966         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2967         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2968         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2969         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2970         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2971         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2972         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2973         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2974         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2975         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2976         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2977         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2978
2979         if (dev) {
2980                 dev_name_source = dev->name;
2981                 /* Terminate the table early */
2982                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2983                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2984         } else {
2985                 dev_name_source = "default";
2986                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2987                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2988                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2989                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2990         }
2991
2992
2993         if (handler) {
2994                 /* RetransTime */
2995                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2996                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2997                 /* ReachableTime */
2998                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2999                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3000                 /* RetransTime (in milliseconds)*/
3001                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3002                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3003                 /* ReachableTime (in milliseconds) */
3004                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3005                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3006         }
3007
3008         /* Don't export sysctls to unprivileged users */
3009         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3010                 t->neigh_vars[0].procname = NULL;
3011
3012         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3013                 p_name, dev_name_source);
3014         t->sysctl_header =
3015                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3016         if (!t->sysctl_header)
3017                 goto free;
3018
3019         p->sysctl_table = t;
3020         return 0;
3021
3022 free:
3023         kfree(t);
3024 err:
3025         return -ENOBUFS;
3026 }
3027 EXPORT_SYMBOL(neigh_sysctl_register);
3028
3029 void neigh_sysctl_unregister(struct neigh_parms *p)
3030 {
3031         if (p->sysctl_table) {
3032                 struct neigh_sysctl_table *t = p->sysctl_table;
3033                 p->sysctl_table = NULL;
3034                 unregister_net_sysctl_table(t->sysctl_header);
3035                 kfree(t);
3036         }
3037 }
3038 EXPORT_SYMBOL(neigh_sysctl_unregister);
3039
3040 #endif  /* CONFIG_SYSCTL */
3041
3042 static int __init neigh_init(void)
3043 {
3044         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3045         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3046         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3047
3048         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3049                       NULL);
3050         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3051
3052         return 0;
3053 }
3054
3055 subsys_initcall(neigh_init);
3056