]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/core/neighbour.c
ASoC: simple-card: Enable and disable DAI clocks as needed
[karo-tx-linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)              \
47 do {                                            \
48         if (level <= NEIGH_DEBUG)               \
49                 pr_debug(fmt, ##__VA_ARGS__);   \
50 } while (0)
51
52 #define PNEIGH_HASHMASK         0xF
53
54 static void neigh_timer_handler(unsigned long arg);
55 static void __neigh_notify(struct neighbour *n, int type, int flags);
56 static void neigh_update_notify(struct neighbour *neigh);
57 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
58
59 #ifdef CONFIG_PROC_FS
60 static const struct file_operations neigh_stat_seq_fops;
61 #endif
62
63 /*
64    Neighbour hash table buckets are protected with rwlock tbl->lock.
65
66    - All the scans/updates to hash buckets MUST be made under this lock.
67    - NOTHING clever should be made under this lock: no callbacks
68      to protocol backends, no attempts to send something to network.
69      It will result in deadlocks, if backend/driver wants to use neighbour
70      cache.
71    - If the entry requires some non-trivial actions, increase
72      its reference count and release table lock.
73
74    Neighbour entries are protected:
75    - with reference count.
76    - with rwlock neigh->lock
77
78    Reference count prevents destruction.
79
80    neigh->lock mainly serializes ll address data and its validity state.
81    However, the same lock is used to protect another entry fields:
82     - timer
83     - resolution queue
84
85    Again, nothing clever shall be made under neigh->lock,
86    the most complicated procedure, which we allow is dev->hard_header.
87    It is supposed, that dev->hard_header is simplistic and does
88    not make callbacks to neighbour tables.
89  */
90
91 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
92 {
93         kfree_skb(skb);
94         return -ENETDOWN;
95 }
96
97 static void neigh_cleanup_and_release(struct neighbour *neigh)
98 {
99         if (neigh->parms->neigh_cleanup)
100                 neigh->parms->neigh_cleanup(neigh);
101
102         __neigh_notify(neigh, RTM_DELNEIGH, 0);
103         neigh_release(neigh);
104 }
105
106 /*
107  * It is random distribution in the interval (1/2)*base...(3/2)*base.
108  * It corresponds to default IPv6 settings and is not overridable,
109  * because it is really reasonable choice.
110  */
111
112 unsigned long neigh_rand_reach_time(unsigned long base)
113 {
114         return base ? (prandom_u32() % base) + (base >> 1) : 0;
115 }
116 EXPORT_SYMBOL(neigh_rand_reach_time);
117
118
119 static int neigh_forced_gc(struct neigh_table *tbl)
120 {
121         int shrunk = 0;
122         int i;
123         struct neigh_hash_table *nht;
124
125         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
126
127         write_lock_bh(&tbl->lock);
128         nht = rcu_dereference_protected(tbl->nht,
129                                         lockdep_is_held(&tbl->lock));
130         for (i = 0; i < (1 << nht->hash_shift); i++) {
131                 struct neighbour *n;
132                 struct neighbour __rcu **np;
133
134                 np = &nht->hash_buckets[i];
135                 while ((n = rcu_dereference_protected(*np,
136                                         lockdep_is_held(&tbl->lock))) != NULL) {
137                         /* Neighbour record may be discarded if:
138                          * - nobody refers to it.
139                          * - it is not permanent
140                          */
141                         write_lock(&n->lock);
142                         if (atomic_read(&n->refcnt) == 1 &&
143                             !(n->nud_state & NUD_PERMANENT)) {
144                                 rcu_assign_pointer(*np,
145                                         rcu_dereference_protected(n->next,
146                                                   lockdep_is_held(&tbl->lock)));
147                                 n->dead = 1;
148                                 shrunk  = 1;
149                                 write_unlock(&n->lock);
150                                 neigh_cleanup_and_release(n);
151                                 continue;
152                         }
153                         write_unlock(&n->lock);
154                         np = &n->next;
155                 }
156         }
157
158         tbl->last_flush = jiffies;
159
160         write_unlock_bh(&tbl->lock);
161
162         return shrunk;
163 }
164
165 static void neigh_add_timer(struct neighbour *n, unsigned long when)
166 {
167         neigh_hold(n);
168         if (unlikely(mod_timer(&n->timer, when))) {
169                 printk("NEIGH: BUG, double timer add, state is %x\n",
170                        n->nud_state);
171                 dump_stack();
172         }
173 }
174
175 static int neigh_del_timer(struct neighbour *n)
176 {
177         if ((n->nud_state & NUD_IN_TIMER) &&
178             del_timer(&n->timer)) {
179                 neigh_release(n);
180                 return 1;
181         }
182         return 0;
183 }
184
185 static void pneigh_queue_purge(struct sk_buff_head *list)
186 {
187         struct sk_buff *skb;
188
189         while ((skb = skb_dequeue(list)) != NULL) {
190                 dev_put(skb->dev);
191                 kfree_skb(skb);
192         }
193 }
194
195 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
196 {
197         int i;
198         struct neigh_hash_table *nht;
199
200         nht = rcu_dereference_protected(tbl->nht,
201                                         lockdep_is_held(&tbl->lock));
202
203         for (i = 0; i < (1 << nht->hash_shift); i++) {
204                 struct neighbour *n;
205                 struct neighbour __rcu **np = &nht->hash_buckets[i];
206
207                 while ((n = rcu_dereference_protected(*np,
208                                         lockdep_is_held(&tbl->lock))) != NULL) {
209                         if (dev && n->dev != dev) {
210                                 np = &n->next;
211                                 continue;
212                         }
213                         rcu_assign_pointer(*np,
214                                    rcu_dereference_protected(n->next,
215                                                 lockdep_is_held(&tbl->lock)));
216                         write_lock(&n->lock);
217                         neigh_del_timer(n);
218                         n->dead = 1;
219
220                         if (atomic_read(&n->refcnt) != 1) {
221                                 /* The most unpleasant situation.
222                                    We must destroy neighbour entry,
223                                    but someone still uses it.
224
225                                    The destroy will be delayed until
226                                    the last user releases us, but
227                                    we must kill timers etc. and move
228                                    it to safe state.
229                                  */
230                                 __skb_queue_purge(&n->arp_queue);
231                                 n->arp_queue_len_bytes = 0;
232                                 n->output = neigh_blackhole;
233                                 if (n->nud_state & NUD_VALID)
234                                         n->nud_state = NUD_NOARP;
235                                 else
236                                         n->nud_state = NUD_NONE;
237                                 neigh_dbg(2, "neigh %p is stray\n", n);
238                         }
239                         write_unlock(&n->lock);
240                         neigh_cleanup_and_release(n);
241                 }
242         }
243 }
244
245 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
246 {
247         write_lock_bh(&tbl->lock);
248         neigh_flush_dev(tbl, dev);
249         write_unlock_bh(&tbl->lock);
250 }
251 EXPORT_SYMBOL(neigh_changeaddr);
252
253 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
254 {
255         write_lock_bh(&tbl->lock);
256         neigh_flush_dev(tbl, dev);
257         pneigh_ifdown(tbl, dev);
258         write_unlock_bh(&tbl->lock);
259
260         del_timer_sync(&tbl->proxy_timer);
261         pneigh_queue_purge(&tbl->proxy_queue);
262         return 0;
263 }
264 EXPORT_SYMBOL(neigh_ifdown);
265
266 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
267 {
268         struct neighbour *n = NULL;
269         unsigned long now = jiffies;
270         int entries;
271
272         entries = atomic_inc_return(&tbl->entries) - 1;
273         if (entries >= tbl->gc_thresh3 ||
274             (entries >= tbl->gc_thresh2 &&
275              time_after(now, tbl->last_flush + 5 * HZ))) {
276                 if (!neigh_forced_gc(tbl) &&
277                     entries >= tbl->gc_thresh3)
278                         goto out_entries;
279         }
280
281         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
282         if (!n)
283                 goto out_entries;
284
285         __skb_queue_head_init(&n->arp_queue);
286         rwlock_init(&n->lock);
287         seqlock_init(&n->ha_lock);
288         n->updated        = n->used = now;
289         n->nud_state      = NUD_NONE;
290         n->output         = neigh_blackhole;
291         seqlock_init(&n->hh.hh_lock);
292         n->parms          = neigh_parms_clone(&tbl->parms);
293         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
294
295         NEIGH_CACHE_STAT_INC(tbl, allocs);
296         n->tbl            = tbl;
297         atomic_set(&n->refcnt, 1);
298         n->dead           = 1;
299 out:
300         return n;
301
302 out_entries:
303         atomic_dec(&tbl->entries);
304         goto out;
305 }
306
307 static void neigh_get_hash_rnd(u32 *x)
308 {
309         get_random_bytes(x, sizeof(*x));
310         *x |= 1;
311 }
312
313 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
314 {
315         size_t size = (1 << shift) * sizeof(struct neighbour *);
316         struct neigh_hash_table *ret;
317         struct neighbour __rcu **buckets;
318         int i;
319
320         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
321         if (!ret)
322                 return NULL;
323         if (size <= PAGE_SIZE)
324                 buckets = kzalloc(size, GFP_ATOMIC);
325         else
326                 buckets = (struct neighbour __rcu **)
327                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
328                                            get_order(size));
329         if (!buckets) {
330                 kfree(ret);
331                 return NULL;
332         }
333         ret->hash_buckets = buckets;
334         ret->hash_shift = shift;
335         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
336                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
337         return ret;
338 }
339
340 static void neigh_hash_free_rcu(struct rcu_head *head)
341 {
342         struct neigh_hash_table *nht = container_of(head,
343                                                     struct neigh_hash_table,
344                                                     rcu);
345         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
346         struct neighbour __rcu **buckets = nht->hash_buckets;
347
348         if (size <= PAGE_SIZE)
349                 kfree(buckets);
350         else
351                 free_pages((unsigned long)buckets, get_order(size));
352         kfree(nht);
353 }
354
355 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356                                                 unsigned long new_shift)
357 {
358         unsigned int i, hash;
359         struct neigh_hash_table *new_nht, *old_nht;
360
361         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
362
363         old_nht = rcu_dereference_protected(tbl->nht,
364                                             lockdep_is_held(&tbl->lock));
365         new_nht = neigh_hash_alloc(new_shift);
366         if (!new_nht)
367                 return old_nht;
368
369         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
370                 struct neighbour *n, *next;
371
372                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
373                                                    lockdep_is_held(&tbl->lock));
374                      n != NULL;
375                      n = next) {
376                         hash = tbl->hash(n->primary_key, n->dev,
377                                          new_nht->hash_rnd);
378
379                         hash >>= (32 - new_nht->hash_shift);
380                         next = rcu_dereference_protected(n->next,
381                                                 lockdep_is_held(&tbl->lock));
382
383                         rcu_assign_pointer(n->next,
384                                            rcu_dereference_protected(
385                                                 new_nht->hash_buckets[hash],
386                                                 lockdep_is_held(&tbl->lock)));
387                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
388                 }
389         }
390
391         rcu_assign_pointer(tbl->nht, new_nht);
392         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
393         return new_nht;
394 }
395
396 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
397                                struct net_device *dev)
398 {
399         struct neighbour *n;
400         int key_len = tbl->key_len;
401         u32 hash_val;
402         struct neigh_hash_table *nht;
403
404         NEIGH_CACHE_STAT_INC(tbl, lookups);
405
406         rcu_read_lock_bh();
407         nht = rcu_dereference_bh(tbl->nht);
408         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
409
410         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
411              n != NULL;
412              n = rcu_dereference_bh(n->next)) {
413                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
414                         if (!atomic_inc_not_zero(&n->refcnt))
415                                 n = NULL;
416                         NEIGH_CACHE_STAT_INC(tbl, hits);
417                         break;
418                 }
419         }
420
421         rcu_read_unlock_bh();
422         return n;
423 }
424 EXPORT_SYMBOL(neigh_lookup);
425
426 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
427                                      const void *pkey)
428 {
429         struct neighbour *n;
430         int key_len = tbl->key_len;
431         u32 hash_val;
432         struct neigh_hash_table *nht;
433
434         NEIGH_CACHE_STAT_INC(tbl, lookups);
435
436         rcu_read_lock_bh();
437         nht = rcu_dereference_bh(tbl->nht);
438         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
439
440         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
441              n != NULL;
442              n = rcu_dereference_bh(n->next)) {
443                 if (!memcmp(n->primary_key, pkey, key_len) &&
444                     net_eq(dev_net(n->dev), net)) {
445                         if (!atomic_inc_not_zero(&n->refcnt))
446                                 n = NULL;
447                         NEIGH_CACHE_STAT_INC(tbl, hits);
448                         break;
449                 }
450         }
451
452         rcu_read_unlock_bh();
453         return n;
454 }
455 EXPORT_SYMBOL(neigh_lookup_nodev);
456
457 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
458                                  struct net_device *dev, bool want_ref)
459 {
460         u32 hash_val;
461         int key_len = tbl->key_len;
462         int error;
463         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
464         struct neigh_hash_table *nht;
465
466         if (!n) {
467                 rc = ERR_PTR(-ENOBUFS);
468                 goto out;
469         }
470
471         memcpy(n->primary_key, pkey, key_len);
472         n->dev = dev;
473         dev_hold(dev);
474
475         /* Protocol specific setup. */
476         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
477                 rc = ERR_PTR(error);
478                 goto out_neigh_release;
479         }
480
481         if (dev->netdev_ops->ndo_neigh_construct) {
482                 error = dev->netdev_ops->ndo_neigh_construct(n);
483                 if (error < 0) {
484                         rc = ERR_PTR(error);
485                         goto out_neigh_release;
486                 }
487         }
488
489         /* Device specific setup. */
490         if (n->parms->neigh_setup &&
491             (error = n->parms->neigh_setup(n)) < 0) {
492                 rc = ERR_PTR(error);
493                 goto out_neigh_release;
494         }
495
496         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
497
498         write_lock_bh(&tbl->lock);
499         nht = rcu_dereference_protected(tbl->nht,
500                                         lockdep_is_held(&tbl->lock));
501
502         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
503                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
504
505         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
506
507         if (n->parms->dead) {
508                 rc = ERR_PTR(-EINVAL);
509                 goto out_tbl_unlock;
510         }
511
512         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
513                                             lockdep_is_held(&tbl->lock));
514              n1 != NULL;
515              n1 = rcu_dereference_protected(n1->next,
516                         lockdep_is_held(&tbl->lock))) {
517                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
518                         if (want_ref)
519                                 neigh_hold(n1);
520                         rc = n1;
521                         goto out_tbl_unlock;
522                 }
523         }
524
525         n->dead = 0;
526         if (want_ref)
527                 neigh_hold(n);
528         rcu_assign_pointer(n->next,
529                            rcu_dereference_protected(nht->hash_buckets[hash_val],
530                                                      lockdep_is_held(&tbl->lock)));
531         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
532         write_unlock_bh(&tbl->lock);
533         neigh_dbg(2, "neigh %p is created\n", n);
534         rc = n;
535 out:
536         return rc;
537 out_tbl_unlock:
538         write_unlock_bh(&tbl->lock);
539 out_neigh_release:
540         neigh_release(n);
541         goto out;
542 }
543 EXPORT_SYMBOL(__neigh_create);
544
545 static u32 pneigh_hash(const void *pkey, int key_len)
546 {
547         u32 hash_val = *(u32 *)(pkey + key_len - 4);
548         hash_val ^= (hash_val >> 16);
549         hash_val ^= hash_val >> 8;
550         hash_val ^= hash_val >> 4;
551         hash_val &= PNEIGH_HASHMASK;
552         return hash_val;
553 }
554
555 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
556                                               struct net *net,
557                                               const void *pkey,
558                                               int key_len,
559                                               struct net_device *dev)
560 {
561         while (n) {
562                 if (!memcmp(n->key, pkey, key_len) &&
563                     net_eq(pneigh_net(n), net) &&
564                     (n->dev == dev || !n->dev))
565                         return n;
566                 n = n->next;
567         }
568         return NULL;
569 }
570
571 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
572                 struct net *net, const void *pkey, struct net_device *dev)
573 {
574         int key_len = tbl->key_len;
575         u32 hash_val = pneigh_hash(pkey, key_len);
576
577         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
578                                  net, pkey, key_len, dev);
579 }
580 EXPORT_SYMBOL_GPL(__pneigh_lookup);
581
582 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
583                                     struct net *net, const void *pkey,
584                                     struct net_device *dev, int creat)
585 {
586         struct pneigh_entry *n;
587         int key_len = tbl->key_len;
588         u32 hash_val = pneigh_hash(pkey, key_len);
589
590         read_lock_bh(&tbl->lock);
591         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
592                               net, pkey, key_len, dev);
593         read_unlock_bh(&tbl->lock);
594
595         if (n || !creat)
596                 goto out;
597
598         ASSERT_RTNL();
599
600         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
601         if (!n)
602                 goto out;
603
604         write_pnet(&n->net, hold_net(net));
605         memcpy(n->key, pkey, key_len);
606         n->dev = dev;
607         if (dev)
608                 dev_hold(dev);
609
610         if (tbl->pconstructor && tbl->pconstructor(n)) {
611                 if (dev)
612                         dev_put(dev);
613                 release_net(net);
614                 kfree(n);
615                 n = NULL;
616                 goto out;
617         }
618
619         write_lock_bh(&tbl->lock);
620         n->next = tbl->phash_buckets[hash_val];
621         tbl->phash_buckets[hash_val] = n;
622         write_unlock_bh(&tbl->lock);
623 out:
624         return n;
625 }
626 EXPORT_SYMBOL(pneigh_lookup);
627
628
629 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
630                   struct net_device *dev)
631 {
632         struct pneigh_entry *n, **np;
633         int key_len = tbl->key_len;
634         u32 hash_val = pneigh_hash(pkey, key_len);
635
636         write_lock_bh(&tbl->lock);
637         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
638              np = &n->next) {
639                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
640                     net_eq(pneigh_net(n), net)) {
641                         *np = n->next;
642                         write_unlock_bh(&tbl->lock);
643                         if (tbl->pdestructor)
644                                 tbl->pdestructor(n);
645                         if (n->dev)
646                                 dev_put(n->dev);
647                         release_net(pneigh_net(n));
648                         kfree(n);
649                         return 0;
650                 }
651         }
652         write_unlock_bh(&tbl->lock);
653         return -ENOENT;
654 }
655
656 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
657 {
658         struct pneigh_entry *n, **np;
659         u32 h;
660
661         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
662                 np = &tbl->phash_buckets[h];
663                 while ((n = *np) != NULL) {
664                         if (!dev || n->dev == dev) {
665                                 *np = n->next;
666                                 if (tbl->pdestructor)
667                                         tbl->pdestructor(n);
668                                 if (n->dev)
669                                         dev_put(n->dev);
670                                 release_net(pneigh_net(n));
671                                 kfree(n);
672                                 continue;
673                         }
674                         np = &n->next;
675                 }
676         }
677         return -ENOENT;
678 }
679
680 static void neigh_parms_destroy(struct neigh_parms *parms);
681
682 static inline void neigh_parms_put(struct neigh_parms *parms)
683 {
684         if (atomic_dec_and_test(&parms->refcnt))
685                 neigh_parms_destroy(parms);
686 }
687
688 /*
689  *      neighbour must already be out of the table;
690  *
691  */
692 void neigh_destroy(struct neighbour *neigh)
693 {
694         struct net_device *dev = neigh->dev;
695
696         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
697
698         if (!neigh->dead) {
699                 pr_warn("Destroying alive neighbour %p\n", neigh);
700                 dump_stack();
701                 return;
702         }
703
704         if (neigh_del_timer(neigh))
705                 pr_warn("Impossible event\n");
706
707         write_lock_bh(&neigh->lock);
708         __skb_queue_purge(&neigh->arp_queue);
709         write_unlock_bh(&neigh->lock);
710         neigh->arp_queue_len_bytes = 0;
711
712         if (dev->netdev_ops->ndo_neigh_destroy)
713                 dev->netdev_ops->ndo_neigh_destroy(neigh);
714
715         dev_put(dev);
716         neigh_parms_put(neigh->parms);
717
718         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
719
720         atomic_dec(&neigh->tbl->entries);
721         kfree_rcu(neigh, rcu);
722 }
723 EXPORT_SYMBOL(neigh_destroy);
724
725 /* Neighbour state is suspicious;
726    disable fast path.
727
728    Called with write_locked neigh.
729  */
730 static void neigh_suspect(struct neighbour *neigh)
731 {
732         neigh_dbg(2, "neigh %p is suspected\n", neigh);
733
734         neigh->output = neigh->ops->output;
735 }
736
737 /* Neighbour state is OK;
738    enable fast path.
739
740    Called with write_locked neigh.
741  */
742 static void neigh_connect(struct neighbour *neigh)
743 {
744         neigh_dbg(2, "neigh %p is connected\n", neigh);
745
746         neigh->output = neigh->ops->connected_output;
747 }
748
749 static void neigh_periodic_work(struct work_struct *work)
750 {
751         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
752         struct neighbour *n;
753         struct neighbour __rcu **np;
754         unsigned int i;
755         struct neigh_hash_table *nht;
756
757         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
758
759         write_lock_bh(&tbl->lock);
760         nht = rcu_dereference_protected(tbl->nht,
761                                         lockdep_is_held(&tbl->lock));
762
763         /*
764          *      periodically recompute ReachableTime from random function
765          */
766
767         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
768                 struct neigh_parms *p;
769                 tbl->last_rand = jiffies;
770                 list_for_each_entry(p, &tbl->parms_list, list)
771                         p->reachable_time =
772                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
773         }
774
775         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
776                 goto out;
777
778         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
779                 np = &nht->hash_buckets[i];
780
781                 while ((n = rcu_dereference_protected(*np,
782                                 lockdep_is_held(&tbl->lock))) != NULL) {
783                         unsigned int state;
784
785                         write_lock(&n->lock);
786
787                         state = n->nud_state;
788                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
789                                 write_unlock(&n->lock);
790                                 goto next_elt;
791                         }
792
793                         if (time_before(n->used, n->confirmed))
794                                 n->used = n->confirmed;
795
796                         if (atomic_read(&n->refcnt) == 1 &&
797                             (state == NUD_FAILED ||
798                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
799                                 *np = n->next;
800                                 n->dead = 1;
801                                 write_unlock(&n->lock);
802                                 neigh_cleanup_and_release(n);
803                                 continue;
804                         }
805                         write_unlock(&n->lock);
806
807 next_elt:
808                         np = &n->next;
809                 }
810                 /*
811                  * It's fine to release lock here, even if hash table
812                  * grows while we are preempted.
813                  */
814                 write_unlock_bh(&tbl->lock);
815                 cond_resched();
816                 write_lock_bh(&tbl->lock);
817                 nht = rcu_dereference_protected(tbl->nht,
818                                                 lockdep_is_held(&tbl->lock));
819         }
820 out:
821         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
822          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
823          * BASE_REACHABLE_TIME.
824          */
825         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
826                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
827         write_unlock_bh(&tbl->lock);
828 }
829
830 static __inline__ int neigh_max_probes(struct neighbour *n)
831 {
832         struct neigh_parms *p = n->parms;
833         int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
834         if (!(n->nud_state & NUD_PROBE))
835                 max_probes += NEIGH_VAR(p, MCAST_PROBES);
836         return max_probes;
837 }
838
839 static void neigh_invalidate(struct neighbour *neigh)
840         __releases(neigh->lock)
841         __acquires(neigh->lock)
842 {
843         struct sk_buff *skb;
844
845         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
846         neigh_dbg(2, "neigh %p is failed\n", neigh);
847         neigh->updated = jiffies;
848
849         /* It is very thin place. report_unreachable is very complicated
850            routine. Particularly, it can hit the same neighbour entry!
851
852            So that, we try to be accurate and avoid dead loop. --ANK
853          */
854         while (neigh->nud_state == NUD_FAILED &&
855                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
856                 write_unlock(&neigh->lock);
857                 neigh->ops->error_report(neigh, skb);
858                 write_lock(&neigh->lock);
859         }
860         __skb_queue_purge(&neigh->arp_queue);
861         neigh->arp_queue_len_bytes = 0;
862 }
863
864 static void neigh_probe(struct neighbour *neigh)
865         __releases(neigh->lock)
866 {
867         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
868         /* keep skb alive even if arp_queue overflows */
869         if (skb)
870                 skb = skb_copy(skb, GFP_ATOMIC);
871         write_unlock(&neigh->lock);
872         neigh->ops->solicit(neigh, skb);
873         atomic_inc(&neigh->probes);
874         kfree_skb(skb);
875 }
876
877 /* Called when a timer expires for a neighbour entry. */
878
879 static void neigh_timer_handler(unsigned long arg)
880 {
881         unsigned long now, next;
882         struct neighbour *neigh = (struct neighbour *)arg;
883         unsigned int state;
884         int notify = 0;
885
886         write_lock(&neigh->lock);
887
888         state = neigh->nud_state;
889         now = jiffies;
890         next = now + HZ;
891
892         if (!(state & NUD_IN_TIMER))
893                 goto out;
894
895         if (state & NUD_REACHABLE) {
896                 if (time_before_eq(now,
897                                    neigh->confirmed + neigh->parms->reachable_time)) {
898                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
899                         next = neigh->confirmed + neigh->parms->reachable_time;
900                 } else if (time_before_eq(now,
901                                           neigh->used +
902                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
903                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
904                         neigh->nud_state = NUD_DELAY;
905                         neigh->updated = jiffies;
906                         neigh_suspect(neigh);
907                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
908                 } else {
909                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
910                         neigh->nud_state = NUD_STALE;
911                         neigh->updated = jiffies;
912                         neigh_suspect(neigh);
913                         notify = 1;
914                 }
915         } else if (state & NUD_DELAY) {
916                 if (time_before_eq(now,
917                                    neigh->confirmed +
918                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
919                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
920                         neigh->nud_state = NUD_REACHABLE;
921                         neigh->updated = jiffies;
922                         neigh_connect(neigh);
923                         notify = 1;
924                         next = neigh->confirmed + neigh->parms->reachable_time;
925                 } else {
926                         neigh_dbg(2, "neigh %p is probed\n", neigh);
927                         neigh->nud_state = NUD_PROBE;
928                         neigh->updated = jiffies;
929                         atomic_set(&neigh->probes, 0);
930                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
931                 }
932         } else {
933                 /* NUD_PROBE|NUD_INCOMPLETE */
934                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
935         }
936
937         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
938             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
939                 neigh->nud_state = NUD_FAILED;
940                 notify = 1;
941                 neigh_invalidate(neigh);
942                 goto out;
943         }
944
945         if (neigh->nud_state & NUD_IN_TIMER) {
946                 if (time_before(next, jiffies + HZ/2))
947                         next = jiffies + HZ/2;
948                 if (!mod_timer(&neigh->timer, next))
949                         neigh_hold(neigh);
950         }
951         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952                 neigh_probe(neigh);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         bool immediate_probe = false;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
977                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
978                         unsigned long next, now = jiffies;
979
980                         atomic_set(&neigh->probes,
981                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
982                         neigh->nud_state     = NUD_INCOMPLETE;
983                         neigh->updated = now;
984                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
985                                          HZ/2);
986                         neigh_add_timer(neigh, next);
987                         immediate_probe = true;
988                 } else {
989                         neigh->nud_state = NUD_FAILED;
990                         neigh->updated = jiffies;
991                         write_unlock_bh(&neigh->lock);
992
993                         kfree_skb(skb);
994                         return 1;
995                 }
996         } else if (neigh->nud_state & NUD_STALE) {
997                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
998                 neigh->nud_state = NUD_DELAY;
999                 neigh->updated = jiffies;
1000                 neigh_add_timer(neigh, jiffies +
1001                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1002         }
1003
1004         if (neigh->nud_state == NUD_INCOMPLETE) {
1005                 if (skb) {
1006                         while (neigh->arp_queue_len_bytes + skb->truesize >
1007                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1008                                 struct sk_buff *buff;
1009
1010                                 buff = __skb_dequeue(&neigh->arp_queue);
1011                                 if (!buff)
1012                                         break;
1013                                 neigh->arp_queue_len_bytes -= buff->truesize;
1014                                 kfree_skb(buff);
1015                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1016                         }
1017                         skb_dst_force(skb);
1018                         __skb_queue_tail(&neigh->arp_queue, skb);
1019                         neigh->arp_queue_len_bytes += skb->truesize;
1020                 }
1021                 rc = 1;
1022         }
1023 out_unlock_bh:
1024         if (immediate_probe)
1025                 neigh_probe(neigh);
1026         else
1027                 write_unlock(&neigh->lock);
1028         local_bh_enable();
1029         return rc;
1030 }
1031 EXPORT_SYMBOL(__neigh_event_send);
1032
1033 static void neigh_update_hhs(struct neighbour *neigh)
1034 {
1035         struct hh_cache *hh;
1036         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1037                 = NULL;
1038
1039         if (neigh->dev->header_ops)
1040                 update = neigh->dev->header_ops->cache_update;
1041
1042         if (update) {
1043                 hh = &neigh->hh;
1044                 if (hh->hh_len) {
1045                         write_seqlock_bh(&hh->hh_lock);
1046                         update(hh, neigh->dev, neigh->ha);
1047                         write_sequnlock_bh(&hh->hh_lock);
1048                 }
1049         }
1050 }
1051
1052
1053
1054 /* Generic update routine.
1055    -- lladdr is new lladdr or NULL, if it is not supplied.
1056    -- new    is new state.
1057    -- flags
1058         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1059                                 if it is different.
1060         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1061                                 lladdr instead of overriding it
1062                                 if it is different.
1063                                 It also allows to retain current state
1064                                 if lladdr is unchanged.
1065         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1066
1067         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1068                                 NTF_ROUTER flag.
1069         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1070                                 a router.
1071
1072    Caller MUST hold reference count on the entry.
1073  */
1074
1075 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1076                  u32 flags)
1077 {
1078         u8 old;
1079         int err;
1080         int notify = 0;
1081         struct net_device *dev;
1082         int update_isrouter = 0;
1083
1084         write_lock_bh(&neigh->lock);
1085
1086         dev    = neigh->dev;
1087         old    = neigh->nud_state;
1088         err    = -EPERM;
1089
1090         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1091             (old & (NUD_NOARP | NUD_PERMANENT)))
1092                 goto out;
1093
1094         if (!(new & NUD_VALID)) {
1095                 neigh_del_timer(neigh);
1096                 if (old & NUD_CONNECTED)
1097                         neigh_suspect(neigh);
1098                 neigh->nud_state = new;
1099                 err = 0;
1100                 notify = old & NUD_VALID;
1101                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1102                     (new & NUD_FAILED)) {
1103                         neigh_invalidate(neigh);
1104                         notify = 1;
1105                 }
1106                 goto out;
1107         }
1108
1109         /* Compare new lladdr with cached one */
1110         if (!dev->addr_len) {
1111                 /* First case: device needs no address. */
1112                 lladdr = neigh->ha;
1113         } else if (lladdr) {
1114                 /* The second case: if something is already cached
1115                    and a new address is proposed:
1116                    - compare new & old
1117                    - if they are different, check override flag
1118                  */
1119                 if ((old & NUD_VALID) &&
1120                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1121                         lladdr = neigh->ha;
1122         } else {
1123                 /* No address is supplied; if we know something,
1124                    use it, otherwise discard the request.
1125                  */
1126                 err = -EINVAL;
1127                 if (!(old & NUD_VALID))
1128                         goto out;
1129                 lladdr = neigh->ha;
1130         }
1131
1132         if (new & NUD_CONNECTED)
1133                 neigh->confirmed = jiffies;
1134         neigh->updated = jiffies;
1135
1136         /* If entry was valid and address is not changed,
1137            do not change entry state, if new one is STALE.
1138          */
1139         err = 0;
1140         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1141         if (old & NUD_VALID) {
1142                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1143                         update_isrouter = 0;
1144                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1145                             (old & NUD_CONNECTED)) {
1146                                 lladdr = neigh->ha;
1147                                 new = NUD_STALE;
1148                         } else
1149                                 goto out;
1150                 } else {
1151                         if (lladdr == neigh->ha && new == NUD_STALE &&
1152                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1153                              (old & NUD_CONNECTED))
1154                             )
1155                                 new = old;
1156                 }
1157         }
1158
1159         if (new != old) {
1160                 neigh_del_timer(neigh);
1161                 if (new & NUD_IN_TIMER)
1162                         neigh_add_timer(neigh, (jiffies +
1163                                                 ((new & NUD_REACHABLE) ?
1164                                                  neigh->parms->reachable_time :
1165                                                  0)));
1166                 neigh->nud_state = new;
1167                 notify = 1;
1168         }
1169
1170         if (lladdr != neigh->ha) {
1171                 write_seqlock(&neigh->ha_lock);
1172                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1173                 write_sequnlock(&neigh->ha_lock);
1174                 neigh_update_hhs(neigh);
1175                 if (!(new & NUD_CONNECTED))
1176                         neigh->confirmed = jiffies -
1177                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1178                 notify = 1;
1179         }
1180         if (new == old)
1181                 goto out;
1182         if (new & NUD_CONNECTED)
1183                 neigh_connect(neigh);
1184         else
1185                 neigh_suspect(neigh);
1186         if (!(old & NUD_VALID)) {
1187                 struct sk_buff *skb;
1188
1189                 /* Again: avoid dead loop if something went wrong */
1190
1191                 while (neigh->nud_state & NUD_VALID &&
1192                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1193                         struct dst_entry *dst = skb_dst(skb);
1194                         struct neighbour *n2, *n1 = neigh;
1195                         write_unlock_bh(&neigh->lock);
1196
1197                         rcu_read_lock();
1198
1199                         /* Why not just use 'neigh' as-is?  The problem is that
1200                          * things such as shaper, eql, and sch_teql can end up
1201                          * using alternative, different, neigh objects to output
1202                          * the packet in the output path.  So what we need to do
1203                          * here is re-lookup the top-level neigh in the path so
1204                          * we can reinject the packet there.
1205                          */
1206                         n2 = NULL;
1207                         if (dst) {
1208                                 n2 = dst_neigh_lookup_skb(dst, skb);
1209                                 if (n2)
1210                                         n1 = n2;
1211                         }
1212                         n1->output(n1, skb);
1213                         if (n2)
1214                                 neigh_release(n2);
1215                         rcu_read_unlock();
1216
1217                         write_lock_bh(&neigh->lock);
1218                 }
1219                 __skb_queue_purge(&neigh->arp_queue);
1220                 neigh->arp_queue_len_bytes = 0;
1221         }
1222 out:
1223         if (update_isrouter) {
1224                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1225                         (neigh->flags | NTF_ROUTER) :
1226                         (neigh->flags & ~NTF_ROUTER);
1227         }
1228         write_unlock_bh(&neigh->lock);
1229
1230         if (notify)
1231                 neigh_update_notify(neigh);
1232
1233         return err;
1234 }
1235 EXPORT_SYMBOL(neigh_update);
1236
1237 /* Update the neigh to listen temporarily for probe responses, even if it is
1238  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1239  */
1240 void __neigh_set_probe_once(struct neighbour *neigh)
1241 {
1242         neigh->updated = jiffies;
1243         if (!(neigh->nud_state & NUD_FAILED))
1244                 return;
1245         neigh->nud_state = NUD_INCOMPLETE;
1246         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1247         neigh_add_timer(neigh,
1248                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1249 }
1250 EXPORT_SYMBOL(__neigh_set_probe_once);
1251
1252 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1253                                  u8 *lladdr, void *saddr,
1254                                  struct net_device *dev)
1255 {
1256         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1257                                                  lladdr || !dev->addr_len);
1258         if (neigh)
1259                 neigh_update(neigh, lladdr, NUD_STALE,
1260                              NEIGH_UPDATE_F_OVERRIDE);
1261         return neigh;
1262 }
1263 EXPORT_SYMBOL(neigh_event_ns);
1264
1265 /* called with read_lock_bh(&n->lock); */
1266 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1267 {
1268         struct net_device *dev = dst->dev;
1269         __be16 prot = dst->ops->protocol;
1270         struct hh_cache *hh = &n->hh;
1271
1272         write_lock_bh(&n->lock);
1273
1274         /* Only one thread can come in here and initialize the
1275          * hh_cache entry.
1276          */
1277         if (!hh->hh_len)
1278                 dev->header_ops->cache(n, hh, prot);
1279
1280         write_unlock_bh(&n->lock);
1281 }
1282
1283 /* This function can be used in contexts, where only old dev_queue_xmit
1284  * worked, f.e. if you want to override normal output path (eql, shaper),
1285  * but resolution is not made yet.
1286  */
1287
1288 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1289 {
1290         struct net_device *dev = skb->dev;
1291
1292         __skb_pull(skb, skb_network_offset(skb));
1293
1294         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1295                             skb->len) < 0 &&
1296             dev_rebuild_header(skb))
1297                 return 0;
1298
1299         return dev_queue_xmit(skb);
1300 }
1301 EXPORT_SYMBOL(neigh_compat_output);
1302
1303 /* Slow and careful. */
1304
1305 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1306 {
1307         struct dst_entry *dst = skb_dst(skb);
1308         int rc = 0;
1309
1310         if (!dst)
1311                 goto discard;
1312
1313         if (!neigh_event_send(neigh, skb)) {
1314                 int err;
1315                 struct net_device *dev = neigh->dev;
1316                 unsigned int seq;
1317
1318                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1319                         neigh_hh_init(neigh, dst);
1320
1321                 do {
1322                         __skb_pull(skb, skb_network_offset(skb));
1323                         seq = read_seqbegin(&neigh->ha_lock);
1324                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1325                                               neigh->ha, NULL, skb->len);
1326                 } while (read_seqretry(&neigh->ha_lock, seq));
1327
1328                 if (err >= 0)
1329                         rc = dev_queue_xmit(skb);
1330                 else
1331                         goto out_kfree_skb;
1332         }
1333 out:
1334         return rc;
1335 discard:
1336         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1337 out_kfree_skb:
1338         rc = -EINVAL;
1339         kfree_skb(skb);
1340         goto out;
1341 }
1342 EXPORT_SYMBOL(neigh_resolve_output);
1343
1344 /* As fast as possible without hh cache */
1345
1346 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1347 {
1348         struct net_device *dev = neigh->dev;
1349         unsigned int seq;
1350         int err;
1351
1352         do {
1353                 __skb_pull(skb, skb_network_offset(skb));
1354                 seq = read_seqbegin(&neigh->ha_lock);
1355                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1356                                       neigh->ha, NULL, skb->len);
1357         } while (read_seqretry(&neigh->ha_lock, seq));
1358
1359         if (err >= 0)
1360                 err = dev_queue_xmit(skb);
1361         else {
1362                 err = -EINVAL;
1363                 kfree_skb(skb);
1364         }
1365         return err;
1366 }
1367 EXPORT_SYMBOL(neigh_connected_output);
1368
1369 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1370 {
1371         return dev_queue_xmit(skb);
1372 }
1373 EXPORT_SYMBOL(neigh_direct_output);
1374
1375 static void neigh_proxy_process(unsigned long arg)
1376 {
1377         struct neigh_table *tbl = (struct neigh_table *)arg;
1378         long sched_next = 0;
1379         unsigned long now = jiffies;
1380         struct sk_buff *skb, *n;
1381
1382         spin_lock(&tbl->proxy_queue.lock);
1383
1384         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1385                 long tdif = NEIGH_CB(skb)->sched_next - now;
1386
1387                 if (tdif <= 0) {
1388                         struct net_device *dev = skb->dev;
1389
1390                         __skb_unlink(skb, &tbl->proxy_queue);
1391                         if (tbl->proxy_redo && netif_running(dev)) {
1392                                 rcu_read_lock();
1393                                 tbl->proxy_redo(skb);
1394                                 rcu_read_unlock();
1395                         } else {
1396                                 kfree_skb(skb);
1397                         }
1398
1399                         dev_put(dev);
1400                 } else if (!sched_next || tdif < sched_next)
1401                         sched_next = tdif;
1402         }
1403         del_timer(&tbl->proxy_timer);
1404         if (sched_next)
1405                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1406         spin_unlock(&tbl->proxy_queue.lock);
1407 }
1408
1409 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1410                     struct sk_buff *skb)
1411 {
1412         unsigned long now = jiffies;
1413
1414         unsigned long sched_next = now + (prandom_u32() %
1415                                           NEIGH_VAR(p, PROXY_DELAY));
1416
1417         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1418                 kfree_skb(skb);
1419                 return;
1420         }
1421
1422         NEIGH_CB(skb)->sched_next = sched_next;
1423         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1424
1425         spin_lock(&tbl->proxy_queue.lock);
1426         if (del_timer(&tbl->proxy_timer)) {
1427                 if (time_before(tbl->proxy_timer.expires, sched_next))
1428                         sched_next = tbl->proxy_timer.expires;
1429         }
1430         skb_dst_drop(skb);
1431         dev_hold(skb->dev);
1432         __skb_queue_tail(&tbl->proxy_queue, skb);
1433         mod_timer(&tbl->proxy_timer, sched_next);
1434         spin_unlock(&tbl->proxy_queue.lock);
1435 }
1436 EXPORT_SYMBOL(pneigh_enqueue);
1437
1438 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1439                                                       struct net *net, int ifindex)
1440 {
1441         struct neigh_parms *p;
1442
1443         list_for_each_entry(p, &tbl->parms_list, list) {
1444                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1445                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1446                         return p;
1447         }
1448
1449         return NULL;
1450 }
1451
1452 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1453                                       struct neigh_table *tbl)
1454 {
1455         struct neigh_parms *p;
1456         struct net *net = dev_net(dev);
1457         const struct net_device_ops *ops = dev->netdev_ops;
1458
1459         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1460         if (p) {
1461                 p->tbl            = tbl;
1462                 atomic_set(&p->refcnt, 1);
1463                 p->reachable_time =
1464                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1465                 dev_hold(dev);
1466                 p->dev = dev;
1467                 write_pnet(&p->net, hold_net(net));
1468                 p->sysctl_table = NULL;
1469
1470                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1471                         release_net(net);
1472                         dev_put(dev);
1473                         kfree(p);
1474                         return NULL;
1475                 }
1476
1477                 write_lock_bh(&tbl->lock);
1478                 list_add(&p->list, &tbl->parms.list);
1479                 write_unlock_bh(&tbl->lock);
1480
1481                 neigh_parms_data_state_cleanall(p);
1482         }
1483         return p;
1484 }
1485 EXPORT_SYMBOL(neigh_parms_alloc);
1486
1487 static void neigh_rcu_free_parms(struct rcu_head *head)
1488 {
1489         struct neigh_parms *parms =
1490                 container_of(head, struct neigh_parms, rcu_head);
1491
1492         neigh_parms_put(parms);
1493 }
1494
1495 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1496 {
1497         if (!parms || parms == &tbl->parms)
1498                 return;
1499         write_lock_bh(&tbl->lock);
1500         list_del(&parms->list);
1501         parms->dead = 1;
1502         write_unlock_bh(&tbl->lock);
1503         if (parms->dev)
1504                 dev_put(parms->dev);
1505         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1506 }
1507 EXPORT_SYMBOL(neigh_parms_release);
1508
1509 static void neigh_parms_destroy(struct neigh_parms *parms)
1510 {
1511         release_net(neigh_parms_net(parms));
1512         kfree(parms);
1513 }
1514
1515 static struct lock_class_key neigh_table_proxy_queue_class;
1516
1517 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1518
1519 void neigh_table_init(int index, struct neigh_table *tbl)
1520 {
1521         unsigned long now = jiffies;
1522         unsigned long phsize;
1523
1524         INIT_LIST_HEAD(&tbl->parms_list);
1525         list_add(&tbl->parms.list, &tbl->parms_list);
1526         write_pnet(&tbl->parms.net, &init_net);
1527         atomic_set(&tbl->parms.refcnt, 1);
1528         tbl->parms.reachable_time =
1529                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1530
1531         tbl->stats = alloc_percpu(struct neigh_statistics);
1532         if (!tbl->stats)
1533                 panic("cannot create neighbour cache statistics");
1534
1535 #ifdef CONFIG_PROC_FS
1536         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1537                               &neigh_stat_seq_fops, tbl))
1538                 panic("cannot create neighbour proc dir entry");
1539 #endif
1540
1541         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1542
1543         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1544         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1545
1546         if (!tbl->nht || !tbl->phash_buckets)
1547                 panic("cannot allocate neighbour cache hashes");
1548
1549         if (!tbl->entry_size)
1550                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1551                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1552         else
1553                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1554
1555         rwlock_init(&tbl->lock);
1556         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1557         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1558                         tbl->parms.reachable_time);
1559         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1560         skb_queue_head_init_class(&tbl->proxy_queue,
1561                         &neigh_table_proxy_queue_class);
1562
1563         tbl->last_flush = now;
1564         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1565
1566         neigh_tables[index] = tbl;
1567 }
1568 EXPORT_SYMBOL(neigh_table_init);
1569
1570 int neigh_table_clear(int index, struct neigh_table *tbl)
1571 {
1572         neigh_tables[index] = NULL;
1573         /* It is not clean... Fix it to unload IPv6 module safely */
1574         cancel_delayed_work_sync(&tbl->gc_work);
1575         del_timer_sync(&tbl->proxy_timer);
1576         pneigh_queue_purge(&tbl->proxy_queue);
1577         neigh_ifdown(tbl, NULL);
1578         if (atomic_read(&tbl->entries))
1579                 pr_crit("neighbour leakage\n");
1580
1581         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1582                  neigh_hash_free_rcu);
1583         tbl->nht = NULL;
1584
1585         kfree(tbl->phash_buckets);
1586         tbl->phash_buckets = NULL;
1587
1588         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1589
1590         free_percpu(tbl->stats);
1591         tbl->stats = NULL;
1592
1593         return 0;
1594 }
1595 EXPORT_SYMBOL(neigh_table_clear);
1596
1597 static struct neigh_table *neigh_find_table(int family)
1598 {
1599         struct neigh_table *tbl = NULL;
1600
1601         switch (family) {
1602         case AF_INET:
1603                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1604                 break;
1605         case AF_INET6:
1606                 tbl = neigh_tables[NEIGH_ND_TABLE];
1607                 break;
1608         case AF_DECnet:
1609                 tbl = neigh_tables[NEIGH_DN_TABLE];
1610                 break;
1611         }
1612
1613         return tbl;
1614 }
1615
1616 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1617 {
1618         struct net *net = sock_net(skb->sk);
1619         struct ndmsg *ndm;
1620         struct nlattr *dst_attr;
1621         struct neigh_table *tbl;
1622         struct neighbour *neigh;
1623         struct net_device *dev = NULL;
1624         int err = -EINVAL;
1625
1626         ASSERT_RTNL();
1627         if (nlmsg_len(nlh) < sizeof(*ndm))
1628                 goto out;
1629
1630         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1631         if (dst_attr == NULL)
1632                 goto out;
1633
1634         ndm = nlmsg_data(nlh);
1635         if (ndm->ndm_ifindex) {
1636                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1637                 if (dev == NULL) {
1638                         err = -ENODEV;
1639                         goto out;
1640                 }
1641         }
1642
1643         tbl = neigh_find_table(ndm->ndm_family);
1644         if (tbl == NULL)
1645                 return -EAFNOSUPPORT;
1646
1647         if (nla_len(dst_attr) < tbl->key_len)
1648                 goto out;
1649
1650         if (ndm->ndm_flags & NTF_PROXY) {
1651                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1652                 goto out;
1653         }
1654
1655         if (dev == NULL)
1656                 goto out;
1657
1658         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1659         if (neigh == NULL) {
1660                 err = -ENOENT;
1661                 goto out;
1662         }
1663
1664         err = neigh_update(neigh, NULL, NUD_FAILED,
1665                            NEIGH_UPDATE_F_OVERRIDE |
1666                            NEIGH_UPDATE_F_ADMIN);
1667         neigh_release(neigh);
1668
1669 out:
1670         return err;
1671 }
1672
1673 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1674 {
1675         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1676         struct net *net = sock_net(skb->sk);
1677         struct ndmsg *ndm;
1678         struct nlattr *tb[NDA_MAX+1];
1679         struct neigh_table *tbl;
1680         struct net_device *dev = NULL;
1681         struct neighbour *neigh;
1682         void *dst, *lladdr;
1683         int err;
1684
1685         ASSERT_RTNL();
1686         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1687         if (err < 0)
1688                 goto out;
1689
1690         err = -EINVAL;
1691         if (tb[NDA_DST] == NULL)
1692                 goto out;
1693
1694         ndm = nlmsg_data(nlh);
1695         if (ndm->ndm_ifindex) {
1696                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1697                 if (dev == NULL) {
1698                         err = -ENODEV;
1699                         goto out;
1700                 }
1701
1702                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1703                         goto out;
1704         }
1705
1706         tbl = neigh_find_table(ndm->ndm_family);
1707         if (tbl == NULL)
1708                 return -EAFNOSUPPORT;
1709
1710         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1711                 goto out;
1712         dst = nla_data(tb[NDA_DST]);
1713         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1714
1715         if (ndm->ndm_flags & NTF_PROXY) {
1716                 struct pneigh_entry *pn;
1717
1718                 err = -ENOBUFS;
1719                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1720                 if (pn) {
1721                         pn->flags = ndm->ndm_flags;
1722                         err = 0;
1723                 }
1724                 goto out;
1725         }
1726
1727         if (dev == NULL)
1728                 goto out;
1729
1730         neigh = neigh_lookup(tbl, dst, dev);
1731         if (neigh == NULL) {
1732                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1733                         err = -ENOENT;
1734                         goto out;
1735                 }
1736
1737                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1738                 if (IS_ERR(neigh)) {
1739                         err = PTR_ERR(neigh);
1740                         goto out;
1741                 }
1742         } else {
1743                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1744                         err = -EEXIST;
1745                         neigh_release(neigh);
1746                         goto out;
1747                 }
1748
1749                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1750                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1751         }
1752
1753         if (ndm->ndm_flags & NTF_USE) {
1754                 neigh_event_send(neigh, NULL);
1755                 err = 0;
1756         } else
1757                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1758         neigh_release(neigh);
1759
1760 out:
1761         return err;
1762 }
1763
1764 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1765 {
1766         struct nlattr *nest;
1767
1768         nest = nla_nest_start(skb, NDTA_PARMS);
1769         if (nest == NULL)
1770                 return -ENOBUFS;
1771
1772         if ((parms->dev &&
1773              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1774             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1775             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1776                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1777             /* approximative value for deprecated QUEUE_LEN (in packets) */
1778             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1779                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1780             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1781             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1782             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1783                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1784             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1785                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1786             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1787             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1788                           NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1789             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1790                           NEIGH_VAR(parms, GC_STALETIME)) ||
1791             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1792                           NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1793             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1794                           NEIGH_VAR(parms, RETRANS_TIME)) ||
1795             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1796                           NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1797             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1798                           NEIGH_VAR(parms, PROXY_DELAY)) ||
1799             nla_put_msecs(skb, NDTPA_LOCKTIME,
1800                           NEIGH_VAR(parms, LOCKTIME)))
1801                 goto nla_put_failure;
1802         return nla_nest_end(skb, nest);
1803
1804 nla_put_failure:
1805         nla_nest_cancel(skb, nest);
1806         return -EMSGSIZE;
1807 }
1808
1809 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1810                               u32 pid, u32 seq, int type, int flags)
1811 {
1812         struct nlmsghdr *nlh;
1813         struct ndtmsg *ndtmsg;
1814
1815         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1816         if (nlh == NULL)
1817                 return -EMSGSIZE;
1818
1819         ndtmsg = nlmsg_data(nlh);
1820
1821         read_lock_bh(&tbl->lock);
1822         ndtmsg->ndtm_family = tbl->family;
1823         ndtmsg->ndtm_pad1   = 0;
1824         ndtmsg->ndtm_pad2   = 0;
1825
1826         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1827             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1828             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1829             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1830             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1831                 goto nla_put_failure;
1832         {
1833                 unsigned long now = jiffies;
1834                 unsigned int flush_delta = now - tbl->last_flush;
1835                 unsigned int rand_delta = now - tbl->last_rand;
1836                 struct neigh_hash_table *nht;
1837                 struct ndt_config ndc = {
1838                         .ndtc_key_len           = tbl->key_len,
1839                         .ndtc_entry_size        = tbl->entry_size,
1840                         .ndtc_entries           = atomic_read(&tbl->entries),
1841                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1842                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1843                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1844                 };
1845
1846                 rcu_read_lock_bh();
1847                 nht = rcu_dereference_bh(tbl->nht);
1848                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1849                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1850                 rcu_read_unlock_bh();
1851
1852                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1853                         goto nla_put_failure;
1854         }
1855
1856         {
1857                 int cpu;
1858                 struct ndt_stats ndst;
1859
1860                 memset(&ndst, 0, sizeof(ndst));
1861
1862                 for_each_possible_cpu(cpu) {
1863                         struct neigh_statistics *st;
1864
1865                         st = per_cpu_ptr(tbl->stats, cpu);
1866                         ndst.ndts_allocs                += st->allocs;
1867                         ndst.ndts_destroys              += st->destroys;
1868                         ndst.ndts_hash_grows            += st->hash_grows;
1869                         ndst.ndts_res_failed            += st->res_failed;
1870                         ndst.ndts_lookups               += st->lookups;
1871                         ndst.ndts_hits                  += st->hits;
1872                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1873                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1874                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1875                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1876                 }
1877
1878                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1879                         goto nla_put_failure;
1880         }
1881
1882         BUG_ON(tbl->parms.dev);
1883         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1884                 goto nla_put_failure;
1885
1886         read_unlock_bh(&tbl->lock);
1887         return nlmsg_end(skb, nlh);
1888
1889 nla_put_failure:
1890         read_unlock_bh(&tbl->lock);
1891         nlmsg_cancel(skb, nlh);
1892         return -EMSGSIZE;
1893 }
1894
1895 static int neightbl_fill_param_info(struct sk_buff *skb,
1896                                     struct neigh_table *tbl,
1897                                     struct neigh_parms *parms,
1898                                     u32 pid, u32 seq, int type,
1899                                     unsigned int flags)
1900 {
1901         struct ndtmsg *ndtmsg;
1902         struct nlmsghdr *nlh;
1903
1904         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1905         if (nlh == NULL)
1906                 return -EMSGSIZE;
1907
1908         ndtmsg = nlmsg_data(nlh);
1909
1910         read_lock_bh(&tbl->lock);
1911         ndtmsg->ndtm_family = tbl->family;
1912         ndtmsg->ndtm_pad1   = 0;
1913         ndtmsg->ndtm_pad2   = 0;
1914
1915         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1916             neightbl_fill_parms(skb, parms) < 0)
1917                 goto errout;
1918
1919         read_unlock_bh(&tbl->lock);
1920         return nlmsg_end(skb, nlh);
1921 errout:
1922         read_unlock_bh(&tbl->lock);
1923         nlmsg_cancel(skb, nlh);
1924         return -EMSGSIZE;
1925 }
1926
1927 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1928         [NDTA_NAME]             = { .type = NLA_STRING },
1929         [NDTA_THRESH1]          = { .type = NLA_U32 },
1930         [NDTA_THRESH2]          = { .type = NLA_U32 },
1931         [NDTA_THRESH3]          = { .type = NLA_U32 },
1932         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1933         [NDTA_PARMS]            = { .type = NLA_NESTED },
1934 };
1935
1936 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1937         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1938         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1939         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1940         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1941         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1942         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1943         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1944         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1945         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1946         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1947         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1948         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1949         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1950 };
1951
1952 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1953 {
1954         struct net *net = sock_net(skb->sk);
1955         struct neigh_table *tbl;
1956         struct ndtmsg *ndtmsg;
1957         struct nlattr *tb[NDTA_MAX+1];
1958         bool found = false;
1959         int err, tidx;
1960
1961         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1962                           nl_neightbl_policy);
1963         if (err < 0)
1964                 goto errout;
1965
1966         if (tb[NDTA_NAME] == NULL) {
1967                 err = -EINVAL;
1968                 goto errout;
1969         }
1970
1971         ndtmsg = nlmsg_data(nlh);
1972
1973         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1974                 tbl = neigh_tables[tidx];
1975                 if (!tbl)
1976                         continue;
1977                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1978                         continue;
1979                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1980                         found = true;
1981                         break;
1982                 }
1983         }
1984
1985         if (!found)
1986                 return -ENOENT;
1987
1988         /*
1989          * We acquire tbl->lock to be nice to the periodic timers and
1990          * make sure they always see a consistent set of values.
1991          */
1992         write_lock_bh(&tbl->lock);
1993
1994         if (tb[NDTA_PARMS]) {
1995                 struct nlattr *tbp[NDTPA_MAX+1];
1996                 struct neigh_parms *p;
1997                 int i, ifindex = 0;
1998
1999                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2000                                        nl_ntbl_parm_policy);
2001                 if (err < 0)
2002                         goto errout_tbl_lock;
2003
2004                 if (tbp[NDTPA_IFINDEX])
2005                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2006
2007                 p = lookup_neigh_parms(tbl, net, ifindex);
2008                 if (p == NULL) {
2009                         err = -ENOENT;
2010                         goto errout_tbl_lock;
2011                 }
2012
2013                 for (i = 1; i <= NDTPA_MAX; i++) {
2014                         if (tbp[i] == NULL)
2015                                 continue;
2016
2017                         switch (i) {
2018                         case NDTPA_QUEUE_LEN:
2019                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2020                                               nla_get_u32(tbp[i]) *
2021                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2022                                 break;
2023                         case NDTPA_QUEUE_LENBYTES:
2024                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2025                                               nla_get_u32(tbp[i]));
2026                                 break;
2027                         case NDTPA_PROXY_QLEN:
2028                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2029                                               nla_get_u32(tbp[i]));
2030                                 break;
2031                         case NDTPA_APP_PROBES:
2032                                 NEIGH_VAR_SET(p, APP_PROBES,
2033                                               nla_get_u32(tbp[i]));
2034                                 break;
2035                         case NDTPA_UCAST_PROBES:
2036                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2037                                               nla_get_u32(tbp[i]));
2038                                 break;
2039                         case NDTPA_MCAST_PROBES:
2040                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2041                                               nla_get_u32(tbp[i]));
2042                                 break;
2043                         case NDTPA_BASE_REACHABLE_TIME:
2044                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2045                                               nla_get_msecs(tbp[i]));
2046                                 break;
2047                         case NDTPA_GC_STALETIME:
2048                                 NEIGH_VAR_SET(p, GC_STALETIME,
2049                                               nla_get_msecs(tbp[i]));
2050                                 break;
2051                         case NDTPA_DELAY_PROBE_TIME:
2052                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2053                                               nla_get_msecs(tbp[i]));
2054                                 break;
2055                         case NDTPA_RETRANS_TIME:
2056                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2057                                               nla_get_msecs(tbp[i]));
2058                                 break;
2059                         case NDTPA_ANYCAST_DELAY:
2060                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2061                                               nla_get_msecs(tbp[i]));
2062                                 break;
2063                         case NDTPA_PROXY_DELAY:
2064                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2065                                               nla_get_msecs(tbp[i]));
2066                                 break;
2067                         case NDTPA_LOCKTIME:
2068                                 NEIGH_VAR_SET(p, LOCKTIME,
2069                                               nla_get_msecs(tbp[i]));
2070                                 break;
2071                         }
2072                 }
2073         }
2074
2075         err = -ENOENT;
2076         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2077              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2078             !net_eq(net, &init_net))
2079                 goto errout_tbl_lock;
2080
2081         if (tb[NDTA_THRESH1])
2082                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2083
2084         if (tb[NDTA_THRESH2])
2085                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2086
2087         if (tb[NDTA_THRESH3])
2088                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2089
2090         if (tb[NDTA_GC_INTERVAL])
2091                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2092
2093         err = 0;
2094
2095 errout_tbl_lock:
2096         write_unlock_bh(&tbl->lock);
2097 errout:
2098         return err;
2099 }
2100
2101 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2102 {
2103         struct net *net = sock_net(skb->sk);
2104         int family, tidx, nidx = 0;
2105         int tbl_skip = cb->args[0];
2106         int neigh_skip = cb->args[1];
2107         struct neigh_table *tbl;
2108
2109         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2110
2111         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2112                 struct neigh_parms *p;
2113
2114                 tbl = neigh_tables[tidx];
2115                 if (!tbl)
2116                         continue;
2117
2118                 if (tidx < tbl_skip || (family && tbl->family != family))
2119                         continue;
2120
2121                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2122                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2123                                        NLM_F_MULTI) <= 0)
2124                         break;
2125
2126                 nidx = 0;
2127                 p = list_next_entry(&tbl->parms, list);
2128                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2129                         if (!net_eq(neigh_parms_net(p), net))
2130                                 continue;
2131
2132                         if (nidx < neigh_skip)
2133                                 goto next;
2134
2135                         if (neightbl_fill_param_info(skb, tbl, p,
2136                                                      NETLINK_CB(cb->skb).portid,
2137                                                      cb->nlh->nlmsg_seq,
2138                                                      RTM_NEWNEIGHTBL,
2139                                                      NLM_F_MULTI) <= 0)
2140                                 goto out;
2141                 next:
2142                         nidx++;
2143                 }
2144
2145                 neigh_skip = 0;
2146         }
2147 out:
2148         cb->args[0] = tidx;
2149         cb->args[1] = nidx;
2150
2151         return skb->len;
2152 }
2153
2154 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2155                            u32 pid, u32 seq, int type, unsigned int flags)
2156 {
2157         unsigned long now = jiffies;
2158         struct nda_cacheinfo ci;
2159         struct nlmsghdr *nlh;
2160         struct ndmsg *ndm;
2161
2162         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2163         if (nlh == NULL)
2164                 return -EMSGSIZE;
2165
2166         ndm = nlmsg_data(nlh);
2167         ndm->ndm_family  = neigh->ops->family;
2168         ndm->ndm_pad1    = 0;
2169         ndm->ndm_pad2    = 0;
2170         ndm->ndm_flags   = neigh->flags;
2171         ndm->ndm_type    = neigh->type;
2172         ndm->ndm_ifindex = neigh->dev->ifindex;
2173
2174         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2175                 goto nla_put_failure;
2176
2177         read_lock_bh(&neigh->lock);
2178         ndm->ndm_state   = neigh->nud_state;
2179         if (neigh->nud_state & NUD_VALID) {
2180                 char haddr[MAX_ADDR_LEN];
2181
2182                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2183                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2184                         read_unlock_bh(&neigh->lock);
2185                         goto nla_put_failure;
2186                 }
2187         }
2188
2189         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2190         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2191         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2192         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2193         read_unlock_bh(&neigh->lock);
2194
2195         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2196             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2197                 goto nla_put_failure;
2198
2199         return nlmsg_end(skb, nlh);
2200
2201 nla_put_failure:
2202         nlmsg_cancel(skb, nlh);
2203         return -EMSGSIZE;
2204 }
2205
2206 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2207                             u32 pid, u32 seq, int type, unsigned int flags,
2208                             struct neigh_table *tbl)
2209 {
2210         struct nlmsghdr *nlh;
2211         struct ndmsg *ndm;
2212
2213         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2214         if (nlh == NULL)
2215                 return -EMSGSIZE;
2216
2217         ndm = nlmsg_data(nlh);
2218         ndm->ndm_family  = tbl->family;
2219         ndm->ndm_pad1    = 0;
2220         ndm->ndm_pad2    = 0;
2221         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2222         ndm->ndm_type    = RTN_UNICAST;
2223         ndm->ndm_ifindex = pn->dev->ifindex;
2224         ndm->ndm_state   = NUD_NONE;
2225
2226         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2227                 goto nla_put_failure;
2228
2229         return nlmsg_end(skb, nlh);
2230
2231 nla_put_failure:
2232         nlmsg_cancel(skb, nlh);
2233         return -EMSGSIZE;
2234 }
2235
2236 static void neigh_update_notify(struct neighbour *neigh)
2237 {
2238         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2239         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2240 }
2241
2242 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2243                             struct netlink_callback *cb)
2244 {
2245         struct net *net = sock_net(skb->sk);
2246         struct neighbour *n;
2247         int rc, h, s_h = cb->args[1];
2248         int idx, s_idx = idx = cb->args[2];
2249         struct neigh_hash_table *nht;
2250
2251         rcu_read_lock_bh();
2252         nht = rcu_dereference_bh(tbl->nht);
2253
2254         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2255                 if (h > s_h)
2256                         s_idx = 0;
2257                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2258                      n != NULL;
2259                      n = rcu_dereference_bh(n->next)) {
2260                         if (!net_eq(dev_net(n->dev), net))
2261                                 continue;
2262                         if (idx < s_idx)
2263                                 goto next;
2264                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2265                                             cb->nlh->nlmsg_seq,
2266                                             RTM_NEWNEIGH,
2267                                             NLM_F_MULTI) <= 0) {
2268                                 rc = -1;
2269                                 goto out;
2270                         }
2271 next:
2272                         idx++;
2273                 }
2274         }
2275         rc = skb->len;
2276 out:
2277         rcu_read_unlock_bh();
2278         cb->args[1] = h;
2279         cb->args[2] = idx;
2280         return rc;
2281 }
2282
2283 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2284                              struct netlink_callback *cb)
2285 {
2286         struct pneigh_entry *n;
2287         struct net *net = sock_net(skb->sk);
2288         int rc, h, s_h = cb->args[3];
2289         int idx, s_idx = idx = cb->args[4];
2290
2291         read_lock_bh(&tbl->lock);
2292
2293         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2294                 if (h > s_h)
2295                         s_idx = 0;
2296                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2297                         if (dev_net(n->dev) != net)
2298                                 continue;
2299                         if (idx < s_idx)
2300                                 goto next;
2301                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2302                                             cb->nlh->nlmsg_seq,
2303                                             RTM_NEWNEIGH,
2304                                             NLM_F_MULTI, tbl) <= 0) {
2305                                 read_unlock_bh(&tbl->lock);
2306                                 rc = -1;
2307                                 goto out;
2308                         }
2309                 next:
2310                         idx++;
2311                 }
2312         }
2313
2314         read_unlock_bh(&tbl->lock);
2315         rc = skb->len;
2316 out:
2317         cb->args[3] = h;
2318         cb->args[4] = idx;
2319         return rc;
2320
2321 }
2322
2323 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2324 {
2325         struct neigh_table *tbl;
2326         int t, family, s_t;
2327         int proxy = 0;
2328         int err;
2329
2330         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2331
2332         /* check for full ndmsg structure presence, family member is
2333          * the same for both structures
2334          */
2335         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2336             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2337                 proxy = 1;
2338
2339         s_t = cb->args[0];
2340
2341         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2342                 tbl = neigh_tables[t];
2343
2344                 if (!tbl)
2345                         continue;
2346                 if (t < s_t || (family && tbl->family != family))
2347                         continue;
2348                 if (t > s_t)
2349                         memset(&cb->args[1], 0, sizeof(cb->args) -
2350                                                 sizeof(cb->args[0]));
2351                 if (proxy)
2352                         err = pneigh_dump_table(tbl, skb, cb);
2353                 else
2354                         err = neigh_dump_table(tbl, skb, cb);
2355                 if (err < 0)
2356                         break;
2357         }
2358
2359         cb->args[0] = t;
2360         return skb->len;
2361 }
2362
2363 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2364 {
2365         int chain;
2366         struct neigh_hash_table *nht;
2367
2368         rcu_read_lock_bh();
2369         nht = rcu_dereference_bh(tbl->nht);
2370
2371         read_lock(&tbl->lock); /* avoid resizes */
2372         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2373                 struct neighbour *n;
2374
2375                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2376                      n != NULL;
2377                      n = rcu_dereference_bh(n->next))
2378                         cb(n, cookie);
2379         }
2380         read_unlock(&tbl->lock);
2381         rcu_read_unlock_bh();
2382 }
2383 EXPORT_SYMBOL(neigh_for_each);
2384
2385 /* The tbl->lock must be held as a writer and BH disabled. */
2386 void __neigh_for_each_release(struct neigh_table *tbl,
2387                               int (*cb)(struct neighbour *))
2388 {
2389         int chain;
2390         struct neigh_hash_table *nht;
2391
2392         nht = rcu_dereference_protected(tbl->nht,
2393                                         lockdep_is_held(&tbl->lock));
2394         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2395                 struct neighbour *n;
2396                 struct neighbour __rcu **np;
2397
2398                 np = &nht->hash_buckets[chain];
2399                 while ((n = rcu_dereference_protected(*np,
2400                                         lockdep_is_held(&tbl->lock))) != NULL) {
2401                         int release;
2402
2403                         write_lock(&n->lock);
2404                         release = cb(n);
2405                         if (release) {
2406                                 rcu_assign_pointer(*np,
2407                                         rcu_dereference_protected(n->next,
2408                                                 lockdep_is_held(&tbl->lock)));
2409                                 n->dead = 1;
2410                         } else
2411                                 np = &n->next;
2412                         write_unlock(&n->lock);
2413                         if (release)
2414                                 neigh_cleanup_and_release(n);
2415                 }
2416         }
2417 }
2418 EXPORT_SYMBOL(__neigh_for_each_release);
2419
2420 #ifdef CONFIG_PROC_FS
2421
2422 static struct neighbour *neigh_get_first(struct seq_file *seq)
2423 {
2424         struct neigh_seq_state *state = seq->private;
2425         struct net *net = seq_file_net(seq);
2426         struct neigh_hash_table *nht = state->nht;
2427         struct neighbour *n = NULL;
2428         int bucket = state->bucket;
2429
2430         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2431         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2432                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2433
2434                 while (n) {
2435                         if (!net_eq(dev_net(n->dev), net))
2436                                 goto next;
2437                         if (state->neigh_sub_iter) {
2438                                 loff_t fakep = 0;
2439                                 void *v;
2440
2441                                 v = state->neigh_sub_iter(state, n, &fakep);
2442                                 if (!v)
2443                                         goto next;
2444                         }
2445                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2446                                 break;
2447                         if (n->nud_state & ~NUD_NOARP)
2448                                 break;
2449 next:
2450                         n = rcu_dereference_bh(n->next);
2451                 }
2452
2453                 if (n)
2454                         break;
2455         }
2456         state->bucket = bucket;
2457
2458         return n;
2459 }
2460
2461 static struct neighbour *neigh_get_next(struct seq_file *seq,
2462                                         struct neighbour *n,
2463                                         loff_t *pos)
2464 {
2465         struct neigh_seq_state *state = seq->private;
2466         struct net *net = seq_file_net(seq);
2467         struct neigh_hash_table *nht = state->nht;
2468
2469         if (state->neigh_sub_iter) {
2470                 void *v = state->neigh_sub_iter(state, n, pos);
2471                 if (v)
2472                         return n;
2473         }
2474         n = rcu_dereference_bh(n->next);
2475
2476         while (1) {
2477                 while (n) {
2478                         if (!net_eq(dev_net(n->dev), net))
2479                                 goto next;
2480                         if (state->neigh_sub_iter) {
2481                                 void *v = state->neigh_sub_iter(state, n, pos);
2482                                 if (v)
2483                                         return n;
2484                                 goto next;
2485                         }
2486                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2487                                 break;
2488
2489                         if (n->nud_state & ~NUD_NOARP)
2490                                 break;
2491 next:
2492                         n = rcu_dereference_bh(n->next);
2493                 }
2494
2495                 if (n)
2496                         break;
2497
2498                 if (++state->bucket >= (1 << nht->hash_shift))
2499                         break;
2500
2501                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2502         }
2503
2504         if (n && pos)
2505                 --(*pos);
2506         return n;
2507 }
2508
2509 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2510 {
2511         struct neighbour *n = neigh_get_first(seq);
2512
2513         if (n) {
2514                 --(*pos);
2515                 while (*pos) {
2516                         n = neigh_get_next(seq, n, pos);
2517                         if (!n)
2518                                 break;
2519                 }
2520         }
2521         return *pos ? NULL : n;
2522 }
2523
2524 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2525 {
2526         struct neigh_seq_state *state = seq->private;
2527         struct net *net = seq_file_net(seq);
2528         struct neigh_table *tbl = state->tbl;
2529         struct pneigh_entry *pn = NULL;
2530         int bucket = state->bucket;
2531
2532         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2533         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2534                 pn = tbl->phash_buckets[bucket];
2535                 while (pn && !net_eq(pneigh_net(pn), net))
2536                         pn = pn->next;
2537                 if (pn)
2538                         break;
2539         }
2540         state->bucket = bucket;
2541
2542         return pn;
2543 }
2544
2545 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2546                                             struct pneigh_entry *pn,
2547                                             loff_t *pos)
2548 {
2549         struct neigh_seq_state *state = seq->private;
2550         struct net *net = seq_file_net(seq);
2551         struct neigh_table *tbl = state->tbl;
2552
2553         do {
2554                 pn = pn->next;
2555         } while (pn && !net_eq(pneigh_net(pn), net));
2556
2557         while (!pn) {
2558                 if (++state->bucket > PNEIGH_HASHMASK)
2559                         break;
2560                 pn = tbl->phash_buckets[state->bucket];
2561                 while (pn && !net_eq(pneigh_net(pn), net))
2562                         pn = pn->next;
2563                 if (pn)
2564                         break;
2565         }
2566
2567         if (pn && pos)
2568                 --(*pos);
2569
2570         return pn;
2571 }
2572
2573 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2574 {
2575         struct pneigh_entry *pn = pneigh_get_first(seq);
2576
2577         if (pn) {
2578                 --(*pos);
2579                 while (*pos) {
2580                         pn = pneigh_get_next(seq, pn, pos);
2581                         if (!pn)
2582                                 break;
2583                 }
2584         }
2585         return *pos ? NULL : pn;
2586 }
2587
2588 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2589 {
2590         struct neigh_seq_state *state = seq->private;
2591         void *rc;
2592         loff_t idxpos = *pos;
2593
2594         rc = neigh_get_idx(seq, &idxpos);
2595         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2596                 rc = pneigh_get_idx(seq, &idxpos);
2597
2598         return rc;
2599 }
2600
2601 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2602         __acquires(rcu_bh)
2603 {
2604         struct neigh_seq_state *state = seq->private;
2605
2606         state->tbl = tbl;
2607         state->bucket = 0;
2608         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2609
2610         rcu_read_lock_bh();
2611         state->nht = rcu_dereference_bh(tbl->nht);
2612
2613         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2614 }
2615 EXPORT_SYMBOL(neigh_seq_start);
2616
2617 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2618 {
2619         struct neigh_seq_state *state;
2620         void *rc;
2621
2622         if (v == SEQ_START_TOKEN) {
2623                 rc = neigh_get_first(seq);
2624                 goto out;
2625         }
2626
2627         state = seq->private;
2628         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2629                 rc = neigh_get_next(seq, v, NULL);
2630                 if (rc)
2631                         goto out;
2632                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2633                         rc = pneigh_get_first(seq);
2634         } else {
2635                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2636                 rc = pneigh_get_next(seq, v, NULL);
2637         }
2638 out:
2639         ++(*pos);
2640         return rc;
2641 }
2642 EXPORT_SYMBOL(neigh_seq_next);
2643
2644 void neigh_seq_stop(struct seq_file *seq, void *v)
2645         __releases(rcu_bh)
2646 {
2647         rcu_read_unlock_bh();
2648 }
2649 EXPORT_SYMBOL(neigh_seq_stop);
2650
2651 /* statistics via seq_file */
2652
2653 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2654 {
2655         struct neigh_table *tbl = seq->private;
2656         int cpu;
2657
2658         if (*pos == 0)
2659                 return SEQ_START_TOKEN;
2660
2661         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2662                 if (!cpu_possible(cpu))
2663                         continue;
2664                 *pos = cpu+1;
2665                 return per_cpu_ptr(tbl->stats, cpu);
2666         }
2667         return NULL;
2668 }
2669
2670 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2671 {
2672         struct neigh_table *tbl = seq->private;
2673         int cpu;
2674
2675         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2676                 if (!cpu_possible(cpu))
2677                         continue;
2678                 *pos = cpu+1;
2679                 return per_cpu_ptr(tbl->stats, cpu);
2680         }
2681         return NULL;
2682 }
2683
2684 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2685 {
2686
2687 }
2688
2689 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2690 {
2691         struct neigh_table *tbl = seq->private;
2692         struct neigh_statistics *st = v;
2693
2694         if (v == SEQ_START_TOKEN) {
2695                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2696                 return 0;
2697         }
2698
2699         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2700                         "%08lx %08lx  %08lx %08lx %08lx\n",
2701                    atomic_read(&tbl->entries),
2702
2703                    st->allocs,
2704                    st->destroys,
2705                    st->hash_grows,
2706
2707                    st->lookups,
2708                    st->hits,
2709
2710                    st->res_failed,
2711
2712                    st->rcv_probes_mcast,
2713                    st->rcv_probes_ucast,
2714
2715                    st->periodic_gc_runs,
2716                    st->forced_gc_runs,
2717                    st->unres_discards
2718                    );
2719
2720         return 0;
2721 }
2722
2723 static const struct seq_operations neigh_stat_seq_ops = {
2724         .start  = neigh_stat_seq_start,
2725         .next   = neigh_stat_seq_next,
2726         .stop   = neigh_stat_seq_stop,
2727         .show   = neigh_stat_seq_show,
2728 };
2729
2730 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2731 {
2732         int ret = seq_open(file, &neigh_stat_seq_ops);
2733
2734         if (!ret) {
2735                 struct seq_file *sf = file->private_data;
2736                 sf->private = PDE_DATA(inode);
2737         }
2738         return ret;
2739 };
2740
2741 static const struct file_operations neigh_stat_seq_fops = {
2742         .owner   = THIS_MODULE,
2743         .open    = neigh_stat_seq_open,
2744         .read    = seq_read,
2745         .llseek  = seq_lseek,
2746         .release = seq_release,
2747 };
2748
2749 #endif /* CONFIG_PROC_FS */
2750
2751 static inline size_t neigh_nlmsg_size(void)
2752 {
2753         return NLMSG_ALIGN(sizeof(struct ndmsg))
2754                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2755                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2756                + nla_total_size(sizeof(struct nda_cacheinfo))
2757                + nla_total_size(4); /* NDA_PROBES */
2758 }
2759
2760 static void __neigh_notify(struct neighbour *n, int type, int flags)
2761 {
2762         struct net *net = dev_net(n->dev);
2763         struct sk_buff *skb;
2764         int err = -ENOBUFS;
2765
2766         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2767         if (skb == NULL)
2768                 goto errout;
2769
2770         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2771         if (err < 0) {
2772                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2773                 WARN_ON(err == -EMSGSIZE);
2774                 kfree_skb(skb);
2775                 goto errout;
2776         }
2777         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2778         return;
2779 errout:
2780         if (err < 0)
2781                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2782 }
2783
2784 void neigh_app_ns(struct neighbour *n)
2785 {
2786         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2787 }
2788 EXPORT_SYMBOL(neigh_app_ns);
2789
2790 #ifdef CONFIG_SYSCTL
2791 static int zero;
2792 static int int_max = INT_MAX;
2793 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2794
2795 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2796                            void __user *buffer, size_t *lenp, loff_t *ppos)
2797 {
2798         int size, ret;
2799         struct ctl_table tmp = *ctl;
2800
2801         tmp.extra1 = &zero;
2802         tmp.extra2 = &unres_qlen_max;
2803         tmp.data = &size;
2804
2805         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2806         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2807
2808         if (write && !ret)
2809                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2810         return ret;
2811 }
2812
2813 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2814                                                    int family)
2815 {
2816         switch (family) {
2817         case AF_INET:
2818                 return __in_dev_arp_parms_get_rcu(dev);
2819         case AF_INET6:
2820                 return __in6_dev_nd_parms_get_rcu(dev);
2821         }
2822         return NULL;
2823 }
2824
2825 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2826                                   int index)
2827 {
2828         struct net_device *dev;
2829         int family = neigh_parms_family(p);
2830
2831         rcu_read_lock();
2832         for_each_netdev_rcu(net, dev) {
2833                 struct neigh_parms *dst_p =
2834                                 neigh_get_dev_parms_rcu(dev, family);
2835
2836                 if (dst_p && !test_bit(index, dst_p->data_state))
2837                         dst_p->data[index] = p->data[index];
2838         }
2839         rcu_read_unlock();
2840 }
2841
2842 static void neigh_proc_update(struct ctl_table *ctl, int write)
2843 {
2844         struct net_device *dev = ctl->extra1;
2845         struct neigh_parms *p = ctl->extra2;
2846         struct net *net = neigh_parms_net(p);
2847         int index = (int *) ctl->data - p->data;
2848
2849         if (!write)
2850                 return;
2851
2852         set_bit(index, p->data_state);
2853         if (!dev) /* NULL dev means this is default value */
2854                 neigh_copy_dflt_parms(net, p, index);
2855 }
2856
2857 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2858                                            void __user *buffer,
2859                                            size_t *lenp, loff_t *ppos)
2860 {
2861         struct ctl_table tmp = *ctl;
2862         int ret;
2863
2864         tmp.extra1 = &zero;
2865         tmp.extra2 = &int_max;
2866
2867         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2868         neigh_proc_update(ctl, write);
2869         return ret;
2870 }
2871
2872 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2873                         void __user *buffer, size_t *lenp, loff_t *ppos)
2874 {
2875         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2876
2877         neigh_proc_update(ctl, write);
2878         return ret;
2879 }
2880 EXPORT_SYMBOL(neigh_proc_dointvec);
2881
2882 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2883                                 void __user *buffer,
2884                                 size_t *lenp, loff_t *ppos)
2885 {
2886         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2887
2888         neigh_proc_update(ctl, write);
2889         return ret;
2890 }
2891 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2892
2893 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2894                                               void __user *buffer,
2895                                               size_t *lenp, loff_t *ppos)
2896 {
2897         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2898
2899         neigh_proc_update(ctl, write);
2900         return ret;
2901 }
2902
2903 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2904                                    void __user *buffer,
2905                                    size_t *lenp, loff_t *ppos)
2906 {
2907         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2908
2909         neigh_proc_update(ctl, write);
2910         return ret;
2911 }
2912 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2913
2914 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2915                                           void __user *buffer,
2916                                           size_t *lenp, loff_t *ppos)
2917 {
2918         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2919
2920         neigh_proc_update(ctl, write);
2921         return ret;
2922 }
2923
2924 #define NEIGH_PARMS_DATA_OFFSET(index)  \
2925         (&((struct neigh_parms *) 0)->data[index])
2926
2927 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2928         [NEIGH_VAR_ ## attr] = { \
2929                 .procname       = name, \
2930                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2931                 .maxlen         = sizeof(int), \
2932                 .mode           = mval, \
2933                 .proc_handler   = proc, \
2934         }
2935
2936 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2937         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2938
2939 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2940         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2941
2942 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2943         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2944
2945 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2946         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2947
2948 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2949         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2950
2951 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2952         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2953
2954 static struct neigh_sysctl_table {
2955         struct ctl_table_header *sysctl_header;
2956         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2957 } neigh_sysctl_template __read_mostly = {
2958         .neigh_vars = {
2959                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2960                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2961                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2962                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2963                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2964                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2965                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2966                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
2967                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
2968                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
2969                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
2970                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
2971                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
2972                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
2973                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
2974                 [NEIGH_VAR_GC_INTERVAL] = {
2975                         .procname       = "gc_interval",
2976                         .maxlen         = sizeof(int),
2977                         .mode           = 0644,
2978                         .proc_handler   = proc_dointvec_jiffies,
2979                 },
2980                 [NEIGH_VAR_GC_THRESH1] = {
2981                         .procname       = "gc_thresh1",
2982                         .maxlen         = sizeof(int),
2983                         .mode           = 0644,
2984                         .extra1         = &zero,
2985                         .extra2         = &int_max,
2986                         .proc_handler   = proc_dointvec_minmax,
2987                 },
2988                 [NEIGH_VAR_GC_THRESH2] = {
2989                         .procname       = "gc_thresh2",
2990                         .maxlen         = sizeof(int),
2991                         .mode           = 0644,
2992                         .extra1         = &zero,
2993                         .extra2         = &int_max,
2994                         .proc_handler   = proc_dointvec_minmax,
2995                 },
2996                 [NEIGH_VAR_GC_THRESH3] = {
2997                         .procname       = "gc_thresh3",
2998                         .maxlen         = sizeof(int),
2999                         .mode           = 0644,
3000                         .extra1         = &zero,
3001                         .extra2         = &int_max,
3002                         .proc_handler   = proc_dointvec_minmax,
3003                 },
3004                 {},
3005         },
3006 };
3007
3008 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3009                           proc_handler *handler)
3010 {
3011         int i;
3012         struct neigh_sysctl_table *t;
3013         const char *dev_name_source;
3014         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3015         char *p_name;
3016
3017         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3018         if (!t)
3019                 goto err;
3020
3021         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3022                 t->neigh_vars[i].data += (long) p;
3023                 t->neigh_vars[i].extra1 = dev;
3024                 t->neigh_vars[i].extra2 = p;
3025         }
3026
3027         if (dev) {
3028                 dev_name_source = dev->name;
3029                 /* Terminate the table early */
3030                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3031                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3032         } else {
3033                 struct neigh_table *tbl = p->tbl;
3034                 dev_name_source = "default";
3035                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3036                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3037                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3038                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3039         }
3040
3041         if (handler) {
3042                 /* RetransTime */
3043                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3044                 /* ReachableTime */
3045                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3046                 /* RetransTime (in milliseconds)*/
3047                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3048                 /* ReachableTime (in milliseconds) */
3049                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3050         }
3051
3052         /* Don't export sysctls to unprivileged users */
3053         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3054                 t->neigh_vars[0].procname = NULL;
3055
3056         switch (neigh_parms_family(p)) {
3057         case AF_INET:
3058               p_name = "ipv4";
3059               break;
3060         case AF_INET6:
3061               p_name = "ipv6";
3062               break;
3063         default:
3064               BUG();
3065         }
3066
3067         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3068                 p_name, dev_name_source);
3069         t->sysctl_header =
3070                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3071         if (!t->sysctl_header)
3072                 goto free;
3073
3074         p->sysctl_table = t;
3075         return 0;
3076
3077 free:
3078         kfree(t);
3079 err:
3080         return -ENOBUFS;
3081 }
3082 EXPORT_SYMBOL(neigh_sysctl_register);
3083
3084 void neigh_sysctl_unregister(struct neigh_parms *p)
3085 {
3086         if (p->sysctl_table) {
3087                 struct neigh_sysctl_table *t = p->sysctl_table;
3088                 p->sysctl_table = NULL;
3089                 unregister_net_sysctl_table(t->sysctl_header);
3090                 kfree(t);
3091         }
3092 }
3093 EXPORT_SYMBOL(neigh_sysctl_unregister);
3094
3095 #endif  /* CONFIG_SYSCTL */
3096
3097 static int __init neigh_init(void)
3098 {
3099         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3100         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3101         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3102
3103         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3104                       NULL);
3105         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3106
3107         return 0;
3108 }
3109
3110 subsys_initcall(neigh_init);
3111