]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/core/neighbour.c
Merge branch 'fixes-for-3.6' of git://git.kernel.org/pub/scm/linux/kernel/git/coolone...
[karo-tx-linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define NEIGH_DEBUG 1
43
44 #define NEIGH_PRINTK(x...) printk(x)
45 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
46 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
47 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
48
49 #if NEIGH_DEBUG >= 1
50 #undef NEIGH_PRINTK1
51 #define NEIGH_PRINTK1 NEIGH_PRINTK
52 #endif
53 #if NEIGH_DEBUG >= 2
54 #undef NEIGH_PRINTK2
55 #define NEIGH_PRINTK2 NEIGH_PRINTK
56 #endif
57
58 #define PNEIGH_HASHMASK         0xF
59
60 static void neigh_timer_handler(unsigned long arg);
61 static void __neigh_notify(struct neighbour *n, int type, int flags);
62 static void neigh_update_notify(struct neighbour *neigh);
63 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
64
65 static struct neigh_table *neigh_tables;
66 #ifdef CONFIG_PROC_FS
67 static const struct file_operations neigh_stat_seq_fops;
68 #endif
69
70 /*
71    Neighbour hash table buckets are protected with rwlock tbl->lock.
72
73    - All the scans/updates to hash buckets MUST be made under this lock.
74    - NOTHING clever should be made under this lock: no callbacks
75      to protocol backends, no attempts to send something to network.
76      It will result in deadlocks, if backend/driver wants to use neighbour
77      cache.
78    - If the entry requires some non-trivial actions, increase
79      its reference count and release table lock.
80
81    Neighbour entries are protected:
82    - with reference count.
83    - with rwlock neigh->lock
84
85    Reference count prevents destruction.
86
87    neigh->lock mainly serializes ll address data and its validity state.
88    However, the same lock is used to protect another entry fields:
89     - timer
90     - resolution queue
91
92    Again, nothing clever shall be made under neigh->lock,
93    the most complicated procedure, which we allow is dev->hard_header.
94    It is supposed, that dev->hard_header is simplistic and does
95    not make callbacks to neighbour tables.
96
97    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
98    list of neighbour tables. This list is used only in process context,
99  */
100
101 static DEFINE_RWLOCK(neigh_tbl_lock);
102
103 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
104 {
105         kfree_skb(skb);
106         return -ENETDOWN;
107 }
108
109 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 {
111         if (neigh->parms->neigh_cleanup)
112                 neigh->parms->neigh_cleanup(neigh);
113
114         __neigh_notify(neigh, RTM_DELNEIGH, 0);
115         neigh_release(neigh);
116 }
117
118 /*
119  * It is random distribution in the interval (1/2)*base...(3/2)*base.
120  * It corresponds to default IPv6 settings and is not overridable,
121  * because it is really reasonable choice.
122  */
123
124 unsigned long neigh_rand_reach_time(unsigned long base)
125 {
126         return base ? (net_random() % base) + (base >> 1) : 0;
127 }
128 EXPORT_SYMBOL(neigh_rand_reach_time);
129
130
131 static int neigh_forced_gc(struct neigh_table *tbl)
132 {
133         int shrunk = 0;
134         int i;
135         struct neigh_hash_table *nht;
136
137         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
138
139         write_lock_bh(&tbl->lock);
140         nht = rcu_dereference_protected(tbl->nht,
141                                         lockdep_is_held(&tbl->lock));
142         for (i = 0; i < (1 << nht->hash_shift); i++) {
143                 struct neighbour *n;
144                 struct neighbour __rcu **np;
145
146                 np = &nht->hash_buckets[i];
147                 while ((n = rcu_dereference_protected(*np,
148                                         lockdep_is_held(&tbl->lock))) != NULL) {
149                         /* Neighbour record may be discarded if:
150                          * - nobody refers to it.
151                          * - it is not permanent
152                          */
153                         write_lock(&n->lock);
154                         if (atomic_read(&n->refcnt) == 1 &&
155                             !(n->nud_state & NUD_PERMANENT)) {
156                                 rcu_assign_pointer(*np,
157                                         rcu_dereference_protected(n->next,
158                                                   lockdep_is_held(&tbl->lock)));
159                                 n->dead = 1;
160                                 shrunk  = 1;
161                                 write_unlock(&n->lock);
162                                 neigh_cleanup_and_release(n);
163                                 continue;
164                         }
165                         write_unlock(&n->lock);
166                         np = &n->next;
167                 }
168         }
169
170         tbl->last_flush = jiffies;
171
172         write_unlock_bh(&tbl->lock);
173
174         return shrunk;
175 }
176
177 static void neigh_add_timer(struct neighbour *n, unsigned long when)
178 {
179         neigh_hold(n);
180         if (unlikely(mod_timer(&n->timer, when))) {
181                 printk("NEIGH: BUG, double timer add, state is %x\n",
182                        n->nud_state);
183                 dump_stack();
184         }
185 }
186
187 static int neigh_del_timer(struct neighbour *n)
188 {
189         if ((n->nud_state & NUD_IN_TIMER) &&
190             del_timer(&n->timer)) {
191                 neigh_release(n);
192                 return 1;
193         }
194         return 0;
195 }
196
197 static void pneigh_queue_purge(struct sk_buff_head *list)
198 {
199         struct sk_buff *skb;
200
201         while ((skb = skb_dequeue(list)) != NULL) {
202                 dev_put(skb->dev);
203                 kfree_skb(skb);
204         }
205 }
206
207 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
208 {
209         int i;
210         struct neigh_hash_table *nht;
211
212         nht = rcu_dereference_protected(tbl->nht,
213                                         lockdep_is_held(&tbl->lock));
214
215         for (i = 0; i < (1 << nht->hash_shift); i++) {
216                 struct neighbour *n;
217                 struct neighbour __rcu **np = &nht->hash_buckets[i];
218
219                 while ((n = rcu_dereference_protected(*np,
220                                         lockdep_is_held(&tbl->lock))) != NULL) {
221                         if (dev && n->dev != dev) {
222                                 np = &n->next;
223                                 continue;
224                         }
225                         rcu_assign_pointer(*np,
226                                    rcu_dereference_protected(n->next,
227                                                 lockdep_is_held(&tbl->lock)));
228                         write_lock(&n->lock);
229                         neigh_del_timer(n);
230                         n->dead = 1;
231
232                         if (atomic_read(&n->refcnt) != 1) {
233                                 /* The most unpleasant situation.
234                                    We must destroy neighbour entry,
235                                    but someone still uses it.
236
237                                    The destroy will be delayed until
238                                    the last user releases us, but
239                                    we must kill timers etc. and move
240                                    it to safe state.
241                                  */
242                                 skb_queue_purge(&n->arp_queue);
243                                 n->arp_queue_len_bytes = 0;
244                                 n->output = neigh_blackhole;
245                                 if (n->nud_state & NUD_VALID)
246                                         n->nud_state = NUD_NOARP;
247                                 else
248                                         n->nud_state = NUD_NONE;
249                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
250                         }
251                         write_unlock(&n->lock);
252                         neigh_cleanup_and_release(n);
253                 }
254         }
255 }
256
257 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         write_unlock_bh(&tbl->lock);
262 }
263 EXPORT_SYMBOL(neigh_changeaddr);
264
265 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
266 {
267         write_lock_bh(&tbl->lock);
268         neigh_flush_dev(tbl, dev);
269         pneigh_ifdown(tbl, dev);
270         write_unlock_bh(&tbl->lock);
271
272         del_timer_sync(&tbl->proxy_timer);
273         pneigh_queue_purge(&tbl->proxy_queue);
274         return 0;
275 }
276 EXPORT_SYMBOL(neigh_ifdown);
277
278 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
279 {
280         struct neighbour *n = NULL;
281         unsigned long now = jiffies;
282         int entries;
283
284         entries = atomic_inc_return(&tbl->entries) - 1;
285         if (entries >= tbl->gc_thresh3 ||
286             (entries >= tbl->gc_thresh2 &&
287              time_after(now, tbl->last_flush + 5 * HZ))) {
288                 if (!neigh_forced_gc(tbl) &&
289                     entries >= tbl->gc_thresh3)
290                         goto out_entries;
291         }
292
293         if (tbl->entry_size)
294                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295         else {
296                 int sz = sizeof(*n) + tbl->key_len;
297
298                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299                 sz += dev->neigh_priv_len;
300                 n = kzalloc(sz, GFP_ATOMIC);
301         }
302         if (!n)
303                 goto out_entries;
304
305         skb_queue_head_init(&n->arp_queue);
306         rwlock_init(&n->lock);
307         seqlock_init(&n->ha_lock);
308         n->updated        = n->used = now;
309         n->nud_state      = NUD_NONE;
310         n->output         = neigh_blackhole;
311         seqlock_init(&n->hh.hh_lock);
312         n->parms          = neigh_parms_clone(&tbl->parms);
313         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
314
315         NEIGH_CACHE_STAT_INC(tbl, allocs);
316         n->tbl            = tbl;
317         atomic_set(&n->refcnt, 1);
318         n->dead           = 1;
319 out:
320         return n;
321
322 out_entries:
323         atomic_dec(&tbl->entries);
324         goto out;
325 }
326
327 static void neigh_get_hash_rnd(u32 *x)
328 {
329         get_random_bytes(x, sizeof(*x));
330         *x |= 1;
331 }
332
333 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
334 {
335         size_t size = (1 << shift) * sizeof(struct neighbour *);
336         struct neigh_hash_table *ret;
337         struct neighbour __rcu **buckets;
338         int i;
339
340         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
341         if (!ret)
342                 return NULL;
343         if (size <= PAGE_SIZE)
344                 buckets = kzalloc(size, GFP_ATOMIC);
345         else
346                 buckets = (struct neighbour __rcu **)
347                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
348                                            get_order(size));
349         if (!buckets) {
350                 kfree(ret);
351                 return NULL;
352         }
353         ret->hash_buckets = buckets;
354         ret->hash_shift = shift;
355         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
356                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
357         return ret;
358 }
359
360 static void neigh_hash_free_rcu(struct rcu_head *head)
361 {
362         struct neigh_hash_table *nht = container_of(head,
363                                                     struct neigh_hash_table,
364                                                     rcu);
365         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
366         struct neighbour __rcu **buckets = nht->hash_buckets;
367
368         if (size <= PAGE_SIZE)
369                 kfree(buckets);
370         else
371                 free_pages((unsigned long)buckets, get_order(size));
372         kfree(nht);
373 }
374
375 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
376                                                 unsigned long new_shift)
377 {
378         unsigned int i, hash;
379         struct neigh_hash_table *new_nht, *old_nht;
380
381         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
382
383         old_nht = rcu_dereference_protected(tbl->nht,
384                                             lockdep_is_held(&tbl->lock));
385         new_nht = neigh_hash_alloc(new_shift);
386         if (!new_nht)
387                 return old_nht;
388
389         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
390                 struct neighbour *n, *next;
391
392                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
393                                                    lockdep_is_held(&tbl->lock));
394                      n != NULL;
395                      n = next) {
396                         hash = tbl->hash(n->primary_key, n->dev,
397                                          new_nht->hash_rnd);
398
399                         hash >>= (32 - new_nht->hash_shift);
400                         next = rcu_dereference_protected(n->next,
401                                                 lockdep_is_held(&tbl->lock));
402
403                         rcu_assign_pointer(n->next,
404                                            rcu_dereference_protected(
405                                                 new_nht->hash_buckets[hash],
406                                                 lockdep_is_held(&tbl->lock)));
407                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
408                 }
409         }
410
411         rcu_assign_pointer(tbl->nht, new_nht);
412         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
413         return new_nht;
414 }
415
416 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
417                                struct net_device *dev)
418 {
419         struct neighbour *n;
420         int key_len = tbl->key_len;
421         u32 hash_val;
422         struct neigh_hash_table *nht;
423
424         NEIGH_CACHE_STAT_INC(tbl, lookups);
425
426         rcu_read_lock_bh();
427         nht = rcu_dereference_bh(tbl->nht);
428         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
429
430         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431              n != NULL;
432              n = rcu_dereference_bh(n->next)) {
433                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
434                         if (!atomic_inc_not_zero(&n->refcnt))
435                                 n = NULL;
436                         NEIGH_CACHE_STAT_INC(tbl, hits);
437                         break;
438                 }
439         }
440
441         rcu_read_unlock_bh();
442         return n;
443 }
444 EXPORT_SYMBOL(neigh_lookup);
445
446 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
447                                      const void *pkey)
448 {
449         struct neighbour *n;
450         int key_len = tbl->key_len;
451         u32 hash_val;
452         struct neigh_hash_table *nht;
453
454         NEIGH_CACHE_STAT_INC(tbl, lookups);
455
456         rcu_read_lock_bh();
457         nht = rcu_dereference_bh(tbl->nht);
458         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
459
460         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
461              n != NULL;
462              n = rcu_dereference_bh(n->next)) {
463                 if (!memcmp(n->primary_key, pkey, key_len) &&
464                     net_eq(dev_net(n->dev), net)) {
465                         if (!atomic_inc_not_zero(&n->refcnt))
466                                 n = NULL;
467                         NEIGH_CACHE_STAT_INC(tbl, hits);
468                         break;
469                 }
470         }
471
472         rcu_read_unlock_bh();
473         return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup_nodev);
476
477 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
478                                  struct net_device *dev, bool want_ref)
479 {
480         u32 hash_val;
481         int key_len = tbl->key_len;
482         int error;
483         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484         struct neigh_hash_table *nht;
485
486         if (!n) {
487                 rc = ERR_PTR(-ENOBUFS);
488                 goto out;
489         }
490
491         memcpy(n->primary_key, pkey, key_len);
492         n->dev = dev;
493         dev_hold(dev);
494
495         /* Protocol specific setup. */
496         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
497                 rc = ERR_PTR(error);
498                 goto out_neigh_release;
499         }
500
501         if (dev->netdev_ops->ndo_neigh_construct) {
502                 error = dev->netdev_ops->ndo_neigh_construct(n);
503                 if (error < 0) {
504                         rc = ERR_PTR(error);
505                         goto out_neigh_release;
506                 }
507         }
508
509         /* Device specific setup. */
510         if (n->parms->neigh_setup &&
511             (error = n->parms->neigh_setup(n)) < 0) {
512                 rc = ERR_PTR(error);
513                 goto out_neigh_release;
514         }
515
516         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
517
518         write_lock_bh(&tbl->lock);
519         nht = rcu_dereference_protected(tbl->nht,
520                                         lockdep_is_held(&tbl->lock));
521
522         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
523                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
524
525         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
526
527         if (n->parms->dead) {
528                 rc = ERR_PTR(-EINVAL);
529                 goto out_tbl_unlock;
530         }
531
532         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
533                                             lockdep_is_held(&tbl->lock));
534              n1 != NULL;
535              n1 = rcu_dereference_protected(n1->next,
536                         lockdep_is_held(&tbl->lock))) {
537                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538                         if (want_ref)
539                                 neigh_hold(n1);
540                         rc = n1;
541                         goto out_tbl_unlock;
542                 }
543         }
544
545         n->dead = 0;
546         if (want_ref)
547                 neigh_hold(n);
548         rcu_assign_pointer(n->next,
549                            rcu_dereference_protected(nht->hash_buckets[hash_val],
550                                                      lockdep_is_held(&tbl->lock)));
551         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
552         write_unlock_bh(&tbl->lock);
553         NEIGH_PRINTK2("neigh %p is created.\n", n);
554         rc = n;
555 out:
556         return rc;
557 out_tbl_unlock:
558         write_unlock_bh(&tbl->lock);
559 out_neigh_release:
560         neigh_release(n);
561         goto out;
562 }
563 EXPORT_SYMBOL(__neigh_create);
564
565 static u32 pneigh_hash(const void *pkey, int key_len)
566 {
567         u32 hash_val = *(u32 *)(pkey + key_len - 4);
568         hash_val ^= (hash_val >> 16);
569         hash_val ^= hash_val >> 8;
570         hash_val ^= hash_val >> 4;
571         hash_val &= PNEIGH_HASHMASK;
572         return hash_val;
573 }
574
575 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
576                                               struct net *net,
577                                               const void *pkey,
578                                               int key_len,
579                                               struct net_device *dev)
580 {
581         while (n) {
582                 if (!memcmp(n->key, pkey, key_len) &&
583                     net_eq(pneigh_net(n), net) &&
584                     (n->dev == dev || !n->dev))
585                         return n;
586                 n = n->next;
587         }
588         return NULL;
589 }
590
591 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
592                 struct net *net, const void *pkey, struct net_device *dev)
593 {
594         int key_len = tbl->key_len;
595         u32 hash_val = pneigh_hash(pkey, key_len);
596
597         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
598                                  net, pkey, key_len, dev);
599 }
600 EXPORT_SYMBOL_GPL(__pneigh_lookup);
601
602 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
603                                     struct net *net, const void *pkey,
604                                     struct net_device *dev, int creat)
605 {
606         struct pneigh_entry *n;
607         int key_len = tbl->key_len;
608         u32 hash_val = pneigh_hash(pkey, key_len);
609
610         read_lock_bh(&tbl->lock);
611         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
612                               net, pkey, key_len, dev);
613         read_unlock_bh(&tbl->lock);
614
615         if (n || !creat)
616                 goto out;
617
618         ASSERT_RTNL();
619
620         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
621         if (!n)
622                 goto out;
623
624         write_pnet(&n->net, hold_net(net));
625         memcpy(n->key, pkey, key_len);
626         n->dev = dev;
627         if (dev)
628                 dev_hold(dev);
629
630         if (tbl->pconstructor && tbl->pconstructor(n)) {
631                 if (dev)
632                         dev_put(dev);
633                 release_net(net);
634                 kfree(n);
635                 n = NULL;
636                 goto out;
637         }
638
639         write_lock_bh(&tbl->lock);
640         n->next = tbl->phash_buckets[hash_val];
641         tbl->phash_buckets[hash_val] = n;
642         write_unlock_bh(&tbl->lock);
643 out:
644         return n;
645 }
646 EXPORT_SYMBOL(pneigh_lookup);
647
648
649 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
650                   struct net_device *dev)
651 {
652         struct pneigh_entry *n, **np;
653         int key_len = tbl->key_len;
654         u32 hash_val = pneigh_hash(pkey, key_len);
655
656         write_lock_bh(&tbl->lock);
657         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
658              np = &n->next) {
659                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
660                     net_eq(pneigh_net(n), net)) {
661                         *np = n->next;
662                         write_unlock_bh(&tbl->lock);
663                         if (tbl->pdestructor)
664                                 tbl->pdestructor(n);
665                         if (n->dev)
666                                 dev_put(n->dev);
667                         release_net(pneigh_net(n));
668                         kfree(n);
669                         return 0;
670                 }
671         }
672         write_unlock_bh(&tbl->lock);
673         return -ENOENT;
674 }
675
676 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
677 {
678         struct pneigh_entry *n, **np;
679         u32 h;
680
681         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
682                 np = &tbl->phash_buckets[h];
683                 while ((n = *np) != NULL) {
684                         if (!dev || n->dev == dev) {
685                                 *np = n->next;
686                                 if (tbl->pdestructor)
687                                         tbl->pdestructor(n);
688                                 if (n->dev)
689                                         dev_put(n->dev);
690                                 release_net(pneigh_net(n));
691                                 kfree(n);
692                                 continue;
693                         }
694                         np = &n->next;
695                 }
696         }
697         return -ENOENT;
698 }
699
700 static void neigh_parms_destroy(struct neigh_parms *parms);
701
702 static inline void neigh_parms_put(struct neigh_parms *parms)
703 {
704         if (atomic_dec_and_test(&parms->refcnt))
705                 neigh_parms_destroy(parms);
706 }
707
708 /*
709  *      neighbour must already be out of the table;
710  *
711  */
712 void neigh_destroy(struct neighbour *neigh)
713 {
714         struct net_device *dev = neigh->dev;
715
716         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
717
718         if (!neigh->dead) {
719                 pr_warn("Destroying alive neighbour %p\n", neigh);
720                 dump_stack();
721                 return;
722         }
723
724         if (neigh_del_timer(neigh))
725                 pr_warn("Impossible event\n");
726
727         skb_queue_purge(&neigh->arp_queue);
728         neigh->arp_queue_len_bytes = 0;
729
730         if (dev->netdev_ops->ndo_neigh_destroy)
731                 dev->netdev_ops->ndo_neigh_destroy(neigh);
732
733         dev_put(dev);
734         neigh_parms_put(neigh->parms);
735
736         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
737
738         atomic_dec(&neigh->tbl->entries);
739         kfree_rcu(neigh, rcu);
740 }
741 EXPORT_SYMBOL(neigh_destroy);
742
743 /* Neighbour state is suspicious;
744    disable fast path.
745
746    Called with write_locked neigh.
747  */
748 static void neigh_suspect(struct neighbour *neigh)
749 {
750         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
751
752         neigh->output = neigh->ops->output;
753 }
754
755 /* Neighbour state is OK;
756    enable fast path.
757
758    Called with write_locked neigh.
759  */
760 static void neigh_connect(struct neighbour *neigh)
761 {
762         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
763
764         neigh->output = neigh->ops->connected_output;
765 }
766
767 static void neigh_periodic_work(struct work_struct *work)
768 {
769         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
770         struct neighbour *n;
771         struct neighbour __rcu **np;
772         unsigned int i;
773         struct neigh_hash_table *nht;
774
775         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
776
777         write_lock_bh(&tbl->lock);
778         nht = rcu_dereference_protected(tbl->nht,
779                                         lockdep_is_held(&tbl->lock));
780
781         /*
782          *      periodically recompute ReachableTime from random function
783          */
784
785         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
786                 struct neigh_parms *p;
787                 tbl->last_rand = jiffies;
788                 for (p = &tbl->parms; p; p = p->next)
789                         p->reachable_time =
790                                 neigh_rand_reach_time(p->base_reachable_time);
791         }
792
793         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
794                 np = &nht->hash_buckets[i];
795
796                 while ((n = rcu_dereference_protected(*np,
797                                 lockdep_is_held(&tbl->lock))) != NULL) {
798                         unsigned int state;
799
800                         write_lock(&n->lock);
801
802                         state = n->nud_state;
803                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
804                                 write_unlock(&n->lock);
805                                 goto next_elt;
806                         }
807
808                         if (time_before(n->used, n->confirmed))
809                                 n->used = n->confirmed;
810
811                         if (atomic_read(&n->refcnt) == 1 &&
812                             (state == NUD_FAILED ||
813                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
814                                 *np = n->next;
815                                 n->dead = 1;
816                                 write_unlock(&n->lock);
817                                 neigh_cleanup_and_release(n);
818                                 continue;
819                         }
820                         write_unlock(&n->lock);
821
822 next_elt:
823                         np = &n->next;
824                 }
825                 /*
826                  * It's fine to release lock here, even if hash table
827                  * grows while we are preempted.
828                  */
829                 write_unlock_bh(&tbl->lock);
830                 cond_resched();
831                 write_lock_bh(&tbl->lock);
832                 nht = rcu_dereference_protected(tbl->nht,
833                                                 lockdep_is_held(&tbl->lock));
834         }
835         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
836          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
837          * base_reachable_time.
838          */
839         schedule_delayed_work(&tbl->gc_work,
840                               tbl->parms.base_reachable_time >> 1);
841         write_unlock_bh(&tbl->lock);
842 }
843
844 static __inline__ int neigh_max_probes(struct neighbour *n)
845 {
846         struct neigh_parms *p = n->parms;
847         return (n->nud_state & NUD_PROBE) ?
848                 p->ucast_probes :
849                 p->ucast_probes + p->app_probes + p->mcast_probes;
850 }
851
852 static void neigh_invalidate(struct neighbour *neigh)
853         __releases(neigh->lock)
854         __acquires(neigh->lock)
855 {
856         struct sk_buff *skb;
857
858         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
859         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
860         neigh->updated = jiffies;
861
862         /* It is very thin place. report_unreachable is very complicated
863            routine. Particularly, it can hit the same neighbour entry!
864
865            So that, we try to be accurate and avoid dead loop. --ANK
866          */
867         while (neigh->nud_state == NUD_FAILED &&
868                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
869                 write_unlock(&neigh->lock);
870                 neigh->ops->error_report(neigh, skb);
871                 write_lock(&neigh->lock);
872         }
873         skb_queue_purge(&neigh->arp_queue);
874         neigh->arp_queue_len_bytes = 0;
875 }
876
877 static void neigh_probe(struct neighbour *neigh)
878         __releases(neigh->lock)
879 {
880         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
881         /* keep skb alive even if arp_queue overflows */
882         if (skb)
883                 skb = skb_copy(skb, GFP_ATOMIC);
884         write_unlock(&neigh->lock);
885         neigh->ops->solicit(neigh, skb);
886         atomic_inc(&neigh->probes);
887         kfree_skb(skb);
888 }
889
890 /* Called when a timer expires for a neighbour entry. */
891
892 static void neigh_timer_handler(unsigned long arg)
893 {
894         unsigned long now, next;
895         struct neighbour *neigh = (struct neighbour *)arg;
896         unsigned int state;
897         int notify = 0;
898
899         write_lock(&neigh->lock);
900
901         state = neigh->nud_state;
902         now = jiffies;
903         next = now + HZ;
904
905         if (!(state & NUD_IN_TIMER))
906                 goto out;
907
908         if (state & NUD_REACHABLE) {
909                 if (time_before_eq(now,
910                                    neigh->confirmed + neigh->parms->reachable_time)) {
911                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
912                         next = neigh->confirmed + neigh->parms->reachable_time;
913                 } else if (time_before_eq(now,
914                                           neigh->used + neigh->parms->delay_probe_time)) {
915                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
916                         neigh->nud_state = NUD_DELAY;
917                         neigh->updated = jiffies;
918                         neigh_suspect(neigh);
919                         next = now + neigh->parms->delay_probe_time;
920                 } else {
921                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
922                         neigh->nud_state = NUD_STALE;
923                         neigh->updated = jiffies;
924                         neigh_suspect(neigh);
925                         notify = 1;
926                 }
927         } else if (state & NUD_DELAY) {
928                 if (time_before_eq(now,
929                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
930                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
931                         neigh->nud_state = NUD_REACHABLE;
932                         neigh->updated = jiffies;
933                         neigh_connect(neigh);
934                         notify = 1;
935                         next = neigh->confirmed + neigh->parms->reachable_time;
936                 } else {
937                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
938                         neigh->nud_state = NUD_PROBE;
939                         neigh->updated = jiffies;
940                         atomic_set(&neigh->probes, 0);
941                         next = now + neigh->parms->retrans_time;
942                 }
943         } else {
944                 /* NUD_PROBE|NUD_INCOMPLETE */
945                 next = now + neigh->parms->retrans_time;
946         }
947
948         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
949             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
950                 neigh->nud_state = NUD_FAILED;
951                 notify = 1;
952                 neigh_invalidate(neigh);
953         }
954
955         if (neigh->nud_state & NUD_IN_TIMER) {
956                 if (time_before(next, jiffies + HZ/2))
957                         next = jiffies + HZ/2;
958                 if (!mod_timer(&neigh->timer, next))
959                         neigh_hold(neigh);
960         }
961         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
962                 neigh_probe(neigh);
963         } else {
964 out:
965                 write_unlock(&neigh->lock);
966         }
967
968         if (notify)
969                 neigh_update_notify(neigh);
970
971         neigh_release(neigh);
972 }
973
974 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
975 {
976         int rc;
977         bool immediate_probe = false;
978
979         write_lock_bh(&neigh->lock);
980
981         rc = 0;
982         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
983                 goto out_unlock_bh;
984
985         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
986                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
987                         unsigned long next, now = jiffies;
988
989                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
990                         neigh->nud_state     = NUD_INCOMPLETE;
991                         neigh->updated = now;
992                         next = now + max(neigh->parms->retrans_time, HZ/2);
993                         neigh_add_timer(neigh, next);
994                         immediate_probe = true;
995                 } else {
996                         neigh->nud_state = NUD_FAILED;
997                         neigh->updated = jiffies;
998                         write_unlock_bh(&neigh->lock);
999
1000                         kfree_skb(skb);
1001                         return 1;
1002                 }
1003         } else if (neigh->nud_state & NUD_STALE) {
1004                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1005                 neigh->nud_state = NUD_DELAY;
1006                 neigh->updated = jiffies;
1007                 neigh_add_timer(neigh,
1008                                 jiffies + neigh->parms->delay_probe_time);
1009         }
1010
1011         if (neigh->nud_state == NUD_INCOMPLETE) {
1012                 if (skb) {
1013                         while (neigh->arp_queue_len_bytes + skb->truesize >
1014                                neigh->parms->queue_len_bytes) {
1015                                 struct sk_buff *buff;
1016
1017                                 buff = __skb_dequeue(&neigh->arp_queue);
1018                                 if (!buff)
1019                                         break;
1020                                 neigh->arp_queue_len_bytes -= buff->truesize;
1021                                 kfree_skb(buff);
1022                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1023                         }
1024                         skb_dst_force(skb);
1025                         __skb_queue_tail(&neigh->arp_queue, skb);
1026                         neigh->arp_queue_len_bytes += skb->truesize;
1027                 }
1028                 rc = 1;
1029         }
1030 out_unlock_bh:
1031         if (immediate_probe)
1032                 neigh_probe(neigh);
1033         else
1034                 write_unlock(&neigh->lock);
1035         local_bh_enable();
1036         return rc;
1037 }
1038 EXPORT_SYMBOL(__neigh_event_send);
1039
1040 static void neigh_update_hhs(struct neighbour *neigh)
1041 {
1042         struct hh_cache *hh;
1043         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1044                 = NULL;
1045
1046         if (neigh->dev->header_ops)
1047                 update = neigh->dev->header_ops->cache_update;
1048
1049         if (update) {
1050                 hh = &neigh->hh;
1051                 if (hh->hh_len) {
1052                         write_seqlock_bh(&hh->hh_lock);
1053                         update(hh, neigh->dev, neigh->ha);
1054                         write_sequnlock_bh(&hh->hh_lock);
1055                 }
1056         }
1057 }
1058
1059
1060
1061 /* Generic update routine.
1062    -- lladdr is new lladdr or NULL, if it is not supplied.
1063    -- new    is new state.
1064    -- flags
1065         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1066                                 if it is different.
1067         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1068                                 lladdr instead of overriding it
1069                                 if it is different.
1070                                 It also allows to retain current state
1071                                 if lladdr is unchanged.
1072         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1073
1074         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1075                                 NTF_ROUTER flag.
1076         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1077                                 a router.
1078
1079    Caller MUST hold reference count on the entry.
1080  */
1081
1082 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1083                  u32 flags)
1084 {
1085         u8 old;
1086         int err;
1087         int notify = 0;
1088         struct net_device *dev;
1089         int update_isrouter = 0;
1090
1091         write_lock_bh(&neigh->lock);
1092
1093         dev    = neigh->dev;
1094         old    = neigh->nud_state;
1095         err    = -EPERM;
1096
1097         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1098             (old & (NUD_NOARP | NUD_PERMANENT)))
1099                 goto out;
1100
1101         if (!(new & NUD_VALID)) {
1102                 neigh_del_timer(neigh);
1103                 if (old & NUD_CONNECTED)
1104                         neigh_suspect(neigh);
1105                 neigh->nud_state = new;
1106                 err = 0;
1107                 notify = old & NUD_VALID;
1108                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1109                     (new & NUD_FAILED)) {
1110                         neigh_invalidate(neigh);
1111                         notify = 1;
1112                 }
1113                 goto out;
1114         }
1115
1116         /* Compare new lladdr with cached one */
1117         if (!dev->addr_len) {
1118                 /* First case: device needs no address. */
1119                 lladdr = neigh->ha;
1120         } else if (lladdr) {
1121                 /* The second case: if something is already cached
1122                    and a new address is proposed:
1123                    - compare new & old
1124                    - if they are different, check override flag
1125                  */
1126                 if ((old & NUD_VALID) &&
1127                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1128                         lladdr = neigh->ha;
1129         } else {
1130                 /* No address is supplied; if we know something,
1131                    use it, otherwise discard the request.
1132                  */
1133                 err = -EINVAL;
1134                 if (!(old & NUD_VALID))
1135                         goto out;
1136                 lladdr = neigh->ha;
1137         }
1138
1139         if (new & NUD_CONNECTED)
1140                 neigh->confirmed = jiffies;
1141         neigh->updated = jiffies;
1142
1143         /* If entry was valid and address is not changed,
1144            do not change entry state, if new one is STALE.
1145          */
1146         err = 0;
1147         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1148         if (old & NUD_VALID) {
1149                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1150                         update_isrouter = 0;
1151                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1152                             (old & NUD_CONNECTED)) {
1153                                 lladdr = neigh->ha;
1154                                 new = NUD_STALE;
1155                         } else
1156                                 goto out;
1157                 } else {
1158                         if (lladdr == neigh->ha && new == NUD_STALE &&
1159                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1160                              (old & NUD_CONNECTED))
1161                             )
1162                                 new = old;
1163                 }
1164         }
1165
1166         if (new != old) {
1167                 neigh_del_timer(neigh);
1168                 if (new & NUD_IN_TIMER)
1169                         neigh_add_timer(neigh, (jiffies +
1170                                                 ((new & NUD_REACHABLE) ?
1171                                                  neigh->parms->reachable_time :
1172                                                  0)));
1173                 neigh->nud_state = new;
1174         }
1175
1176         if (lladdr != neigh->ha) {
1177                 write_seqlock(&neigh->ha_lock);
1178                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1179                 write_sequnlock(&neigh->ha_lock);
1180                 neigh_update_hhs(neigh);
1181                 if (!(new & NUD_CONNECTED))
1182                         neigh->confirmed = jiffies -
1183                                       (neigh->parms->base_reachable_time << 1);
1184                 notify = 1;
1185         }
1186         if (new == old)
1187                 goto out;
1188         if (new & NUD_CONNECTED)
1189                 neigh_connect(neigh);
1190         else
1191                 neigh_suspect(neigh);
1192         if (!(old & NUD_VALID)) {
1193                 struct sk_buff *skb;
1194
1195                 /* Again: avoid dead loop if something went wrong */
1196
1197                 while (neigh->nud_state & NUD_VALID &&
1198                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1199                         struct dst_entry *dst = skb_dst(skb);
1200                         struct neighbour *n2, *n1 = neigh;
1201                         write_unlock_bh(&neigh->lock);
1202
1203                         rcu_read_lock();
1204
1205                         /* Why not just use 'neigh' as-is?  The problem is that
1206                          * things such as shaper, eql, and sch_teql can end up
1207                          * using alternative, different, neigh objects to output
1208                          * the packet in the output path.  So what we need to do
1209                          * here is re-lookup the top-level neigh in the path so
1210                          * we can reinject the packet there.
1211                          */
1212                         n2 = NULL;
1213                         if (dst) {
1214                                 n2 = dst_neigh_lookup_skb(dst, skb);
1215                                 if (n2)
1216                                         n1 = n2;
1217                         }
1218                         n1->output(n1, skb);
1219                         if (n2)
1220                                 neigh_release(n2);
1221                         rcu_read_unlock();
1222
1223                         write_lock_bh(&neigh->lock);
1224                 }
1225                 skb_queue_purge(&neigh->arp_queue);
1226                 neigh->arp_queue_len_bytes = 0;
1227         }
1228 out:
1229         if (update_isrouter) {
1230                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1231                         (neigh->flags | NTF_ROUTER) :
1232                         (neigh->flags & ~NTF_ROUTER);
1233         }
1234         write_unlock_bh(&neigh->lock);
1235
1236         if (notify)
1237                 neigh_update_notify(neigh);
1238
1239         return err;
1240 }
1241 EXPORT_SYMBOL(neigh_update);
1242
1243 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1244                                  u8 *lladdr, void *saddr,
1245                                  struct net_device *dev)
1246 {
1247         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1248                                                  lladdr || !dev->addr_len);
1249         if (neigh)
1250                 neigh_update(neigh, lladdr, NUD_STALE,
1251                              NEIGH_UPDATE_F_OVERRIDE);
1252         return neigh;
1253 }
1254 EXPORT_SYMBOL(neigh_event_ns);
1255
1256 /* called with read_lock_bh(&n->lock); */
1257 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1258 {
1259         struct net_device *dev = dst->dev;
1260         __be16 prot = dst->ops->protocol;
1261         struct hh_cache *hh = &n->hh;
1262
1263         write_lock_bh(&n->lock);
1264
1265         /* Only one thread can come in here and initialize the
1266          * hh_cache entry.
1267          */
1268         if (!hh->hh_len)
1269                 dev->header_ops->cache(n, hh, prot);
1270
1271         write_unlock_bh(&n->lock);
1272 }
1273
1274 /* This function can be used in contexts, where only old dev_queue_xmit
1275  * worked, f.e. if you want to override normal output path (eql, shaper),
1276  * but resolution is not made yet.
1277  */
1278
1279 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1280 {
1281         struct net_device *dev = skb->dev;
1282
1283         __skb_pull(skb, skb_network_offset(skb));
1284
1285         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1286                             skb->len) < 0 &&
1287             dev->header_ops->rebuild(skb))
1288                 return 0;
1289
1290         return dev_queue_xmit(skb);
1291 }
1292 EXPORT_SYMBOL(neigh_compat_output);
1293
1294 /* Slow and careful. */
1295
1296 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1297 {
1298         struct dst_entry *dst = skb_dst(skb);
1299         int rc = 0;
1300
1301         if (!dst)
1302                 goto discard;
1303
1304         __skb_pull(skb, skb_network_offset(skb));
1305
1306         if (!neigh_event_send(neigh, skb)) {
1307                 int err;
1308                 struct net_device *dev = neigh->dev;
1309                 unsigned int seq;
1310
1311                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1312                         neigh_hh_init(neigh, dst);
1313
1314                 do {
1315                         seq = read_seqbegin(&neigh->ha_lock);
1316                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1317                                               neigh->ha, NULL, skb->len);
1318                 } while (read_seqretry(&neigh->ha_lock, seq));
1319
1320                 if (err >= 0)
1321                         rc = dev_queue_xmit(skb);
1322                 else
1323                         goto out_kfree_skb;
1324         }
1325 out:
1326         return rc;
1327 discard:
1328         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1329                       dst, neigh);
1330 out_kfree_skb:
1331         rc = -EINVAL;
1332         kfree_skb(skb);
1333         goto out;
1334 }
1335 EXPORT_SYMBOL(neigh_resolve_output);
1336
1337 /* As fast as possible without hh cache */
1338
1339 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1340 {
1341         struct net_device *dev = neigh->dev;
1342         unsigned int seq;
1343         int err;
1344
1345         __skb_pull(skb, skb_network_offset(skb));
1346
1347         do {
1348                 seq = read_seqbegin(&neigh->ha_lock);
1349                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1350                                       neigh->ha, NULL, skb->len);
1351         } while (read_seqretry(&neigh->ha_lock, seq));
1352
1353         if (err >= 0)
1354                 err = dev_queue_xmit(skb);
1355         else {
1356                 err = -EINVAL;
1357                 kfree_skb(skb);
1358         }
1359         return err;
1360 }
1361 EXPORT_SYMBOL(neigh_connected_output);
1362
1363 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1364 {
1365         return dev_queue_xmit(skb);
1366 }
1367 EXPORT_SYMBOL(neigh_direct_output);
1368
1369 static void neigh_proxy_process(unsigned long arg)
1370 {
1371         struct neigh_table *tbl = (struct neigh_table *)arg;
1372         long sched_next = 0;
1373         unsigned long now = jiffies;
1374         struct sk_buff *skb, *n;
1375
1376         spin_lock(&tbl->proxy_queue.lock);
1377
1378         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1379                 long tdif = NEIGH_CB(skb)->sched_next - now;
1380
1381                 if (tdif <= 0) {
1382                         struct net_device *dev = skb->dev;
1383
1384                         __skb_unlink(skb, &tbl->proxy_queue);
1385                         if (tbl->proxy_redo && netif_running(dev)) {
1386                                 rcu_read_lock();
1387                                 tbl->proxy_redo(skb);
1388                                 rcu_read_unlock();
1389                         } else {
1390                                 kfree_skb(skb);
1391                         }
1392
1393                         dev_put(dev);
1394                 } else if (!sched_next || tdif < sched_next)
1395                         sched_next = tdif;
1396         }
1397         del_timer(&tbl->proxy_timer);
1398         if (sched_next)
1399                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1400         spin_unlock(&tbl->proxy_queue.lock);
1401 }
1402
1403 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1404                     struct sk_buff *skb)
1405 {
1406         unsigned long now = jiffies;
1407         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1408
1409         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1410                 kfree_skb(skb);
1411                 return;
1412         }
1413
1414         NEIGH_CB(skb)->sched_next = sched_next;
1415         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1416
1417         spin_lock(&tbl->proxy_queue.lock);
1418         if (del_timer(&tbl->proxy_timer)) {
1419                 if (time_before(tbl->proxy_timer.expires, sched_next))
1420                         sched_next = tbl->proxy_timer.expires;
1421         }
1422         skb_dst_drop(skb);
1423         dev_hold(skb->dev);
1424         __skb_queue_tail(&tbl->proxy_queue, skb);
1425         mod_timer(&tbl->proxy_timer, sched_next);
1426         spin_unlock(&tbl->proxy_queue.lock);
1427 }
1428 EXPORT_SYMBOL(pneigh_enqueue);
1429
1430 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1431                                                       struct net *net, int ifindex)
1432 {
1433         struct neigh_parms *p;
1434
1435         for (p = &tbl->parms; p; p = p->next) {
1436                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1437                     (!p->dev && !ifindex))
1438                         return p;
1439         }
1440
1441         return NULL;
1442 }
1443
1444 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1445                                       struct neigh_table *tbl)
1446 {
1447         struct neigh_parms *p, *ref;
1448         struct net *net = dev_net(dev);
1449         const struct net_device_ops *ops = dev->netdev_ops;
1450
1451         ref = lookup_neigh_parms(tbl, net, 0);
1452         if (!ref)
1453                 return NULL;
1454
1455         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1456         if (p) {
1457                 p->tbl            = tbl;
1458                 atomic_set(&p->refcnt, 1);
1459                 p->reachable_time =
1460                                 neigh_rand_reach_time(p->base_reachable_time);
1461
1462                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1463                         kfree(p);
1464                         return NULL;
1465                 }
1466
1467                 dev_hold(dev);
1468                 p->dev = dev;
1469                 write_pnet(&p->net, hold_net(net));
1470                 p->sysctl_table = NULL;
1471                 write_lock_bh(&tbl->lock);
1472                 p->next         = tbl->parms.next;
1473                 tbl->parms.next = p;
1474                 write_unlock_bh(&tbl->lock);
1475         }
1476         return p;
1477 }
1478 EXPORT_SYMBOL(neigh_parms_alloc);
1479
1480 static void neigh_rcu_free_parms(struct rcu_head *head)
1481 {
1482         struct neigh_parms *parms =
1483                 container_of(head, struct neigh_parms, rcu_head);
1484
1485         neigh_parms_put(parms);
1486 }
1487
1488 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1489 {
1490         struct neigh_parms **p;
1491
1492         if (!parms || parms == &tbl->parms)
1493                 return;
1494         write_lock_bh(&tbl->lock);
1495         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1496                 if (*p == parms) {
1497                         *p = parms->next;
1498                         parms->dead = 1;
1499                         write_unlock_bh(&tbl->lock);
1500                         if (parms->dev)
1501                                 dev_put(parms->dev);
1502                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1503                         return;
1504                 }
1505         }
1506         write_unlock_bh(&tbl->lock);
1507         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1508 }
1509 EXPORT_SYMBOL(neigh_parms_release);
1510
1511 static void neigh_parms_destroy(struct neigh_parms *parms)
1512 {
1513         release_net(neigh_parms_net(parms));
1514         kfree(parms);
1515 }
1516
1517 static struct lock_class_key neigh_table_proxy_queue_class;
1518
1519 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1520 {
1521         unsigned long now = jiffies;
1522         unsigned long phsize;
1523
1524         write_pnet(&tbl->parms.net, &init_net);
1525         atomic_set(&tbl->parms.refcnt, 1);
1526         tbl->parms.reachable_time =
1527                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1528
1529         tbl->stats = alloc_percpu(struct neigh_statistics);
1530         if (!tbl->stats)
1531                 panic("cannot create neighbour cache statistics");
1532
1533 #ifdef CONFIG_PROC_FS
1534         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1535                               &neigh_stat_seq_fops, tbl))
1536                 panic("cannot create neighbour proc dir entry");
1537 #endif
1538
1539         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1540
1541         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1542         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1543
1544         if (!tbl->nht || !tbl->phash_buckets)
1545                 panic("cannot allocate neighbour cache hashes");
1546
1547         rwlock_init(&tbl->lock);
1548         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1549         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1550         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1551         skb_queue_head_init_class(&tbl->proxy_queue,
1552                         &neigh_table_proxy_queue_class);
1553
1554         tbl->last_flush = now;
1555         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1556 }
1557
1558 void neigh_table_init(struct neigh_table *tbl)
1559 {
1560         struct neigh_table *tmp;
1561
1562         neigh_table_init_no_netlink(tbl);
1563         write_lock(&neigh_tbl_lock);
1564         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1565                 if (tmp->family == tbl->family)
1566                         break;
1567         }
1568         tbl->next       = neigh_tables;
1569         neigh_tables    = tbl;
1570         write_unlock(&neigh_tbl_lock);
1571
1572         if (unlikely(tmp)) {
1573                 pr_err("Registering multiple tables for family %d\n",
1574                        tbl->family);
1575                 dump_stack();
1576         }
1577 }
1578 EXPORT_SYMBOL(neigh_table_init);
1579
1580 int neigh_table_clear(struct neigh_table *tbl)
1581 {
1582         struct neigh_table **tp;
1583
1584         /* It is not clean... Fix it to unload IPv6 module safely */
1585         cancel_delayed_work_sync(&tbl->gc_work);
1586         del_timer_sync(&tbl->proxy_timer);
1587         pneigh_queue_purge(&tbl->proxy_queue);
1588         neigh_ifdown(tbl, NULL);
1589         if (atomic_read(&tbl->entries))
1590                 pr_crit("neighbour leakage\n");
1591         write_lock(&neigh_tbl_lock);
1592         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1593                 if (*tp == tbl) {
1594                         *tp = tbl->next;
1595                         break;
1596                 }
1597         }
1598         write_unlock(&neigh_tbl_lock);
1599
1600         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1601                  neigh_hash_free_rcu);
1602         tbl->nht = NULL;
1603
1604         kfree(tbl->phash_buckets);
1605         tbl->phash_buckets = NULL;
1606
1607         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1608
1609         free_percpu(tbl->stats);
1610         tbl->stats = NULL;
1611
1612         return 0;
1613 }
1614 EXPORT_SYMBOL(neigh_table_clear);
1615
1616 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1617 {
1618         struct net *net = sock_net(skb->sk);
1619         struct ndmsg *ndm;
1620         struct nlattr *dst_attr;
1621         struct neigh_table *tbl;
1622         struct net_device *dev = NULL;
1623         int err = -EINVAL;
1624
1625         ASSERT_RTNL();
1626         if (nlmsg_len(nlh) < sizeof(*ndm))
1627                 goto out;
1628
1629         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1630         if (dst_attr == NULL)
1631                 goto out;
1632
1633         ndm = nlmsg_data(nlh);
1634         if (ndm->ndm_ifindex) {
1635                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1636                 if (dev == NULL) {
1637                         err = -ENODEV;
1638                         goto out;
1639                 }
1640         }
1641
1642         read_lock(&neigh_tbl_lock);
1643         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1644                 struct neighbour *neigh;
1645
1646                 if (tbl->family != ndm->ndm_family)
1647                         continue;
1648                 read_unlock(&neigh_tbl_lock);
1649
1650                 if (nla_len(dst_attr) < tbl->key_len)
1651                         goto out;
1652
1653                 if (ndm->ndm_flags & NTF_PROXY) {
1654                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1655                         goto out;
1656                 }
1657
1658                 if (dev == NULL)
1659                         goto out;
1660
1661                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1662                 if (neigh == NULL) {
1663                         err = -ENOENT;
1664                         goto out;
1665                 }
1666
1667                 err = neigh_update(neigh, NULL, NUD_FAILED,
1668                                    NEIGH_UPDATE_F_OVERRIDE |
1669                                    NEIGH_UPDATE_F_ADMIN);
1670                 neigh_release(neigh);
1671                 goto out;
1672         }
1673         read_unlock(&neigh_tbl_lock);
1674         err = -EAFNOSUPPORT;
1675
1676 out:
1677         return err;
1678 }
1679
1680 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1681 {
1682         struct net *net = sock_net(skb->sk);
1683         struct ndmsg *ndm;
1684         struct nlattr *tb[NDA_MAX+1];
1685         struct neigh_table *tbl;
1686         struct net_device *dev = NULL;
1687         int err;
1688
1689         ASSERT_RTNL();
1690         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1691         if (err < 0)
1692                 goto out;
1693
1694         err = -EINVAL;
1695         if (tb[NDA_DST] == NULL)
1696                 goto out;
1697
1698         ndm = nlmsg_data(nlh);
1699         if (ndm->ndm_ifindex) {
1700                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1701                 if (dev == NULL) {
1702                         err = -ENODEV;
1703                         goto out;
1704                 }
1705
1706                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1707                         goto out;
1708         }
1709
1710         read_lock(&neigh_tbl_lock);
1711         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1712                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1713                 struct neighbour *neigh;
1714                 void *dst, *lladdr;
1715
1716                 if (tbl->family != ndm->ndm_family)
1717                         continue;
1718                 read_unlock(&neigh_tbl_lock);
1719
1720                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1721                         goto out;
1722                 dst = nla_data(tb[NDA_DST]);
1723                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1724
1725                 if (ndm->ndm_flags & NTF_PROXY) {
1726                         struct pneigh_entry *pn;
1727
1728                         err = -ENOBUFS;
1729                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1730                         if (pn) {
1731                                 pn->flags = ndm->ndm_flags;
1732                                 err = 0;
1733                         }
1734                         goto out;
1735                 }
1736
1737                 if (dev == NULL)
1738                         goto out;
1739
1740                 neigh = neigh_lookup(tbl, dst, dev);
1741                 if (neigh == NULL) {
1742                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1743                                 err = -ENOENT;
1744                                 goto out;
1745                         }
1746
1747                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1748                         if (IS_ERR(neigh)) {
1749                                 err = PTR_ERR(neigh);
1750                                 goto out;
1751                         }
1752                 } else {
1753                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1754                                 err = -EEXIST;
1755                                 neigh_release(neigh);
1756                                 goto out;
1757                         }
1758
1759                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1760                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1761                 }
1762
1763                 if (ndm->ndm_flags & NTF_USE) {
1764                         neigh_event_send(neigh, NULL);
1765                         err = 0;
1766                 } else
1767                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1768                 neigh_release(neigh);
1769                 goto out;
1770         }
1771
1772         read_unlock(&neigh_tbl_lock);
1773         err = -EAFNOSUPPORT;
1774 out:
1775         return err;
1776 }
1777
1778 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1779 {
1780         struct nlattr *nest;
1781
1782         nest = nla_nest_start(skb, NDTA_PARMS);
1783         if (nest == NULL)
1784                 return -ENOBUFS;
1785
1786         if ((parms->dev &&
1787              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1788             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1789             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1790             /* approximative value for deprecated QUEUE_LEN (in packets) */
1791             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1792                         DIV_ROUND_UP(parms->queue_len_bytes,
1793                                      SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1794             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1795             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1796             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1797             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1798             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1799             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1800                           parms->base_reachable_time) ||
1801             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1802             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1803                           parms->delay_probe_time) ||
1804             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1805             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1806             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1807             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1808                 goto nla_put_failure;
1809         return nla_nest_end(skb, nest);
1810
1811 nla_put_failure:
1812         nla_nest_cancel(skb, nest);
1813         return -EMSGSIZE;
1814 }
1815
1816 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1817                               u32 pid, u32 seq, int type, int flags)
1818 {
1819         struct nlmsghdr *nlh;
1820         struct ndtmsg *ndtmsg;
1821
1822         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1823         if (nlh == NULL)
1824                 return -EMSGSIZE;
1825
1826         ndtmsg = nlmsg_data(nlh);
1827
1828         read_lock_bh(&tbl->lock);
1829         ndtmsg->ndtm_family = tbl->family;
1830         ndtmsg->ndtm_pad1   = 0;
1831         ndtmsg->ndtm_pad2   = 0;
1832
1833         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1834             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1835             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1836             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1837             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1838                 goto nla_put_failure;
1839         {
1840                 unsigned long now = jiffies;
1841                 unsigned int flush_delta = now - tbl->last_flush;
1842                 unsigned int rand_delta = now - tbl->last_rand;
1843                 struct neigh_hash_table *nht;
1844                 struct ndt_config ndc = {
1845                         .ndtc_key_len           = tbl->key_len,
1846                         .ndtc_entry_size        = tbl->entry_size,
1847                         .ndtc_entries           = atomic_read(&tbl->entries),
1848                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1849                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1850                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1851                 };
1852
1853                 rcu_read_lock_bh();
1854                 nht = rcu_dereference_bh(tbl->nht);
1855                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1856                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1857                 rcu_read_unlock_bh();
1858
1859                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1860                         goto nla_put_failure;
1861         }
1862
1863         {
1864                 int cpu;
1865                 struct ndt_stats ndst;
1866
1867                 memset(&ndst, 0, sizeof(ndst));
1868
1869                 for_each_possible_cpu(cpu) {
1870                         struct neigh_statistics *st;
1871
1872                         st = per_cpu_ptr(tbl->stats, cpu);
1873                         ndst.ndts_allocs                += st->allocs;
1874                         ndst.ndts_destroys              += st->destroys;
1875                         ndst.ndts_hash_grows            += st->hash_grows;
1876                         ndst.ndts_res_failed            += st->res_failed;
1877                         ndst.ndts_lookups               += st->lookups;
1878                         ndst.ndts_hits                  += st->hits;
1879                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1880                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1881                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1882                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1883                 }
1884
1885                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1886                         goto nla_put_failure;
1887         }
1888
1889         BUG_ON(tbl->parms.dev);
1890         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1891                 goto nla_put_failure;
1892
1893         read_unlock_bh(&tbl->lock);
1894         return nlmsg_end(skb, nlh);
1895
1896 nla_put_failure:
1897         read_unlock_bh(&tbl->lock);
1898         nlmsg_cancel(skb, nlh);
1899         return -EMSGSIZE;
1900 }
1901
1902 static int neightbl_fill_param_info(struct sk_buff *skb,
1903                                     struct neigh_table *tbl,
1904                                     struct neigh_parms *parms,
1905                                     u32 pid, u32 seq, int type,
1906                                     unsigned int flags)
1907 {
1908         struct ndtmsg *ndtmsg;
1909         struct nlmsghdr *nlh;
1910
1911         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1912         if (nlh == NULL)
1913                 return -EMSGSIZE;
1914
1915         ndtmsg = nlmsg_data(nlh);
1916
1917         read_lock_bh(&tbl->lock);
1918         ndtmsg->ndtm_family = tbl->family;
1919         ndtmsg->ndtm_pad1   = 0;
1920         ndtmsg->ndtm_pad2   = 0;
1921
1922         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1923             neightbl_fill_parms(skb, parms) < 0)
1924                 goto errout;
1925
1926         read_unlock_bh(&tbl->lock);
1927         return nlmsg_end(skb, nlh);
1928 errout:
1929         read_unlock_bh(&tbl->lock);
1930         nlmsg_cancel(skb, nlh);
1931         return -EMSGSIZE;
1932 }
1933
1934 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1935         [NDTA_NAME]             = { .type = NLA_STRING },
1936         [NDTA_THRESH1]          = { .type = NLA_U32 },
1937         [NDTA_THRESH2]          = { .type = NLA_U32 },
1938         [NDTA_THRESH3]          = { .type = NLA_U32 },
1939         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1940         [NDTA_PARMS]            = { .type = NLA_NESTED },
1941 };
1942
1943 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1944         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1945         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1946         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1947         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1948         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1949         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1950         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1951         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1952         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1953         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1954         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1955         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1956         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1957 };
1958
1959 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1960 {
1961         struct net *net = sock_net(skb->sk);
1962         struct neigh_table *tbl;
1963         struct ndtmsg *ndtmsg;
1964         struct nlattr *tb[NDTA_MAX+1];
1965         int err;
1966
1967         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1968                           nl_neightbl_policy);
1969         if (err < 0)
1970                 goto errout;
1971
1972         if (tb[NDTA_NAME] == NULL) {
1973                 err = -EINVAL;
1974                 goto errout;
1975         }
1976
1977         ndtmsg = nlmsg_data(nlh);
1978         read_lock(&neigh_tbl_lock);
1979         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1980                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1981                         continue;
1982
1983                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1984                         break;
1985         }
1986
1987         if (tbl == NULL) {
1988                 err = -ENOENT;
1989                 goto errout_locked;
1990         }
1991
1992         /*
1993          * We acquire tbl->lock to be nice to the periodic timers and
1994          * make sure they always see a consistent set of values.
1995          */
1996         write_lock_bh(&tbl->lock);
1997
1998         if (tb[NDTA_PARMS]) {
1999                 struct nlattr *tbp[NDTPA_MAX+1];
2000                 struct neigh_parms *p;
2001                 int i, ifindex = 0;
2002
2003                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2004                                        nl_ntbl_parm_policy);
2005                 if (err < 0)
2006                         goto errout_tbl_lock;
2007
2008                 if (tbp[NDTPA_IFINDEX])
2009                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2010
2011                 p = lookup_neigh_parms(tbl, net, ifindex);
2012                 if (p == NULL) {
2013                         err = -ENOENT;
2014                         goto errout_tbl_lock;
2015                 }
2016
2017                 for (i = 1; i <= NDTPA_MAX; i++) {
2018                         if (tbp[i] == NULL)
2019                                 continue;
2020
2021                         switch (i) {
2022                         case NDTPA_QUEUE_LEN:
2023                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2024                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2025                                 break;
2026                         case NDTPA_QUEUE_LENBYTES:
2027                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2028                                 break;
2029                         case NDTPA_PROXY_QLEN:
2030                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2031                                 break;
2032                         case NDTPA_APP_PROBES:
2033                                 p->app_probes = nla_get_u32(tbp[i]);
2034                                 break;
2035                         case NDTPA_UCAST_PROBES:
2036                                 p->ucast_probes = nla_get_u32(tbp[i]);
2037                                 break;
2038                         case NDTPA_MCAST_PROBES:
2039                                 p->mcast_probes = nla_get_u32(tbp[i]);
2040                                 break;
2041                         case NDTPA_BASE_REACHABLE_TIME:
2042                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_GC_STALETIME:
2045                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         case NDTPA_DELAY_PROBE_TIME:
2048                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2049                                 break;
2050                         case NDTPA_RETRANS_TIME:
2051                                 p->retrans_time = nla_get_msecs(tbp[i]);
2052                                 break;
2053                         case NDTPA_ANYCAST_DELAY:
2054                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2055                                 break;
2056                         case NDTPA_PROXY_DELAY:
2057                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2058                                 break;
2059                         case NDTPA_LOCKTIME:
2060                                 p->locktime = nla_get_msecs(tbp[i]);
2061                                 break;
2062                         }
2063                 }
2064         }
2065
2066         if (tb[NDTA_THRESH1])
2067                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2068
2069         if (tb[NDTA_THRESH2])
2070                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2071
2072         if (tb[NDTA_THRESH3])
2073                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2074
2075         if (tb[NDTA_GC_INTERVAL])
2076                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2077
2078         err = 0;
2079
2080 errout_tbl_lock:
2081         write_unlock_bh(&tbl->lock);
2082 errout_locked:
2083         read_unlock(&neigh_tbl_lock);
2084 errout:
2085         return err;
2086 }
2087
2088 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2089 {
2090         struct net *net = sock_net(skb->sk);
2091         int family, tidx, nidx = 0;
2092         int tbl_skip = cb->args[0];
2093         int neigh_skip = cb->args[1];
2094         struct neigh_table *tbl;
2095
2096         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2097
2098         read_lock(&neigh_tbl_lock);
2099         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2100                 struct neigh_parms *p;
2101
2102                 if (tidx < tbl_skip || (family && tbl->family != family))
2103                         continue;
2104
2105                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2106                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2107                                        NLM_F_MULTI) <= 0)
2108                         break;
2109
2110                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2111                         if (!net_eq(neigh_parms_net(p), net))
2112                                 continue;
2113
2114                         if (nidx < neigh_skip)
2115                                 goto next;
2116
2117                         if (neightbl_fill_param_info(skb, tbl, p,
2118                                                      NETLINK_CB(cb->skb).pid,
2119                                                      cb->nlh->nlmsg_seq,
2120                                                      RTM_NEWNEIGHTBL,
2121                                                      NLM_F_MULTI) <= 0)
2122                                 goto out;
2123                 next:
2124                         nidx++;
2125                 }
2126
2127                 neigh_skip = 0;
2128         }
2129 out:
2130         read_unlock(&neigh_tbl_lock);
2131         cb->args[0] = tidx;
2132         cb->args[1] = nidx;
2133
2134         return skb->len;
2135 }
2136
2137 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2138                            u32 pid, u32 seq, int type, unsigned int flags)
2139 {
2140         unsigned long now = jiffies;
2141         struct nda_cacheinfo ci;
2142         struct nlmsghdr *nlh;
2143         struct ndmsg *ndm;
2144
2145         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2146         if (nlh == NULL)
2147                 return -EMSGSIZE;
2148
2149         ndm = nlmsg_data(nlh);
2150         ndm->ndm_family  = neigh->ops->family;
2151         ndm->ndm_pad1    = 0;
2152         ndm->ndm_pad2    = 0;
2153         ndm->ndm_flags   = neigh->flags;
2154         ndm->ndm_type    = neigh->type;
2155         ndm->ndm_ifindex = neigh->dev->ifindex;
2156
2157         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2158                 goto nla_put_failure;
2159
2160         read_lock_bh(&neigh->lock);
2161         ndm->ndm_state   = neigh->nud_state;
2162         if (neigh->nud_state & NUD_VALID) {
2163                 char haddr[MAX_ADDR_LEN];
2164
2165                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2166                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2167                         read_unlock_bh(&neigh->lock);
2168                         goto nla_put_failure;
2169                 }
2170         }
2171
2172         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2173         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2174         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2175         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2176         read_unlock_bh(&neigh->lock);
2177
2178         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2179             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2180                 goto nla_put_failure;
2181
2182         return nlmsg_end(skb, nlh);
2183
2184 nla_put_failure:
2185         nlmsg_cancel(skb, nlh);
2186         return -EMSGSIZE;
2187 }
2188
2189 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2190                             u32 pid, u32 seq, int type, unsigned int flags,
2191                             struct neigh_table *tbl)
2192 {
2193         struct nlmsghdr *nlh;
2194         struct ndmsg *ndm;
2195
2196         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2197         if (nlh == NULL)
2198                 return -EMSGSIZE;
2199
2200         ndm = nlmsg_data(nlh);
2201         ndm->ndm_family  = tbl->family;
2202         ndm->ndm_pad1    = 0;
2203         ndm->ndm_pad2    = 0;
2204         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2205         ndm->ndm_type    = NDA_DST;
2206         ndm->ndm_ifindex = pn->dev->ifindex;
2207         ndm->ndm_state   = NUD_NONE;
2208
2209         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2210                 goto nla_put_failure;
2211
2212         return nlmsg_end(skb, nlh);
2213
2214 nla_put_failure:
2215         nlmsg_cancel(skb, nlh);
2216         return -EMSGSIZE;
2217 }
2218
2219 static void neigh_update_notify(struct neighbour *neigh)
2220 {
2221         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2222         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2223 }
2224
2225 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2226                             struct netlink_callback *cb)
2227 {
2228         struct net *net = sock_net(skb->sk);
2229         struct neighbour *n;
2230         int rc, h, s_h = cb->args[1];
2231         int idx, s_idx = idx = cb->args[2];
2232         struct neigh_hash_table *nht;
2233
2234         rcu_read_lock_bh();
2235         nht = rcu_dereference_bh(tbl->nht);
2236
2237         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2238                 if (h > s_h)
2239                         s_idx = 0;
2240                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2241                      n != NULL;
2242                      n = rcu_dereference_bh(n->next)) {
2243                         if (!net_eq(dev_net(n->dev), net))
2244                                 continue;
2245                         if (idx < s_idx)
2246                                 goto next;
2247                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2248                                             cb->nlh->nlmsg_seq,
2249                                             RTM_NEWNEIGH,
2250                                             NLM_F_MULTI) <= 0) {
2251                                 rc = -1;
2252                                 goto out;
2253                         }
2254 next:
2255                         idx++;
2256                 }
2257         }
2258         rc = skb->len;
2259 out:
2260         rcu_read_unlock_bh();
2261         cb->args[1] = h;
2262         cb->args[2] = idx;
2263         return rc;
2264 }
2265
2266 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2267                              struct netlink_callback *cb)
2268 {
2269         struct pneigh_entry *n;
2270         struct net *net = sock_net(skb->sk);
2271         int rc, h, s_h = cb->args[3];
2272         int idx, s_idx = idx = cb->args[4];
2273
2274         read_lock_bh(&tbl->lock);
2275
2276         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2277                 if (h > s_h)
2278                         s_idx = 0;
2279                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2280                         if (dev_net(n->dev) != net)
2281                                 continue;
2282                         if (idx < s_idx)
2283                                 goto next;
2284                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2285                                             cb->nlh->nlmsg_seq,
2286                                             RTM_NEWNEIGH,
2287                                             NLM_F_MULTI, tbl) <= 0) {
2288                                 read_unlock_bh(&tbl->lock);
2289                                 rc = -1;
2290                                 goto out;
2291                         }
2292                 next:
2293                         idx++;
2294                 }
2295         }
2296
2297         read_unlock_bh(&tbl->lock);
2298         rc = skb->len;
2299 out:
2300         cb->args[3] = h;
2301         cb->args[4] = idx;
2302         return rc;
2303
2304 }
2305
2306 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2307 {
2308         struct neigh_table *tbl;
2309         int t, family, s_t;
2310         int proxy = 0;
2311         int err;
2312
2313         read_lock(&neigh_tbl_lock);
2314         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2315
2316         /* check for full ndmsg structure presence, family member is
2317          * the same for both structures
2318          */
2319         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2320             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2321                 proxy = 1;
2322
2323         s_t = cb->args[0];
2324
2325         for (tbl = neigh_tables, t = 0; tbl;
2326              tbl = tbl->next, t++) {
2327                 if (t < s_t || (family && tbl->family != family))
2328                         continue;
2329                 if (t > s_t)
2330                         memset(&cb->args[1], 0, sizeof(cb->args) -
2331                                                 sizeof(cb->args[0]));
2332                 if (proxy)
2333                         err = pneigh_dump_table(tbl, skb, cb);
2334                 else
2335                         err = neigh_dump_table(tbl, skb, cb);
2336                 if (err < 0)
2337                         break;
2338         }
2339         read_unlock(&neigh_tbl_lock);
2340
2341         cb->args[0] = t;
2342         return skb->len;
2343 }
2344
2345 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2346 {
2347         int chain;
2348         struct neigh_hash_table *nht;
2349
2350         rcu_read_lock_bh();
2351         nht = rcu_dereference_bh(tbl->nht);
2352
2353         read_lock(&tbl->lock); /* avoid resizes */
2354         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2355                 struct neighbour *n;
2356
2357                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2358                      n != NULL;
2359                      n = rcu_dereference_bh(n->next))
2360                         cb(n, cookie);
2361         }
2362         read_unlock(&tbl->lock);
2363         rcu_read_unlock_bh();
2364 }
2365 EXPORT_SYMBOL(neigh_for_each);
2366
2367 /* The tbl->lock must be held as a writer and BH disabled. */
2368 void __neigh_for_each_release(struct neigh_table *tbl,
2369                               int (*cb)(struct neighbour *))
2370 {
2371         int chain;
2372         struct neigh_hash_table *nht;
2373
2374         nht = rcu_dereference_protected(tbl->nht,
2375                                         lockdep_is_held(&tbl->lock));
2376         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2377                 struct neighbour *n;
2378                 struct neighbour __rcu **np;
2379
2380                 np = &nht->hash_buckets[chain];
2381                 while ((n = rcu_dereference_protected(*np,
2382                                         lockdep_is_held(&tbl->lock))) != NULL) {
2383                         int release;
2384
2385                         write_lock(&n->lock);
2386                         release = cb(n);
2387                         if (release) {
2388                                 rcu_assign_pointer(*np,
2389                                         rcu_dereference_protected(n->next,
2390                                                 lockdep_is_held(&tbl->lock)));
2391                                 n->dead = 1;
2392                         } else
2393                                 np = &n->next;
2394                         write_unlock(&n->lock);
2395                         if (release)
2396                                 neigh_cleanup_and_release(n);
2397                 }
2398         }
2399 }
2400 EXPORT_SYMBOL(__neigh_for_each_release);
2401
2402 #ifdef CONFIG_PROC_FS
2403
2404 static struct neighbour *neigh_get_first(struct seq_file *seq)
2405 {
2406         struct neigh_seq_state *state = seq->private;
2407         struct net *net = seq_file_net(seq);
2408         struct neigh_hash_table *nht = state->nht;
2409         struct neighbour *n = NULL;
2410         int bucket = state->bucket;
2411
2412         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2413         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2414                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2415
2416                 while (n) {
2417                         if (!net_eq(dev_net(n->dev), net))
2418                                 goto next;
2419                         if (state->neigh_sub_iter) {
2420                                 loff_t fakep = 0;
2421                                 void *v;
2422
2423                                 v = state->neigh_sub_iter(state, n, &fakep);
2424                                 if (!v)
2425                                         goto next;
2426                         }
2427                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2428                                 break;
2429                         if (n->nud_state & ~NUD_NOARP)
2430                                 break;
2431 next:
2432                         n = rcu_dereference_bh(n->next);
2433                 }
2434
2435                 if (n)
2436                         break;
2437         }
2438         state->bucket = bucket;
2439
2440         return n;
2441 }
2442
2443 static struct neighbour *neigh_get_next(struct seq_file *seq,
2444                                         struct neighbour *n,
2445                                         loff_t *pos)
2446 {
2447         struct neigh_seq_state *state = seq->private;
2448         struct net *net = seq_file_net(seq);
2449         struct neigh_hash_table *nht = state->nht;
2450
2451         if (state->neigh_sub_iter) {
2452                 void *v = state->neigh_sub_iter(state, n, pos);
2453                 if (v)
2454                         return n;
2455         }
2456         n = rcu_dereference_bh(n->next);
2457
2458         while (1) {
2459                 while (n) {
2460                         if (!net_eq(dev_net(n->dev), net))
2461                                 goto next;
2462                         if (state->neigh_sub_iter) {
2463                                 void *v = state->neigh_sub_iter(state, n, pos);
2464                                 if (v)
2465                                         return n;
2466                                 goto next;
2467                         }
2468                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2469                                 break;
2470
2471                         if (n->nud_state & ~NUD_NOARP)
2472                                 break;
2473 next:
2474                         n = rcu_dereference_bh(n->next);
2475                 }
2476
2477                 if (n)
2478                         break;
2479
2480                 if (++state->bucket >= (1 << nht->hash_shift))
2481                         break;
2482
2483                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2484         }
2485
2486         if (n && pos)
2487                 --(*pos);
2488         return n;
2489 }
2490
2491 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2492 {
2493         struct neighbour *n = neigh_get_first(seq);
2494
2495         if (n) {
2496                 --(*pos);
2497                 while (*pos) {
2498                         n = neigh_get_next(seq, n, pos);
2499                         if (!n)
2500                                 break;
2501                 }
2502         }
2503         return *pos ? NULL : n;
2504 }
2505
2506 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2507 {
2508         struct neigh_seq_state *state = seq->private;
2509         struct net *net = seq_file_net(seq);
2510         struct neigh_table *tbl = state->tbl;
2511         struct pneigh_entry *pn = NULL;
2512         int bucket = state->bucket;
2513
2514         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2515         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2516                 pn = tbl->phash_buckets[bucket];
2517                 while (pn && !net_eq(pneigh_net(pn), net))
2518                         pn = pn->next;
2519                 if (pn)
2520                         break;
2521         }
2522         state->bucket = bucket;
2523
2524         return pn;
2525 }
2526
2527 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2528                                             struct pneigh_entry *pn,
2529                                             loff_t *pos)
2530 {
2531         struct neigh_seq_state *state = seq->private;
2532         struct net *net = seq_file_net(seq);
2533         struct neigh_table *tbl = state->tbl;
2534
2535         do {
2536                 pn = pn->next;
2537         } while (pn && !net_eq(pneigh_net(pn), net));
2538
2539         while (!pn) {
2540                 if (++state->bucket > PNEIGH_HASHMASK)
2541                         break;
2542                 pn = tbl->phash_buckets[state->bucket];
2543                 while (pn && !net_eq(pneigh_net(pn), net))
2544                         pn = pn->next;
2545                 if (pn)
2546                         break;
2547         }
2548
2549         if (pn && pos)
2550                 --(*pos);
2551
2552         return pn;
2553 }
2554
2555 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2556 {
2557         struct pneigh_entry *pn = pneigh_get_first(seq);
2558
2559         if (pn) {
2560                 --(*pos);
2561                 while (*pos) {
2562                         pn = pneigh_get_next(seq, pn, pos);
2563                         if (!pn)
2564                                 break;
2565                 }
2566         }
2567         return *pos ? NULL : pn;
2568 }
2569
2570 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2571 {
2572         struct neigh_seq_state *state = seq->private;
2573         void *rc;
2574         loff_t idxpos = *pos;
2575
2576         rc = neigh_get_idx(seq, &idxpos);
2577         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2578                 rc = pneigh_get_idx(seq, &idxpos);
2579
2580         return rc;
2581 }
2582
2583 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2584         __acquires(rcu_bh)
2585 {
2586         struct neigh_seq_state *state = seq->private;
2587
2588         state->tbl = tbl;
2589         state->bucket = 0;
2590         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2591
2592         rcu_read_lock_bh();
2593         state->nht = rcu_dereference_bh(tbl->nht);
2594
2595         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2596 }
2597 EXPORT_SYMBOL(neigh_seq_start);
2598
2599 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2600 {
2601         struct neigh_seq_state *state;
2602         void *rc;
2603
2604         if (v == SEQ_START_TOKEN) {
2605                 rc = neigh_get_first(seq);
2606                 goto out;
2607         }
2608
2609         state = seq->private;
2610         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2611                 rc = neigh_get_next(seq, v, NULL);
2612                 if (rc)
2613                         goto out;
2614                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2615                         rc = pneigh_get_first(seq);
2616         } else {
2617                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2618                 rc = pneigh_get_next(seq, v, NULL);
2619         }
2620 out:
2621         ++(*pos);
2622         return rc;
2623 }
2624 EXPORT_SYMBOL(neigh_seq_next);
2625
2626 void neigh_seq_stop(struct seq_file *seq, void *v)
2627         __releases(rcu_bh)
2628 {
2629         rcu_read_unlock_bh();
2630 }
2631 EXPORT_SYMBOL(neigh_seq_stop);
2632
2633 /* statistics via seq_file */
2634
2635 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2636 {
2637         struct neigh_table *tbl = seq->private;
2638         int cpu;
2639
2640         if (*pos == 0)
2641                 return SEQ_START_TOKEN;
2642
2643         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2644                 if (!cpu_possible(cpu))
2645                         continue;
2646                 *pos = cpu+1;
2647                 return per_cpu_ptr(tbl->stats, cpu);
2648         }
2649         return NULL;
2650 }
2651
2652 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2653 {
2654         struct neigh_table *tbl = seq->private;
2655         int cpu;
2656
2657         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2658                 if (!cpu_possible(cpu))
2659                         continue;
2660                 *pos = cpu+1;
2661                 return per_cpu_ptr(tbl->stats, cpu);
2662         }
2663         return NULL;
2664 }
2665
2666 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2667 {
2668
2669 }
2670
2671 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2672 {
2673         struct neigh_table *tbl = seq->private;
2674         struct neigh_statistics *st = v;
2675
2676         if (v == SEQ_START_TOKEN) {
2677                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2678                 return 0;
2679         }
2680
2681         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2682                         "%08lx %08lx  %08lx %08lx %08lx\n",
2683                    atomic_read(&tbl->entries),
2684
2685                    st->allocs,
2686                    st->destroys,
2687                    st->hash_grows,
2688
2689                    st->lookups,
2690                    st->hits,
2691
2692                    st->res_failed,
2693
2694                    st->rcv_probes_mcast,
2695                    st->rcv_probes_ucast,
2696
2697                    st->periodic_gc_runs,
2698                    st->forced_gc_runs,
2699                    st->unres_discards
2700                    );
2701
2702         return 0;
2703 }
2704
2705 static const struct seq_operations neigh_stat_seq_ops = {
2706         .start  = neigh_stat_seq_start,
2707         .next   = neigh_stat_seq_next,
2708         .stop   = neigh_stat_seq_stop,
2709         .show   = neigh_stat_seq_show,
2710 };
2711
2712 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2713 {
2714         int ret = seq_open(file, &neigh_stat_seq_ops);
2715
2716         if (!ret) {
2717                 struct seq_file *sf = file->private_data;
2718                 sf->private = PDE(inode)->data;
2719         }
2720         return ret;
2721 };
2722
2723 static const struct file_operations neigh_stat_seq_fops = {
2724         .owner   = THIS_MODULE,
2725         .open    = neigh_stat_seq_open,
2726         .read    = seq_read,
2727         .llseek  = seq_lseek,
2728         .release = seq_release,
2729 };
2730
2731 #endif /* CONFIG_PROC_FS */
2732
2733 static inline size_t neigh_nlmsg_size(void)
2734 {
2735         return NLMSG_ALIGN(sizeof(struct ndmsg))
2736                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2737                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2738                + nla_total_size(sizeof(struct nda_cacheinfo))
2739                + nla_total_size(4); /* NDA_PROBES */
2740 }
2741
2742 static void __neigh_notify(struct neighbour *n, int type, int flags)
2743 {
2744         struct net *net = dev_net(n->dev);
2745         struct sk_buff *skb;
2746         int err = -ENOBUFS;
2747
2748         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2749         if (skb == NULL)
2750                 goto errout;
2751
2752         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2753         if (err < 0) {
2754                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2755                 WARN_ON(err == -EMSGSIZE);
2756                 kfree_skb(skb);
2757                 goto errout;
2758         }
2759         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2760         return;
2761 errout:
2762         if (err < 0)
2763                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2764 }
2765
2766 #ifdef CONFIG_ARPD
2767 void neigh_app_ns(struct neighbour *n)
2768 {
2769         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2770 }
2771 EXPORT_SYMBOL(neigh_app_ns);
2772 #endif /* CONFIG_ARPD */
2773
2774 #ifdef CONFIG_SYSCTL
2775
2776 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2777                            size_t *lenp, loff_t *ppos)
2778 {
2779         int size, ret;
2780         ctl_table tmp = *ctl;
2781
2782         tmp.data = &size;
2783         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2784         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2785         if (write && !ret)
2786                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2787         return ret;
2788 }
2789
2790 enum {
2791         NEIGH_VAR_MCAST_PROBE,
2792         NEIGH_VAR_UCAST_PROBE,
2793         NEIGH_VAR_APP_PROBE,
2794         NEIGH_VAR_RETRANS_TIME,
2795         NEIGH_VAR_BASE_REACHABLE_TIME,
2796         NEIGH_VAR_DELAY_PROBE_TIME,
2797         NEIGH_VAR_GC_STALETIME,
2798         NEIGH_VAR_QUEUE_LEN,
2799         NEIGH_VAR_QUEUE_LEN_BYTES,
2800         NEIGH_VAR_PROXY_QLEN,
2801         NEIGH_VAR_ANYCAST_DELAY,
2802         NEIGH_VAR_PROXY_DELAY,
2803         NEIGH_VAR_LOCKTIME,
2804         NEIGH_VAR_RETRANS_TIME_MS,
2805         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2806         NEIGH_VAR_GC_INTERVAL,
2807         NEIGH_VAR_GC_THRESH1,
2808         NEIGH_VAR_GC_THRESH2,
2809         NEIGH_VAR_GC_THRESH3,
2810         NEIGH_VAR_MAX
2811 };
2812
2813 static struct neigh_sysctl_table {
2814         struct ctl_table_header *sysctl_header;
2815         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2816 } neigh_sysctl_template __read_mostly = {
2817         .neigh_vars = {
2818                 [NEIGH_VAR_MCAST_PROBE] = {
2819                         .procname       = "mcast_solicit",
2820                         .maxlen         = sizeof(int),
2821                         .mode           = 0644,
2822                         .proc_handler   = proc_dointvec,
2823                 },
2824                 [NEIGH_VAR_UCAST_PROBE] = {
2825                         .procname       = "ucast_solicit",
2826                         .maxlen         = sizeof(int),
2827                         .mode           = 0644,
2828                         .proc_handler   = proc_dointvec,
2829                 },
2830                 [NEIGH_VAR_APP_PROBE] = {
2831                         .procname       = "app_solicit",
2832                         .maxlen         = sizeof(int),
2833                         .mode           = 0644,
2834                         .proc_handler   = proc_dointvec,
2835                 },
2836                 [NEIGH_VAR_RETRANS_TIME] = {
2837                         .procname       = "retrans_time",
2838                         .maxlen         = sizeof(int),
2839                         .mode           = 0644,
2840                         .proc_handler   = proc_dointvec_userhz_jiffies,
2841                 },
2842                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2843                         .procname       = "base_reachable_time",
2844                         .maxlen         = sizeof(int),
2845                         .mode           = 0644,
2846                         .proc_handler   = proc_dointvec_jiffies,
2847                 },
2848                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2849                         .procname       = "delay_first_probe_time",
2850                         .maxlen         = sizeof(int),
2851                         .mode           = 0644,
2852                         .proc_handler   = proc_dointvec_jiffies,
2853                 },
2854                 [NEIGH_VAR_GC_STALETIME] = {
2855                         .procname       = "gc_stale_time",
2856                         .maxlen         = sizeof(int),
2857                         .mode           = 0644,
2858                         .proc_handler   = proc_dointvec_jiffies,
2859                 },
2860                 [NEIGH_VAR_QUEUE_LEN] = {
2861                         .procname       = "unres_qlen",
2862                         .maxlen         = sizeof(int),
2863                         .mode           = 0644,
2864                         .proc_handler   = proc_unres_qlen,
2865                 },
2866                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2867                         .procname       = "unres_qlen_bytes",
2868                         .maxlen         = sizeof(int),
2869                         .mode           = 0644,
2870                         .proc_handler   = proc_dointvec,
2871                 },
2872                 [NEIGH_VAR_PROXY_QLEN] = {
2873                         .procname       = "proxy_qlen",
2874                         .maxlen         = sizeof(int),
2875                         .mode           = 0644,
2876                         .proc_handler   = proc_dointvec,
2877                 },
2878                 [NEIGH_VAR_ANYCAST_DELAY] = {
2879                         .procname       = "anycast_delay",
2880                         .maxlen         = sizeof(int),
2881                         .mode           = 0644,
2882                         .proc_handler   = proc_dointvec_userhz_jiffies,
2883                 },
2884                 [NEIGH_VAR_PROXY_DELAY] = {
2885                         .procname       = "proxy_delay",
2886                         .maxlen         = sizeof(int),
2887                         .mode           = 0644,
2888                         .proc_handler   = proc_dointvec_userhz_jiffies,
2889                 },
2890                 [NEIGH_VAR_LOCKTIME] = {
2891                         .procname       = "locktime",
2892                         .maxlen         = sizeof(int),
2893                         .mode           = 0644,
2894                         .proc_handler   = proc_dointvec_userhz_jiffies,
2895                 },
2896                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2897                         .procname       = "retrans_time_ms",
2898                         .maxlen         = sizeof(int),
2899                         .mode           = 0644,
2900                         .proc_handler   = proc_dointvec_ms_jiffies,
2901                 },
2902                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2903                         .procname       = "base_reachable_time_ms",
2904                         .maxlen         = sizeof(int),
2905                         .mode           = 0644,
2906                         .proc_handler   = proc_dointvec_ms_jiffies,
2907                 },
2908                 [NEIGH_VAR_GC_INTERVAL] = {
2909                         .procname       = "gc_interval",
2910                         .maxlen         = sizeof(int),
2911                         .mode           = 0644,
2912                         .proc_handler   = proc_dointvec_jiffies,
2913                 },
2914                 [NEIGH_VAR_GC_THRESH1] = {
2915                         .procname       = "gc_thresh1",
2916                         .maxlen         = sizeof(int),
2917                         .mode           = 0644,
2918                         .proc_handler   = proc_dointvec,
2919                 },
2920                 [NEIGH_VAR_GC_THRESH2] = {
2921                         .procname       = "gc_thresh2",
2922                         .maxlen         = sizeof(int),
2923                         .mode           = 0644,
2924                         .proc_handler   = proc_dointvec,
2925                 },
2926                 [NEIGH_VAR_GC_THRESH3] = {
2927                         .procname       = "gc_thresh3",
2928                         .maxlen         = sizeof(int),
2929                         .mode           = 0644,
2930                         .proc_handler   = proc_dointvec,
2931                 },
2932                 {},
2933         },
2934 };
2935
2936 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2937                           char *p_name, proc_handler *handler)
2938 {
2939         struct neigh_sysctl_table *t;
2940         const char *dev_name_source = NULL;
2941         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2942
2943         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2944         if (!t)
2945                 goto err;
2946
2947         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2948         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2949         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2950         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2951         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2952         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2953         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2954         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2955         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2956         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2957         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2958         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2959         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2960         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2961         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2962
2963         if (dev) {
2964                 dev_name_source = dev->name;
2965                 /* Terminate the table early */
2966                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2967                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2968         } else {
2969                 dev_name_source = "default";
2970                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2971                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2972                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2973                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2974         }
2975
2976
2977         if (handler) {
2978                 /* RetransTime */
2979                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2980                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2981                 /* ReachableTime */
2982                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2983                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2984                 /* RetransTime (in milliseconds)*/
2985                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2986                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2987                 /* ReachableTime (in milliseconds) */
2988                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2989                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2990         }
2991
2992         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2993                 p_name, dev_name_source);
2994         t->sysctl_header =
2995                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2996         if (!t->sysctl_header)
2997                 goto free;
2998
2999         p->sysctl_table = t;
3000         return 0;
3001
3002 free:
3003         kfree(t);
3004 err:
3005         return -ENOBUFS;
3006 }
3007 EXPORT_SYMBOL(neigh_sysctl_register);
3008
3009 void neigh_sysctl_unregister(struct neigh_parms *p)
3010 {
3011         if (p->sysctl_table) {
3012                 struct neigh_sysctl_table *t = p->sysctl_table;
3013                 p->sysctl_table = NULL;
3014                 unregister_net_sysctl_table(t->sysctl_header);
3015                 kfree(t);
3016         }
3017 }
3018 EXPORT_SYMBOL(neigh_sysctl_unregister);
3019
3020 #endif  /* CONFIG_SYSCTL */
3021
3022 static int __init neigh_init(void)
3023 {
3024         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3025         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3026         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3027
3028         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3029                       NULL);
3030         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3031
3032         return 0;
3033 }
3034
3035 subsys_initcall(neigh_init);
3036