2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
28 #include <linux/sysctl.h>
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
46 #define neigh_dbg(level, fmt, ...) \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
52 #define PNEIGH_HASHMASK 0xF
54 static void neigh_timer_handler(unsigned long arg);
55 static void __neigh_notify(struct neighbour *n, int type, int flags);
56 static void neigh_update_notify(struct neighbour *neigh);
57 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
60 static const struct file_operations neigh_stat_seq_fops;
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
78 Reference count prevents destruction.
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
91 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
97 static void neigh_cleanup_and_release(struct neighbour *neigh)
99 if (neigh->parms->neigh_cleanup)
100 neigh->parms->neigh_cleanup(neigh);
102 __neigh_notify(neigh, RTM_DELNEIGH, 0);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
119 static int neigh_forced_gc(struct neigh_table *tbl)
123 struct neigh_hash_table *nht;
125 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
127 write_lock_bh(&tbl->lock);
128 nht = rcu_dereference_protected(tbl->nht,
129 lockdep_is_held(&tbl->lock));
130 for (i = 0; i < (1 << nht->hash_shift); i++) {
132 struct neighbour __rcu **np;
134 np = &nht->hash_buckets[i];
135 while ((n = rcu_dereference_protected(*np,
136 lockdep_is_held(&tbl->lock))) != NULL) {
137 /* Neighbour record may be discarded if:
138 * - nobody refers to it.
139 * - it is not permanent
141 write_lock(&n->lock);
142 if (atomic_read(&n->refcnt) == 1 &&
143 !(n->nud_state & NUD_PERMANENT)) {
144 rcu_assign_pointer(*np,
145 rcu_dereference_protected(n->next,
146 lockdep_is_held(&tbl->lock)));
149 write_unlock(&n->lock);
150 neigh_cleanup_and_release(n);
153 write_unlock(&n->lock);
158 tbl->last_flush = jiffies;
160 write_unlock_bh(&tbl->lock);
165 static void neigh_add_timer(struct neighbour *n, unsigned long when)
168 if (unlikely(mod_timer(&n->timer, when))) {
169 printk("NEIGH: BUG, double timer add, state is %x\n",
175 static int neigh_del_timer(struct neighbour *n)
177 if ((n->nud_state & NUD_IN_TIMER) &&
178 del_timer(&n->timer)) {
185 static void pneigh_queue_purge(struct sk_buff_head *list)
189 while ((skb = skb_dequeue(list)) != NULL) {
195 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
198 struct neigh_hash_table *nht;
200 nht = rcu_dereference_protected(tbl->nht,
201 lockdep_is_held(&tbl->lock));
203 for (i = 0; i < (1 << nht->hash_shift); i++) {
205 struct neighbour __rcu **np = &nht->hash_buckets[i];
207 while ((n = rcu_dereference_protected(*np,
208 lockdep_is_held(&tbl->lock))) != NULL) {
209 if (dev && n->dev != dev) {
213 rcu_assign_pointer(*np,
214 rcu_dereference_protected(n->next,
215 lockdep_is_held(&tbl->lock)));
216 write_lock(&n->lock);
220 if (atomic_read(&n->refcnt) != 1) {
221 /* The most unpleasant situation.
222 We must destroy neighbour entry,
223 but someone still uses it.
225 The destroy will be delayed until
226 the last user releases us, but
227 we must kill timers etc. and move
230 __skb_queue_purge(&n->arp_queue);
231 n->arp_queue_len_bytes = 0;
232 n->output = neigh_blackhole;
233 if (n->nud_state & NUD_VALID)
234 n->nud_state = NUD_NOARP;
236 n->nud_state = NUD_NONE;
237 neigh_dbg(2, "neigh %p is stray\n", n);
239 write_unlock(&n->lock);
240 neigh_cleanup_and_release(n);
245 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
247 write_lock_bh(&tbl->lock);
248 neigh_flush_dev(tbl, dev);
249 write_unlock_bh(&tbl->lock);
251 EXPORT_SYMBOL(neigh_changeaddr);
253 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
255 write_lock_bh(&tbl->lock);
256 neigh_flush_dev(tbl, dev);
257 pneigh_ifdown(tbl, dev);
258 write_unlock_bh(&tbl->lock);
260 del_timer_sync(&tbl->proxy_timer);
261 pneigh_queue_purge(&tbl->proxy_queue);
264 EXPORT_SYMBOL(neigh_ifdown);
266 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
268 struct neighbour *n = NULL;
269 unsigned long now = jiffies;
272 entries = atomic_inc_return(&tbl->entries) - 1;
273 if (entries >= tbl->gc_thresh3 ||
274 (entries >= tbl->gc_thresh2 &&
275 time_after(now, tbl->last_flush + 5 * HZ))) {
276 if (!neigh_forced_gc(tbl) &&
277 entries >= tbl->gc_thresh3)
281 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
285 __skb_queue_head_init(&n->arp_queue);
286 rwlock_init(&n->lock);
287 seqlock_init(&n->ha_lock);
288 n->updated = n->used = now;
289 n->nud_state = NUD_NONE;
290 n->output = neigh_blackhole;
291 seqlock_init(&n->hh.hh_lock);
292 n->parms = neigh_parms_clone(&tbl->parms);
293 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
295 NEIGH_CACHE_STAT_INC(tbl, allocs);
297 atomic_set(&n->refcnt, 1);
303 atomic_dec(&tbl->entries);
307 static void neigh_get_hash_rnd(u32 *x)
309 get_random_bytes(x, sizeof(*x));
313 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
315 size_t size = (1 << shift) * sizeof(struct neighbour *);
316 struct neigh_hash_table *ret;
317 struct neighbour __rcu **buckets;
320 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323 if (size <= PAGE_SIZE)
324 buckets = kzalloc(size, GFP_ATOMIC);
326 buckets = (struct neighbour __rcu **)
327 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
333 ret->hash_buckets = buckets;
334 ret->hash_shift = shift;
335 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
336 neigh_get_hash_rnd(&ret->hash_rnd[i]);
340 static void neigh_hash_free_rcu(struct rcu_head *head)
342 struct neigh_hash_table *nht = container_of(head,
343 struct neigh_hash_table,
345 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
346 struct neighbour __rcu **buckets = nht->hash_buckets;
348 if (size <= PAGE_SIZE)
351 free_pages((unsigned long)buckets, get_order(size));
355 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356 unsigned long new_shift)
358 unsigned int i, hash;
359 struct neigh_hash_table *new_nht, *old_nht;
361 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
363 old_nht = rcu_dereference_protected(tbl->nht,
364 lockdep_is_held(&tbl->lock));
365 new_nht = neigh_hash_alloc(new_shift);
369 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
370 struct neighbour *n, *next;
372 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
373 lockdep_is_held(&tbl->lock));
376 hash = tbl->hash(n->primary_key, n->dev,
379 hash >>= (32 - new_nht->hash_shift);
380 next = rcu_dereference_protected(n->next,
381 lockdep_is_held(&tbl->lock));
383 rcu_assign_pointer(n->next,
384 rcu_dereference_protected(
385 new_nht->hash_buckets[hash],
386 lockdep_is_held(&tbl->lock)));
387 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
391 rcu_assign_pointer(tbl->nht, new_nht);
392 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
396 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
397 struct net_device *dev)
401 NEIGH_CACHE_STAT_INC(tbl, lookups);
404 n = __neigh_lookup_noref(tbl, pkey, dev);
406 if (!atomic_inc_not_zero(&n->refcnt))
408 NEIGH_CACHE_STAT_INC(tbl, hits);
411 rcu_read_unlock_bh();
414 EXPORT_SYMBOL(neigh_lookup);
416 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
420 int key_len = tbl->key_len;
422 struct neigh_hash_table *nht;
424 NEIGH_CACHE_STAT_INC(tbl, lookups);
427 nht = rcu_dereference_bh(tbl->nht);
428 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
430 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
432 n = rcu_dereference_bh(n->next)) {
433 if (!memcmp(n->primary_key, pkey, key_len) &&
434 net_eq(dev_net(n->dev), net)) {
435 if (!atomic_inc_not_zero(&n->refcnt))
437 NEIGH_CACHE_STAT_INC(tbl, hits);
442 rcu_read_unlock_bh();
445 EXPORT_SYMBOL(neigh_lookup_nodev);
447 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
448 struct net_device *dev, bool want_ref)
451 int key_len = tbl->key_len;
453 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
454 struct neigh_hash_table *nht;
457 rc = ERR_PTR(-ENOBUFS);
461 memcpy(n->primary_key, pkey, key_len);
465 /* Protocol specific setup. */
466 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
468 goto out_neigh_release;
471 if (dev->netdev_ops->ndo_neigh_construct) {
472 error = dev->netdev_ops->ndo_neigh_construct(n);
475 goto out_neigh_release;
479 /* Device specific setup. */
480 if (n->parms->neigh_setup &&
481 (error = n->parms->neigh_setup(n)) < 0) {
483 goto out_neigh_release;
486 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
488 write_lock_bh(&tbl->lock);
489 nht = rcu_dereference_protected(tbl->nht,
490 lockdep_is_held(&tbl->lock));
492 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
493 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
495 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
497 if (n->parms->dead) {
498 rc = ERR_PTR(-EINVAL);
502 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
503 lockdep_is_held(&tbl->lock));
505 n1 = rcu_dereference_protected(n1->next,
506 lockdep_is_held(&tbl->lock))) {
507 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
518 rcu_assign_pointer(n->next,
519 rcu_dereference_protected(nht->hash_buckets[hash_val],
520 lockdep_is_held(&tbl->lock)));
521 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
522 write_unlock_bh(&tbl->lock);
523 neigh_dbg(2, "neigh %p is created\n", n);
528 write_unlock_bh(&tbl->lock);
533 EXPORT_SYMBOL(__neigh_create);
535 static u32 pneigh_hash(const void *pkey, int key_len)
537 u32 hash_val = *(u32 *)(pkey + key_len - 4);
538 hash_val ^= (hash_val >> 16);
539 hash_val ^= hash_val >> 8;
540 hash_val ^= hash_val >> 4;
541 hash_val &= PNEIGH_HASHMASK;
545 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
549 struct net_device *dev)
552 if (!memcmp(n->key, pkey, key_len) &&
553 net_eq(pneigh_net(n), net) &&
554 (n->dev == dev || !n->dev))
561 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
562 struct net *net, const void *pkey, struct net_device *dev)
564 int key_len = tbl->key_len;
565 u32 hash_val = pneigh_hash(pkey, key_len);
567 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
568 net, pkey, key_len, dev);
570 EXPORT_SYMBOL_GPL(__pneigh_lookup);
572 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
573 struct net *net, const void *pkey,
574 struct net_device *dev, int creat)
576 struct pneigh_entry *n;
577 int key_len = tbl->key_len;
578 u32 hash_val = pneigh_hash(pkey, key_len);
580 read_lock_bh(&tbl->lock);
581 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 net, pkey, key_len, dev);
583 read_unlock_bh(&tbl->lock);
590 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
594 write_pnet(&n->net, hold_net(net));
595 memcpy(n->key, pkey, key_len);
600 if (tbl->pconstructor && tbl->pconstructor(n)) {
609 write_lock_bh(&tbl->lock);
610 n->next = tbl->phash_buckets[hash_val];
611 tbl->phash_buckets[hash_val] = n;
612 write_unlock_bh(&tbl->lock);
616 EXPORT_SYMBOL(pneigh_lookup);
619 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
620 struct net_device *dev)
622 struct pneigh_entry *n, **np;
623 int key_len = tbl->key_len;
624 u32 hash_val = pneigh_hash(pkey, key_len);
626 write_lock_bh(&tbl->lock);
627 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
629 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
630 net_eq(pneigh_net(n), net)) {
632 write_unlock_bh(&tbl->lock);
633 if (tbl->pdestructor)
637 release_net(pneigh_net(n));
642 write_unlock_bh(&tbl->lock);
646 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
648 struct pneigh_entry *n, **np;
651 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
652 np = &tbl->phash_buckets[h];
653 while ((n = *np) != NULL) {
654 if (!dev || n->dev == dev) {
656 if (tbl->pdestructor)
660 release_net(pneigh_net(n));
670 static void neigh_parms_destroy(struct neigh_parms *parms);
672 static inline void neigh_parms_put(struct neigh_parms *parms)
674 if (atomic_dec_and_test(&parms->refcnt))
675 neigh_parms_destroy(parms);
679 * neighbour must already be out of the table;
682 void neigh_destroy(struct neighbour *neigh)
684 struct net_device *dev = neigh->dev;
686 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
689 pr_warn("Destroying alive neighbour %p\n", neigh);
694 if (neigh_del_timer(neigh))
695 pr_warn("Impossible event\n");
697 write_lock_bh(&neigh->lock);
698 __skb_queue_purge(&neigh->arp_queue);
699 write_unlock_bh(&neigh->lock);
700 neigh->arp_queue_len_bytes = 0;
702 if (dev->netdev_ops->ndo_neigh_destroy)
703 dev->netdev_ops->ndo_neigh_destroy(neigh);
706 neigh_parms_put(neigh->parms);
708 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
710 atomic_dec(&neigh->tbl->entries);
711 kfree_rcu(neigh, rcu);
713 EXPORT_SYMBOL(neigh_destroy);
715 /* Neighbour state is suspicious;
718 Called with write_locked neigh.
720 static void neigh_suspect(struct neighbour *neigh)
722 neigh_dbg(2, "neigh %p is suspected\n", neigh);
724 neigh->output = neigh->ops->output;
727 /* Neighbour state is OK;
730 Called with write_locked neigh.
732 static void neigh_connect(struct neighbour *neigh)
734 neigh_dbg(2, "neigh %p is connected\n", neigh);
736 neigh->output = neigh->ops->connected_output;
739 static void neigh_periodic_work(struct work_struct *work)
741 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
743 struct neighbour __rcu **np;
745 struct neigh_hash_table *nht;
747 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
749 write_lock_bh(&tbl->lock);
750 nht = rcu_dereference_protected(tbl->nht,
751 lockdep_is_held(&tbl->lock));
754 * periodically recompute ReachableTime from random function
757 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
758 struct neigh_parms *p;
759 tbl->last_rand = jiffies;
760 list_for_each_entry(p, &tbl->parms_list, list)
762 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
765 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
769 np = &nht->hash_buckets[i];
771 while ((n = rcu_dereference_protected(*np,
772 lockdep_is_held(&tbl->lock))) != NULL) {
775 write_lock(&n->lock);
777 state = n->nud_state;
778 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
779 write_unlock(&n->lock);
783 if (time_before(n->used, n->confirmed))
784 n->used = n->confirmed;
786 if (atomic_read(&n->refcnt) == 1 &&
787 (state == NUD_FAILED ||
788 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
791 write_unlock(&n->lock);
792 neigh_cleanup_and_release(n);
795 write_unlock(&n->lock);
801 * It's fine to release lock here, even if hash table
802 * grows while we are preempted.
804 write_unlock_bh(&tbl->lock);
806 write_lock_bh(&tbl->lock);
807 nht = rcu_dereference_protected(tbl->nht,
808 lockdep_is_held(&tbl->lock));
811 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
812 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
813 * BASE_REACHABLE_TIME.
815 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
816 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
817 write_unlock_bh(&tbl->lock);
820 static __inline__ int neigh_max_probes(struct neighbour *n)
822 struct neigh_parms *p = n->parms;
823 int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
824 if (!(n->nud_state & NUD_PROBE))
825 max_probes += NEIGH_VAR(p, MCAST_PROBES);
829 static void neigh_invalidate(struct neighbour *neigh)
830 __releases(neigh->lock)
831 __acquires(neigh->lock)
835 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
836 neigh_dbg(2, "neigh %p is failed\n", neigh);
837 neigh->updated = jiffies;
839 /* It is very thin place. report_unreachable is very complicated
840 routine. Particularly, it can hit the same neighbour entry!
842 So that, we try to be accurate and avoid dead loop. --ANK
844 while (neigh->nud_state == NUD_FAILED &&
845 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
846 write_unlock(&neigh->lock);
847 neigh->ops->error_report(neigh, skb);
848 write_lock(&neigh->lock);
850 __skb_queue_purge(&neigh->arp_queue);
851 neigh->arp_queue_len_bytes = 0;
854 static void neigh_probe(struct neighbour *neigh)
855 __releases(neigh->lock)
857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
858 /* keep skb alive even if arp_queue overflows */
860 skb = skb_copy(skb, GFP_ATOMIC);
861 write_unlock(&neigh->lock);
862 neigh->ops->solicit(neigh, skb);
863 atomic_inc(&neigh->probes);
867 /* Called when a timer expires for a neighbour entry. */
869 static void neigh_timer_handler(unsigned long arg)
871 unsigned long now, next;
872 struct neighbour *neigh = (struct neighbour *)arg;
876 write_lock(&neigh->lock);
878 state = neigh->nud_state;
882 if (!(state & NUD_IN_TIMER))
885 if (state & NUD_REACHABLE) {
886 if (time_before_eq(now,
887 neigh->confirmed + neigh->parms->reachable_time)) {
888 neigh_dbg(2, "neigh %p is still alive\n", neigh);
889 next = neigh->confirmed + neigh->parms->reachable_time;
890 } else if (time_before_eq(now,
892 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
893 neigh_dbg(2, "neigh %p is delayed\n", neigh);
894 neigh->nud_state = NUD_DELAY;
895 neigh->updated = jiffies;
896 neigh_suspect(neigh);
897 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
899 neigh_dbg(2, "neigh %p is suspected\n", neigh);
900 neigh->nud_state = NUD_STALE;
901 neigh->updated = jiffies;
902 neigh_suspect(neigh);
905 } else if (state & NUD_DELAY) {
906 if (time_before_eq(now,
908 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
909 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
910 neigh->nud_state = NUD_REACHABLE;
911 neigh->updated = jiffies;
912 neigh_connect(neigh);
914 next = neigh->confirmed + neigh->parms->reachable_time;
916 neigh_dbg(2, "neigh %p is probed\n", neigh);
917 neigh->nud_state = NUD_PROBE;
918 neigh->updated = jiffies;
919 atomic_set(&neigh->probes, 0);
920 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
923 /* NUD_PROBE|NUD_INCOMPLETE */
924 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
927 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
928 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
929 neigh->nud_state = NUD_FAILED;
931 neigh_invalidate(neigh);
935 if (neigh->nud_state & NUD_IN_TIMER) {
936 if (time_before(next, jiffies + HZ/2))
937 next = jiffies + HZ/2;
938 if (!mod_timer(&neigh->timer, next))
941 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
945 write_unlock(&neigh->lock);
949 neigh_update_notify(neigh);
951 neigh_release(neigh);
954 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
957 bool immediate_probe = false;
959 write_lock_bh(&neigh->lock);
962 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
965 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
966 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
967 NEIGH_VAR(neigh->parms, APP_PROBES)) {
968 unsigned long next, now = jiffies;
970 atomic_set(&neigh->probes,
971 NEIGH_VAR(neigh->parms, UCAST_PROBES));
972 neigh->nud_state = NUD_INCOMPLETE;
973 neigh->updated = now;
974 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
976 neigh_add_timer(neigh, next);
977 immediate_probe = true;
979 neigh->nud_state = NUD_FAILED;
980 neigh->updated = jiffies;
981 write_unlock_bh(&neigh->lock);
986 } else if (neigh->nud_state & NUD_STALE) {
987 neigh_dbg(2, "neigh %p is delayed\n", neigh);
988 neigh->nud_state = NUD_DELAY;
989 neigh->updated = jiffies;
990 neigh_add_timer(neigh, jiffies +
991 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
994 if (neigh->nud_state == NUD_INCOMPLETE) {
996 while (neigh->arp_queue_len_bytes + skb->truesize >
997 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
998 struct sk_buff *buff;
1000 buff = __skb_dequeue(&neigh->arp_queue);
1003 neigh->arp_queue_len_bytes -= buff->truesize;
1005 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1008 __skb_queue_tail(&neigh->arp_queue, skb);
1009 neigh->arp_queue_len_bytes += skb->truesize;
1014 if (immediate_probe)
1017 write_unlock(&neigh->lock);
1021 EXPORT_SYMBOL(__neigh_event_send);
1023 static void neigh_update_hhs(struct neighbour *neigh)
1025 struct hh_cache *hh;
1026 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1029 if (neigh->dev->header_ops)
1030 update = neigh->dev->header_ops->cache_update;
1035 write_seqlock_bh(&hh->hh_lock);
1036 update(hh, neigh->dev, neigh->ha);
1037 write_sequnlock_bh(&hh->hh_lock);
1044 /* Generic update routine.
1045 -- lladdr is new lladdr or NULL, if it is not supplied.
1046 -- new is new state.
1048 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1050 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1051 lladdr instead of overriding it
1053 It also allows to retain current state
1054 if lladdr is unchanged.
1055 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1057 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1059 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1062 Caller MUST hold reference count on the entry.
1065 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071 struct net_device *dev;
1072 int update_isrouter = 0;
1074 write_lock_bh(&neigh->lock);
1077 old = neigh->nud_state;
1080 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1081 (old & (NUD_NOARP | NUD_PERMANENT)))
1084 if (!(new & NUD_VALID)) {
1085 neigh_del_timer(neigh);
1086 if (old & NUD_CONNECTED)
1087 neigh_suspect(neigh);
1088 neigh->nud_state = new;
1090 notify = old & NUD_VALID;
1091 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1092 (new & NUD_FAILED)) {
1093 neigh_invalidate(neigh);
1099 /* Compare new lladdr with cached one */
1100 if (!dev->addr_len) {
1101 /* First case: device needs no address. */
1103 } else if (lladdr) {
1104 /* The second case: if something is already cached
1105 and a new address is proposed:
1107 - if they are different, check override flag
1109 if ((old & NUD_VALID) &&
1110 !memcmp(lladdr, neigh->ha, dev->addr_len))
1113 /* No address is supplied; if we know something,
1114 use it, otherwise discard the request.
1117 if (!(old & NUD_VALID))
1122 if (new & NUD_CONNECTED)
1123 neigh->confirmed = jiffies;
1124 neigh->updated = jiffies;
1126 /* If entry was valid and address is not changed,
1127 do not change entry state, if new one is STALE.
1130 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1131 if (old & NUD_VALID) {
1132 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1133 update_isrouter = 0;
1134 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1135 (old & NUD_CONNECTED)) {
1141 if (lladdr == neigh->ha && new == NUD_STALE &&
1142 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1143 (old & NUD_CONNECTED))
1150 neigh_del_timer(neigh);
1151 if (new & NUD_IN_TIMER)
1152 neigh_add_timer(neigh, (jiffies +
1153 ((new & NUD_REACHABLE) ?
1154 neigh->parms->reachable_time :
1156 neigh->nud_state = new;
1160 if (lladdr != neigh->ha) {
1161 write_seqlock(&neigh->ha_lock);
1162 memcpy(&neigh->ha, lladdr, dev->addr_len);
1163 write_sequnlock(&neigh->ha_lock);
1164 neigh_update_hhs(neigh);
1165 if (!(new & NUD_CONNECTED))
1166 neigh->confirmed = jiffies -
1167 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1172 if (new & NUD_CONNECTED)
1173 neigh_connect(neigh);
1175 neigh_suspect(neigh);
1176 if (!(old & NUD_VALID)) {
1177 struct sk_buff *skb;
1179 /* Again: avoid dead loop if something went wrong */
1181 while (neigh->nud_state & NUD_VALID &&
1182 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1183 struct dst_entry *dst = skb_dst(skb);
1184 struct neighbour *n2, *n1 = neigh;
1185 write_unlock_bh(&neigh->lock);
1189 /* Why not just use 'neigh' as-is? The problem is that
1190 * things such as shaper, eql, and sch_teql can end up
1191 * using alternative, different, neigh objects to output
1192 * the packet in the output path. So what we need to do
1193 * here is re-lookup the top-level neigh in the path so
1194 * we can reinject the packet there.
1198 n2 = dst_neigh_lookup_skb(dst, skb);
1202 n1->output(n1, skb);
1207 write_lock_bh(&neigh->lock);
1209 __skb_queue_purge(&neigh->arp_queue);
1210 neigh->arp_queue_len_bytes = 0;
1213 if (update_isrouter) {
1214 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1215 (neigh->flags | NTF_ROUTER) :
1216 (neigh->flags & ~NTF_ROUTER);
1218 write_unlock_bh(&neigh->lock);
1221 neigh_update_notify(neigh);
1225 EXPORT_SYMBOL(neigh_update);
1227 /* Update the neigh to listen temporarily for probe responses, even if it is
1228 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1230 void __neigh_set_probe_once(struct neighbour *neigh)
1232 neigh->updated = jiffies;
1233 if (!(neigh->nud_state & NUD_FAILED))
1235 neigh->nud_state = NUD_INCOMPLETE;
1236 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1237 neigh_add_timer(neigh,
1238 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1240 EXPORT_SYMBOL(__neigh_set_probe_once);
1242 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1243 u8 *lladdr, void *saddr,
1244 struct net_device *dev)
1246 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1247 lladdr || !dev->addr_len);
1249 neigh_update(neigh, lladdr, NUD_STALE,
1250 NEIGH_UPDATE_F_OVERRIDE);
1253 EXPORT_SYMBOL(neigh_event_ns);
1255 /* called with read_lock_bh(&n->lock); */
1256 static void neigh_hh_init(struct neighbour *n)
1258 struct net_device *dev = n->dev;
1259 __be16 prot = n->tbl->protocol;
1260 struct hh_cache *hh = &n->hh;
1262 write_lock_bh(&n->lock);
1264 /* Only one thread can come in here and initialize the
1268 dev->header_ops->cache(n, hh, prot);
1270 write_unlock_bh(&n->lock);
1273 /* Slow and careful. */
1275 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1279 if (!neigh_event_send(neigh, skb)) {
1281 struct net_device *dev = neigh->dev;
1284 if (dev->header_ops->cache && !neigh->hh.hh_len)
1285 neigh_hh_init(neigh);
1288 __skb_pull(skb, skb_network_offset(skb));
1289 seq = read_seqbegin(&neigh->ha_lock);
1290 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1291 neigh->ha, NULL, skb->len);
1292 } while (read_seqretry(&neigh->ha_lock, seq));
1295 rc = dev_queue_xmit(skb);
1306 EXPORT_SYMBOL(neigh_resolve_output);
1308 /* As fast as possible without hh cache */
1310 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1312 struct net_device *dev = neigh->dev;
1317 __skb_pull(skb, skb_network_offset(skb));
1318 seq = read_seqbegin(&neigh->ha_lock);
1319 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1320 neigh->ha, NULL, skb->len);
1321 } while (read_seqretry(&neigh->ha_lock, seq));
1324 err = dev_queue_xmit(skb);
1331 EXPORT_SYMBOL(neigh_connected_output);
1333 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1335 return dev_queue_xmit(skb);
1337 EXPORT_SYMBOL(neigh_direct_output);
1339 static void neigh_proxy_process(unsigned long arg)
1341 struct neigh_table *tbl = (struct neigh_table *)arg;
1342 long sched_next = 0;
1343 unsigned long now = jiffies;
1344 struct sk_buff *skb, *n;
1346 spin_lock(&tbl->proxy_queue.lock);
1348 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1349 long tdif = NEIGH_CB(skb)->sched_next - now;
1352 struct net_device *dev = skb->dev;
1354 __skb_unlink(skb, &tbl->proxy_queue);
1355 if (tbl->proxy_redo && netif_running(dev)) {
1357 tbl->proxy_redo(skb);
1364 } else if (!sched_next || tdif < sched_next)
1367 del_timer(&tbl->proxy_timer);
1369 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1370 spin_unlock(&tbl->proxy_queue.lock);
1373 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1374 struct sk_buff *skb)
1376 unsigned long now = jiffies;
1378 unsigned long sched_next = now + (prandom_u32() %
1379 NEIGH_VAR(p, PROXY_DELAY));
1381 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1386 NEIGH_CB(skb)->sched_next = sched_next;
1387 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1389 spin_lock(&tbl->proxy_queue.lock);
1390 if (del_timer(&tbl->proxy_timer)) {
1391 if (time_before(tbl->proxy_timer.expires, sched_next))
1392 sched_next = tbl->proxy_timer.expires;
1396 __skb_queue_tail(&tbl->proxy_queue, skb);
1397 mod_timer(&tbl->proxy_timer, sched_next);
1398 spin_unlock(&tbl->proxy_queue.lock);
1400 EXPORT_SYMBOL(pneigh_enqueue);
1402 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1403 struct net *net, int ifindex)
1405 struct neigh_parms *p;
1407 list_for_each_entry(p, &tbl->parms_list, list) {
1408 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1409 (!p->dev && !ifindex && net_eq(net, &init_net)))
1416 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1417 struct neigh_table *tbl)
1419 struct neigh_parms *p;
1420 struct net *net = dev_net(dev);
1421 const struct net_device_ops *ops = dev->netdev_ops;
1423 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1426 atomic_set(&p->refcnt, 1);
1428 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1431 write_pnet(&p->net, hold_net(net));
1432 p->sysctl_table = NULL;
1434 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1441 write_lock_bh(&tbl->lock);
1442 list_add(&p->list, &tbl->parms.list);
1443 write_unlock_bh(&tbl->lock);
1445 neigh_parms_data_state_cleanall(p);
1449 EXPORT_SYMBOL(neigh_parms_alloc);
1451 static void neigh_rcu_free_parms(struct rcu_head *head)
1453 struct neigh_parms *parms =
1454 container_of(head, struct neigh_parms, rcu_head);
1456 neigh_parms_put(parms);
1459 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1461 if (!parms || parms == &tbl->parms)
1463 write_lock_bh(&tbl->lock);
1464 list_del(&parms->list);
1466 write_unlock_bh(&tbl->lock);
1468 dev_put(parms->dev);
1469 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1471 EXPORT_SYMBOL(neigh_parms_release);
1473 static void neigh_parms_destroy(struct neigh_parms *parms)
1475 release_net(neigh_parms_net(parms));
1479 static struct lock_class_key neigh_table_proxy_queue_class;
1481 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1483 void neigh_table_init(int index, struct neigh_table *tbl)
1485 unsigned long now = jiffies;
1486 unsigned long phsize;
1488 INIT_LIST_HEAD(&tbl->parms_list);
1489 list_add(&tbl->parms.list, &tbl->parms_list);
1490 write_pnet(&tbl->parms.net, &init_net);
1491 atomic_set(&tbl->parms.refcnt, 1);
1492 tbl->parms.reachable_time =
1493 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1495 tbl->stats = alloc_percpu(struct neigh_statistics);
1497 panic("cannot create neighbour cache statistics");
1499 #ifdef CONFIG_PROC_FS
1500 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1501 &neigh_stat_seq_fops, tbl))
1502 panic("cannot create neighbour proc dir entry");
1505 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1507 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1508 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1510 if (!tbl->nht || !tbl->phash_buckets)
1511 panic("cannot allocate neighbour cache hashes");
1513 if (!tbl->entry_size)
1514 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1515 tbl->key_len, NEIGH_PRIV_ALIGN);
1517 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1519 rwlock_init(&tbl->lock);
1520 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1521 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1522 tbl->parms.reachable_time);
1523 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1524 skb_queue_head_init_class(&tbl->proxy_queue,
1525 &neigh_table_proxy_queue_class);
1527 tbl->last_flush = now;
1528 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1530 neigh_tables[index] = tbl;
1532 EXPORT_SYMBOL(neigh_table_init);
1534 int neigh_table_clear(int index, struct neigh_table *tbl)
1536 neigh_tables[index] = NULL;
1537 /* It is not clean... Fix it to unload IPv6 module safely */
1538 cancel_delayed_work_sync(&tbl->gc_work);
1539 del_timer_sync(&tbl->proxy_timer);
1540 pneigh_queue_purge(&tbl->proxy_queue);
1541 neigh_ifdown(tbl, NULL);
1542 if (atomic_read(&tbl->entries))
1543 pr_crit("neighbour leakage\n");
1545 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1546 neigh_hash_free_rcu);
1549 kfree(tbl->phash_buckets);
1550 tbl->phash_buckets = NULL;
1552 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1554 free_percpu(tbl->stats);
1559 EXPORT_SYMBOL(neigh_table_clear);
1561 static struct neigh_table *neigh_find_table(int family)
1563 struct neigh_table *tbl = NULL;
1567 tbl = neigh_tables[NEIGH_ARP_TABLE];
1570 tbl = neigh_tables[NEIGH_ND_TABLE];
1573 tbl = neigh_tables[NEIGH_DN_TABLE];
1580 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1582 struct net *net = sock_net(skb->sk);
1584 struct nlattr *dst_attr;
1585 struct neigh_table *tbl;
1586 struct neighbour *neigh;
1587 struct net_device *dev = NULL;
1591 if (nlmsg_len(nlh) < sizeof(*ndm))
1594 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1595 if (dst_attr == NULL)
1598 ndm = nlmsg_data(nlh);
1599 if (ndm->ndm_ifindex) {
1600 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1607 tbl = neigh_find_table(ndm->ndm_family);
1609 return -EAFNOSUPPORT;
1611 if (nla_len(dst_attr) < tbl->key_len)
1614 if (ndm->ndm_flags & NTF_PROXY) {
1615 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1622 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1623 if (neigh == NULL) {
1628 err = neigh_update(neigh, NULL, NUD_FAILED,
1629 NEIGH_UPDATE_F_OVERRIDE |
1630 NEIGH_UPDATE_F_ADMIN);
1631 neigh_release(neigh);
1637 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1639 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1640 struct net *net = sock_net(skb->sk);
1642 struct nlattr *tb[NDA_MAX+1];
1643 struct neigh_table *tbl;
1644 struct net_device *dev = NULL;
1645 struct neighbour *neigh;
1650 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1655 if (tb[NDA_DST] == NULL)
1658 ndm = nlmsg_data(nlh);
1659 if (ndm->ndm_ifindex) {
1660 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1666 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1670 tbl = neigh_find_table(ndm->ndm_family);
1672 return -EAFNOSUPPORT;
1674 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1676 dst = nla_data(tb[NDA_DST]);
1677 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1679 if (ndm->ndm_flags & NTF_PROXY) {
1680 struct pneigh_entry *pn;
1683 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1685 pn->flags = ndm->ndm_flags;
1694 neigh = neigh_lookup(tbl, dst, dev);
1695 if (neigh == NULL) {
1696 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1701 neigh = __neigh_lookup_errno(tbl, dst, dev);
1702 if (IS_ERR(neigh)) {
1703 err = PTR_ERR(neigh);
1707 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1709 neigh_release(neigh);
1713 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1714 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1717 if (ndm->ndm_flags & NTF_USE) {
1718 neigh_event_send(neigh, NULL);
1721 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1722 neigh_release(neigh);
1728 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1730 struct nlattr *nest;
1732 nest = nla_nest_start(skb, NDTA_PARMS);
1737 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1738 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1739 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1740 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1741 /* approximative value for deprecated QUEUE_LEN (in packets) */
1742 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1743 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1744 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1745 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1746 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1747 NEIGH_VAR(parms, UCAST_PROBES)) ||
1748 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1749 NEIGH_VAR(parms, MCAST_PROBES)) ||
1750 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1751 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1752 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1753 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1754 NEIGH_VAR(parms, GC_STALETIME)) ||
1755 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1756 NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1757 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1758 NEIGH_VAR(parms, RETRANS_TIME)) ||
1759 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1760 NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1761 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1762 NEIGH_VAR(parms, PROXY_DELAY)) ||
1763 nla_put_msecs(skb, NDTPA_LOCKTIME,
1764 NEIGH_VAR(parms, LOCKTIME)))
1765 goto nla_put_failure;
1766 return nla_nest_end(skb, nest);
1769 nla_nest_cancel(skb, nest);
1773 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1774 u32 pid, u32 seq, int type, int flags)
1776 struct nlmsghdr *nlh;
1777 struct ndtmsg *ndtmsg;
1779 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1783 ndtmsg = nlmsg_data(nlh);
1785 read_lock_bh(&tbl->lock);
1786 ndtmsg->ndtm_family = tbl->family;
1787 ndtmsg->ndtm_pad1 = 0;
1788 ndtmsg->ndtm_pad2 = 0;
1790 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1791 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1792 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1793 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1794 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1795 goto nla_put_failure;
1797 unsigned long now = jiffies;
1798 unsigned int flush_delta = now - tbl->last_flush;
1799 unsigned int rand_delta = now - tbl->last_rand;
1800 struct neigh_hash_table *nht;
1801 struct ndt_config ndc = {
1802 .ndtc_key_len = tbl->key_len,
1803 .ndtc_entry_size = tbl->entry_size,
1804 .ndtc_entries = atomic_read(&tbl->entries),
1805 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1806 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1807 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1811 nht = rcu_dereference_bh(tbl->nht);
1812 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1813 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1814 rcu_read_unlock_bh();
1816 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1817 goto nla_put_failure;
1822 struct ndt_stats ndst;
1824 memset(&ndst, 0, sizeof(ndst));
1826 for_each_possible_cpu(cpu) {
1827 struct neigh_statistics *st;
1829 st = per_cpu_ptr(tbl->stats, cpu);
1830 ndst.ndts_allocs += st->allocs;
1831 ndst.ndts_destroys += st->destroys;
1832 ndst.ndts_hash_grows += st->hash_grows;
1833 ndst.ndts_res_failed += st->res_failed;
1834 ndst.ndts_lookups += st->lookups;
1835 ndst.ndts_hits += st->hits;
1836 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1837 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1838 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1839 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1842 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1843 goto nla_put_failure;
1846 BUG_ON(tbl->parms.dev);
1847 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1848 goto nla_put_failure;
1850 read_unlock_bh(&tbl->lock);
1851 nlmsg_end(skb, nlh);
1855 read_unlock_bh(&tbl->lock);
1856 nlmsg_cancel(skb, nlh);
1860 static int neightbl_fill_param_info(struct sk_buff *skb,
1861 struct neigh_table *tbl,
1862 struct neigh_parms *parms,
1863 u32 pid, u32 seq, int type,
1866 struct ndtmsg *ndtmsg;
1867 struct nlmsghdr *nlh;
1869 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1873 ndtmsg = nlmsg_data(nlh);
1875 read_lock_bh(&tbl->lock);
1876 ndtmsg->ndtm_family = tbl->family;
1877 ndtmsg->ndtm_pad1 = 0;
1878 ndtmsg->ndtm_pad2 = 0;
1880 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1881 neightbl_fill_parms(skb, parms) < 0)
1884 read_unlock_bh(&tbl->lock);
1885 nlmsg_end(skb, nlh);
1888 read_unlock_bh(&tbl->lock);
1889 nlmsg_cancel(skb, nlh);
1893 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1894 [NDTA_NAME] = { .type = NLA_STRING },
1895 [NDTA_THRESH1] = { .type = NLA_U32 },
1896 [NDTA_THRESH2] = { .type = NLA_U32 },
1897 [NDTA_THRESH3] = { .type = NLA_U32 },
1898 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1899 [NDTA_PARMS] = { .type = NLA_NESTED },
1902 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1903 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1904 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1905 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1906 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1907 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1908 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1909 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1910 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1911 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1912 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1913 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1914 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1915 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1918 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1920 struct net *net = sock_net(skb->sk);
1921 struct neigh_table *tbl;
1922 struct ndtmsg *ndtmsg;
1923 struct nlattr *tb[NDTA_MAX+1];
1927 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1928 nl_neightbl_policy);
1932 if (tb[NDTA_NAME] == NULL) {
1937 ndtmsg = nlmsg_data(nlh);
1939 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1940 tbl = neigh_tables[tidx];
1943 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1945 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1955 * We acquire tbl->lock to be nice to the periodic timers and
1956 * make sure they always see a consistent set of values.
1958 write_lock_bh(&tbl->lock);
1960 if (tb[NDTA_PARMS]) {
1961 struct nlattr *tbp[NDTPA_MAX+1];
1962 struct neigh_parms *p;
1965 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1966 nl_ntbl_parm_policy);
1968 goto errout_tbl_lock;
1970 if (tbp[NDTPA_IFINDEX])
1971 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1973 p = lookup_neigh_parms(tbl, net, ifindex);
1976 goto errout_tbl_lock;
1979 for (i = 1; i <= NDTPA_MAX; i++) {
1984 case NDTPA_QUEUE_LEN:
1985 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1986 nla_get_u32(tbp[i]) *
1987 SKB_TRUESIZE(ETH_FRAME_LEN));
1989 case NDTPA_QUEUE_LENBYTES:
1990 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1991 nla_get_u32(tbp[i]));
1993 case NDTPA_PROXY_QLEN:
1994 NEIGH_VAR_SET(p, PROXY_QLEN,
1995 nla_get_u32(tbp[i]));
1997 case NDTPA_APP_PROBES:
1998 NEIGH_VAR_SET(p, APP_PROBES,
1999 nla_get_u32(tbp[i]));
2001 case NDTPA_UCAST_PROBES:
2002 NEIGH_VAR_SET(p, UCAST_PROBES,
2003 nla_get_u32(tbp[i]));
2005 case NDTPA_MCAST_PROBES:
2006 NEIGH_VAR_SET(p, MCAST_PROBES,
2007 nla_get_u32(tbp[i]));
2009 case NDTPA_BASE_REACHABLE_TIME:
2010 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2011 nla_get_msecs(tbp[i]));
2012 /* update reachable_time as well, otherwise, the change will
2013 * only be effective after the next time neigh_periodic_work
2014 * decides to recompute it (can be multiple minutes)
2017 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2019 case NDTPA_GC_STALETIME:
2020 NEIGH_VAR_SET(p, GC_STALETIME,
2021 nla_get_msecs(tbp[i]));
2023 case NDTPA_DELAY_PROBE_TIME:
2024 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2025 nla_get_msecs(tbp[i]));
2027 case NDTPA_RETRANS_TIME:
2028 NEIGH_VAR_SET(p, RETRANS_TIME,
2029 nla_get_msecs(tbp[i]));
2031 case NDTPA_ANYCAST_DELAY:
2032 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2033 nla_get_msecs(tbp[i]));
2035 case NDTPA_PROXY_DELAY:
2036 NEIGH_VAR_SET(p, PROXY_DELAY,
2037 nla_get_msecs(tbp[i]));
2039 case NDTPA_LOCKTIME:
2040 NEIGH_VAR_SET(p, LOCKTIME,
2041 nla_get_msecs(tbp[i]));
2048 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2049 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2050 !net_eq(net, &init_net))
2051 goto errout_tbl_lock;
2053 if (tb[NDTA_THRESH1])
2054 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2056 if (tb[NDTA_THRESH2])
2057 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2059 if (tb[NDTA_THRESH3])
2060 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2062 if (tb[NDTA_GC_INTERVAL])
2063 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2068 write_unlock_bh(&tbl->lock);
2073 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2075 struct net *net = sock_net(skb->sk);
2076 int family, tidx, nidx = 0;
2077 int tbl_skip = cb->args[0];
2078 int neigh_skip = cb->args[1];
2079 struct neigh_table *tbl;
2081 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2083 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2084 struct neigh_parms *p;
2086 tbl = neigh_tables[tidx];
2090 if (tidx < tbl_skip || (family && tbl->family != family))
2093 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2094 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2099 p = list_next_entry(&tbl->parms, list);
2100 list_for_each_entry_from(p, &tbl->parms_list, list) {
2101 if (!net_eq(neigh_parms_net(p), net))
2104 if (nidx < neigh_skip)
2107 if (neightbl_fill_param_info(skb, tbl, p,
2108 NETLINK_CB(cb->skb).portid,
2126 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2127 u32 pid, u32 seq, int type, unsigned int flags)
2129 unsigned long now = jiffies;
2130 struct nda_cacheinfo ci;
2131 struct nlmsghdr *nlh;
2134 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2138 ndm = nlmsg_data(nlh);
2139 ndm->ndm_family = neigh->ops->family;
2142 ndm->ndm_flags = neigh->flags;
2143 ndm->ndm_type = neigh->type;
2144 ndm->ndm_ifindex = neigh->dev->ifindex;
2146 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2147 goto nla_put_failure;
2149 read_lock_bh(&neigh->lock);
2150 ndm->ndm_state = neigh->nud_state;
2151 if (neigh->nud_state & NUD_VALID) {
2152 char haddr[MAX_ADDR_LEN];
2154 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2155 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2156 read_unlock_bh(&neigh->lock);
2157 goto nla_put_failure;
2161 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2162 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2163 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2164 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2165 read_unlock_bh(&neigh->lock);
2167 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2168 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2169 goto nla_put_failure;
2171 nlmsg_end(skb, nlh);
2175 nlmsg_cancel(skb, nlh);
2179 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2180 u32 pid, u32 seq, int type, unsigned int flags,
2181 struct neigh_table *tbl)
2183 struct nlmsghdr *nlh;
2186 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2190 ndm = nlmsg_data(nlh);
2191 ndm->ndm_family = tbl->family;
2194 ndm->ndm_flags = pn->flags | NTF_PROXY;
2195 ndm->ndm_type = RTN_UNICAST;
2196 ndm->ndm_ifindex = pn->dev->ifindex;
2197 ndm->ndm_state = NUD_NONE;
2199 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2200 goto nla_put_failure;
2202 nlmsg_end(skb, nlh);
2206 nlmsg_cancel(skb, nlh);
2210 static void neigh_update_notify(struct neighbour *neigh)
2212 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2213 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2216 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2217 struct netlink_callback *cb)
2219 struct net *net = sock_net(skb->sk);
2220 struct neighbour *n;
2221 int rc, h, s_h = cb->args[1];
2222 int idx, s_idx = idx = cb->args[2];
2223 struct neigh_hash_table *nht;
2226 nht = rcu_dereference_bh(tbl->nht);
2228 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2231 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2233 n = rcu_dereference_bh(n->next)) {
2234 if (!net_eq(dev_net(n->dev), net))
2238 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2251 rcu_read_unlock_bh();
2257 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2258 struct netlink_callback *cb)
2260 struct pneigh_entry *n;
2261 struct net *net = sock_net(skb->sk);
2262 int rc, h, s_h = cb->args[3];
2263 int idx, s_idx = idx = cb->args[4];
2265 read_lock_bh(&tbl->lock);
2267 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2270 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2271 if (dev_net(n->dev) != net)
2275 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2278 NLM_F_MULTI, tbl) < 0) {
2279 read_unlock_bh(&tbl->lock);
2288 read_unlock_bh(&tbl->lock);
2297 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2299 struct neigh_table *tbl;
2304 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2306 /* check for full ndmsg structure presence, family member is
2307 * the same for both structures
2309 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2310 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2315 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2316 tbl = neigh_tables[t];
2320 if (t < s_t || (family && tbl->family != family))
2323 memset(&cb->args[1], 0, sizeof(cb->args) -
2324 sizeof(cb->args[0]));
2326 err = pneigh_dump_table(tbl, skb, cb);
2328 err = neigh_dump_table(tbl, skb, cb);
2337 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2340 struct neigh_hash_table *nht;
2343 nht = rcu_dereference_bh(tbl->nht);
2345 read_lock(&tbl->lock); /* avoid resizes */
2346 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2347 struct neighbour *n;
2349 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2351 n = rcu_dereference_bh(n->next))
2354 read_unlock(&tbl->lock);
2355 rcu_read_unlock_bh();
2357 EXPORT_SYMBOL(neigh_for_each);
2359 /* The tbl->lock must be held as a writer and BH disabled. */
2360 void __neigh_for_each_release(struct neigh_table *tbl,
2361 int (*cb)(struct neighbour *))
2364 struct neigh_hash_table *nht;
2366 nht = rcu_dereference_protected(tbl->nht,
2367 lockdep_is_held(&tbl->lock));
2368 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2369 struct neighbour *n;
2370 struct neighbour __rcu **np;
2372 np = &nht->hash_buckets[chain];
2373 while ((n = rcu_dereference_protected(*np,
2374 lockdep_is_held(&tbl->lock))) != NULL) {
2377 write_lock(&n->lock);
2380 rcu_assign_pointer(*np,
2381 rcu_dereference_protected(n->next,
2382 lockdep_is_held(&tbl->lock)));
2386 write_unlock(&n->lock);
2388 neigh_cleanup_and_release(n);
2392 EXPORT_SYMBOL(__neigh_for_each_release);
2394 int neigh_xmit(int family, struct net_device *dev,
2395 const void *addr, struct sk_buff *skb)
2398 if (family == AF_PACKET) {
2399 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2400 addr, NULL, skb->len);
2403 err = dev_queue_xmit(skb);
2405 struct neigh_table *tbl;
2406 struct neighbour *neigh;
2409 tbl = neigh_find_table(family);
2412 neigh = __neigh_lookup_noref(tbl, addr, dev);
2414 neigh = __neigh_create(tbl, addr, dev, false);
2415 err = PTR_ERR(neigh);
2418 err = neigh->output(neigh, skb);
2426 EXPORT_SYMBOL(neigh_xmit);
2428 #ifdef CONFIG_PROC_FS
2430 static struct neighbour *neigh_get_first(struct seq_file *seq)
2432 struct neigh_seq_state *state = seq->private;
2433 struct net *net = seq_file_net(seq);
2434 struct neigh_hash_table *nht = state->nht;
2435 struct neighbour *n = NULL;
2436 int bucket = state->bucket;
2438 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2439 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2440 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2443 if (!net_eq(dev_net(n->dev), net))
2445 if (state->neigh_sub_iter) {
2449 v = state->neigh_sub_iter(state, n, &fakep);
2453 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2455 if (n->nud_state & ~NUD_NOARP)
2458 n = rcu_dereference_bh(n->next);
2464 state->bucket = bucket;
2469 static struct neighbour *neigh_get_next(struct seq_file *seq,
2470 struct neighbour *n,
2473 struct neigh_seq_state *state = seq->private;
2474 struct net *net = seq_file_net(seq);
2475 struct neigh_hash_table *nht = state->nht;
2477 if (state->neigh_sub_iter) {
2478 void *v = state->neigh_sub_iter(state, n, pos);
2482 n = rcu_dereference_bh(n->next);
2486 if (!net_eq(dev_net(n->dev), net))
2488 if (state->neigh_sub_iter) {
2489 void *v = state->neigh_sub_iter(state, n, pos);
2494 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2497 if (n->nud_state & ~NUD_NOARP)
2500 n = rcu_dereference_bh(n->next);
2506 if (++state->bucket >= (1 << nht->hash_shift))
2509 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2517 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2519 struct neighbour *n = neigh_get_first(seq);
2524 n = neigh_get_next(seq, n, pos);
2529 return *pos ? NULL : n;
2532 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2534 struct neigh_seq_state *state = seq->private;
2535 struct net *net = seq_file_net(seq);
2536 struct neigh_table *tbl = state->tbl;
2537 struct pneigh_entry *pn = NULL;
2538 int bucket = state->bucket;
2540 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2541 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2542 pn = tbl->phash_buckets[bucket];
2543 while (pn && !net_eq(pneigh_net(pn), net))
2548 state->bucket = bucket;
2553 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2554 struct pneigh_entry *pn,
2557 struct neigh_seq_state *state = seq->private;
2558 struct net *net = seq_file_net(seq);
2559 struct neigh_table *tbl = state->tbl;
2563 } while (pn && !net_eq(pneigh_net(pn), net));
2566 if (++state->bucket > PNEIGH_HASHMASK)
2568 pn = tbl->phash_buckets[state->bucket];
2569 while (pn && !net_eq(pneigh_net(pn), net))
2581 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2583 struct pneigh_entry *pn = pneigh_get_first(seq);
2588 pn = pneigh_get_next(seq, pn, pos);
2593 return *pos ? NULL : pn;
2596 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2598 struct neigh_seq_state *state = seq->private;
2600 loff_t idxpos = *pos;
2602 rc = neigh_get_idx(seq, &idxpos);
2603 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2604 rc = pneigh_get_idx(seq, &idxpos);
2609 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2612 struct neigh_seq_state *state = seq->private;
2616 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2619 state->nht = rcu_dereference_bh(tbl->nht);
2621 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2623 EXPORT_SYMBOL(neigh_seq_start);
2625 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2627 struct neigh_seq_state *state;
2630 if (v == SEQ_START_TOKEN) {
2631 rc = neigh_get_first(seq);
2635 state = seq->private;
2636 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2637 rc = neigh_get_next(seq, v, NULL);
2640 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2641 rc = pneigh_get_first(seq);
2643 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2644 rc = pneigh_get_next(seq, v, NULL);
2650 EXPORT_SYMBOL(neigh_seq_next);
2652 void neigh_seq_stop(struct seq_file *seq, void *v)
2655 rcu_read_unlock_bh();
2657 EXPORT_SYMBOL(neigh_seq_stop);
2659 /* statistics via seq_file */
2661 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2663 struct neigh_table *tbl = seq->private;
2667 return SEQ_START_TOKEN;
2669 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2670 if (!cpu_possible(cpu))
2673 return per_cpu_ptr(tbl->stats, cpu);
2678 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2680 struct neigh_table *tbl = seq->private;
2683 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2684 if (!cpu_possible(cpu))
2687 return per_cpu_ptr(tbl->stats, cpu);
2692 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2697 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2699 struct neigh_table *tbl = seq->private;
2700 struct neigh_statistics *st = v;
2702 if (v == SEQ_START_TOKEN) {
2703 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2707 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2708 "%08lx %08lx %08lx %08lx %08lx\n",
2709 atomic_read(&tbl->entries),
2720 st->rcv_probes_mcast,
2721 st->rcv_probes_ucast,
2723 st->periodic_gc_runs,
2731 static const struct seq_operations neigh_stat_seq_ops = {
2732 .start = neigh_stat_seq_start,
2733 .next = neigh_stat_seq_next,
2734 .stop = neigh_stat_seq_stop,
2735 .show = neigh_stat_seq_show,
2738 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2740 int ret = seq_open(file, &neigh_stat_seq_ops);
2743 struct seq_file *sf = file->private_data;
2744 sf->private = PDE_DATA(inode);
2749 static const struct file_operations neigh_stat_seq_fops = {
2750 .owner = THIS_MODULE,
2751 .open = neigh_stat_seq_open,
2753 .llseek = seq_lseek,
2754 .release = seq_release,
2757 #endif /* CONFIG_PROC_FS */
2759 static inline size_t neigh_nlmsg_size(void)
2761 return NLMSG_ALIGN(sizeof(struct ndmsg))
2762 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2763 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2764 + nla_total_size(sizeof(struct nda_cacheinfo))
2765 + nla_total_size(4); /* NDA_PROBES */
2768 static void __neigh_notify(struct neighbour *n, int type, int flags)
2770 struct net *net = dev_net(n->dev);
2771 struct sk_buff *skb;
2774 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2778 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2780 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2781 WARN_ON(err == -EMSGSIZE);
2785 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2789 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2792 void neigh_app_ns(struct neighbour *n)
2794 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2796 EXPORT_SYMBOL(neigh_app_ns);
2798 #ifdef CONFIG_SYSCTL
2800 static int int_max = INT_MAX;
2801 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2803 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2804 void __user *buffer, size_t *lenp, loff_t *ppos)
2807 struct ctl_table tmp = *ctl;
2810 tmp.extra2 = &unres_qlen_max;
2813 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2814 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2817 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2821 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2826 return __in_dev_arp_parms_get_rcu(dev);
2828 return __in6_dev_nd_parms_get_rcu(dev);
2833 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2836 struct net_device *dev;
2837 int family = neigh_parms_family(p);
2840 for_each_netdev_rcu(net, dev) {
2841 struct neigh_parms *dst_p =
2842 neigh_get_dev_parms_rcu(dev, family);
2844 if (dst_p && !test_bit(index, dst_p->data_state))
2845 dst_p->data[index] = p->data[index];
2850 static void neigh_proc_update(struct ctl_table *ctl, int write)
2852 struct net_device *dev = ctl->extra1;
2853 struct neigh_parms *p = ctl->extra2;
2854 struct net *net = neigh_parms_net(p);
2855 int index = (int *) ctl->data - p->data;
2860 set_bit(index, p->data_state);
2861 if (!dev) /* NULL dev means this is default value */
2862 neigh_copy_dflt_parms(net, p, index);
2865 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2866 void __user *buffer,
2867 size_t *lenp, loff_t *ppos)
2869 struct ctl_table tmp = *ctl;
2873 tmp.extra2 = &int_max;
2875 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2876 neigh_proc_update(ctl, write);
2880 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2881 void __user *buffer, size_t *lenp, loff_t *ppos)
2883 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2885 neigh_proc_update(ctl, write);
2888 EXPORT_SYMBOL(neigh_proc_dointvec);
2890 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2891 void __user *buffer,
2892 size_t *lenp, loff_t *ppos)
2894 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2896 neigh_proc_update(ctl, write);
2899 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2901 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2902 void __user *buffer,
2903 size_t *lenp, loff_t *ppos)
2905 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2907 neigh_proc_update(ctl, write);
2911 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2912 void __user *buffer,
2913 size_t *lenp, loff_t *ppos)
2915 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2917 neigh_proc_update(ctl, write);
2920 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2922 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2923 void __user *buffer,
2924 size_t *lenp, loff_t *ppos)
2926 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2928 neigh_proc_update(ctl, write);
2932 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
2933 void __user *buffer,
2934 size_t *lenp, loff_t *ppos)
2936 struct neigh_parms *p = ctl->extra2;
2939 if (strcmp(ctl->procname, "base_reachable_time") == 0)
2940 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2941 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
2942 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2946 if (write && ret == 0) {
2947 /* update reachable_time as well, otherwise, the change will
2948 * only be effective after the next time neigh_periodic_work
2949 * decides to recompute it
2952 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2957 #define NEIGH_PARMS_DATA_OFFSET(index) \
2958 (&((struct neigh_parms *) 0)->data[index])
2960 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2961 [NEIGH_VAR_ ## attr] = { \
2963 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2964 .maxlen = sizeof(int), \
2966 .proc_handler = proc, \
2969 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2970 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2972 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2973 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2975 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2976 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2978 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2979 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2981 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2982 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2984 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2985 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2987 static struct neigh_sysctl_table {
2988 struct ctl_table_header *sysctl_header;
2989 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2990 } neigh_sysctl_template __read_mostly = {
2992 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2993 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2994 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2995 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2996 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2997 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2998 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2999 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3000 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3001 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3002 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3003 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3004 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3005 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3006 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3007 [NEIGH_VAR_GC_INTERVAL] = {
3008 .procname = "gc_interval",
3009 .maxlen = sizeof(int),
3011 .proc_handler = proc_dointvec_jiffies,
3013 [NEIGH_VAR_GC_THRESH1] = {
3014 .procname = "gc_thresh1",
3015 .maxlen = sizeof(int),
3019 .proc_handler = proc_dointvec_minmax,
3021 [NEIGH_VAR_GC_THRESH2] = {
3022 .procname = "gc_thresh2",
3023 .maxlen = sizeof(int),
3027 .proc_handler = proc_dointvec_minmax,
3029 [NEIGH_VAR_GC_THRESH3] = {
3030 .procname = "gc_thresh3",
3031 .maxlen = sizeof(int),
3035 .proc_handler = proc_dointvec_minmax,
3041 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3042 proc_handler *handler)
3045 struct neigh_sysctl_table *t;
3046 const char *dev_name_source;
3047 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3050 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3054 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3055 t->neigh_vars[i].data += (long) p;
3056 t->neigh_vars[i].extra1 = dev;
3057 t->neigh_vars[i].extra2 = p;
3061 dev_name_source = dev->name;
3062 /* Terminate the table early */
3063 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3064 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3066 struct neigh_table *tbl = p->tbl;
3067 dev_name_source = "default";
3068 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3069 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3070 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3071 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3076 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3078 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3079 /* RetransTime (in milliseconds)*/
3080 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3081 /* ReachableTime (in milliseconds) */
3082 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3084 /* Those handlers will update p->reachable_time after
3085 * base_reachable_time(_ms) is set to ensure the new timer starts being
3086 * applied after the next neighbour update instead of waiting for
3087 * neigh_periodic_work to update its value (can be multiple minutes)
3088 * So any handler that replaces them should do this as well
3091 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3092 neigh_proc_base_reachable_time;
3093 /* ReachableTime (in milliseconds) */
3094 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3095 neigh_proc_base_reachable_time;
3098 /* Don't export sysctls to unprivileged users */
3099 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3100 t->neigh_vars[0].procname = NULL;
3102 switch (neigh_parms_family(p)) {
3113 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3114 p_name, dev_name_source);
3116 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3117 if (!t->sysctl_header)
3120 p->sysctl_table = t;
3128 EXPORT_SYMBOL(neigh_sysctl_register);
3130 void neigh_sysctl_unregister(struct neigh_parms *p)
3132 if (p->sysctl_table) {
3133 struct neigh_sysctl_table *t = p->sysctl_table;
3134 p->sysctl_table = NULL;
3135 unregister_net_sysctl_table(t->sysctl_header);
3139 EXPORT_SYMBOL(neigh_sysctl_unregister);
3141 #endif /* CONFIG_SYSCTL */
3143 static int __init neigh_init(void)
3145 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3146 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3147 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3149 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3151 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3156 subsys_initcall(neigh_init);