]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
7c55df919a07241d5525f01c915b6140280b803d
[karo-tx-linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <net/netevent.h>
45 #include <net/neighbour.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48
49 #include "spectrum.h"
50 #include "core.h"
51 #include "reg.h"
52
53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
54         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
55
56 static bool
57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
58                              struct mlxsw_sp_prefix_usage *prefix_usage2)
59 {
60         unsigned char prefix;
61
62         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
63                 if (!test_bit(prefix, prefix_usage2->b))
64                         return false;
65         }
66         return true;
67 }
68
69 static bool
70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
71                          struct mlxsw_sp_prefix_usage *prefix_usage2)
72 {
73         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
74 }
75
76 static bool
77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
78 {
79         struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
80
81         return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
82 }
83
84 static void
85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
86                           struct mlxsw_sp_prefix_usage *prefix_usage2)
87 {
88         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
89 }
90
91 static void
92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
93 {
94         memset(prefix_usage, 0, sizeof(*prefix_usage));
95 }
96
97 static void
98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
99                           unsigned char prefix_len)
100 {
101         set_bit(prefix_len, prefix_usage->b);
102 }
103
104 static void
105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
106                             unsigned char prefix_len)
107 {
108         clear_bit(prefix_len, prefix_usage->b);
109 }
110
111 struct mlxsw_sp_fib_key {
112         unsigned char addr[sizeof(struct in6_addr)];
113         unsigned char prefix_len;
114 };
115
116 enum mlxsw_sp_fib_entry_type {
117         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121
122 struct mlxsw_sp_nexthop_group;
123
124 struct mlxsw_sp_fib_node {
125         struct list_head entry_list;
126         struct list_head list;
127         struct rhash_head ht_node;
128         struct mlxsw_sp_vr *vr;
129         struct mlxsw_sp_fib_key key;
130 };
131
132 struct mlxsw_sp_fib_entry_params {
133         u32 tb_id;
134         u32 prio;
135         u8 tos;
136         u8 type;
137 };
138
139 struct mlxsw_sp_fib_entry {
140         struct list_head list;
141         struct mlxsw_sp_fib_node *fib_node;
142         enum mlxsw_sp_fib_entry_type type;
143         struct list_head nexthop_group_node;
144         struct mlxsw_sp_nexthop_group *nh_group;
145         struct mlxsw_sp_fib_entry_params params;
146         bool offloaded;
147 };
148
149 struct mlxsw_sp_fib {
150         struct rhashtable ht;
151         struct list_head node_list;
152         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
153         struct mlxsw_sp_prefix_usage prefix_usage;
154 };
155
156 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
157
158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
159 {
160         struct mlxsw_sp_fib *fib;
161         int err;
162
163         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
164         if (!fib)
165                 return ERR_PTR(-ENOMEM);
166         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
167         if (err)
168                 goto err_rhashtable_init;
169         INIT_LIST_HEAD(&fib->node_list);
170         return fib;
171
172 err_rhashtable_init:
173         kfree(fib);
174         return ERR_PTR(err);
175 }
176
177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
178 {
179         WARN_ON(!list_empty(&fib->node_list));
180         rhashtable_destroy(&fib->ht);
181         kfree(fib);
182 }
183
184 static struct mlxsw_sp_lpm_tree *
185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
186 {
187         static struct mlxsw_sp_lpm_tree *lpm_tree;
188         int i;
189
190         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
191                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
192                 if (lpm_tree->ref_count == 0) {
193                         if (one_reserved)
194                                 one_reserved = false;
195                         else
196                                 return lpm_tree;
197                 }
198         }
199         return NULL;
200 }
201
202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
203                                    struct mlxsw_sp_lpm_tree *lpm_tree)
204 {
205         char ralta_pl[MLXSW_REG_RALTA_LEN];
206
207         mlxsw_reg_ralta_pack(ralta_pl, true,
208                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
209                              lpm_tree->id);
210         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
211 }
212
213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
214                                   struct mlxsw_sp_lpm_tree *lpm_tree)
215 {
216         char ralta_pl[MLXSW_REG_RALTA_LEN];
217
218         mlxsw_reg_ralta_pack(ralta_pl, false,
219                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
220                              lpm_tree->id);
221         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
222 }
223
224 static int
225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
226                                   struct mlxsw_sp_prefix_usage *prefix_usage,
227                                   struct mlxsw_sp_lpm_tree *lpm_tree)
228 {
229         char ralst_pl[MLXSW_REG_RALST_LEN];
230         u8 root_bin = 0;
231         u8 prefix;
232         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
233
234         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
235                 root_bin = prefix;
236
237         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
238         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
239                 if (prefix == 0)
240                         continue;
241                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
242                                          MLXSW_REG_RALST_BIN_NO_CHILD);
243                 last_prefix = prefix;
244         }
245         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
246 }
247
248 static struct mlxsw_sp_lpm_tree *
249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
250                          struct mlxsw_sp_prefix_usage *prefix_usage,
251                          enum mlxsw_sp_l3proto proto, bool one_reserved)
252 {
253         struct mlxsw_sp_lpm_tree *lpm_tree;
254         int err;
255
256         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
257         if (!lpm_tree)
258                 return ERR_PTR(-EBUSY);
259         lpm_tree->proto = proto;
260         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
261         if (err)
262                 return ERR_PTR(err);
263
264         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
265                                                 lpm_tree);
266         if (err)
267                 goto err_left_struct_set;
268         memcpy(&lpm_tree->prefix_usage, prefix_usage,
269                sizeof(lpm_tree->prefix_usage));
270         return lpm_tree;
271
272 err_left_struct_set:
273         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
274         return ERR_PTR(err);
275 }
276
277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
278                                      struct mlxsw_sp_lpm_tree *lpm_tree)
279 {
280         return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
281 }
282
283 static struct mlxsw_sp_lpm_tree *
284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
285                       struct mlxsw_sp_prefix_usage *prefix_usage,
286                       enum mlxsw_sp_l3proto proto, bool one_reserved)
287 {
288         struct mlxsw_sp_lpm_tree *lpm_tree;
289         int i;
290
291         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
292                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
293                 if (lpm_tree->ref_count != 0 &&
294                     lpm_tree->proto == proto &&
295                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
296                                              prefix_usage))
297                         goto inc_ref_count;
298         }
299         lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
300                                             proto, one_reserved);
301         if (IS_ERR(lpm_tree))
302                 return lpm_tree;
303
304 inc_ref_count:
305         lpm_tree->ref_count++;
306         return lpm_tree;
307 }
308
309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
310                                  struct mlxsw_sp_lpm_tree *lpm_tree)
311 {
312         if (--lpm_tree->ref_count == 0)
313                 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
314         return 0;
315 }
316
317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
318 {
319         struct mlxsw_sp_lpm_tree *lpm_tree;
320         int i;
321
322         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
323                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
324                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
325         }
326 }
327
328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
329 {
330         struct mlxsw_sp_vr *vr;
331         int i;
332
333         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
334                 vr = &mlxsw_sp->router.vrs[i];
335                 if (!vr->used)
336                         return vr;
337         }
338         return NULL;
339 }
340
341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
342                                      struct mlxsw_sp_vr *vr)
343 {
344         char raltb_pl[MLXSW_REG_RALTB_LEN];
345
346         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
347                              (enum mlxsw_reg_ralxx_protocol) vr->proto,
348                              vr->lpm_tree->id);
349         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
350 }
351
352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
353                                        struct mlxsw_sp_vr *vr)
354 {
355         char raltb_pl[MLXSW_REG_RALTB_LEN];
356
357         /* Bind to tree 0 which is default */
358         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
359                              (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
360         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
361 }
362
363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
364 {
365         /* For our purpose, squash main and local table into one */
366         if (tb_id == RT_TABLE_LOCAL)
367                 tb_id = RT_TABLE_MAIN;
368         return tb_id;
369 }
370
371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
372                                             u32 tb_id,
373                                             enum mlxsw_sp_l3proto proto)
374 {
375         struct mlxsw_sp_vr *vr;
376         int i;
377
378         tb_id = mlxsw_sp_fix_tb_id(tb_id);
379
380         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
381                 vr = &mlxsw_sp->router.vrs[i];
382                 if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
383                         return vr;
384         }
385         return NULL;
386 }
387
388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
389                                               unsigned char prefix_len,
390                                               u32 tb_id,
391                                               enum mlxsw_sp_l3proto proto)
392 {
393         struct mlxsw_sp_prefix_usage req_prefix_usage;
394         struct mlxsw_sp_lpm_tree *lpm_tree;
395         struct mlxsw_sp_vr *vr;
396         int err;
397
398         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
399         if (!vr)
400                 return ERR_PTR(-EBUSY);
401         vr->fib = mlxsw_sp_fib_create();
402         if (IS_ERR(vr->fib))
403                 return ERR_CAST(vr->fib);
404
405         vr->proto = proto;
406         vr->tb_id = tb_id;
407         mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
408         mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
409         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
410                                          proto, true);
411         if (IS_ERR(lpm_tree)) {
412                 err = PTR_ERR(lpm_tree);
413                 goto err_tree_get;
414         }
415         vr->lpm_tree = lpm_tree;
416         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
417         if (err)
418                 goto err_tree_bind;
419
420         vr->used = true;
421         return vr;
422
423 err_tree_bind:
424         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
425 err_tree_get:
426         mlxsw_sp_fib_destroy(vr->fib);
427
428         return ERR_PTR(err);
429 }
430
431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
432                                 struct mlxsw_sp_vr *vr)
433 {
434         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
435         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
436         mlxsw_sp_fib_destroy(vr->fib);
437         vr->used = false;
438 }
439
440 static int
441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
442                            struct mlxsw_sp_prefix_usage *req_prefix_usage)
443 {
444         struct mlxsw_sp_lpm_tree *lpm_tree;
445
446         if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
447                                      &vr->lpm_tree->prefix_usage))
448                 return 0;
449
450         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
451                                          vr->proto, false);
452         if (IS_ERR(lpm_tree)) {
453                 /* We failed to get a tree according to the required
454                  * prefix usage. However, the current tree might be still good
455                  * for us if our requirement is subset of the prefixes used
456                  * in the tree.
457                  */
458                 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
459                                                  &vr->lpm_tree->prefix_usage))
460                         return 0;
461                 return PTR_ERR(lpm_tree);
462         }
463
464         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
465         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
466         vr->lpm_tree = lpm_tree;
467         return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
468 }
469
470 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
471                                            unsigned char prefix_len,
472                                            u32 tb_id,
473                                            enum mlxsw_sp_l3proto proto)
474 {
475         struct mlxsw_sp_vr *vr;
476         int err;
477
478         tb_id = mlxsw_sp_fix_tb_id(tb_id);
479         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
480         if (!vr) {
481                 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
482                 if (IS_ERR(vr))
483                         return vr;
484         } else {
485                 struct mlxsw_sp_prefix_usage req_prefix_usage;
486
487                 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
488                                           &vr->fib->prefix_usage);
489                 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
490                 /* Need to replace LPM tree in case new prefix is required. */
491                 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
492                                                  &req_prefix_usage);
493                 if (err)
494                         return ERR_PTR(err);
495         }
496         return vr;
497 }
498
499 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
500 {
501         /* Destroy virtual router entity in case the associated FIB is empty
502          * and allow it to be used for other tables in future. Otherwise,
503          * check if some prefix usage did not disappear and change tree if
504          * that is the case. Note that in case new, smaller tree cannot be
505          * allocated, the original one will be kept being used.
506          */
507         if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
508                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
509         else
510                 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
511                                            &vr->fib->prefix_usage);
512 }
513
514 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
515 {
516         struct mlxsw_sp_vr *vr;
517         u64 max_vrs;
518         int i;
519
520         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
521                 return -EIO;
522
523         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
524         mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
525                                        GFP_KERNEL);
526         if (!mlxsw_sp->router.vrs)
527                 return -ENOMEM;
528
529         for (i = 0; i < max_vrs; i++) {
530                 vr = &mlxsw_sp->router.vrs[i];
531                 vr->id = i;
532         }
533
534         return 0;
535 }
536
537 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
538
539 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
540 {
541         /* At this stage we're guaranteed not to have new incoming
542          * FIB notifications and the work queue is free from FIBs
543          * sitting on top of mlxsw netdevs. However, we can still
544          * have other FIBs queued. Flush the queue before flushing
545          * the device's tables. No need for locks, as we're the only
546          * writer.
547          */
548         mlxsw_core_flush_owq();
549         mlxsw_sp_router_fib_flush(mlxsw_sp);
550         kfree(mlxsw_sp->router.vrs);
551 }
552
553 struct mlxsw_sp_neigh_key {
554         struct neighbour *n;
555 };
556
557 struct mlxsw_sp_neigh_entry {
558         struct list_head rif_list_node;
559         struct rhash_head ht_node;
560         struct mlxsw_sp_neigh_key key;
561         u16 rif;
562         bool connected;
563         unsigned char ha[ETH_ALEN];
564         struct list_head nexthop_list; /* list of nexthops using
565                                         * this neigh entry
566                                         */
567         struct list_head nexthop_neighs_list_node;
568 };
569
570 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
571         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
572         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
573         .key_len = sizeof(struct mlxsw_sp_neigh_key),
574 };
575
576 static struct mlxsw_sp_neigh_entry *
577 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
578                            u16 rif)
579 {
580         struct mlxsw_sp_neigh_entry *neigh_entry;
581
582         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
583         if (!neigh_entry)
584                 return NULL;
585
586         neigh_entry->key.n = n;
587         neigh_entry->rif = rif;
588         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
589
590         return neigh_entry;
591 }
592
593 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
594 {
595         kfree(neigh_entry);
596 }
597
598 static int
599 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
600                             struct mlxsw_sp_neigh_entry *neigh_entry)
601 {
602         return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
603                                       &neigh_entry->ht_node,
604                                       mlxsw_sp_neigh_ht_params);
605 }
606
607 static void
608 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
609                             struct mlxsw_sp_neigh_entry *neigh_entry)
610 {
611         rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
612                                &neigh_entry->ht_node,
613                                mlxsw_sp_neigh_ht_params);
614 }
615
616 static struct mlxsw_sp_neigh_entry *
617 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
618 {
619         struct mlxsw_sp_neigh_entry *neigh_entry;
620         struct mlxsw_sp_rif *r;
621         int err;
622
623         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
624         if (!r)
625                 return ERR_PTR(-EINVAL);
626
627         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
628         if (!neigh_entry)
629                 return ERR_PTR(-ENOMEM);
630
631         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
632         if (err)
633                 goto err_neigh_entry_insert;
634
635         list_add(&neigh_entry->rif_list_node, &r->neigh_list);
636
637         return neigh_entry;
638
639 err_neigh_entry_insert:
640         mlxsw_sp_neigh_entry_free(neigh_entry);
641         return ERR_PTR(err);
642 }
643
644 static void
645 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
646                              struct mlxsw_sp_neigh_entry *neigh_entry)
647 {
648         list_del(&neigh_entry->rif_list_node);
649         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
650         mlxsw_sp_neigh_entry_free(neigh_entry);
651 }
652
653 static struct mlxsw_sp_neigh_entry *
654 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
655 {
656         struct mlxsw_sp_neigh_key key;
657
658         key.n = n;
659         return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
660                                       &key, mlxsw_sp_neigh_ht_params);
661 }
662
663 static void
664 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
665 {
666         unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
667
668         mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
669 }
670
671 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
672                                                    char *rauhtd_pl,
673                                                    int ent_index)
674 {
675         struct net_device *dev;
676         struct neighbour *n;
677         __be32 dipn;
678         u32 dip;
679         u16 rif;
680
681         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
682
683         if (!mlxsw_sp->rifs[rif]) {
684                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
685                 return;
686         }
687
688         dipn = htonl(dip);
689         dev = mlxsw_sp->rifs[rif]->dev;
690         n = neigh_lookup(&arp_tbl, &dipn, dev);
691         if (!n) {
692                 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
693                            &dip);
694                 return;
695         }
696
697         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
698         neigh_event_send(n, NULL);
699         neigh_release(n);
700 }
701
702 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
703                                                    char *rauhtd_pl,
704                                                    int rec_index)
705 {
706         u8 num_entries;
707         int i;
708
709         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
710                                                                 rec_index);
711         /* Hardware starts counting at 0, so add 1. */
712         num_entries++;
713
714         /* Each record consists of several neighbour entries. */
715         for (i = 0; i < num_entries; i++) {
716                 int ent_index;
717
718                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
719                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
720                                                        ent_index);
721         }
722
723 }
724
725 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
726                                               char *rauhtd_pl, int rec_index)
727 {
728         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
729         case MLXSW_REG_RAUHTD_TYPE_IPV4:
730                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
731                                                        rec_index);
732                 break;
733         case MLXSW_REG_RAUHTD_TYPE_IPV6:
734                 WARN_ON_ONCE(1);
735                 break;
736         }
737 }
738
739 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
740 {
741         u8 num_rec, last_rec_index, num_entries;
742
743         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
744         last_rec_index = num_rec - 1;
745
746         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
747                 return false;
748         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
749             MLXSW_REG_RAUHTD_TYPE_IPV6)
750                 return true;
751
752         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
753                                                                 last_rec_index);
754         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
755                 return true;
756         return false;
757 }
758
759 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
760 {
761         char *rauhtd_pl;
762         u8 num_rec;
763         int i, err;
764
765         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
766         if (!rauhtd_pl)
767                 return -ENOMEM;
768
769         /* Make sure the neighbour's netdev isn't removed in the
770          * process.
771          */
772         rtnl_lock();
773         do {
774                 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
775                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
776                                       rauhtd_pl);
777                 if (err) {
778                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
779                         break;
780                 }
781                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
782                 for (i = 0; i < num_rec; i++)
783                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
784                                                           i);
785         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
786         rtnl_unlock();
787
788         kfree(rauhtd_pl);
789         return err;
790 }
791
792 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
793 {
794         struct mlxsw_sp_neigh_entry *neigh_entry;
795
796         /* Take RTNL mutex here to prevent lists from changes */
797         rtnl_lock();
798         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
799                             nexthop_neighs_list_node)
800                 /* If this neigh have nexthops, make the kernel think this neigh
801                  * is active regardless of the traffic.
802                  */
803                 neigh_event_send(neigh_entry->key.n, NULL);
804         rtnl_unlock();
805 }
806
807 static void
808 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
809 {
810         unsigned long interval = mlxsw_sp->router.neighs_update.interval;
811
812         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
813                                msecs_to_jiffies(interval));
814 }
815
816 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
817 {
818         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
819                                                  router.neighs_update.dw.work);
820         int err;
821
822         err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
823         if (err)
824                 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
825
826         mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
827
828         mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
829 }
830
831 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
832 {
833         struct mlxsw_sp_neigh_entry *neigh_entry;
834         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
835                                                  router.nexthop_probe_dw.work);
836
837         /* Iterate over nexthop neighbours, find those who are unresolved and
838          * send arp on them. This solves the chicken-egg problem when
839          * the nexthop wouldn't get offloaded until the neighbor is resolved
840          * but it wouldn't get resolved ever in case traffic is flowing in HW
841          * using different nexthop.
842          *
843          * Take RTNL mutex here to prevent lists from changes.
844          */
845         rtnl_lock();
846         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
847                             nexthop_neighs_list_node)
848                 if (!neigh_entry->connected)
849                         neigh_event_send(neigh_entry->key.n, NULL);
850         rtnl_unlock();
851
852         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
853                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
854 }
855
856 static void
857 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
858                               struct mlxsw_sp_neigh_entry *neigh_entry,
859                               bool removing);
860
861 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
862 {
863         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
864                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
865 }
866
867 static void
868 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
869                                 struct mlxsw_sp_neigh_entry *neigh_entry,
870                                 enum mlxsw_reg_rauht_op op)
871 {
872         struct neighbour *n = neigh_entry->key.n;
873         u32 dip = ntohl(*((__be32 *) n->primary_key));
874         char rauht_pl[MLXSW_REG_RAUHT_LEN];
875
876         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
877                               dip);
878         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
879 }
880
881 static void
882 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
883                             struct mlxsw_sp_neigh_entry *neigh_entry,
884                             bool adding)
885 {
886         if (!adding && !neigh_entry->connected)
887                 return;
888         neigh_entry->connected = adding;
889         if (neigh_entry->key.n->tbl == &arp_tbl)
890                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
891                                                 mlxsw_sp_rauht_op(adding));
892         else
893                 WARN_ON_ONCE(1);
894 }
895
896 struct mlxsw_sp_neigh_event_work {
897         struct work_struct work;
898         struct mlxsw_sp *mlxsw_sp;
899         struct neighbour *n;
900 };
901
902 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
903 {
904         struct mlxsw_sp_neigh_event_work *neigh_work =
905                 container_of(work, struct mlxsw_sp_neigh_event_work, work);
906         struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
907         struct mlxsw_sp_neigh_entry *neigh_entry;
908         struct neighbour *n = neigh_work->n;
909         unsigned char ha[ETH_ALEN];
910         bool entry_connected;
911         u8 nud_state, dead;
912
913         /* If these parameters are changed after we release the lock,
914          * then we are guaranteed to receive another event letting us
915          * know about it.
916          */
917         read_lock_bh(&n->lock);
918         memcpy(ha, n->ha, ETH_ALEN);
919         nud_state = n->nud_state;
920         dead = n->dead;
921         read_unlock_bh(&n->lock);
922
923         rtnl_lock();
924         entry_connected = nud_state & NUD_VALID && !dead;
925         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
926         if (!entry_connected && !neigh_entry)
927                 goto out;
928         if (!neigh_entry) {
929                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
930                 if (IS_ERR(neigh_entry))
931                         goto out;
932         }
933
934         memcpy(neigh_entry->ha, ha, ETH_ALEN);
935         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
936         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
937
938         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
939                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
940
941 out:
942         rtnl_unlock();
943         neigh_release(n);
944         kfree(neigh_work);
945 }
946
947 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
948                                    unsigned long event, void *ptr)
949 {
950         struct mlxsw_sp_neigh_event_work *neigh_work;
951         struct mlxsw_sp_port *mlxsw_sp_port;
952         struct mlxsw_sp *mlxsw_sp;
953         unsigned long interval;
954         struct neigh_parms *p;
955         struct neighbour *n;
956
957         switch (event) {
958         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
959                 p = ptr;
960
961                 /* We don't care about changes in the default table. */
962                 if (!p->dev || p->tbl != &arp_tbl)
963                         return NOTIFY_DONE;
964
965                 /* We are in atomic context and can't take RTNL mutex,
966                  * so use RCU variant to walk the device chain.
967                  */
968                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
969                 if (!mlxsw_sp_port)
970                         return NOTIFY_DONE;
971
972                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
973                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
974                 mlxsw_sp->router.neighs_update.interval = interval;
975
976                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
977                 break;
978         case NETEVENT_NEIGH_UPDATE:
979                 n = ptr;
980
981                 if (n->tbl != &arp_tbl)
982                         return NOTIFY_DONE;
983
984                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
985                 if (!mlxsw_sp_port)
986                         return NOTIFY_DONE;
987
988                 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
989                 if (!neigh_work) {
990                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
991                         return NOTIFY_BAD;
992                 }
993
994                 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
995                 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
996                 neigh_work->n = n;
997
998                 /* Take a reference to ensure the neighbour won't be
999                  * destructed until we drop the reference in delayed
1000                  * work.
1001                  */
1002                 neigh_clone(n);
1003                 mlxsw_core_schedule_work(&neigh_work->work);
1004                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1005                 break;
1006         }
1007
1008         return NOTIFY_DONE;
1009 }
1010
1011 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1012 {
1013         int err;
1014
1015         err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1016                               &mlxsw_sp_neigh_ht_params);
1017         if (err)
1018                 return err;
1019
1020         /* Initialize the polling interval according to the default
1021          * table.
1022          */
1023         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1024
1025         /* Create the delayed works for the activity_update */
1026         INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1027                           mlxsw_sp_router_neighs_update_work);
1028         INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1029                           mlxsw_sp_router_probe_unresolved_nexthops);
1030         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1031         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1032         return 0;
1033 }
1034
1035 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1036 {
1037         cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1038         cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1039         rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1040 }
1041
1042 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1043                                     const struct mlxsw_sp_rif *r)
1044 {
1045         char rauht_pl[MLXSW_REG_RAUHT_LEN];
1046
1047         mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1048                              r->rif, r->addr);
1049         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1050 }
1051
1052 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1053                                          struct mlxsw_sp_rif *r)
1054 {
1055         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1056
1057         mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
1058         list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
1059                                  rif_list_node)
1060                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1061 }
1062
1063 struct mlxsw_sp_nexthop_key {
1064         struct fib_nh *fib_nh;
1065 };
1066
1067 struct mlxsw_sp_nexthop {
1068         struct list_head neigh_list_node; /* member of neigh entry list */
1069         struct list_head rif_list_node;
1070         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1071                                                 * this belongs to
1072                                                 */
1073         struct rhash_head ht_node;
1074         struct mlxsw_sp_nexthop_key key;
1075         struct mlxsw_sp_rif *r;
1076         u8 should_offload:1, /* set indicates this neigh is connected and
1077                               * should be put to KVD linear area of this group.
1078                               */
1079            offloaded:1, /* set in case the neigh is actually put into
1080                          * KVD linear area of this group.
1081                          */
1082            update:1; /* set indicates that MAC of this neigh should be
1083                       * updated in HW
1084                       */
1085         struct mlxsw_sp_neigh_entry *neigh_entry;
1086 };
1087
1088 struct mlxsw_sp_nexthop_group_key {
1089         struct fib_info *fi;
1090 };
1091
1092 struct mlxsw_sp_nexthop_group {
1093         struct rhash_head ht_node;
1094         struct list_head fib_list; /* list of fib entries that use this group */
1095         struct mlxsw_sp_nexthop_group_key key;
1096         u8 adj_index_valid:1,
1097            gateway:1; /* routes using the group use a gateway */
1098         u32 adj_index;
1099         u16 ecmp_size;
1100         u16 count;
1101         struct mlxsw_sp_nexthop nexthops[0];
1102 #define nh_rif  nexthops[0].r
1103 };
1104
1105 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1106         .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1107         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1108         .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1109 };
1110
1111 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1112                                          struct mlxsw_sp_nexthop_group *nh_grp)
1113 {
1114         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1115                                       &nh_grp->ht_node,
1116                                       mlxsw_sp_nexthop_group_ht_params);
1117 }
1118
1119 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1120                                           struct mlxsw_sp_nexthop_group *nh_grp)
1121 {
1122         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1123                                &nh_grp->ht_node,
1124                                mlxsw_sp_nexthop_group_ht_params);
1125 }
1126
1127 static struct mlxsw_sp_nexthop_group *
1128 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1129                               struct mlxsw_sp_nexthop_group_key key)
1130 {
1131         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1132                                       mlxsw_sp_nexthop_group_ht_params);
1133 }
1134
1135 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1136         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1137         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1138         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1139 };
1140
1141 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1142                                    struct mlxsw_sp_nexthop *nh)
1143 {
1144         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1145                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1146 }
1147
1148 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1149                                     struct mlxsw_sp_nexthop *nh)
1150 {
1151         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1152                                mlxsw_sp_nexthop_ht_params);
1153 }
1154
1155 static struct mlxsw_sp_nexthop *
1156 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1157                         struct mlxsw_sp_nexthop_key key)
1158 {
1159         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1160                                       mlxsw_sp_nexthop_ht_params);
1161 }
1162
1163 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1164                                              struct mlxsw_sp_vr *vr,
1165                                              u32 adj_index, u16 ecmp_size,
1166                                              u32 new_adj_index,
1167                                              u16 new_ecmp_size)
1168 {
1169         char raleu_pl[MLXSW_REG_RALEU_LEN];
1170
1171         mlxsw_reg_raleu_pack(raleu_pl,
1172                              (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1173                              adj_index, ecmp_size, new_adj_index,
1174                              new_ecmp_size);
1175         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1176 }
1177
1178 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1179                                           struct mlxsw_sp_nexthop_group *nh_grp,
1180                                           u32 old_adj_index, u16 old_ecmp_size)
1181 {
1182         struct mlxsw_sp_fib_entry *fib_entry;
1183         struct mlxsw_sp_vr *vr = NULL;
1184         int err;
1185
1186         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1187                 if (vr == fib_entry->fib_node->vr)
1188                         continue;
1189                 vr = fib_entry->fib_node->vr;
1190                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1191                                                         old_adj_index,
1192                                                         old_ecmp_size,
1193                                                         nh_grp->adj_index,
1194                                                         nh_grp->ecmp_size);
1195                 if (err)
1196                         return err;
1197         }
1198         return 0;
1199 }
1200
1201 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1202                                        struct mlxsw_sp_nexthop *nh)
1203 {
1204         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1205         char ratr_pl[MLXSW_REG_RATR_LEN];
1206
1207         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1208                             true, adj_index, neigh_entry->rif);
1209         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1210         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1211 }
1212
1213 static int
1214 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1215                                   struct mlxsw_sp_nexthop_group *nh_grp,
1216                                   bool reallocate)
1217 {
1218         u32 adj_index = nh_grp->adj_index; /* base */
1219         struct mlxsw_sp_nexthop *nh;
1220         int i;
1221         int err;
1222
1223         for (i = 0; i < nh_grp->count; i++) {
1224                 nh = &nh_grp->nexthops[i];
1225
1226                 if (!nh->should_offload) {
1227                         nh->offloaded = 0;
1228                         continue;
1229                 }
1230
1231                 if (nh->update || reallocate) {
1232                         err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1233                                                           adj_index, nh);
1234                         if (err)
1235                                 return err;
1236                         nh->update = 0;
1237                         nh->offloaded = 1;
1238                 }
1239                 adj_index++;
1240         }
1241         return 0;
1242 }
1243
1244 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1245                                      struct mlxsw_sp_fib_entry *fib_entry);
1246
1247 static int
1248 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1249                                     struct mlxsw_sp_nexthop_group *nh_grp)
1250 {
1251         struct mlxsw_sp_fib_entry *fib_entry;
1252         int err;
1253
1254         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1255                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1256                 if (err)
1257                         return err;
1258         }
1259         return 0;
1260 }
1261
1262 static void
1263 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1264                                struct mlxsw_sp_nexthop_group *nh_grp)
1265 {
1266         struct mlxsw_sp_nexthop *nh;
1267         bool offload_change = false;
1268         u32 adj_index;
1269         u16 ecmp_size = 0;
1270         bool old_adj_index_valid;
1271         u32 old_adj_index;
1272         u16 old_ecmp_size;
1273         int ret;
1274         int i;
1275         int err;
1276
1277         if (!nh_grp->gateway) {
1278                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1279                 return;
1280         }
1281
1282         for (i = 0; i < nh_grp->count; i++) {
1283                 nh = &nh_grp->nexthops[i];
1284
1285                 if (nh->should_offload ^ nh->offloaded) {
1286                         offload_change = true;
1287                         if (nh->should_offload)
1288                                 nh->update = 1;
1289                 }
1290                 if (nh->should_offload)
1291                         ecmp_size++;
1292         }
1293         if (!offload_change) {
1294                 /* Nothing was added or removed, so no need to reallocate. Just
1295                  * update MAC on existing adjacency indexes.
1296                  */
1297                 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1298                                                         false);
1299                 if (err) {
1300                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1301                         goto set_trap;
1302                 }
1303                 return;
1304         }
1305         if (!ecmp_size)
1306                 /* No neigh of this group is connected so we just set
1307                  * the trap and let everthing flow through kernel.
1308                  */
1309                 goto set_trap;
1310
1311         ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1312         if (ret < 0) {
1313                 /* We ran out of KVD linear space, just set the
1314                  * trap and let everything flow through kernel.
1315                  */
1316                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1317                 goto set_trap;
1318         }
1319         adj_index = ret;
1320         old_adj_index_valid = nh_grp->adj_index_valid;
1321         old_adj_index = nh_grp->adj_index;
1322         old_ecmp_size = nh_grp->ecmp_size;
1323         nh_grp->adj_index_valid = 1;
1324         nh_grp->adj_index = adj_index;
1325         nh_grp->ecmp_size = ecmp_size;
1326         err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1327         if (err) {
1328                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1329                 goto set_trap;
1330         }
1331
1332         if (!old_adj_index_valid) {
1333                 /* The trap was set for fib entries, so we have to call
1334                  * fib entry update to unset it and use adjacency index.
1335                  */
1336                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1337                 if (err) {
1338                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1339                         goto set_trap;
1340                 }
1341                 return;
1342         }
1343
1344         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1345                                              old_adj_index, old_ecmp_size);
1346         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1347         if (err) {
1348                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1349                 goto set_trap;
1350         }
1351         return;
1352
1353 set_trap:
1354         old_adj_index_valid = nh_grp->adj_index_valid;
1355         nh_grp->adj_index_valid = 0;
1356         for (i = 0; i < nh_grp->count; i++) {
1357                 nh = &nh_grp->nexthops[i];
1358                 nh->offloaded = 0;
1359         }
1360         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1361         if (err)
1362                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1363         if (old_adj_index_valid)
1364                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1365 }
1366
1367 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1368                                             bool removing)
1369 {
1370         if (!removing && !nh->should_offload)
1371                 nh->should_offload = 1;
1372         else if (removing && nh->offloaded)
1373                 nh->should_offload = 0;
1374         nh->update = 1;
1375 }
1376
1377 static void
1378 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1379                               struct mlxsw_sp_neigh_entry *neigh_entry,
1380                               bool removing)
1381 {
1382         struct mlxsw_sp_nexthop *nh;
1383
1384         list_for_each_entry(nh, &neigh_entry->nexthop_list,
1385                             neigh_list_node) {
1386                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1387                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1388         }
1389 }
1390
1391 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1392                                       struct mlxsw_sp_rif *r)
1393 {
1394         if (nh->r)
1395                 return;
1396
1397         nh->r = r;
1398         list_add(&nh->rif_list_node, &r->nexthop_list);
1399 }
1400
1401 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1402 {
1403         if (!nh->r)
1404                 return;
1405
1406         list_del(&nh->rif_list_node);
1407         nh->r = NULL;
1408 }
1409
1410 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1411                                        struct mlxsw_sp_nexthop *nh)
1412 {
1413         struct mlxsw_sp_neigh_entry *neigh_entry;
1414         struct fib_nh *fib_nh = nh->key.fib_nh;
1415         struct neighbour *n;
1416         u8 nud_state, dead;
1417         int err;
1418
1419         if (!nh->nh_grp->gateway || nh->neigh_entry)
1420                 return 0;
1421
1422         /* Take a reference of neigh here ensuring that neigh would
1423          * not be detructed before the nexthop entry is finished.
1424          * The reference is taken either in neigh_lookup() or
1425          * in neigh_create() in case n is not found.
1426          */
1427         n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1428         if (!n) {
1429                 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1430                 if (IS_ERR(n))
1431                         return PTR_ERR(n);
1432                 neigh_event_send(n, NULL);
1433         }
1434         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1435         if (!neigh_entry) {
1436                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1437                 if (IS_ERR(neigh_entry)) {
1438                         err = -EINVAL;
1439                         goto err_neigh_entry_create;
1440                 }
1441         }
1442
1443         /* If that is the first nexthop connected to that neigh, add to
1444          * nexthop_neighs_list
1445          */
1446         if (list_empty(&neigh_entry->nexthop_list))
1447                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1448                               &mlxsw_sp->router.nexthop_neighs_list);
1449
1450         nh->neigh_entry = neigh_entry;
1451         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1452         read_lock_bh(&n->lock);
1453         nud_state = n->nud_state;
1454         dead = n->dead;
1455         read_unlock_bh(&n->lock);
1456         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1457
1458         return 0;
1459
1460 err_neigh_entry_create:
1461         neigh_release(n);
1462         return err;
1463 }
1464
1465 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1466                                         struct mlxsw_sp_nexthop *nh)
1467 {
1468         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1469         struct neighbour *n;
1470
1471         if (!neigh_entry)
1472                 return;
1473         n = neigh_entry->key.n;
1474
1475         __mlxsw_sp_nexthop_neigh_update(nh, true);
1476         list_del(&nh->neigh_list_node);
1477         nh->neigh_entry = NULL;
1478
1479         /* If that is the last nexthop connected to that neigh, remove from
1480          * nexthop_neighs_list
1481          */
1482         if (list_empty(&neigh_entry->nexthop_list))
1483                 list_del(&neigh_entry->nexthop_neighs_list_node);
1484
1485         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1486                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1487
1488         neigh_release(n);
1489 }
1490
1491 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1492                                  struct mlxsw_sp_nexthop_group *nh_grp,
1493                                  struct mlxsw_sp_nexthop *nh,
1494                                  struct fib_nh *fib_nh)
1495 {
1496         struct net_device *dev = fib_nh->nh_dev;
1497         struct in_device *in_dev;
1498         struct mlxsw_sp_rif *r;
1499         int err;
1500
1501         nh->nh_grp = nh_grp;
1502         nh->key.fib_nh = fib_nh;
1503         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1504         if (err)
1505                 return err;
1506
1507         in_dev = __in_dev_get_rtnl(dev);
1508         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1509             fib_nh->nh_flags & RTNH_F_LINKDOWN)
1510                 return 0;
1511
1512         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1513         if (!r)
1514                 return 0;
1515         mlxsw_sp_nexthop_rif_init(nh, r);
1516
1517         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1518         if (err)
1519                 goto err_nexthop_neigh_init;
1520
1521         return 0;
1522
1523 err_nexthop_neigh_init:
1524         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1525         return err;
1526 }
1527
1528 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1529                                   struct mlxsw_sp_nexthop *nh)
1530 {
1531         mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1532         mlxsw_sp_nexthop_rif_fini(nh);
1533         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1534 }
1535
1536 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1537                                    unsigned long event, struct fib_nh *fib_nh)
1538 {
1539         struct mlxsw_sp_nexthop_key key;
1540         struct mlxsw_sp_nexthop *nh;
1541         struct mlxsw_sp_rif *r;
1542
1543         if (mlxsw_sp->router.aborted)
1544                 return;
1545
1546         key.fib_nh = fib_nh;
1547         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1548         if (WARN_ON_ONCE(!nh))
1549                 return;
1550
1551         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1552         if (!r)
1553                 return;
1554
1555         switch (event) {
1556         case FIB_EVENT_NH_ADD:
1557                 mlxsw_sp_nexthop_rif_init(nh, r);
1558                 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1559                 break;
1560         case FIB_EVENT_NH_DEL:
1561                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1562                 mlxsw_sp_nexthop_rif_fini(nh);
1563                 break;
1564         }
1565
1566         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1567 }
1568
1569 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1570                                            struct mlxsw_sp_rif *r)
1571 {
1572         struct mlxsw_sp_nexthop *nh, *tmp;
1573
1574         list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
1575                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1576                 mlxsw_sp_nexthop_rif_fini(nh);
1577                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1578         }
1579 }
1580
1581 static struct mlxsw_sp_nexthop_group *
1582 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1583 {
1584         struct mlxsw_sp_nexthop_group *nh_grp;
1585         struct mlxsw_sp_nexthop *nh;
1586         struct fib_nh *fib_nh;
1587         size_t alloc_size;
1588         int i;
1589         int err;
1590
1591         alloc_size = sizeof(*nh_grp) +
1592                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1593         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1594         if (!nh_grp)
1595                 return ERR_PTR(-ENOMEM);
1596         INIT_LIST_HEAD(&nh_grp->fib_list);
1597         nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1598         nh_grp->count = fi->fib_nhs;
1599         nh_grp->key.fi = fi;
1600         for (i = 0; i < nh_grp->count; i++) {
1601                 nh = &nh_grp->nexthops[i];
1602                 fib_nh = &fi->fib_nh[i];
1603                 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1604                 if (err)
1605                         goto err_nexthop_init;
1606         }
1607         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1608         if (err)
1609                 goto err_nexthop_group_insert;
1610         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1611         return nh_grp;
1612
1613 err_nexthop_group_insert:
1614 err_nexthop_init:
1615         for (i--; i >= 0; i--) {
1616                 nh = &nh_grp->nexthops[i];
1617                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1618         }
1619         kfree(nh_grp);
1620         return ERR_PTR(err);
1621 }
1622
1623 static void
1624 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1625                                struct mlxsw_sp_nexthop_group *nh_grp)
1626 {
1627         struct mlxsw_sp_nexthop *nh;
1628         int i;
1629
1630         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1631         for (i = 0; i < nh_grp->count; i++) {
1632                 nh = &nh_grp->nexthops[i];
1633                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1634         }
1635         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1636         WARN_ON_ONCE(nh_grp->adj_index_valid);
1637         kfree(nh_grp);
1638 }
1639
1640 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1641                                       struct mlxsw_sp_fib_entry *fib_entry,
1642                                       struct fib_info *fi)
1643 {
1644         struct mlxsw_sp_nexthop_group_key key;
1645         struct mlxsw_sp_nexthop_group *nh_grp;
1646
1647         key.fi = fi;
1648         nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1649         if (!nh_grp) {
1650                 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1651                 if (IS_ERR(nh_grp))
1652                         return PTR_ERR(nh_grp);
1653         }
1654         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1655         fib_entry->nh_group = nh_grp;
1656         return 0;
1657 }
1658
1659 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1660                                        struct mlxsw_sp_fib_entry *fib_entry)
1661 {
1662         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1663
1664         list_del(&fib_entry->nexthop_group_node);
1665         if (!list_empty(&nh_grp->fib_list))
1666                 return;
1667         mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1668 }
1669
1670 static bool
1671 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1672 {
1673         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1674
1675         if (fib_entry->params.tos)
1676                 return false;
1677
1678         switch (fib_entry->type) {
1679         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1680                 return !!nh_group->adj_index_valid;
1681         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1682                 return !!nh_group->nh_rif;
1683         default:
1684                 return false;
1685         }
1686 }
1687
1688 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1689 {
1690         fib_entry->offloaded = true;
1691
1692         switch (fib_entry->fib_node->vr->proto) {
1693         case MLXSW_SP_L3_PROTO_IPV4:
1694                 fib_info_offload_inc(fib_entry->nh_group->key.fi);
1695                 break;
1696         case MLXSW_SP_L3_PROTO_IPV6:
1697                 WARN_ON_ONCE(1);
1698         }
1699 }
1700
1701 static void
1702 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1703 {
1704         switch (fib_entry->fib_node->vr->proto) {
1705         case MLXSW_SP_L3_PROTO_IPV4:
1706                 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1707                 break;
1708         case MLXSW_SP_L3_PROTO_IPV6:
1709                 WARN_ON_ONCE(1);
1710         }
1711
1712         fib_entry->offloaded = false;
1713 }
1714
1715 static void
1716 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1717                                    enum mlxsw_reg_ralue_op op, int err)
1718 {
1719         switch (op) {
1720         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1721                 if (!fib_entry->offloaded)
1722                         return;
1723                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1724         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1725                 if (err)
1726                         return;
1727                 if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1728                     !fib_entry->offloaded)
1729                         mlxsw_sp_fib_entry_offload_set(fib_entry);
1730                 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1731                          fib_entry->offloaded)
1732                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
1733                 return;
1734         default:
1735                 return;
1736         }
1737 }
1738
1739 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1740                                          struct mlxsw_sp_fib_entry *fib_entry,
1741                                          enum mlxsw_reg_ralue_op op)
1742 {
1743         char ralue_pl[MLXSW_REG_RALUE_LEN];
1744         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1745         struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1746         enum mlxsw_reg_ralue_trap_action trap_action;
1747         u16 trap_id = 0;
1748         u32 adjacency_index = 0;
1749         u16 ecmp_size = 0;
1750
1751         /* In case the nexthop group adjacency index is valid, use it
1752          * with provided ECMP size. Otherwise, setup trap and pass
1753          * traffic to kernel.
1754          */
1755         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1756                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1757                 adjacency_index = fib_entry->nh_group->adj_index;
1758                 ecmp_size = fib_entry->nh_group->ecmp_size;
1759         } else {
1760                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1761                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1762         }
1763
1764         mlxsw_reg_ralue_pack4(ralue_pl,
1765                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1766                               vr->id, fib_entry->fib_node->key.prefix_len,
1767                               *p_dip);
1768         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1769                                         adjacency_index, ecmp_size);
1770         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1771 }
1772
1773 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1774                                         struct mlxsw_sp_fib_entry *fib_entry,
1775                                         enum mlxsw_reg_ralue_op op)
1776 {
1777         struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1778         enum mlxsw_reg_ralue_trap_action trap_action;
1779         char ralue_pl[MLXSW_REG_RALUE_LEN];
1780         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1781         struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1782         u16 trap_id = 0;
1783         u16 rif = 0;
1784
1785         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1786                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1787                 rif = r->rif;
1788         } else {
1789                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1790                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1791         }
1792
1793         mlxsw_reg_ralue_pack4(ralue_pl,
1794                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1795                               vr->id, fib_entry->fib_node->key.prefix_len,
1796                               *p_dip);
1797         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
1798         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1799 }
1800
1801 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1802                                        struct mlxsw_sp_fib_entry *fib_entry,
1803                                        enum mlxsw_reg_ralue_op op)
1804 {
1805         char ralue_pl[MLXSW_REG_RALUE_LEN];
1806         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1807         struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1808
1809         mlxsw_reg_ralue_pack4(ralue_pl,
1810                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1811                               vr->id, fib_entry->fib_node->key.prefix_len,
1812                               *p_dip);
1813         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1814         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1815 }
1816
1817 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1818                                   struct mlxsw_sp_fib_entry *fib_entry,
1819                                   enum mlxsw_reg_ralue_op op)
1820 {
1821         switch (fib_entry->type) {
1822         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1823                 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1824         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1825                 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1826         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1827                 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1828         }
1829         return -EINVAL;
1830 }
1831
1832 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1833                                  struct mlxsw_sp_fib_entry *fib_entry,
1834                                  enum mlxsw_reg_ralue_op op)
1835 {
1836         int err = -EINVAL;
1837
1838         switch (fib_entry->fib_node->vr->proto) {
1839         case MLXSW_SP_L3_PROTO_IPV4:
1840                 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1841                 break;
1842         case MLXSW_SP_L3_PROTO_IPV6:
1843                 return err;
1844         }
1845         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
1846         return err;
1847 }
1848
1849 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1850                                      struct mlxsw_sp_fib_entry *fib_entry)
1851 {
1852         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1853                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
1854 }
1855
1856 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1857                                   struct mlxsw_sp_fib_entry *fib_entry)
1858 {
1859         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1860                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
1861 }
1862
1863 static int
1864 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
1865                              const struct fib_entry_notifier_info *fen_info,
1866                              struct mlxsw_sp_fib_entry *fib_entry)
1867 {
1868         struct fib_info *fi = fen_info->fi;
1869
1870         if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1871                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1872                 return 0;
1873         }
1874         if (fen_info->type != RTN_UNICAST)
1875                 return -EINVAL;
1876         if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1877                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1878         else
1879                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1880         return 0;
1881 }
1882
1883 static struct mlxsw_sp_fib_entry *
1884 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
1885                            struct mlxsw_sp_fib_node *fib_node,
1886                            const struct fib_entry_notifier_info *fen_info)
1887 {
1888         struct mlxsw_sp_fib_entry *fib_entry;
1889         int err;
1890
1891         fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
1892         if (!fib_entry) {
1893                 err = -ENOMEM;
1894                 goto err_fib_entry_alloc;
1895         }
1896
1897         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
1898         if (err)
1899                 goto err_fib4_entry_type_set;
1900
1901         err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
1902         if (err)
1903                 goto err_nexthop_group_get;
1904
1905         fib_entry->params.prio = fen_info->fi->fib_priority;
1906         fib_entry->params.tb_id = fen_info->tb_id;
1907         fib_entry->params.type = fen_info->type;
1908         fib_entry->params.tos = fen_info->tos;
1909
1910         fib_entry->fib_node = fib_node;
1911
1912         return fib_entry;
1913
1914 err_nexthop_group_get:
1915 err_fib4_entry_type_set:
1916         kfree(fib_entry);
1917 err_fib_entry_alloc:
1918         return ERR_PTR(err);
1919 }
1920
1921 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1922                                         struct mlxsw_sp_fib_entry *fib_entry)
1923 {
1924         mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1925         kfree(fib_entry);
1926 }
1927
1928 static struct mlxsw_sp_fib_node *
1929 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
1930                        const struct fib_entry_notifier_info *fen_info);
1931
1932 static struct mlxsw_sp_fib_entry *
1933 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
1934                            const struct fib_entry_notifier_info *fen_info)
1935 {
1936         struct mlxsw_sp_fib_entry *fib_entry;
1937         struct mlxsw_sp_fib_node *fib_node;
1938
1939         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
1940         if (IS_ERR(fib_node))
1941                 return NULL;
1942
1943         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
1944                 if (fib_entry->params.tb_id == fen_info->tb_id &&
1945                     fib_entry->params.tos == fen_info->tos &&
1946                     fib_entry->params.type == fen_info->type &&
1947                     fib_entry->nh_group->key.fi == fen_info->fi) {
1948                         return fib_entry;
1949                 }
1950         }
1951
1952         return NULL;
1953 }
1954
1955 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
1956         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
1957         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
1958         .key_len = sizeof(struct mlxsw_sp_fib_key),
1959         .automatic_shrinking = true,
1960 };
1961
1962 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
1963                                     struct mlxsw_sp_fib_node *fib_node)
1964 {
1965         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
1966                                       mlxsw_sp_fib_ht_params);
1967 }
1968
1969 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
1970                                      struct mlxsw_sp_fib_node *fib_node)
1971 {
1972         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
1973                                mlxsw_sp_fib_ht_params);
1974 }
1975
1976 static struct mlxsw_sp_fib_node *
1977 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1978                          size_t addr_len, unsigned char prefix_len)
1979 {
1980         struct mlxsw_sp_fib_key key;
1981
1982         memset(&key, 0, sizeof(key));
1983         memcpy(key.addr, addr, addr_len);
1984         key.prefix_len = prefix_len;
1985         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
1986 }
1987
1988 static struct mlxsw_sp_fib_node *
1989 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
1990                          size_t addr_len, unsigned char prefix_len)
1991 {
1992         struct mlxsw_sp_fib_node *fib_node;
1993
1994         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
1995         if (!fib_node)
1996                 return NULL;
1997
1998         INIT_LIST_HEAD(&fib_node->entry_list);
1999         list_add(&fib_node->list, &vr->fib->node_list);
2000         memcpy(fib_node->key.addr, addr, addr_len);
2001         fib_node->key.prefix_len = prefix_len;
2002         mlxsw_sp_fib_node_insert(vr->fib, fib_node);
2003         fib_node->vr = vr;
2004
2005         return fib_node;
2006 }
2007
2008 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2009 {
2010         mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
2011         list_del(&fib_node->list);
2012         WARN_ON(!list_empty(&fib_node->entry_list));
2013         kfree(fib_node);
2014 }
2015
2016 static bool
2017 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2018                                  const struct mlxsw_sp_fib_entry *fib_entry)
2019 {
2020         return list_first_entry(&fib_node->entry_list,
2021                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
2022 }
2023
2024 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2025 {
2026         unsigned char prefix_len = fib_node->key.prefix_len;
2027         struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2028
2029         if (fib->prefix_ref_count[prefix_len]++ == 0)
2030                 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2031 }
2032
2033 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2034 {
2035         unsigned char prefix_len = fib_node->key.prefix_len;
2036         struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2037
2038         if (--fib->prefix_ref_count[prefix_len] == 0)
2039                 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2040 }
2041
2042 static struct mlxsw_sp_fib_node *
2043 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2044                        const struct fib_entry_notifier_info *fen_info)
2045 {
2046         struct mlxsw_sp_fib_node *fib_node;
2047         struct mlxsw_sp_vr *vr;
2048         int err;
2049
2050         vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
2051                              MLXSW_SP_L3_PROTO_IPV4);
2052         if (IS_ERR(vr))
2053                 return ERR_CAST(vr);
2054
2055         fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
2056                                             sizeof(fen_info->dst),
2057                                             fen_info->dst_len);
2058         if (fib_node)
2059                 return fib_node;
2060
2061         fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
2062                                             sizeof(fen_info->dst),
2063                                             fen_info->dst_len);
2064         if (!fib_node) {
2065                 err = -ENOMEM;
2066                 goto err_fib_node_create;
2067         }
2068
2069         return fib_node;
2070
2071 err_fib_node_create:
2072         mlxsw_sp_vr_put(mlxsw_sp, vr);
2073         return ERR_PTR(err);
2074 }
2075
2076 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2077                                    struct mlxsw_sp_fib_node *fib_node)
2078 {
2079         struct mlxsw_sp_vr *vr = fib_node->vr;
2080
2081         if (!list_empty(&fib_node->entry_list))
2082                 return;
2083         mlxsw_sp_fib_node_destroy(fib_node);
2084         mlxsw_sp_vr_put(mlxsw_sp, vr);
2085 }
2086
2087 static struct mlxsw_sp_fib_entry *
2088 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2089                               const struct mlxsw_sp_fib_entry_params *params)
2090 {
2091         struct mlxsw_sp_fib_entry *fib_entry;
2092
2093         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2094                 if (fib_entry->params.tb_id > params->tb_id)
2095                         continue;
2096                 if (fib_entry->params.tb_id != params->tb_id)
2097                         break;
2098                 if (fib_entry->params.tos > params->tos)
2099                         continue;
2100                 if (fib_entry->params.prio >= params->prio ||
2101                     fib_entry->params.tos < params->tos)
2102                         return fib_entry;
2103         }
2104
2105         return NULL;
2106 }
2107
2108 static int
2109 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2110                                struct mlxsw_sp_fib_entry *new_entry)
2111 {
2112         struct mlxsw_sp_fib_entry *fib_entry;
2113
2114         fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2115
2116         if (fib_entry) {
2117                 list_add_tail(&new_entry->list, &fib_entry->list);
2118         } else {
2119                 struct mlxsw_sp_fib_entry *last;
2120
2121                 list_for_each_entry(last, &fib_node->entry_list, list) {
2122                         if (new_entry->params.tb_id > last->params.tb_id)
2123                                 break;
2124                         fib_entry = last;
2125                 }
2126
2127                 if (fib_entry)
2128                         list_add(&new_entry->list, &fib_entry->list);
2129                 else
2130                         list_add(&new_entry->list, &fib_node->entry_list);
2131         }
2132
2133         return 0;
2134 }
2135
2136 static void
2137 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2138 {
2139         list_del(&fib_entry->list);
2140 }
2141
2142 static int
2143 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2144                              const struct mlxsw_sp_fib_node *fib_node,
2145                              struct mlxsw_sp_fib_entry *fib_entry)
2146 {
2147         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2148                 return 0;
2149
2150         /* To prevent packet loss, overwrite the previously offloaded
2151          * entry.
2152          */
2153         if (!list_is_singular(&fib_node->entry_list)) {
2154                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2155                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2156
2157                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2158         }
2159
2160         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2161 }
2162
2163 static void
2164 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2165                              const struct mlxsw_sp_fib_node *fib_node,
2166                              struct mlxsw_sp_fib_entry *fib_entry)
2167 {
2168         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2169                 return;
2170
2171         /* Promote the next entry by overwriting the deleted entry */
2172         if (!list_is_singular(&fib_node->entry_list)) {
2173                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2174                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2175
2176                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2177                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2178                 return;
2179         }
2180
2181         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2182 }
2183
2184 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2185                                          struct mlxsw_sp_fib_entry *fib_entry)
2186 {
2187         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2188         int err;
2189
2190         err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry);
2191         if (err)
2192                 return err;
2193
2194         err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2195         if (err)
2196                 goto err_fib4_node_entry_add;
2197
2198         mlxsw_sp_fib_node_prefix_inc(fib_node);
2199
2200         return 0;
2201
2202 err_fib4_node_entry_add:
2203         mlxsw_sp_fib4_node_list_remove(fib_entry);
2204         return err;
2205 }
2206
2207 static void
2208 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2209                                 struct mlxsw_sp_fib_entry *fib_entry)
2210 {
2211         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2212
2213         mlxsw_sp_fib_node_prefix_dec(fib_node);
2214         mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2215         mlxsw_sp_fib4_node_list_remove(fib_entry);
2216 }
2217
2218 static int
2219 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2220                          const struct fib_entry_notifier_info *fen_info)
2221 {
2222         struct mlxsw_sp_fib_entry *fib_entry;
2223         struct mlxsw_sp_fib_node *fib_node;
2224         int err;
2225
2226         if (mlxsw_sp->router.aborted)
2227                 return 0;
2228
2229         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2230         if (IS_ERR(fib_node)) {
2231                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2232                 return PTR_ERR(fib_node);
2233         }
2234
2235         fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2236         if (IS_ERR(fib_entry)) {
2237                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2238                 err = PTR_ERR(fib_entry);
2239                 goto err_fib4_entry_create;
2240         }
2241
2242         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry);
2243         if (err) {
2244                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2245                 goto err_fib4_node_entry_link;
2246         }
2247
2248         return 0;
2249
2250 err_fib4_node_entry_link:
2251         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2252 err_fib4_entry_create:
2253         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2254         return err;
2255 }
2256
2257 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2258                                      struct fib_entry_notifier_info *fen_info)
2259 {
2260         struct mlxsw_sp_fib_entry *fib_entry;
2261         struct mlxsw_sp_fib_node *fib_node;
2262
2263         if (mlxsw_sp->router.aborted)
2264                 return;
2265
2266         fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2267         if (WARN_ON(!fib_entry))
2268                 return;
2269         fib_node = fib_entry->fib_node;
2270
2271         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2272         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2273         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2274 }
2275
2276 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2277 {
2278         char ralta_pl[MLXSW_REG_RALTA_LEN];
2279         char ralst_pl[MLXSW_REG_RALST_LEN];
2280         char raltb_pl[MLXSW_REG_RALTB_LEN];
2281         char ralue_pl[MLXSW_REG_RALUE_LEN];
2282         int err;
2283
2284         mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2285                              MLXSW_SP_LPM_TREE_MIN);
2286         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2287         if (err)
2288                 return err;
2289
2290         mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2291         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2292         if (err)
2293                 return err;
2294
2295         mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2296                              MLXSW_SP_LPM_TREE_MIN);
2297         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
2298         if (err)
2299                 return err;
2300
2301         mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2302                               MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
2303         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2304         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2305 }
2306
2307 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2308                                      struct mlxsw_sp_fib_node *fib_node)
2309 {
2310         struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2311
2312         list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2313                 bool do_break = &tmp->list == &fib_node->entry_list;
2314
2315                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2316                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2317                 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2318                 /* Break when entry list is empty and node was freed.
2319                  * Otherwise, we'll access freed memory in the next
2320                  * iteration.
2321                  */
2322                 if (do_break)
2323                         break;
2324         }
2325 }
2326
2327 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2328                                     struct mlxsw_sp_fib_node *fib_node)
2329 {
2330         switch (fib_node->vr->proto) {
2331         case MLXSW_SP_L3_PROTO_IPV4:
2332                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2333                 break;
2334         case MLXSW_SP_L3_PROTO_IPV6:
2335                 WARN_ON_ONCE(1);
2336                 break;
2337         }
2338 }
2339
2340 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2341 {
2342         struct mlxsw_sp_fib_node *fib_node, *tmp;
2343         struct mlxsw_sp_vr *vr;
2344         int i;
2345
2346         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2347                 vr = &mlxsw_sp->router.vrs[i];
2348
2349                 if (!vr->used)
2350                         continue;
2351
2352                 list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
2353                                          list) {
2354                         bool do_break = &tmp->list == &vr->fib->node_list;
2355
2356                         mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2357                         if (do_break)
2358                                 break;
2359                 }
2360         }
2361 }
2362
2363 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2364 {
2365         int err;
2366
2367         if (mlxsw_sp->router.aborted)
2368                 return;
2369         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2370         mlxsw_sp_router_fib_flush(mlxsw_sp);
2371         mlxsw_sp->router.aborted = true;
2372         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2373         if (err)
2374                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2375 }
2376
2377 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2378 {
2379         char ritr_pl[MLXSW_REG_RITR_LEN];
2380         int err;
2381
2382         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2383         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2384         if (WARN_ON_ONCE(err))
2385                 return err;
2386
2387         mlxsw_reg_ritr_enable_set(ritr_pl, false);
2388         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2389 }
2390
2391 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2392                                    struct mlxsw_sp_rif *r)
2393 {
2394         mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
2395         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
2396         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
2397 }
2398
2399 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2400 {
2401         char rgcr_pl[MLXSW_REG_RGCR_LEN];
2402         u64 max_rifs;
2403         int err;
2404
2405         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
2406                 return -EIO;
2407
2408         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2409         mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
2410                                  GFP_KERNEL);
2411         if (!mlxsw_sp->rifs)
2412                 return -ENOMEM;
2413
2414         mlxsw_reg_rgcr_pack(rgcr_pl, true);
2415         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
2416         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2417         if (err)
2418                 goto err_rgcr_fail;
2419
2420         return 0;
2421
2422 err_rgcr_fail:
2423         kfree(mlxsw_sp->rifs);
2424         return err;
2425 }
2426
2427 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2428 {
2429         char rgcr_pl[MLXSW_REG_RGCR_LEN];
2430         int i;
2431
2432         mlxsw_reg_rgcr_pack(rgcr_pl, false);
2433         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2434
2435         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2436                 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2437
2438         kfree(mlxsw_sp->rifs);
2439 }
2440
2441 struct mlxsw_sp_fib_event_work {
2442         struct work_struct work;
2443         union {
2444                 struct fib_entry_notifier_info fen_info;
2445                 struct fib_nh_notifier_info fnh_info;
2446         };
2447         struct mlxsw_sp *mlxsw_sp;
2448         unsigned long event;
2449 };
2450
2451 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2452 {
2453         struct mlxsw_sp_fib_event_work *fib_work =
2454                 container_of(work, struct mlxsw_sp_fib_event_work, work);
2455         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2456         int err;
2457
2458         /* Protect internal structures from changes */
2459         rtnl_lock();
2460         switch (fib_work->event) {
2461         case FIB_EVENT_ENTRY_ADD:
2462                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info);
2463                 if (err)
2464                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2465                 fib_info_put(fib_work->fen_info.fi);
2466                 break;
2467         case FIB_EVENT_ENTRY_DEL:
2468                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2469                 fib_info_put(fib_work->fen_info.fi);
2470                 break;
2471         case FIB_EVENT_RULE_ADD: /* fall through */
2472         case FIB_EVENT_RULE_DEL:
2473                 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2474                 break;
2475         case FIB_EVENT_NH_ADD: /* fall through */
2476         case FIB_EVENT_NH_DEL:
2477                 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2478                                        fib_work->fnh_info.fib_nh);
2479                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2480                 break;
2481         }
2482         rtnl_unlock();
2483         kfree(fib_work);
2484 }
2485
2486 /* Called with rcu_read_lock() */
2487 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2488                                      unsigned long event, void *ptr)
2489 {
2490         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2491         struct mlxsw_sp_fib_event_work *fib_work;
2492         struct fib_notifier_info *info = ptr;
2493
2494         if (!net_eq(info->net, &init_net))
2495                 return NOTIFY_DONE;
2496
2497         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2498         if (WARN_ON(!fib_work))
2499                 return NOTIFY_BAD;
2500
2501         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2502         fib_work->mlxsw_sp = mlxsw_sp;
2503         fib_work->event = event;
2504
2505         switch (event) {
2506         case FIB_EVENT_ENTRY_ADD: /* fall through */
2507         case FIB_EVENT_ENTRY_DEL:
2508                 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2509                 /* Take referece on fib_info to prevent it from being
2510                  * freed while work is queued. Release it afterwards.
2511                  */
2512                 fib_info_hold(fib_work->fen_info.fi);
2513                 break;
2514         case FIB_EVENT_NH_ADD: /* fall through */
2515         case FIB_EVENT_NH_DEL:
2516                 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2517                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2518                 break;
2519         }
2520
2521         mlxsw_core_schedule_work(&fib_work->work);
2522
2523         return NOTIFY_DONE;
2524 }
2525
2526 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2527 {
2528         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2529
2530         /* Flush pending FIB notifications and then flush the device's
2531          * table before requesting another dump. The FIB notification
2532          * block is unregistered, so no need to take RTNL.
2533          */
2534         mlxsw_core_flush_owq();
2535         mlxsw_sp_router_fib_flush(mlxsw_sp);
2536 }
2537
2538 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2539 {
2540         int err;
2541
2542         INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2543         err = __mlxsw_sp_router_init(mlxsw_sp);
2544         if (err)
2545                 return err;
2546
2547         err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2548                               &mlxsw_sp_nexthop_ht_params);
2549         if (err)
2550                 goto err_nexthop_ht_init;
2551
2552         err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2553                               &mlxsw_sp_nexthop_group_ht_params);
2554         if (err)
2555                 goto err_nexthop_group_ht_init;
2556
2557         mlxsw_sp_lpm_init(mlxsw_sp);
2558         err = mlxsw_sp_vrs_init(mlxsw_sp);
2559         if (err)
2560                 goto err_vrs_init;
2561
2562         err = mlxsw_sp_neigh_init(mlxsw_sp);
2563         if (err)
2564                 goto err_neigh_init;
2565
2566         mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2567         err = register_fib_notifier(&mlxsw_sp->fib_nb,
2568                                     mlxsw_sp_router_fib_dump_flush);
2569         if (err)
2570                 goto err_register_fib_notifier;
2571
2572         return 0;
2573
2574 err_register_fib_notifier:
2575         mlxsw_sp_neigh_fini(mlxsw_sp);
2576 err_neigh_init:
2577         mlxsw_sp_vrs_fini(mlxsw_sp);
2578 err_vrs_init:
2579         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2580 err_nexthop_group_ht_init:
2581         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2582 err_nexthop_ht_init:
2583         __mlxsw_sp_router_fini(mlxsw_sp);
2584         return err;
2585 }
2586
2587 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2588 {
2589         unregister_fib_notifier(&mlxsw_sp->fib_nb);
2590         mlxsw_sp_neigh_fini(mlxsw_sp);
2591         mlxsw_sp_vrs_fini(mlxsw_sp);
2592         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2593         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2594         __mlxsw_sp_router_fini(mlxsw_sp);
2595 }