]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
74839f7f368af755f38b8fd5932af16fc2b5c459
[karo-tx-linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <net/netevent.h>
44 #include <net/neighbour.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47
48 #include "spectrum.h"
49 #include "core.h"
50 #include "reg.h"
51
52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
53         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
54
55 static bool
56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
57                              struct mlxsw_sp_prefix_usage *prefix_usage2)
58 {
59         unsigned char prefix;
60
61         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
62                 if (!test_bit(prefix, prefix_usage2->b))
63                         return false;
64         }
65         return true;
66 }
67
68 static bool
69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
70                          struct mlxsw_sp_prefix_usage *prefix_usage2)
71 {
72         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
73 }
74
75 static bool
76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
77 {
78         struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
79
80         return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
81 }
82
83 static void
84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
85                           struct mlxsw_sp_prefix_usage *prefix_usage2)
86 {
87         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
88 }
89
90 static void
91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
92 {
93         memset(prefix_usage, 0, sizeof(*prefix_usage));
94 }
95
96 static void
97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
98                           unsigned char prefix_len)
99 {
100         set_bit(prefix_len, prefix_usage->b);
101 }
102
103 static void
104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
105                             unsigned char prefix_len)
106 {
107         clear_bit(prefix_len, prefix_usage->b);
108 }
109
110 struct mlxsw_sp_fib_key {
111         struct net_device *dev;
112         unsigned char addr[sizeof(struct in6_addr)];
113         unsigned char prefix_len;
114 };
115
116 enum mlxsw_sp_fib_entry_type {
117         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121
122 struct mlxsw_sp_nexthop_group;
123
124 struct mlxsw_sp_fib_entry {
125         struct rhash_head ht_node;
126         struct list_head list;
127         struct mlxsw_sp_fib_key key;
128         enum mlxsw_sp_fib_entry_type type;
129         unsigned int ref_count;
130         struct mlxsw_sp_vr *vr;
131         struct list_head nexthop_group_node;
132         struct mlxsw_sp_nexthop_group *nh_group;
133 };
134
135 struct mlxsw_sp_fib {
136         struct rhashtable ht;
137         struct list_head entry_list;
138         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
139         struct mlxsw_sp_prefix_usage prefix_usage;
140 };
141
142 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
143         .key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
144         .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
145         .key_len = sizeof(struct mlxsw_sp_fib_key),
146         .automatic_shrinking = true,
147 };
148
149 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
150                                      struct mlxsw_sp_fib_entry *fib_entry)
151 {
152         unsigned char prefix_len = fib_entry->key.prefix_len;
153         int err;
154
155         err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
156                                      mlxsw_sp_fib_ht_params);
157         if (err)
158                 return err;
159         list_add_tail(&fib_entry->list, &fib->entry_list);
160         if (fib->prefix_ref_count[prefix_len]++ == 0)
161                 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
162         return 0;
163 }
164
165 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
166                                       struct mlxsw_sp_fib_entry *fib_entry)
167 {
168         unsigned char prefix_len = fib_entry->key.prefix_len;
169
170         if (--fib->prefix_ref_count[prefix_len] == 0)
171                 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
172         list_del(&fib_entry->list);
173         rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
174                                mlxsw_sp_fib_ht_params);
175 }
176
177 static struct mlxsw_sp_fib_entry *
178 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
179                           size_t addr_len, unsigned char prefix_len,
180                           struct net_device *dev)
181 {
182         struct mlxsw_sp_fib_entry *fib_entry;
183
184         fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
185         if (!fib_entry)
186                 return NULL;
187         fib_entry->key.dev = dev;
188         memcpy(fib_entry->key.addr, addr, addr_len);
189         fib_entry->key.prefix_len = prefix_len;
190         return fib_entry;
191 }
192
193 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
194 {
195         kfree(fib_entry);
196 }
197
198 static struct mlxsw_sp_fib_entry *
199 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
200                           size_t addr_len, unsigned char prefix_len,
201                           struct net_device *dev)
202 {
203         struct mlxsw_sp_fib_key key;
204
205         memset(&key, 0, sizeof(key));
206         key.dev = dev;
207         memcpy(key.addr, addr, addr_len);
208         key.prefix_len = prefix_len;
209         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
210 }
211
212 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
213 {
214         struct mlxsw_sp_fib *fib;
215         int err;
216
217         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
218         if (!fib)
219                 return ERR_PTR(-ENOMEM);
220         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
221         if (err)
222                 goto err_rhashtable_init;
223         INIT_LIST_HEAD(&fib->entry_list);
224         return fib;
225
226 err_rhashtable_init:
227         kfree(fib);
228         return ERR_PTR(err);
229 }
230
231 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
232 {
233         rhashtable_destroy(&fib->ht);
234         kfree(fib);
235 }
236
237 static struct mlxsw_sp_lpm_tree *
238 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
239 {
240         static struct mlxsw_sp_lpm_tree *lpm_tree;
241         int i;
242
243         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
244                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
245                 if (lpm_tree->ref_count == 0) {
246                         if (one_reserved)
247                                 one_reserved = false;
248                         else
249                                 return lpm_tree;
250                 }
251         }
252         return NULL;
253 }
254
255 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
256                                    struct mlxsw_sp_lpm_tree *lpm_tree)
257 {
258         char ralta_pl[MLXSW_REG_RALTA_LEN];
259
260         mlxsw_reg_ralta_pack(ralta_pl, true,
261                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
262                              lpm_tree->id);
263         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
264 }
265
266 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
267                                   struct mlxsw_sp_lpm_tree *lpm_tree)
268 {
269         char ralta_pl[MLXSW_REG_RALTA_LEN];
270
271         mlxsw_reg_ralta_pack(ralta_pl, false,
272                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
273                              lpm_tree->id);
274         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
275 }
276
277 static int
278 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
279                                   struct mlxsw_sp_prefix_usage *prefix_usage,
280                                   struct mlxsw_sp_lpm_tree *lpm_tree)
281 {
282         char ralst_pl[MLXSW_REG_RALST_LEN];
283         u8 root_bin = 0;
284         u8 prefix;
285         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
286
287         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
288                 root_bin = prefix;
289
290         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
291         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
292                 if (prefix == 0)
293                         continue;
294                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
295                                          MLXSW_REG_RALST_BIN_NO_CHILD);
296                 last_prefix = prefix;
297         }
298         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
299 }
300
301 static struct mlxsw_sp_lpm_tree *
302 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
303                          struct mlxsw_sp_prefix_usage *prefix_usage,
304                          enum mlxsw_sp_l3proto proto, bool one_reserved)
305 {
306         struct mlxsw_sp_lpm_tree *lpm_tree;
307         int err;
308
309         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
310         if (!lpm_tree)
311                 return ERR_PTR(-EBUSY);
312         lpm_tree->proto = proto;
313         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
314         if (err)
315                 return ERR_PTR(err);
316
317         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
318                                                 lpm_tree);
319         if (err)
320                 goto err_left_struct_set;
321         memcpy(&lpm_tree->prefix_usage, prefix_usage,
322                sizeof(lpm_tree->prefix_usage));
323         return lpm_tree;
324
325 err_left_struct_set:
326         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
327         return ERR_PTR(err);
328 }
329
330 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
331                                      struct mlxsw_sp_lpm_tree *lpm_tree)
332 {
333         return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
334 }
335
336 static struct mlxsw_sp_lpm_tree *
337 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
338                       struct mlxsw_sp_prefix_usage *prefix_usage,
339                       enum mlxsw_sp_l3proto proto, bool one_reserved)
340 {
341         struct mlxsw_sp_lpm_tree *lpm_tree;
342         int i;
343
344         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
345                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
346                 if (lpm_tree->ref_count != 0 &&
347                     lpm_tree->proto == proto &&
348                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
349                                              prefix_usage))
350                         goto inc_ref_count;
351         }
352         lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
353                                             proto, one_reserved);
354         if (IS_ERR(lpm_tree))
355                 return lpm_tree;
356
357 inc_ref_count:
358         lpm_tree->ref_count++;
359         return lpm_tree;
360 }
361
362 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
363                                  struct mlxsw_sp_lpm_tree *lpm_tree)
364 {
365         if (--lpm_tree->ref_count == 0)
366                 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
367         return 0;
368 }
369
370 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
371 {
372         struct mlxsw_sp_lpm_tree *lpm_tree;
373         int i;
374
375         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
376                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
377                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
378         }
379 }
380
381 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
382 {
383         struct mlxsw_sp_vr *vr;
384         int i;
385
386         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
387                 vr = &mlxsw_sp->router.vrs[i];
388                 if (!vr->used)
389                         return vr;
390         }
391         return NULL;
392 }
393
394 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
395                                      struct mlxsw_sp_vr *vr)
396 {
397         char raltb_pl[MLXSW_REG_RALTB_LEN];
398
399         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
400                              (enum mlxsw_reg_ralxx_protocol) vr->proto,
401                              vr->lpm_tree->id);
402         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
403 }
404
405 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
406                                        struct mlxsw_sp_vr *vr)
407 {
408         char raltb_pl[MLXSW_REG_RALTB_LEN];
409
410         /* Bind to tree 0 which is default */
411         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
412                              (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
413         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
414 }
415
416 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
417 {
418         /* For our purpose, squash main and local table into one */
419         if (tb_id == RT_TABLE_LOCAL)
420                 tb_id = RT_TABLE_MAIN;
421         return tb_id;
422 }
423
424 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
425                                             u32 tb_id,
426                                             enum mlxsw_sp_l3proto proto)
427 {
428         struct mlxsw_sp_vr *vr;
429         int i;
430
431         tb_id = mlxsw_sp_fix_tb_id(tb_id);
432
433         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
434                 vr = &mlxsw_sp->router.vrs[i];
435                 if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
436                         return vr;
437         }
438         return NULL;
439 }
440
441 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
442                                               unsigned char prefix_len,
443                                               u32 tb_id,
444                                               enum mlxsw_sp_l3proto proto)
445 {
446         struct mlxsw_sp_prefix_usage req_prefix_usage;
447         struct mlxsw_sp_lpm_tree *lpm_tree;
448         struct mlxsw_sp_vr *vr;
449         int err;
450
451         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
452         if (!vr)
453                 return ERR_PTR(-EBUSY);
454         vr->fib = mlxsw_sp_fib_create();
455         if (IS_ERR(vr->fib))
456                 return ERR_CAST(vr->fib);
457
458         vr->proto = proto;
459         vr->tb_id = tb_id;
460         mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
461         mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
462         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
463                                          proto, true);
464         if (IS_ERR(lpm_tree)) {
465                 err = PTR_ERR(lpm_tree);
466                 goto err_tree_get;
467         }
468         vr->lpm_tree = lpm_tree;
469         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
470         if (err)
471                 goto err_tree_bind;
472
473         vr->used = true;
474         return vr;
475
476 err_tree_bind:
477         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
478 err_tree_get:
479         mlxsw_sp_fib_destroy(vr->fib);
480
481         return ERR_PTR(err);
482 }
483
484 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
485                                 struct mlxsw_sp_vr *vr)
486 {
487         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
488         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
489         mlxsw_sp_fib_destroy(vr->fib);
490         vr->used = false;
491 }
492
493 static int
494 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
495                            struct mlxsw_sp_prefix_usage *req_prefix_usage)
496 {
497         struct mlxsw_sp_lpm_tree *lpm_tree;
498
499         if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
500                                      &vr->lpm_tree->prefix_usage))
501                 return 0;
502
503         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
504                                          vr->proto, false);
505         if (IS_ERR(lpm_tree)) {
506                 /* We failed to get a tree according to the required
507                  * prefix usage. However, the current tree might be still good
508                  * for us if our requirement is subset of the prefixes used
509                  * in the tree.
510                  */
511                 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
512                                                  &vr->lpm_tree->prefix_usage))
513                         return 0;
514                 return PTR_ERR(lpm_tree);
515         }
516
517         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
518         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
519         vr->lpm_tree = lpm_tree;
520         return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
521 }
522
523 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
524                                            unsigned char prefix_len,
525                                            u32 tb_id,
526                                            enum mlxsw_sp_l3proto proto)
527 {
528         struct mlxsw_sp_vr *vr;
529         int err;
530
531         tb_id = mlxsw_sp_fix_tb_id(tb_id);
532         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
533         if (!vr) {
534                 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
535                 if (IS_ERR(vr))
536                         return vr;
537         } else {
538                 struct mlxsw_sp_prefix_usage req_prefix_usage;
539
540                 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
541                                           &vr->fib->prefix_usage);
542                 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
543                 /* Need to replace LPM tree in case new prefix is required. */
544                 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
545                                                  &req_prefix_usage);
546                 if (err)
547                         return ERR_PTR(err);
548         }
549         return vr;
550 }
551
552 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
553 {
554         /* Destroy virtual router entity in case the associated FIB is empty
555          * and allow it to be used for other tables in future. Otherwise,
556          * check if some prefix usage did not disappear and change tree if
557          * that is the case. Note that in case new, smaller tree cannot be
558          * allocated, the original one will be kept being used.
559          */
560         if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
561                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
562         else
563                 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
564                                            &vr->fib->prefix_usage);
565 }
566
567 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
568 {
569         struct mlxsw_sp_vr *vr;
570         u64 max_vrs;
571         int i;
572
573         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
574                 return -EIO;
575
576         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
577         mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
578                                        GFP_KERNEL);
579         if (!mlxsw_sp->router.vrs)
580                 return -ENOMEM;
581
582         for (i = 0; i < max_vrs; i++) {
583                 vr = &mlxsw_sp->router.vrs[i];
584                 vr->id = i;
585         }
586
587         return 0;
588 }
589
590 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
591
592 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
593 {
594         /* At this stage we're guaranteed not to have new incoming
595          * FIB notifications and the work queue is free from FIBs
596          * sitting on top of mlxsw netdevs. However, we can still
597          * have other FIBs queued. Flush the queue before flushing
598          * the device's tables. No need for locks, as we're the only
599          * writer.
600          */
601         mlxsw_core_flush_owq();
602         mlxsw_sp_router_fib_flush(mlxsw_sp);
603         kfree(mlxsw_sp->router.vrs);
604 }
605
606 struct mlxsw_sp_neigh_key {
607         struct neighbour *n;
608 };
609
610 struct mlxsw_sp_neigh_entry {
611         struct rhash_head ht_node;
612         struct mlxsw_sp_neigh_key key;
613         u16 rif;
614         bool connected;
615         unsigned char ha[ETH_ALEN];
616         struct list_head nexthop_list; /* list of nexthops using
617                                         * this neigh entry
618                                         */
619         struct list_head nexthop_neighs_list_node;
620 };
621
622 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
623         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
624         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
625         .key_len = sizeof(struct mlxsw_sp_neigh_key),
626 };
627
628 static struct mlxsw_sp_neigh_entry *
629 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
630                            u16 rif)
631 {
632         struct mlxsw_sp_neigh_entry *neigh_entry;
633
634         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
635         if (!neigh_entry)
636                 return NULL;
637
638         neigh_entry->key.n = n;
639         neigh_entry->rif = rif;
640         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
641
642         return neigh_entry;
643 }
644
645 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
646 {
647         kfree(neigh_entry);
648 }
649
650 static int
651 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
652                             struct mlxsw_sp_neigh_entry *neigh_entry)
653 {
654         return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
655                                       &neigh_entry->ht_node,
656                                       mlxsw_sp_neigh_ht_params);
657 }
658
659 static void
660 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
661                             struct mlxsw_sp_neigh_entry *neigh_entry)
662 {
663         rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
664                                &neigh_entry->ht_node,
665                                mlxsw_sp_neigh_ht_params);
666 }
667
668 static struct mlxsw_sp_neigh_entry *
669 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
670 {
671         struct mlxsw_sp_neigh_entry *neigh_entry;
672         struct mlxsw_sp_rif *r;
673         int err;
674
675         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
676         if (!r)
677                 return ERR_PTR(-EINVAL);
678
679         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
680         if (!neigh_entry)
681                 return ERR_PTR(-ENOMEM);
682
683         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
684         if (err)
685                 goto err_neigh_entry_insert;
686
687         return neigh_entry;
688
689 err_neigh_entry_insert:
690         mlxsw_sp_neigh_entry_free(neigh_entry);
691         return ERR_PTR(err);
692 }
693
694 static void
695 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
696                              struct mlxsw_sp_neigh_entry *neigh_entry)
697 {
698         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
699         mlxsw_sp_neigh_entry_free(neigh_entry);
700 }
701
702 static struct mlxsw_sp_neigh_entry *
703 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
704 {
705         struct mlxsw_sp_neigh_key key;
706
707         key.n = n;
708         return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
709                                       &key, mlxsw_sp_neigh_ht_params);
710 }
711
712 static void
713 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
714 {
715         unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
716
717         mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
718 }
719
720 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
721                                                    char *rauhtd_pl,
722                                                    int ent_index)
723 {
724         struct net_device *dev;
725         struct neighbour *n;
726         __be32 dipn;
727         u32 dip;
728         u16 rif;
729
730         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
731
732         if (!mlxsw_sp->rifs[rif]) {
733                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
734                 return;
735         }
736
737         dipn = htonl(dip);
738         dev = mlxsw_sp->rifs[rif]->dev;
739         n = neigh_lookup(&arp_tbl, &dipn, dev);
740         if (!n) {
741                 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
742                            &dip);
743                 return;
744         }
745
746         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
747         neigh_event_send(n, NULL);
748         neigh_release(n);
749 }
750
751 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
752                                                    char *rauhtd_pl,
753                                                    int rec_index)
754 {
755         u8 num_entries;
756         int i;
757
758         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
759                                                                 rec_index);
760         /* Hardware starts counting at 0, so add 1. */
761         num_entries++;
762
763         /* Each record consists of several neighbour entries. */
764         for (i = 0; i < num_entries; i++) {
765                 int ent_index;
766
767                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
768                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
769                                                        ent_index);
770         }
771
772 }
773
774 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
775                                               char *rauhtd_pl, int rec_index)
776 {
777         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
778         case MLXSW_REG_RAUHTD_TYPE_IPV4:
779                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
780                                                        rec_index);
781                 break;
782         case MLXSW_REG_RAUHTD_TYPE_IPV6:
783                 WARN_ON_ONCE(1);
784                 break;
785         }
786 }
787
788 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
789 {
790         u8 num_rec, last_rec_index, num_entries;
791
792         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
793         last_rec_index = num_rec - 1;
794
795         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
796                 return false;
797         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
798             MLXSW_REG_RAUHTD_TYPE_IPV6)
799                 return true;
800
801         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
802                                                                 last_rec_index);
803         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
804                 return true;
805         return false;
806 }
807
808 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
809 {
810         char *rauhtd_pl;
811         u8 num_rec;
812         int i, err;
813
814         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
815         if (!rauhtd_pl)
816                 return -ENOMEM;
817
818         /* Make sure the neighbour's netdev isn't removed in the
819          * process.
820          */
821         rtnl_lock();
822         do {
823                 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
824                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
825                                       rauhtd_pl);
826                 if (err) {
827                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
828                         break;
829                 }
830                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
831                 for (i = 0; i < num_rec; i++)
832                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
833                                                           i);
834         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
835         rtnl_unlock();
836
837         kfree(rauhtd_pl);
838         return err;
839 }
840
841 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
842 {
843         struct mlxsw_sp_neigh_entry *neigh_entry;
844
845         /* Take RTNL mutex here to prevent lists from changes */
846         rtnl_lock();
847         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
848                             nexthop_neighs_list_node)
849                 /* If this neigh have nexthops, make the kernel think this neigh
850                  * is active regardless of the traffic.
851                  */
852                 neigh_event_send(neigh_entry->key.n, NULL);
853         rtnl_unlock();
854 }
855
856 static void
857 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
858 {
859         unsigned long interval = mlxsw_sp->router.neighs_update.interval;
860
861         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
862                                msecs_to_jiffies(interval));
863 }
864
865 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
866 {
867         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
868                                                  router.neighs_update.dw.work);
869         int err;
870
871         err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
872         if (err)
873                 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
874
875         mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
876
877         mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
878 }
879
880 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
881 {
882         struct mlxsw_sp_neigh_entry *neigh_entry;
883         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
884                                                  router.nexthop_probe_dw.work);
885
886         /* Iterate over nexthop neighbours, find those who are unresolved and
887          * send arp on them. This solves the chicken-egg problem when
888          * the nexthop wouldn't get offloaded until the neighbor is resolved
889          * but it wouldn't get resolved ever in case traffic is flowing in HW
890          * using different nexthop.
891          *
892          * Take RTNL mutex here to prevent lists from changes.
893          */
894         rtnl_lock();
895         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
896                             nexthop_neighs_list_node)
897                 if (!neigh_entry->connected)
898                         neigh_event_send(neigh_entry->key.n, NULL);
899         rtnl_unlock();
900
901         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
902                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
903 }
904
905 static void
906 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
907                               struct mlxsw_sp_neigh_entry *neigh_entry,
908                               bool removing);
909
910 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
911 {
912         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
913                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
914 }
915
916 static void
917 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
918                                 struct mlxsw_sp_neigh_entry *neigh_entry,
919                                 enum mlxsw_reg_rauht_op op)
920 {
921         struct neighbour *n = neigh_entry->key.n;
922         u32 dip = ntohl(*((__be32 *) n->primary_key));
923         char rauht_pl[MLXSW_REG_RAUHT_LEN];
924
925         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
926                               dip);
927         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
928 }
929
930 static void
931 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
932                             struct mlxsw_sp_neigh_entry *neigh_entry,
933                             bool adding)
934 {
935         if (!adding && !neigh_entry->connected)
936                 return;
937         neigh_entry->connected = adding;
938         if (neigh_entry->key.n->tbl == &arp_tbl)
939                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
940                                                 mlxsw_sp_rauht_op(adding));
941         else
942                 WARN_ON_ONCE(1);
943 }
944
945 struct mlxsw_sp_neigh_event_work {
946         struct work_struct work;
947         struct mlxsw_sp *mlxsw_sp;
948         struct neighbour *n;
949 };
950
951 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
952 {
953         struct mlxsw_sp_neigh_event_work *neigh_work =
954                 container_of(work, struct mlxsw_sp_neigh_event_work, work);
955         struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
956         struct mlxsw_sp_neigh_entry *neigh_entry;
957         struct neighbour *n = neigh_work->n;
958         unsigned char ha[ETH_ALEN];
959         bool entry_connected;
960         u8 nud_state, dead;
961
962         /* If these parameters are changed after we release the lock,
963          * then we are guaranteed to receive another event letting us
964          * know about it.
965          */
966         read_lock_bh(&n->lock);
967         memcpy(ha, n->ha, ETH_ALEN);
968         nud_state = n->nud_state;
969         dead = n->dead;
970         read_unlock_bh(&n->lock);
971
972         rtnl_lock();
973         entry_connected = nud_state & NUD_VALID && !dead;
974         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
975         if (!entry_connected && !neigh_entry)
976                 goto out;
977         if (!neigh_entry) {
978                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
979                 if (IS_ERR(neigh_entry))
980                         goto out;
981         }
982
983         memcpy(neigh_entry->ha, ha, ETH_ALEN);
984         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
985         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
986
987         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
988                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
989
990 out:
991         rtnl_unlock();
992         neigh_release(n);
993         kfree(neigh_work);
994 }
995
996 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
997                                    unsigned long event, void *ptr)
998 {
999         struct mlxsw_sp_neigh_event_work *neigh_work;
1000         struct mlxsw_sp_port *mlxsw_sp_port;
1001         struct mlxsw_sp *mlxsw_sp;
1002         unsigned long interval;
1003         struct neigh_parms *p;
1004         struct neighbour *n;
1005
1006         switch (event) {
1007         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1008                 p = ptr;
1009
1010                 /* We don't care about changes in the default table. */
1011                 if (!p->dev || p->tbl != &arp_tbl)
1012                         return NOTIFY_DONE;
1013
1014                 /* We are in atomic context and can't take RTNL mutex,
1015                  * so use RCU variant to walk the device chain.
1016                  */
1017                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1018                 if (!mlxsw_sp_port)
1019                         return NOTIFY_DONE;
1020
1021                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1022                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1023                 mlxsw_sp->router.neighs_update.interval = interval;
1024
1025                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1026                 break;
1027         case NETEVENT_NEIGH_UPDATE:
1028                 n = ptr;
1029
1030                 if (n->tbl != &arp_tbl)
1031                         return NOTIFY_DONE;
1032
1033                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1034                 if (!mlxsw_sp_port)
1035                         return NOTIFY_DONE;
1036
1037                 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1038                 if (!neigh_work) {
1039                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
1040                         return NOTIFY_BAD;
1041                 }
1042
1043                 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1044                 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1045                 neigh_work->n = n;
1046
1047                 /* Take a reference to ensure the neighbour won't be
1048                  * destructed until we drop the reference in delayed
1049                  * work.
1050                  */
1051                 neigh_clone(n);
1052                 mlxsw_core_schedule_work(&neigh_work->work);
1053                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1054                 break;
1055         }
1056
1057         return NOTIFY_DONE;
1058 }
1059
1060 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1061 {
1062         int err;
1063
1064         err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1065                               &mlxsw_sp_neigh_ht_params);
1066         if (err)
1067                 return err;
1068
1069         /* Initialize the polling interval according to the default
1070          * table.
1071          */
1072         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1073
1074         /* Create the delayed works for the activity_update */
1075         INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1076                           mlxsw_sp_router_neighs_update_work);
1077         INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1078                           mlxsw_sp_router_probe_unresolved_nexthops);
1079         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1080         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1081         return 0;
1082 }
1083
1084 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1085 {
1086         cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1087         cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1088         rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1089 }
1090
1091 struct mlxsw_sp_nexthop_key {
1092         struct fib_nh *fib_nh;
1093 };
1094
1095 struct mlxsw_sp_nexthop {
1096         struct list_head neigh_list_node; /* member of neigh entry list */
1097         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1098                                                 * this belongs to
1099                                                 */
1100         struct rhash_head ht_node;
1101         struct mlxsw_sp_nexthop_key key;
1102         struct mlxsw_sp_rif *r;
1103         u8 should_offload:1, /* set indicates this neigh is connected and
1104                               * should be put to KVD linear area of this group.
1105                               */
1106            offloaded:1, /* set in case the neigh is actually put into
1107                          * KVD linear area of this group.
1108                          */
1109            update:1; /* set indicates that MAC of this neigh should be
1110                       * updated in HW
1111                       */
1112         struct mlxsw_sp_neigh_entry *neigh_entry;
1113 };
1114
1115 struct mlxsw_sp_nexthop_group_key {
1116         struct fib_info *fi;
1117 };
1118
1119 struct mlxsw_sp_nexthop_group {
1120         struct rhash_head ht_node;
1121         struct list_head fib_list; /* list of fib entries that use this group */
1122         struct mlxsw_sp_nexthop_group_key key;
1123         u8 adj_index_valid:1,
1124            gateway:1; /* routes using the group use a gateway */
1125         u32 adj_index;
1126         u16 ecmp_size;
1127         u16 count;
1128         struct mlxsw_sp_nexthop nexthops[0];
1129 #define nh_rif  nexthops[0].r
1130 };
1131
1132 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1133         .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1134         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1135         .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1136 };
1137
1138 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1139                                          struct mlxsw_sp_nexthop_group *nh_grp)
1140 {
1141         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1142                                       &nh_grp->ht_node,
1143                                       mlxsw_sp_nexthop_group_ht_params);
1144 }
1145
1146 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1147                                           struct mlxsw_sp_nexthop_group *nh_grp)
1148 {
1149         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1150                                &nh_grp->ht_node,
1151                                mlxsw_sp_nexthop_group_ht_params);
1152 }
1153
1154 static struct mlxsw_sp_nexthop_group *
1155 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1156                               struct mlxsw_sp_nexthop_group_key key)
1157 {
1158         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1159                                       mlxsw_sp_nexthop_group_ht_params);
1160 }
1161
1162 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1163         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1164         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1165         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1166 };
1167
1168 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1169                                    struct mlxsw_sp_nexthop *nh)
1170 {
1171         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1172                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1173 }
1174
1175 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1176                                     struct mlxsw_sp_nexthop *nh)
1177 {
1178         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1179                                mlxsw_sp_nexthop_ht_params);
1180 }
1181
1182 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1183                                              struct mlxsw_sp_vr *vr,
1184                                              u32 adj_index, u16 ecmp_size,
1185                                              u32 new_adj_index,
1186                                              u16 new_ecmp_size)
1187 {
1188         char raleu_pl[MLXSW_REG_RALEU_LEN];
1189
1190         mlxsw_reg_raleu_pack(raleu_pl,
1191                              (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1192                              adj_index, ecmp_size, new_adj_index,
1193                              new_ecmp_size);
1194         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1195 }
1196
1197 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1198                                           struct mlxsw_sp_nexthop_group *nh_grp,
1199                                           u32 old_adj_index, u16 old_ecmp_size)
1200 {
1201         struct mlxsw_sp_fib_entry *fib_entry;
1202         struct mlxsw_sp_vr *vr = NULL;
1203         int err;
1204
1205         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1206                 if (vr == fib_entry->vr)
1207                         continue;
1208                 vr = fib_entry->vr;
1209                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1210                                                         old_adj_index,
1211                                                         old_ecmp_size,
1212                                                         nh_grp->adj_index,
1213                                                         nh_grp->ecmp_size);
1214                 if (err)
1215                         return err;
1216         }
1217         return 0;
1218 }
1219
1220 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1221                                        struct mlxsw_sp_nexthop *nh)
1222 {
1223         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1224         char ratr_pl[MLXSW_REG_RATR_LEN];
1225
1226         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1227                             true, adj_index, neigh_entry->rif);
1228         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1229         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1230 }
1231
1232 static int
1233 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1234                                   struct mlxsw_sp_nexthop_group *nh_grp,
1235                                   bool reallocate)
1236 {
1237         u32 adj_index = nh_grp->adj_index; /* base */
1238         struct mlxsw_sp_nexthop *nh;
1239         int i;
1240         int err;
1241
1242         for (i = 0; i < nh_grp->count; i++) {
1243                 nh = &nh_grp->nexthops[i];
1244
1245                 if (!nh->should_offload) {
1246                         nh->offloaded = 0;
1247                         continue;
1248                 }
1249
1250                 if (nh->update || reallocate) {
1251                         err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1252                                                           adj_index, nh);
1253                         if (err)
1254                                 return err;
1255                         nh->update = 0;
1256                         nh->offloaded = 1;
1257                 }
1258                 adj_index++;
1259         }
1260         return 0;
1261 }
1262
1263 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1264                                      struct mlxsw_sp_fib_entry *fib_entry);
1265
1266 static int
1267 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1268                                     struct mlxsw_sp_nexthop_group *nh_grp)
1269 {
1270         struct mlxsw_sp_fib_entry *fib_entry;
1271         int err;
1272
1273         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1274                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1275                 if (err)
1276                         return err;
1277         }
1278         return 0;
1279 }
1280
1281 static void
1282 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1283                                struct mlxsw_sp_nexthop_group *nh_grp)
1284 {
1285         struct mlxsw_sp_nexthop *nh;
1286         bool offload_change = false;
1287         u32 adj_index;
1288         u16 ecmp_size = 0;
1289         bool old_adj_index_valid;
1290         u32 old_adj_index;
1291         u16 old_ecmp_size;
1292         int ret;
1293         int i;
1294         int err;
1295
1296         if (!nh_grp->gateway) {
1297                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1298                 return;
1299         }
1300
1301         for (i = 0; i < nh_grp->count; i++) {
1302                 nh = &nh_grp->nexthops[i];
1303
1304                 if (nh->should_offload ^ nh->offloaded) {
1305                         offload_change = true;
1306                         if (nh->should_offload)
1307                                 nh->update = 1;
1308                 }
1309                 if (nh->should_offload)
1310                         ecmp_size++;
1311         }
1312         if (!offload_change) {
1313                 /* Nothing was added or removed, so no need to reallocate. Just
1314                  * update MAC on existing adjacency indexes.
1315                  */
1316                 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1317                                                         false);
1318                 if (err) {
1319                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1320                         goto set_trap;
1321                 }
1322                 return;
1323         }
1324         if (!ecmp_size)
1325                 /* No neigh of this group is connected so we just set
1326                  * the trap and let everthing flow through kernel.
1327                  */
1328                 goto set_trap;
1329
1330         ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1331         if (ret < 0) {
1332                 /* We ran out of KVD linear space, just set the
1333                  * trap and let everything flow through kernel.
1334                  */
1335                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1336                 goto set_trap;
1337         }
1338         adj_index = ret;
1339         old_adj_index_valid = nh_grp->adj_index_valid;
1340         old_adj_index = nh_grp->adj_index;
1341         old_ecmp_size = nh_grp->ecmp_size;
1342         nh_grp->adj_index_valid = 1;
1343         nh_grp->adj_index = adj_index;
1344         nh_grp->ecmp_size = ecmp_size;
1345         err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1346         if (err) {
1347                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1348                 goto set_trap;
1349         }
1350
1351         if (!old_adj_index_valid) {
1352                 /* The trap was set for fib entries, so we have to call
1353                  * fib entry update to unset it and use adjacency index.
1354                  */
1355                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1356                 if (err) {
1357                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1358                         goto set_trap;
1359                 }
1360                 return;
1361         }
1362
1363         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1364                                              old_adj_index, old_ecmp_size);
1365         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1366         if (err) {
1367                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1368                 goto set_trap;
1369         }
1370         return;
1371
1372 set_trap:
1373         old_adj_index_valid = nh_grp->adj_index_valid;
1374         nh_grp->adj_index_valid = 0;
1375         for (i = 0; i < nh_grp->count; i++) {
1376                 nh = &nh_grp->nexthops[i];
1377                 nh->offloaded = 0;
1378         }
1379         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1380         if (err)
1381                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1382         if (old_adj_index_valid)
1383                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1384 }
1385
1386 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1387                                             bool removing)
1388 {
1389         if (!removing && !nh->should_offload)
1390                 nh->should_offload = 1;
1391         else if (removing && nh->offloaded)
1392                 nh->should_offload = 0;
1393         nh->update = 1;
1394 }
1395
1396 static void
1397 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1398                               struct mlxsw_sp_neigh_entry *neigh_entry,
1399                               bool removing)
1400 {
1401         struct mlxsw_sp_nexthop *nh;
1402
1403         list_for_each_entry(nh, &neigh_entry->nexthop_list,
1404                             neigh_list_node) {
1405                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1406                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1407         }
1408 }
1409
1410 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1411                                  struct mlxsw_sp_nexthop_group *nh_grp,
1412                                  struct mlxsw_sp_nexthop *nh,
1413                                  struct fib_nh *fib_nh)
1414 {
1415         struct mlxsw_sp_neigh_entry *neigh_entry;
1416         struct net_device *dev = fib_nh->nh_dev;
1417         struct mlxsw_sp_rif *r;
1418         struct neighbour *n;
1419         u8 nud_state, dead;
1420         int err;
1421
1422         nh->nh_grp = nh_grp;
1423         nh->key.fib_nh = fib_nh;
1424         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1425         if (err)
1426                 return err;
1427
1428         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1429         if (!r)
1430                 return 0;
1431         nh->r = r;
1432
1433         if (!nh_grp->gateway)
1434                 return 0;
1435
1436         /* Take a reference of neigh here ensuring that neigh would
1437          * not be detructed before the nexthop entry is finished.
1438          * The reference is taken either in neigh_lookup() or
1439          * in neigh_create() in case n is not found.
1440          */
1441         n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev);
1442         if (!n) {
1443                 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev);
1444                 if (IS_ERR(n)) {
1445                         err = PTR_ERR(n);
1446                         goto err_neigh_create;
1447                 }
1448                 neigh_event_send(n, NULL);
1449         }
1450         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1451         if (!neigh_entry) {
1452                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1453                 if (IS_ERR(neigh_entry)) {
1454                         err = -EINVAL;
1455                         goto err_neigh_entry_create;
1456                 }
1457         }
1458
1459         /* If that is the first nexthop connected to that neigh, add to
1460          * nexthop_neighs_list
1461          */
1462         if (list_empty(&neigh_entry->nexthop_list))
1463                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1464                               &mlxsw_sp->router.nexthop_neighs_list);
1465
1466         nh->neigh_entry = neigh_entry;
1467         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1468         read_lock_bh(&n->lock);
1469         nud_state = n->nud_state;
1470         dead = n->dead;
1471         read_unlock_bh(&n->lock);
1472         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1473
1474         return 0;
1475
1476 err_neigh_entry_create:
1477         neigh_release(n);
1478 err_neigh_create:
1479         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1480         return err;
1481 }
1482
1483 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1484                                   struct mlxsw_sp_nexthop *nh)
1485 {
1486         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1487         struct neighbour *n = neigh_entry->key.n;
1488
1489         if (!neigh_entry)
1490                 goto out;
1491
1492         __mlxsw_sp_nexthop_neigh_update(nh, true);
1493         list_del(&nh->neigh_list_node);
1494         nh->neigh_entry = NULL;
1495
1496         /* If that is the last nexthop connected to that neigh, remove from
1497          * nexthop_neighs_list
1498          */
1499         if (list_empty(&neigh_entry->nexthop_list))
1500                 list_del(&neigh_entry->nexthop_neighs_list_node);
1501
1502         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1503                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1504
1505         neigh_release(n);
1506
1507 out:
1508         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1509 }
1510
1511 static struct mlxsw_sp_nexthop_group *
1512 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1513 {
1514         struct mlxsw_sp_nexthop_group *nh_grp;
1515         struct mlxsw_sp_nexthop *nh;
1516         struct fib_nh *fib_nh;
1517         size_t alloc_size;
1518         int i;
1519         int err;
1520
1521         alloc_size = sizeof(*nh_grp) +
1522                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1523         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1524         if (!nh_grp)
1525                 return ERR_PTR(-ENOMEM);
1526         INIT_LIST_HEAD(&nh_grp->fib_list);
1527         nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1528         nh_grp->count = fi->fib_nhs;
1529         nh_grp->key.fi = fi;
1530         for (i = 0; i < nh_grp->count; i++) {
1531                 nh = &nh_grp->nexthops[i];
1532                 fib_nh = &fi->fib_nh[i];
1533                 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1534                 if (err)
1535                         goto err_nexthop_init;
1536         }
1537         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1538         if (err)
1539                 goto err_nexthop_group_insert;
1540         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1541         return nh_grp;
1542
1543 err_nexthop_group_insert:
1544 err_nexthop_init:
1545         for (i--; i >= 0; i--)
1546                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1547         kfree(nh_grp);
1548         return ERR_PTR(err);
1549 }
1550
1551 static void
1552 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1553                                struct mlxsw_sp_nexthop_group *nh_grp)
1554 {
1555         struct mlxsw_sp_nexthop *nh;
1556         int i;
1557
1558         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1559         for (i = 0; i < nh_grp->count; i++) {
1560                 nh = &nh_grp->nexthops[i];
1561                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1562         }
1563         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1564         WARN_ON_ONCE(nh_grp->adj_index_valid);
1565         kfree(nh_grp);
1566 }
1567
1568 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1569                                       struct mlxsw_sp_fib_entry *fib_entry,
1570                                       struct fib_info *fi)
1571 {
1572         struct mlxsw_sp_nexthop_group_key key;
1573         struct mlxsw_sp_nexthop_group *nh_grp;
1574
1575         key.fi = fi;
1576         nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1577         if (!nh_grp) {
1578                 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1579                 if (IS_ERR(nh_grp))
1580                         return PTR_ERR(nh_grp);
1581         }
1582         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1583         fib_entry->nh_group = nh_grp;
1584         return 0;
1585 }
1586
1587 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1588                                        struct mlxsw_sp_fib_entry *fib_entry)
1589 {
1590         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1591
1592         list_del(&fib_entry->nexthop_group_node);
1593         if (!list_empty(&nh_grp->fib_list))
1594                 return;
1595         mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1596 }
1597
1598 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1599                                          struct mlxsw_sp_fib_entry *fib_entry,
1600                                          enum mlxsw_reg_ralue_op op)
1601 {
1602         char ralue_pl[MLXSW_REG_RALUE_LEN];
1603         u32 *p_dip = (u32 *) fib_entry->key.addr;
1604         struct mlxsw_sp_vr *vr = fib_entry->vr;
1605         enum mlxsw_reg_ralue_trap_action trap_action;
1606         u16 trap_id = 0;
1607         u32 adjacency_index = 0;
1608         u16 ecmp_size = 0;
1609
1610         /* In case the nexthop group adjacency index is valid, use it
1611          * with provided ECMP size. Otherwise, setup trap and pass
1612          * traffic to kernel.
1613          */
1614         if (fib_entry->nh_group->adj_index_valid) {
1615                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1616                 adjacency_index = fib_entry->nh_group->adj_index;
1617                 ecmp_size = fib_entry->nh_group->ecmp_size;
1618         } else {
1619                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1620                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1621         }
1622
1623         mlxsw_reg_ralue_pack4(ralue_pl,
1624                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1625                               vr->id, fib_entry->key.prefix_len, *p_dip);
1626         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1627                                         adjacency_index, ecmp_size);
1628         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1629 }
1630
1631 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1632                                         struct mlxsw_sp_fib_entry *fib_entry,
1633                                         enum mlxsw_reg_ralue_op op)
1634 {
1635         struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1636         char ralue_pl[MLXSW_REG_RALUE_LEN];
1637         u32 *p_dip = (u32 *) fib_entry->key.addr;
1638         struct mlxsw_sp_vr *vr = fib_entry->vr;
1639
1640         mlxsw_reg_ralue_pack4(ralue_pl,
1641                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1642                               vr->id, fib_entry->key.prefix_len, *p_dip);
1643         mlxsw_reg_ralue_act_local_pack(ralue_pl,
1644                                        MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
1645                                        r->rif);
1646         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1647 }
1648
1649 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1650                                        struct mlxsw_sp_fib_entry *fib_entry,
1651                                        enum mlxsw_reg_ralue_op op)
1652 {
1653         char ralue_pl[MLXSW_REG_RALUE_LEN];
1654         u32 *p_dip = (u32 *) fib_entry->key.addr;
1655         struct mlxsw_sp_vr *vr = fib_entry->vr;
1656
1657         mlxsw_reg_ralue_pack4(ralue_pl,
1658                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1659                               vr->id, fib_entry->key.prefix_len, *p_dip);
1660         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1661         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1662 }
1663
1664 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1665                                   struct mlxsw_sp_fib_entry *fib_entry,
1666                                   enum mlxsw_reg_ralue_op op)
1667 {
1668         switch (fib_entry->type) {
1669         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1670                 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1671         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1672                 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1673         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1674                 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1675         }
1676         return -EINVAL;
1677 }
1678
1679 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1680                                  struct mlxsw_sp_fib_entry *fib_entry,
1681                                  enum mlxsw_reg_ralue_op op)
1682 {
1683         switch (fib_entry->vr->proto) {
1684         case MLXSW_SP_L3_PROTO_IPV4:
1685                 return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1686         case MLXSW_SP_L3_PROTO_IPV6:
1687                 return -EINVAL;
1688         }
1689         return -EINVAL;
1690 }
1691
1692 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1693                                      struct mlxsw_sp_fib_entry *fib_entry)
1694 {
1695         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1696                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
1697 }
1698
1699 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1700                                   struct mlxsw_sp_fib_entry *fib_entry)
1701 {
1702         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1703                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
1704 }
1705
1706 static int
1707 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
1708                                 const struct fib_entry_notifier_info *fen_info,
1709                                 struct mlxsw_sp_fib_entry *fib_entry)
1710 {
1711         struct fib_info *fi = fen_info->fi;
1712         struct mlxsw_sp_rif *r = NULL;
1713         int nhsel;
1714
1715         if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1716                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1717                 return 0;
1718         }
1719         if (fen_info->type != RTN_UNICAST)
1720                 return -EINVAL;
1721
1722         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1723                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1724
1725                 if (!nh->nh_dev)
1726                         continue;
1727                 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev);
1728                 if (!r) {
1729                         /* In case router interface is not found for
1730                          * at least one of the nexthops, that means
1731                          * the nexthop points to some device unrelated
1732                          * to us. Set trap and pass the packets for
1733                          * this prefix to kernel.
1734                          */
1735                         break;
1736                 }
1737         }
1738
1739         if (!r) {
1740                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1741                 return 0;
1742         }
1743
1744         if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1745                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1746         else
1747                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1748         fib_info_offload_inc(fen_info->fi);
1749         return 0;
1750 }
1751
1752 static void
1753 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
1754                                 struct mlxsw_sp_fib_entry *fib_entry)
1755 {
1756         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1757                 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1758 }
1759
1760 static struct mlxsw_sp_fib_entry *
1761 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp,
1762                        const struct fib_entry_notifier_info *fen_info)
1763 {
1764         struct mlxsw_sp_fib_entry *fib_entry;
1765         struct fib_info *fi = fen_info->fi;
1766         struct mlxsw_sp_vr *vr;
1767         int err;
1768
1769         vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
1770                              MLXSW_SP_L3_PROTO_IPV4);
1771         if (IS_ERR(vr))
1772                 return ERR_CAST(vr);
1773
1774         fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1775                                               sizeof(fen_info->dst),
1776                                               fen_info->dst_len, fi->fib_dev);
1777         if (fib_entry) {
1778                 /* Already exists, just take a reference */
1779                 fib_entry->ref_count++;
1780                 return fib_entry;
1781         }
1782         fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst,
1783                                               sizeof(fen_info->dst),
1784                                               fen_info->dst_len, fi->fib_dev);
1785         if (!fib_entry) {
1786                 err = -ENOMEM;
1787                 goto err_fib_entry_create;
1788         }
1789         fib_entry->vr = vr;
1790         fib_entry->ref_count = 1;
1791
1792         err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry);
1793         if (err)
1794                 goto err_fib4_entry_init;
1795
1796         err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
1797         if (err)
1798                 goto err_nexthop_group_get;
1799
1800         return fib_entry;
1801
1802 err_nexthop_group_get:
1803         mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1804 err_fib4_entry_init:
1805         mlxsw_sp_fib_entry_destroy(fib_entry);
1806 err_fib_entry_create:
1807         mlxsw_sp_vr_put(mlxsw_sp, vr);
1808
1809         return ERR_PTR(err);
1810 }
1811
1812 static struct mlxsw_sp_fib_entry *
1813 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp,
1814                         const struct fib_entry_notifier_info *fen_info)
1815 {
1816         struct mlxsw_sp_vr *vr;
1817
1818         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id,
1819                               MLXSW_SP_L3_PROTO_IPV4);
1820         if (!vr)
1821                 return NULL;
1822
1823         return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1824                                          sizeof(fen_info->dst),
1825                                          fen_info->dst_len,
1826                                          fen_info->fi->fib_dev);
1827 }
1828
1829 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp,
1830                                    struct mlxsw_sp_fib_entry *fib_entry)
1831 {
1832         struct mlxsw_sp_vr *vr = fib_entry->vr;
1833
1834         if (--fib_entry->ref_count == 0) {
1835                 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1836                 mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1837                 mlxsw_sp_fib_entry_destroy(fib_entry);
1838         }
1839         mlxsw_sp_vr_put(mlxsw_sp, vr);
1840 }
1841
1842 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp,
1843                                        struct mlxsw_sp_fib_entry *fib_entry)
1844 {
1845         unsigned int last_ref_count;
1846
1847         do {
1848                 last_ref_count = fib_entry->ref_count;
1849                 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1850         } while (last_ref_count != 1);
1851 }
1852
1853 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
1854                                     struct fib_entry_notifier_info *fen_info)
1855 {
1856         struct mlxsw_sp_fib_entry *fib_entry;
1857         struct mlxsw_sp_vr *vr;
1858         int err;
1859
1860         if (mlxsw_sp->router.aborted)
1861                 return 0;
1862
1863         fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info);
1864         if (IS_ERR(fib_entry)) {
1865                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n");
1866                 return PTR_ERR(fib_entry);
1867         }
1868
1869         if (fib_entry->ref_count != 1)
1870                 return 0;
1871
1872         vr = fib_entry->vr;
1873         err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry);
1874         if (err) {
1875                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n");
1876                 goto err_fib_entry_insert;
1877         }
1878         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1879         if (err)
1880                 goto err_fib_entry_add;
1881         return 0;
1882
1883 err_fib_entry_add:
1884         mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
1885 err_fib_entry_insert:
1886         mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1887         return err;
1888 }
1889
1890 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
1891                                      struct fib_entry_notifier_info *fen_info)
1892 {
1893         struct mlxsw_sp_fib_entry *fib_entry;
1894
1895         if (mlxsw_sp->router.aborted)
1896                 return;
1897
1898         fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info);
1899         if (!fib_entry)
1900                 return;
1901
1902         if (fib_entry->ref_count == 1) {
1903                 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1904                 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry);
1905         }
1906
1907         mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1908 }
1909
1910 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
1911 {
1912         char ralta_pl[MLXSW_REG_RALTA_LEN];
1913         char ralst_pl[MLXSW_REG_RALST_LEN];
1914         char raltb_pl[MLXSW_REG_RALTB_LEN];
1915         char ralue_pl[MLXSW_REG_RALUE_LEN];
1916         int err;
1917
1918         mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1919                              MLXSW_SP_LPM_TREE_MIN);
1920         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
1921         if (err)
1922                 return err;
1923
1924         mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
1925         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
1926         if (err)
1927                 return err;
1928
1929         mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1930                              MLXSW_SP_LPM_TREE_MIN);
1931         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
1932         if (err)
1933                 return err;
1934
1935         mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
1936                               MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
1937         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1938         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1939 }
1940
1941 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
1942 {
1943         struct mlxsw_sp_fib_entry *fib_entry;
1944         struct mlxsw_sp_fib_entry *tmp;
1945         struct mlxsw_sp_vr *vr;
1946         int i;
1947
1948         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
1949                 vr = &mlxsw_sp->router.vrs[i];
1950
1951                 if (!vr->used)
1952                         continue;
1953
1954                 list_for_each_entry_safe(fib_entry, tmp,
1955                                          &vr->fib->entry_list, list) {
1956                         bool do_break = &tmp->list == &vr->fib->entry_list;
1957
1958                         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1959                         mlxsw_sp_fib_entry_remove(fib_entry->vr->fib,
1960                                                   fib_entry);
1961                         mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry);
1962                         if (do_break)
1963                                 break;
1964                 }
1965         }
1966 }
1967
1968 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
1969 {
1970         int err;
1971
1972         if (mlxsw_sp->router.aborted)
1973                 return;
1974         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
1975         mlxsw_sp_router_fib_flush(mlxsw_sp);
1976         mlxsw_sp->router.aborted = true;
1977         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
1978         if (err)
1979                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
1980 }
1981
1982 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1983 {
1984         char rgcr_pl[MLXSW_REG_RGCR_LEN];
1985         u64 max_rifs;
1986         int err;
1987
1988         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
1989                 return -EIO;
1990
1991         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
1992         mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
1993                                  GFP_KERNEL);
1994         if (!mlxsw_sp->rifs)
1995                 return -ENOMEM;
1996
1997         mlxsw_reg_rgcr_pack(rgcr_pl, true);
1998         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
1999         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2000         if (err)
2001                 goto err_rgcr_fail;
2002
2003         return 0;
2004
2005 err_rgcr_fail:
2006         kfree(mlxsw_sp->rifs);
2007         return err;
2008 }
2009
2010 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2011 {
2012         char rgcr_pl[MLXSW_REG_RGCR_LEN];
2013         int i;
2014
2015         mlxsw_reg_rgcr_pack(rgcr_pl, false);
2016         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2017
2018         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2019                 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2020
2021         kfree(mlxsw_sp->rifs);
2022 }
2023
2024 struct mlxsw_sp_fib_event_work {
2025         struct work_struct work;
2026         struct fib_entry_notifier_info fen_info;
2027         struct mlxsw_sp *mlxsw_sp;
2028         unsigned long event;
2029 };
2030
2031 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2032 {
2033         struct mlxsw_sp_fib_event_work *fib_work =
2034                 container_of(work, struct mlxsw_sp_fib_event_work, work);
2035         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2036         int err;
2037
2038         /* Protect internal structures from changes */
2039         rtnl_lock();
2040         switch (fib_work->event) {
2041         case FIB_EVENT_ENTRY_ADD:
2042                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info);
2043                 if (err)
2044                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2045                 fib_info_put(fib_work->fen_info.fi);
2046                 break;
2047         case FIB_EVENT_ENTRY_DEL:
2048                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2049                 fib_info_put(fib_work->fen_info.fi);
2050                 break;
2051         case FIB_EVENT_RULE_ADD: /* fall through */
2052         case FIB_EVENT_RULE_DEL:
2053                 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2054                 break;
2055         }
2056         rtnl_unlock();
2057         kfree(fib_work);
2058 }
2059
2060 /* Called with rcu_read_lock() */
2061 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2062                                      unsigned long event, void *ptr)
2063 {
2064         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2065         struct mlxsw_sp_fib_event_work *fib_work;
2066         struct fib_notifier_info *info = ptr;
2067
2068         if (!net_eq(info->net, &init_net))
2069                 return NOTIFY_DONE;
2070
2071         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2072         if (WARN_ON(!fib_work))
2073                 return NOTIFY_BAD;
2074
2075         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2076         fib_work->mlxsw_sp = mlxsw_sp;
2077         fib_work->event = event;
2078
2079         switch (event) {
2080         case FIB_EVENT_ENTRY_ADD: /* fall through */
2081         case FIB_EVENT_ENTRY_DEL:
2082                 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2083                 /* Take referece on fib_info to prevent it from being
2084                  * freed while work is queued. Release it afterwards.
2085                  */
2086                 fib_info_hold(fib_work->fen_info.fi);
2087                 break;
2088         }
2089
2090         mlxsw_core_schedule_work(&fib_work->work);
2091
2092         return NOTIFY_DONE;
2093 }
2094
2095 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2096 {
2097         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2098
2099         /* Flush pending FIB notifications and then flush the device's
2100          * table before requesting another dump. The FIB notification
2101          * block is unregistered, so no need to take RTNL.
2102          */
2103         mlxsw_core_flush_owq();
2104         mlxsw_sp_router_fib_flush(mlxsw_sp);
2105 }
2106
2107 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2108 {
2109         int err;
2110
2111         INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2112         err = __mlxsw_sp_router_init(mlxsw_sp);
2113         if (err)
2114                 return err;
2115
2116         err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2117                               &mlxsw_sp_nexthop_ht_params);
2118         if (err)
2119                 goto err_nexthop_ht_init;
2120
2121         err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2122                               &mlxsw_sp_nexthop_group_ht_params);
2123         if (err)
2124                 goto err_nexthop_group_ht_init;
2125
2126         mlxsw_sp_lpm_init(mlxsw_sp);
2127         err = mlxsw_sp_vrs_init(mlxsw_sp);
2128         if (err)
2129                 goto err_vrs_init;
2130
2131         err = mlxsw_sp_neigh_init(mlxsw_sp);
2132         if (err)
2133                 goto err_neigh_init;
2134
2135         mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2136         err = register_fib_notifier(&mlxsw_sp->fib_nb,
2137                                     mlxsw_sp_router_fib_dump_flush);
2138         if (err)
2139                 goto err_register_fib_notifier;
2140
2141         return 0;
2142
2143 err_register_fib_notifier:
2144         mlxsw_sp_neigh_fini(mlxsw_sp);
2145 err_neigh_init:
2146         mlxsw_sp_vrs_fini(mlxsw_sp);
2147 err_vrs_init:
2148         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2149 err_nexthop_group_ht_init:
2150         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2151 err_nexthop_ht_init:
2152         __mlxsw_sp_router_fini(mlxsw_sp);
2153         return err;
2154 }
2155
2156 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2157 {
2158         unregister_fib_notifier(&mlxsw_sp->fib_nb);
2159         mlxsw_sp_neigh_fini(mlxsw_sp);
2160         mlxsw_sp_vrs_fini(mlxsw_sp);
2161         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2162         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2163         __mlxsw_sp_router_fini(mlxsw_sp);
2164 }