]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
mlxsw: spectrum_router: Use nexthop's scope to set action type
[karo-tx-linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <net/netevent.h>
44 #include <net/neighbour.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47
48 #include "spectrum.h"
49 #include "core.h"
50 #include "reg.h"
51
52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
53         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
54
55 static bool
56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
57                              struct mlxsw_sp_prefix_usage *prefix_usage2)
58 {
59         unsigned char prefix;
60
61         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
62                 if (!test_bit(prefix, prefix_usage2->b))
63                         return false;
64         }
65         return true;
66 }
67
68 static bool
69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
70                          struct mlxsw_sp_prefix_usage *prefix_usage2)
71 {
72         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
73 }
74
75 static bool
76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
77 {
78         struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
79
80         return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
81 }
82
83 static void
84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
85                           struct mlxsw_sp_prefix_usage *prefix_usage2)
86 {
87         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
88 }
89
90 static void
91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
92 {
93         memset(prefix_usage, 0, sizeof(*prefix_usage));
94 }
95
96 static void
97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
98                           unsigned char prefix_len)
99 {
100         set_bit(prefix_len, prefix_usage->b);
101 }
102
103 static void
104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
105                             unsigned char prefix_len)
106 {
107         clear_bit(prefix_len, prefix_usage->b);
108 }
109
110 struct mlxsw_sp_fib_key {
111         struct net_device *dev;
112         unsigned char addr[sizeof(struct in6_addr)];
113         unsigned char prefix_len;
114 };
115
116 enum mlxsw_sp_fib_entry_type {
117         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121
122 struct mlxsw_sp_nexthop_group;
123
124 struct mlxsw_sp_fib_entry {
125         struct rhash_head ht_node;
126         struct list_head list;
127         struct mlxsw_sp_fib_key key;
128         enum mlxsw_sp_fib_entry_type type;
129         unsigned int ref_count;
130         u16 rif; /* used for action local */
131         struct mlxsw_sp_vr *vr;
132         struct fib_info *fi;
133         struct list_head nexthop_group_node;
134         struct mlxsw_sp_nexthop_group *nh_group;
135 };
136
137 struct mlxsw_sp_fib {
138         struct rhashtable ht;
139         struct list_head entry_list;
140         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
141         struct mlxsw_sp_prefix_usage prefix_usage;
142 };
143
144 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
145         .key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
146         .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
147         .key_len = sizeof(struct mlxsw_sp_fib_key),
148         .automatic_shrinking = true,
149 };
150
151 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
152                                      struct mlxsw_sp_fib_entry *fib_entry)
153 {
154         unsigned char prefix_len = fib_entry->key.prefix_len;
155         int err;
156
157         err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
158                                      mlxsw_sp_fib_ht_params);
159         if (err)
160                 return err;
161         list_add_tail(&fib_entry->list, &fib->entry_list);
162         if (fib->prefix_ref_count[prefix_len]++ == 0)
163                 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
164         return 0;
165 }
166
167 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
168                                       struct mlxsw_sp_fib_entry *fib_entry)
169 {
170         unsigned char prefix_len = fib_entry->key.prefix_len;
171
172         if (--fib->prefix_ref_count[prefix_len] == 0)
173                 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
174         list_del(&fib_entry->list);
175         rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
176                                mlxsw_sp_fib_ht_params);
177 }
178
179 static struct mlxsw_sp_fib_entry *
180 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
181                           size_t addr_len, unsigned char prefix_len,
182                           struct net_device *dev)
183 {
184         struct mlxsw_sp_fib_entry *fib_entry;
185
186         fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
187         if (!fib_entry)
188                 return NULL;
189         fib_entry->key.dev = dev;
190         memcpy(fib_entry->key.addr, addr, addr_len);
191         fib_entry->key.prefix_len = prefix_len;
192         return fib_entry;
193 }
194
195 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
196 {
197         kfree(fib_entry);
198 }
199
200 static struct mlxsw_sp_fib_entry *
201 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
202                           size_t addr_len, unsigned char prefix_len,
203                           struct net_device *dev)
204 {
205         struct mlxsw_sp_fib_key key;
206
207         memset(&key, 0, sizeof(key));
208         key.dev = dev;
209         memcpy(key.addr, addr, addr_len);
210         key.prefix_len = prefix_len;
211         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
212 }
213
214 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
215 {
216         struct mlxsw_sp_fib *fib;
217         int err;
218
219         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
220         if (!fib)
221                 return ERR_PTR(-ENOMEM);
222         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
223         if (err)
224                 goto err_rhashtable_init;
225         INIT_LIST_HEAD(&fib->entry_list);
226         return fib;
227
228 err_rhashtable_init:
229         kfree(fib);
230         return ERR_PTR(err);
231 }
232
233 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
234 {
235         rhashtable_destroy(&fib->ht);
236         kfree(fib);
237 }
238
239 static struct mlxsw_sp_lpm_tree *
240 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
241 {
242         static struct mlxsw_sp_lpm_tree *lpm_tree;
243         int i;
244
245         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
246                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
247                 if (lpm_tree->ref_count == 0) {
248                         if (one_reserved)
249                                 one_reserved = false;
250                         else
251                                 return lpm_tree;
252                 }
253         }
254         return NULL;
255 }
256
257 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
258                                    struct mlxsw_sp_lpm_tree *lpm_tree)
259 {
260         char ralta_pl[MLXSW_REG_RALTA_LEN];
261
262         mlxsw_reg_ralta_pack(ralta_pl, true,
263                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
264                              lpm_tree->id);
265         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
266 }
267
268 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
269                                   struct mlxsw_sp_lpm_tree *lpm_tree)
270 {
271         char ralta_pl[MLXSW_REG_RALTA_LEN];
272
273         mlxsw_reg_ralta_pack(ralta_pl, false,
274                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
275                              lpm_tree->id);
276         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
277 }
278
279 static int
280 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
281                                   struct mlxsw_sp_prefix_usage *prefix_usage,
282                                   struct mlxsw_sp_lpm_tree *lpm_tree)
283 {
284         char ralst_pl[MLXSW_REG_RALST_LEN];
285         u8 root_bin = 0;
286         u8 prefix;
287         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
288
289         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
290                 root_bin = prefix;
291
292         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
293         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
294                 if (prefix == 0)
295                         continue;
296                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
297                                          MLXSW_REG_RALST_BIN_NO_CHILD);
298                 last_prefix = prefix;
299         }
300         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
301 }
302
303 static struct mlxsw_sp_lpm_tree *
304 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
305                          struct mlxsw_sp_prefix_usage *prefix_usage,
306                          enum mlxsw_sp_l3proto proto, bool one_reserved)
307 {
308         struct mlxsw_sp_lpm_tree *lpm_tree;
309         int err;
310
311         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
312         if (!lpm_tree)
313                 return ERR_PTR(-EBUSY);
314         lpm_tree->proto = proto;
315         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
316         if (err)
317                 return ERR_PTR(err);
318
319         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
320                                                 lpm_tree);
321         if (err)
322                 goto err_left_struct_set;
323         memcpy(&lpm_tree->prefix_usage, prefix_usage,
324                sizeof(lpm_tree->prefix_usage));
325         return lpm_tree;
326
327 err_left_struct_set:
328         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
329         return ERR_PTR(err);
330 }
331
332 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
333                                      struct mlxsw_sp_lpm_tree *lpm_tree)
334 {
335         return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
336 }
337
338 static struct mlxsw_sp_lpm_tree *
339 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
340                       struct mlxsw_sp_prefix_usage *prefix_usage,
341                       enum mlxsw_sp_l3proto proto, bool one_reserved)
342 {
343         struct mlxsw_sp_lpm_tree *lpm_tree;
344         int i;
345
346         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
347                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
348                 if (lpm_tree->ref_count != 0 &&
349                     lpm_tree->proto == proto &&
350                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
351                                              prefix_usage))
352                         goto inc_ref_count;
353         }
354         lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
355                                             proto, one_reserved);
356         if (IS_ERR(lpm_tree))
357                 return lpm_tree;
358
359 inc_ref_count:
360         lpm_tree->ref_count++;
361         return lpm_tree;
362 }
363
364 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
365                                  struct mlxsw_sp_lpm_tree *lpm_tree)
366 {
367         if (--lpm_tree->ref_count == 0)
368                 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
369         return 0;
370 }
371
372 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
373 {
374         struct mlxsw_sp_lpm_tree *lpm_tree;
375         int i;
376
377         for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
378                 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
379                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
380         }
381 }
382
383 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
384 {
385         struct mlxsw_sp_vr *vr;
386         int i;
387
388         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
389                 vr = &mlxsw_sp->router.vrs[i];
390                 if (!vr->used)
391                         return vr;
392         }
393         return NULL;
394 }
395
396 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
397                                      struct mlxsw_sp_vr *vr)
398 {
399         char raltb_pl[MLXSW_REG_RALTB_LEN];
400
401         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
402                              (enum mlxsw_reg_ralxx_protocol) vr->proto,
403                              vr->lpm_tree->id);
404         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
405 }
406
407 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
408                                        struct mlxsw_sp_vr *vr)
409 {
410         char raltb_pl[MLXSW_REG_RALTB_LEN];
411
412         /* Bind to tree 0 which is default */
413         mlxsw_reg_raltb_pack(raltb_pl, vr->id,
414                              (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
415         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
416 }
417
418 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
419 {
420         /* For our purpose, squash main and local table into one */
421         if (tb_id == RT_TABLE_LOCAL)
422                 tb_id = RT_TABLE_MAIN;
423         return tb_id;
424 }
425
426 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
427                                             u32 tb_id,
428                                             enum mlxsw_sp_l3proto proto)
429 {
430         struct mlxsw_sp_vr *vr;
431         int i;
432
433         tb_id = mlxsw_sp_fix_tb_id(tb_id);
434
435         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
436                 vr = &mlxsw_sp->router.vrs[i];
437                 if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
438                         return vr;
439         }
440         return NULL;
441 }
442
443 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
444                                               unsigned char prefix_len,
445                                               u32 tb_id,
446                                               enum mlxsw_sp_l3proto proto)
447 {
448         struct mlxsw_sp_prefix_usage req_prefix_usage;
449         struct mlxsw_sp_lpm_tree *lpm_tree;
450         struct mlxsw_sp_vr *vr;
451         int err;
452
453         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
454         if (!vr)
455                 return ERR_PTR(-EBUSY);
456         vr->fib = mlxsw_sp_fib_create();
457         if (IS_ERR(vr->fib))
458                 return ERR_CAST(vr->fib);
459
460         vr->proto = proto;
461         vr->tb_id = tb_id;
462         mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
463         mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
464         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
465                                          proto, true);
466         if (IS_ERR(lpm_tree)) {
467                 err = PTR_ERR(lpm_tree);
468                 goto err_tree_get;
469         }
470         vr->lpm_tree = lpm_tree;
471         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
472         if (err)
473                 goto err_tree_bind;
474
475         vr->used = true;
476         return vr;
477
478 err_tree_bind:
479         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
480 err_tree_get:
481         mlxsw_sp_fib_destroy(vr->fib);
482
483         return ERR_PTR(err);
484 }
485
486 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
487                                 struct mlxsw_sp_vr *vr)
488 {
489         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
490         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
491         mlxsw_sp_fib_destroy(vr->fib);
492         vr->used = false;
493 }
494
495 static int
496 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
497                            struct mlxsw_sp_prefix_usage *req_prefix_usage)
498 {
499         struct mlxsw_sp_lpm_tree *lpm_tree;
500
501         if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
502                                      &vr->lpm_tree->prefix_usage))
503                 return 0;
504
505         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
506                                          vr->proto, false);
507         if (IS_ERR(lpm_tree)) {
508                 /* We failed to get a tree according to the required
509                  * prefix usage. However, the current tree might be still good
510                  * for us if our requirement is subset of the prefixes used
511                  * in the tree.
512                  */
513                 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
514                                                  &vr->lpm_tree->prefix_usage))
515                         return 0;
516                 return PTR_ERR(lpm_tree);
517         }
518
519         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
520         mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
521         vr->lpm_tree = lpm_tree;
522         return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
523 }
524
525 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
526                                            unsigned char prefix_len,
527                                            u32 tb_id,
528                                            enum mlxsw_sp_l3proto proto)
529 {
530         struct mlxsw_sp_vr *vr;
531         int err;
532
533         tb_id = mlxsw_sp_fix_tb_id(tb_id);
534         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
535         if (!vr) {
536                 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
537                 if (IS_ERR(vr))
538                         return vr;
539         } else {
540                 struct mlxsw_sp_prefix_usage req_prefix_usage;
541
542                 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
543                                           &vr->fib->prefix_usage);
544                 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
545                 /* Need to replace LPM tree in case new prefix is required. */
546                 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
547                                                  &req_prefix_usage);
548                 if (err)
549                         return ERR_PTR(err);
550         }
551         return vr;
552 }
553
554 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
555 {
556         /* Destroy virtual router entity in case the associated FIB is empty
557          * and allow it to be used for other tables in future. Otherwise,
558          * check if some prefix usage did not disappear and change tree if
559          * that is the case. Note that in case new, smaller tree cannot be
560          * allocated, the original one will be kept being used.
561          */
562         if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
563                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
564         else
565                 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
566                                            &vr->fib->prefix_usage);
567 }
568
569 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
570 {
571         struct mlxsw_sp_vr *vr;
572         u64 max_vrs;
573         int i;
574
575         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
576                 return -EIO;
577
578         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
579         mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
580                                        GFP_KERNEL);
581         if (!mlxsw_sp->router.vrs)
582                 return -ENOMEM;
583
584         for (i = 0; i < max_vrs; i++) {
585                 vr = &mlxsw_sp->router.vrs[i];
586                 vr->id = i;
587         }
588
589         return 0;
590 }
591
592 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
593
594 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
595 {
596         /* At this stage we're guaranteed not to have new incoming
597          * FIB notifications and the work queue is free from FIBs
598          * sitting on top of mlxsw netdevs. However, we can still
599          * have other FIBs queued. Flush the queue before flushing
600          * the device's tables. No need for locks, as we're the only
601          * writer.
602          */
603         mlxsw_core_flush_owq();
604         mlxsw_sp_router_fib_flush(mlxsw_sp);
605         kfree(mlxsw_sp->router.vrs);
606 }
607
608 struct mlxsw_sp_neigh_key {
609         struct neighbour *n;
610 };
611
612 struct mlxsw_sp_neigh_entry {
613         struct rhash_head ht_node;
614         struct mlxsw_sp_neigh_key key;
615         u16 rif;
616         bool connected;
617         unsigned char ha[ETH_ALEN];
618         struct list_head nexthop_list; /* list of nexthops using
619                                         * this neigh entry
620                                         */
621         struct list_head nexthop_neighs_list_node;
622 };
623
624 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
625         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
626         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
627         .key_len = sizeof(struct mlxsw_sp_neigh_key),
628 };
629
630 static struct mlxsw_sp_neigh_entry *
631 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
632                            u16 rif)
633 {
634         struct mlxsw_sp_neigh_entry *neigh_entry;
635
636         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
637         if (!neigh_entry)
638                 return NULL;
639
640         neigh_entry->key.n = n;
641         neigh_entry->rif = rif;
642         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
643
644         return neigh_entry;
645 }
646
647 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
648 {
649         kfree(neigh_entry);
650 }
651
652 static int
653 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
654                             struct mlxsw_sp_neigh_entry *neigh_entry)
655 {
656         return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
657                                       &neigh_entry->ht_node,
658                                       mlxsw_sp_neigh_ht_params);
659 }
660
661 static void
662 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
663                             struct mlxsw_sp_neigh_entry *neigh_entry)
664 {
665         rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
666                                &neigh_entry->ht_node,
667                                mlxsw_sp_neigh_ht_params);
668 }
669
670 static struct mlxsw_sp_neigh_entry *
671 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
672 {
673         struct mlxsw_sp_neigh_entry *neigh_entry;
674         struct mlxsw_sp_rif *r;
675         int err;
676
677         r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
678         if (!r)
679                 return ERR_PTR(-EINVAL);
680
681         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
682         if (!neigh_entry)
683                 return ERR_PTR(-ENOMEM);
684
685         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
686         if (err)
687                 goto err_neigh_entry_insert;
688
689         return neigh_entry;
690
691 err_neigh_entry_insert:
692         mlxsw_sp_neigh_entry_free(neigh_entry);
693         return ERR_PTR(err);
694 }
695
696 static void
697 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
698                              struct mlxsw_sp_neigh_entry *neigh_entry)
699 {
700         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
701         mlxsw_sp_neigh_entry_free(neigh_entry);
702 }
703
704 static struct mlxsw_sp_neigh_entry *
705 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
706 {
707         struct mlxsw_sp_neigh_key key;
708
709         key.n = n;
710         return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
711                                       &key, mlxsw_sp_neigh_ht_params);
712 }
713
714 static void
715 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
716 {
717         unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
718
719         mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
720 }
721
722 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
723                                                    char *rauhtd_pl,
724                                                    int ent_index)
725 {
726         struct net_device *dev;
727         struct neighbour *n;
728         __be32 dipn;
729         u32 dip;
730         u16 rif;
731
732         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
733
734         if (!mlxsw_sp->rifs[rif]) {
735                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
736                 return;
737         }
738
739         dipn = htonl(dip);
740         dev = mlxsw_sp->rifs[rif]->dev;
741         n = neigh_lookup(&arp_tbl, &dipn, dev);
742         if (!n) {
743                 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
744                            &dip);
745                 return;
746         }
747
748         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
749         neigh_event_send(n, NULL);
750         neigh_release(n);
751 }
752
753 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
754                                                    char *rauhtd_pl,
755                                                    int rec_index)
756 {
757         u8 num_entries;
758         int i;
759
760         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
761                                                                 rec_index);
762         /* Hardware starts counting at 0, so add 1. */
763         num_entries++;
764
765         /* Each record consists of several neighbour entries. */
766         for (i = 0; i < num_entries; i++) {
767                 int ent_index;
768
769                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
770                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
771                                                        ent_index);
772         }
773
774 }
775
776 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
777                                               char *rauhtd_pl, int rec_index)
778 {
779         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
780         case MLXSW_REG_RAUHTD_TYPE_IPV4:
781                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
782                                                        rec_index);
783                 break;
784         case MLXSW_REG_RAUHTD_TYPE_IPV6:
785                 WARN_ON_ONCE(1);
786                 break;
787         }
788 }
789
790 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
791 {
792         u8 num_rec, last_rec_index, num_entries;
793
794         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
795         last_rec_index = num_rec - 1;
796
797         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
798                 return false;
799         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
800             MLXSW_REG_RAUHTD_TYPE_IPV6)
801                 return true;
802
803         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
804                                                                 last_rec_index);
805         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
806                 return true;
807         return false;
808 }
809
810 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
811 {
812         char *rauhtd_pl;
813         u8 num_rec;
814         int i, err;
815
816         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
817         if (!rauhtd_pl)
818                 return -ENOMEM;
819
820         /* Make sure the neighbour's netdev isn't removed in the
821          * process.
822          */
823         rtnl_lock();
824         do {
825                 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
826                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
827                                       rauhtd_pl);
828                 if (err) {
829                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
830                         break;
831                 }
832                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
833                 for (i = 0; i < num_rec; i++)
834                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
835                                                           i);
836         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
837         rtnl_unlock();
838
839         kfree(rauhtd_pl);
840         return err;
841 }
842
843 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
844 {
845         struct mlxsw_sp_neigh_entry *neigh_entry;
846
847         /* Take RTNL mutex here to prevent lists from changes */
848         rtnl_lock();
849         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
850                             nexthop_neighs_list_node)
851                 /* If this neigh have nexthops, make the kernel think this neigh
852                  * is active regardless of the traffic.
853                  */
854                 neigh_event_send(neigh_entry->key.n, NULL);
855         rtnl_unlock();
856 }
857
858 static void
859 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
860 {
861         unsigned long interval = mlxsw_sp->router.neighs_update.interval;
862
863         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
864                                msecs_to_jiffies(interval));
865 }
866
867 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
868 {
869         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
870                                                  router.neighs_update.dw.work);
871         int err;
872
873         err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
874         if (err)
875                 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
876
877         mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
878
879         mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
880 }
881
882 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
883 {
884         struct mlxsw_sp_neigh_entry *neigh_entry;
885         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
886                                                  router.nexthop_probe_dw.work);
887
888         /* Iterate over nexthop neighbours, find those who are unresolved and
889          * send arp on them. This solves the chicken-egg problem when
890          * the nexthop wouldn't get offloaded until the neighbor is resolved
891          * but it wouldn't get resolved ever in case traffic is flowing in HW
892          * using different nexthop.
893          *
894          * Take RTNL mutex here to prevent lists from changes.
895          */
896         rtnl_lock();
897         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
898                             nexthop_neighs_list_node)
899                 if (!neigh_entry->connected)
900                         neigh_event_send(neigh_entry->key.n, NULL);
901         rtnl_unlock();
902
903         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
904                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
905 }
906
907 static void
908 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
909                               struct mlxsw_sp_neigh_entry *neigh_entry,
910                               bool removing);
911
912 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
913 {
914         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
915                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
916 }
917
918 static void
919 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
920                                 struct mlxsw_sp_neigh_entry *neigh_entry,
921                                 enum mlxsw_reg_rauht_op op)
922 {
923         struct neighbour *n = neigh_entry->key.n;
924         u32 dip = ntohl(*((__be32 *) n->primary_key));
925         char rauht_pl[MLXSW_REG_RAUHT_LEN];
926
927         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
928                               dip);
929         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
930 }
931
932 static void
933 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
934                             struct mlxsw_sp_neigh_entry *neigh_entry,
935                             bool adding)
936 {
937         if (!adding && !neigh_entry->connected)
938                 return;
939         neigh_entry->connected = adding;
940         if (neigh_entry->key.n->tbl == &arp_tbl)
941                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
942                                                 mlxsw_sp_rauht_op(adding));
943         else
944                 WARN_ON_ONCE(1);
945 }
946
947 struct mlxsw_sp_neigh_event_work {
948         struct work_struct work;
949         struct mlxsw_sp *mlxsw_sp;
950         struct neighbour *n;
951 };
952
953 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
954 {
955         struct mlxsw_sp_neigh_event_work *neigh_work =
956                 container_of(work, struct mlxsw_sp_neigh_event_work, work);
957         struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
958         struct mlxsw_sp_neigh_entry *neigh_entry;
959         struct neighbour *n = neigh_work->n;
960         unsigned char ha[ETH_ALEN];
961         bool entry_connected;
962         u8 nud_state, dead;
963
964         /* If these parameters are changed after we release the lock,
965          * then we are guaranteed to receive another event letting us
966          * know about it.
967          */
968         read_lock_bh(&n->lock);
969         memcpy(ha, n->ha, ETH_ALEN);
970         nud_state = n->nud_state;
971         dead = n->dead;
972         read_unlock_bh(&n->lock);
973
974         rtnl_lock();
975         entry_connected = nud_state & NUD_VALID && !dead;
976         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
977         if (!entry_connected && !neigh_entry)
978                 goto out;
979         if (!neigh_entry) {
980                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
981                 if (IS_ERR(neigh_entry))
982                         goto out;
983         }
984
985         memcpy(neigh_entry->ha, ha, ETH_ALEN);
986         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
987         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
988
989         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
990                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
991
992 out:
993         rtnl_unlock();
994         neigh_release(n);
995         kfree(neigh_work);
996 }
997
998 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
999                                    unsigned long event, void *ptr)
1000 {
1001         struct mlxsw_sp_neigh_event_work *neigh_work;
1002         struct mlxsw_sp_port *mlxsw_sp_port;
1003         struct mlxsw_sp *mlxsw_sp;
1004         unsigned long interval;
1005         struct neigh_parms *p;
1006         struct neighbour *n;
1007
1008         switch (event) {
1009         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1010                 p = ptr;
1011
1012                 /* We don't care about changes in the default table. */
1013                 if (!p->dev || p->tbl != &arp_tbl)
1014                         return NOTIFY_DONE;
1015
1016                 /* We are in atomic context and can't take RTNL mutex,
1017                  * so use RCU variant to walk the device chain.
1018                  */
1019                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1020                 if (!mlxsw_sp_port)
1021                         return NOTIFY_DONE;
1022
1023                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1024                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1025                 mlxsw_sp->router.neighs_update.interval = interval;
1026
1027                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1028                 break;
1029         case NETEVENT_NEIGH_UPDATE:
1030                 n = ptr;
1031
1032                 if (n->tbl != &arp_tbl)
1033                         return NOTIFY_DONE;
1034
1035                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1036                 if (!mlxsw_sp_port)
1037                         return NOTIFY_DONE;
1038
1039                 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1040                 if (!neigh_work) {
1041                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
1042                         return NOTIFY_BAD;
1043                 }
1044
1045                 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1046                 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1047                 neigh_work->n = n;
1048
1049                 /* Take a reference to ensure the neighbour won't be
1050                  * destructed until we drop the reference in delayed
1051                  * work.
1052                  */
1053                 neigh_clone(n);
1054                 mlxsw_core_schedule_work(&neigh_work->work);
1055                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1056                 break;
1057         }
1058
1059         return NOTIFY_DONE;
1060 }
1061
1062 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1063 {
1064         int err;
1065
1066         err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1067                               &mlxsw_sp_neigh_ht_params);
1068         if (err)
1069                 return err;
1070
1071         /* Initialize the polling interval according to the default
1072          * table.
1073          */
1074         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1075
1076         /* Create the delayed works for the activity_update */
1077         INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1078                           mlxsw_sp_router_neighs_update_work);
1079         INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1080                           mlxsw_sp_router_probe_unresolved_nexthops);
1081         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1082         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1083         return 0;
1084 }
1085
1086 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1087 {
1088         cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1089         cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1090         rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1091 }
1092
1093 struct mlxsw_sp_nexthop_key {
1094         struct fib_nh *fib_nh;
1095 };
1096
1097 struct mlxsw_sp_nexthop {
1098         struct list_head neigh_list_node; /* member of neigh entry list */
1099         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1100                                                 * this belongs to
1101                                                 */
1102         struct rhash_head ht_node;
1103         struct mlxsw_sp_nexthop_key key;
1104         u8 should_offload:1, /* set indicates this neigh is connected and
1105                               * should be put to KVD linear area of this group.
1106                               */
1107            offloaded:1, /* set in case the neigh is actually put into
1108                          * KVD linear area of this group.
1109                          */
1110            update:1; /* set indicates that MAC of this neigh should be
1111                       * updated in HW
1112                       */
1113         struct mlxsw_sp_neigh_entry *neigh_entry;
1114 };
1115
1116 struct mlxsw_sp_nexthop_group_key {
1117         struct fib_info *fi;
1118 };
1119
1120 struct mlxsw_sp_nexthop_group {
1121         struct rhash_head ht_node;
1122         struct list_head fib_list; /* list of fib entries that use this group */
1123         struct mlxsw_sp_nexthop_group_key key;
1124         u8 adj_index_valid:1;
1125         u32 adj_index;
1126         u16 ecmp_size;
1127         u16 count;
1128         struct mlxsw_sp_nexthop nexthops[0];
1129 };
1130
1131 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1132         .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1133         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1134         .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1135 };
1136
1137 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1138                                          struct mlxsw_sp_nexthop_group *nh_grp)
1139 {
1140         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1141                                       &nh_grp->ht_node,
1142                                       mlxsw_sp_nexthop_group_ht_params);
1143 }
1144
1145 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1146                                           struct mlxsw_sp_nexthop_group *nh_grp)
1147 {
1148         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1149                                &nh_grp->ht_node,
1150                                mlxsw_sp_nexthop_group_ht_params);
1151 }
1152
1153 static struct mlxsw_sp_nexthop_group *
1154 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1155                               struct mlxsw_sp_nexthop_group_key key)
1156 {
1157         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1158                                       mlxsw_sp_nexthop_group_ht_params);
1159 }
1160
1161 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1162         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1163         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1164         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1165 };
1166
1167 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1168                                    struct mlxsw_sp_nexthop *nh)
1169 {
1170         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1171                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1172 }
1173
1174 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1175                                     struct mlxsw_sp_nexthop *nh)
1176 {
1177         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1178                                mlxsw_sp_nexthop_ht_params);
1179 }
1180
1181 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1182                                              struct mlxsw_sp_vr *vr,
1183                                              u32 adj_index, u16 ecmp_size,
1184                                              u32 new_adj_index,
1185                                              u16 new_ecmp_size)
1186 {
1187         char raleu_pl[MLXSW_REG_RALEU_LEN];
1188
1189         mlxsw_reg_raleu_pack(raleu_pl,
1190                              (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1191                              adj_index, ecmp_size, new_adj_index,
1192                              new_ecmp_size);
1193         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1194 }
1195
1196 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1197                                           struct mlxsw_sp_nexthop_group *nh_grp,
1198                                           u32 old_adj_index, u16 old_ecmp_size)
1199 {
1200         struct mlxsw_sp_fib_entry *fib_entry;
1201         struct mlxsw_sp_vr *vr = NULL;
1202         int err;
1203
1204         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1205                 if (vr == fib_entry->vr)
1206                         continue;
1207                 vr = fib_entry->vr;
1208                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1209                                                         old_adj_index,
1210                                                         old_ecmp_size,
1211                                                         nh_grp->adj_index,
1212                                                         nh_grp->ecmp_size);
1213                 if (err)
1214                         return err;
1215         }
1216         return 0;
1217 }
1218
1219 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1220                                        struct mlxsw_sp_nexthop *nh)
1221 {
1222         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1223         char ratr_pl[MLXSW_REG_RATR_LEN];
1224
1225         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1226                             true, adj_index, neigh_entry->rif);
1227         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1228         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1229 }
1230
1231 static int
1232 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1233                                   struct mlxsw_sp_nexthop_group *nh_grp,
1234                                   bool reallocate)
1235 {
1236         u32 adj_index = nh_grp->adj_index; /* base */
1237         struct mlxsw_sp_nexthop *nh;
1238         int i;
1239         int err;
1240
1241         for (i = 0; i < nh_grp->count; i++) {
1242                 nh = &nh_grp->nexthops[i];
1243
1244                 if (!nh->should_offload) {
1245                         nh->offloaded = 0;
1246                         continue;
1247                 }
1248
1249                 if (nh->update || reallocate) {
1250                         err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1251                                                           adj_index, nh);
1252                         if (err)
1253                                 return err;
1254                         nh->update = 0;
1255                         nh->offloaded = 1;
1256                 }
1257                 adj_index++;
1258         }
1259         return 0;
1260 }
1261
1262 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1263                                      struct mlxsw_sp_fib_entry *fib_entry);
1264
1265 static int
1266 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1267                                     struct mlxsw_sp_nexthop_group *nh_grp)
1268 {
1269         struct mlxsw_sp_fib_entry *fib_entry;
1270         int err;
1271
1272         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1273                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1274                 if (err)
1275                         return err;
1276         }
1277         return 0;
1278 }
1279
1280 static void
1281 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1282                                struct mlxsw_sp_nexthop_group *nh_grp)
1283 {
1284         struct mlxsw_sp_nexthop *nh;
1285         bool offload_change = false;
1286         u32 adj_index;
1287         u16 ecmp_size = 0;
1288         bool old_adj_index_valid;
1289         u32 old_adj_index;
1290         u16 old_ecmp_size;
1291         int ret;
1292         int i;
1293         int err;
1294
1295         for (i = 0; i < nh_grp->count; i++) {
1296                 nh = &nh_grp->nexthops[i];
1297
1298                 if (nh->should_offload ^ nh->offloaded) {
1299                         offload_change = true;
1300                         if (nh->should_offload)
1301                                 nh->update = 1;
1302                 }
1303                 if (nh->should_offload)
1304                         ecmp_size++;
1305         }
1306         if (!offload_change) {
1307                 /* Nothing was added or removed, so no need to reallocate. Just
1308                  * update MAC on existing adjacency indexes.
1309                  */
1310                 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1311                                                         false);
1312                 if (err) {
1313                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1314                         goto set_trap;
1315                 }
1316                 return;
1317         }
1318         if (!ecmp_size)
1319                 /* No neigh of this group is connected so we just set
1320                  * the trap and let everthing flow through kernel.
1321                  */
1322                 goto set_trap;
1323
1324         ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1325         if (ret < 0) {
1326                 /* We ran out of KVD linear space, just set the
1327                  * trap and let everything flow through kernel.
1328                  */
1329                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1330                 goto set_trap;
1331         }
1332         adj_index = ret;
1333         old_adj_index_valid = nh_grp->adj_index_valid;
1334         old_adj_index = nh_grp->adj_index;
1335         old_ecmp_size = nh_grp->ecmp_size;
1336         nh_grp->adj_index_valid = 1;
1337         nh_grp->adj_index = adj_index;
1338         nh_grp->ecmp_size = ecmp_size;
1339         err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1340         if (err) {
1341                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1342                 goto set_trap;
1343         }
1344
1345         if (!old_adj_index_valid) {
1346                 /* The trap was set for fib entries, so we have to call
1347                  * fib entry update to unset it and use adjacency index.
1348                  */
1349                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1350                 if (err) {
1351                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1352                         goto set_trap;
1353                 }
1354                 return;
1355         }
1356
1357         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1358                                              old_adj_index, old_ecmp_size);
1359         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1360         if (err) {
1361                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1362                 goto set_trap;
1363         }
1364         return;
1365
1366 set_trap:
1367         old_adj_index_valid = nh_grp->adj_index_valid;
1368         nh_grp->adj_index_valid = 0;
1369         for (i = 0; i < nh_grp->count; i++) {
1370                 nh = &nh_grp->nexthops[i];
1371                 nh->offloaded = 0;
1372         }
1373         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1374         if (err)
1375                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1376         if (old_adj_index_valid)
1377                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1378 }
1379
1380 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1381                                             bool removing)
1382 {
1383         if (!removing && !nh->should_offload)
1384                 nh->should_offload = 1;
1385         else if (removing && nh->offloaded)
1386                 nh->should_offload = 0;
1387         nh->update = 1;
1388 }
1389
1390 static void
1391 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1392                               struct mlxsw_sp_neigh_entry *neigh_entry,
1393                               bool removing)
1394 {
1395         struct mlxsw_sp_nexthop *nh;
1396
1397         list_for_each_entry(nh, &neigh_entry->nexthop_list,
1398                             neigh_list_node) {
1399                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1400                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1401         }
1402 }
1403
1404 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1405                                  struct mlxsw_sp_nexthop_group *nh_grp,
1406                                  struct mlxsw_sp_nexthop *nh,
1407                                  struct fib_nh *fib_nh)
1408 {
1409         struct mlxsw_sp_neigh_entry *neigh_entry;
1410         struct net_device *dev = fib_nh->nh_dev;
1411         struct neighbour *n;
1412         u8 nud_state, dead;
1413         int err;
1414
1415         nh->key.fib_nh = fib_nh;
1416         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1417         if (err)
1418                 return err;
1419
1420         /* Take a reference of neigh here ensuring that neigh would
1421          * not be detructed before the nexthop entry is finished.
1422          * The reference is taken either in neigh_lookup() or
1423          * in neigh_create() in case n is not found.
1424          */
1425         n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev);
1426         if (!n) {
1427                 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev);
1428                 if (IS_ERR(n)) {
1429                         err = PTR_ERR(n);
1430                         goto err_neigh_create;
1431                 }
1432                 neigh_event_send(n, NULL);
1433         }
1434         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1435         if (!neigh_entry) {
1436                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1437                 if (IS_ERR(neigh_entry)) {
1438                         err = -EINVAL;
1439                         goto err_neigh_entry_create;
1440                 }
1441         }
1442
1443         /* If that is the first nexthop connected to that neigh, add to
1444          * nexthop_neighs_list
1445          */
1446         if (list_empty(&neigh_entry->nexthop_list))
1447                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1448                               &mlxsw_sp->router.nexthop_neighs_list);
1449
1450         nh->nh_grp = nh_grp;
1451         nh->neigh_entry = neigh_entry;
1452         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1453         read_lock_bh(&n->lock);
1454         nud_state = n->nud_state;
1455         dead = n->dead;
1456         read_unlock_bh(&n->lock);
1457         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1458
1459         return 0;
1460
1461 err_neigh_entry_create:
1462         neigh_release(n);
1463 err_neigh_create:
1464         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1465         return err;
1466 }
1467
1468 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1469                                   struct mlxsw_sp_nexthop *nh)
1470 {
1471         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1472         struct neighbour *n = neigh_entry->key.n;
1473
1474         __mlxsw_sp_nexthop_neigh_update(nh, true);
1475         list_del(&nh->neigh_list_node);
1476         nh->neigh_entry = NULL;
1477
1478         /* If that is the last nexthop connected to that neigh, remove from
1479          * nexthop_neighs_list
1480          */
1481         if (list_empty(&neigh_entry->nexthop_list))
1482                 list_del(&neigh_entry->nexthop_neighs_list_node);
1483
1484         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1485                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1486
1487         neigh_release(n);
1488
1489         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1490 }
1491
1492 static struct mlxsw_sp_nexthop_group *
1493 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1494 {
1495         struct mlxsw_sp_nexthop_group *nh_grp;
1496         struct mlxsw_sp_nexthop *nh;
1497         struct fib_nh *fib_nh;
1498         size_t alloc_size;
1499         int i;
1500         int err;
1501
1502         alloc_size = sizeof(*nh_grp) +
1503                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1504         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1505         if (!nh_grp)
1506                 return ERR_PTR(-ENOMEM);
1507         INIT_LIST_HEAD(&nh_grp->fib_list);
1508         nh_grp->count = fi->fib_nhs;
1509         nh_grp->key.fi = fi;
1510         for (i = 0; i < nh_grp->count; i++) {
1511                 nh = &nh_grp->nexthops[i];
1512                 fib_nh = &fi->fib_nh[i];
1513                 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1514                 if (err)
1515                         goto err_nexthop_init;
1516         }
1517         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1518         if (err)
1519                 goto err_nexthop_group_insert;
1520         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1521         return nh_grp;
1522
1523 err_nexthop_group_insert:
1524 err_nexthop_init:
1525         for (i--; i >= 0; i--)
1526                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1527         kfree(nh_grp);
1528         return ERR_PTR(err);
1529 }
1530
1531 static void
1532 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1533                                struct mlxsw_sp_nexthop_group *nh_grp)
1534 {
1535         struct mlxsw_sp_nexthop *nh;
1536         int i;
1537
1538         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1539         for (i = 0; i < nh_grp->count; i++) {
1540                 nh = &nh_grp->nexthops[i];
1541                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1542         }
1543         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1544         WARN_ON_ONCE(nh_grp->adj_index_valid);
1545         kfree(nh_grp);
1546 }
1547
1548 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1549                                       struct mlxsw_sp_fib_entry *fib_entry,
1550                                       struct fib_info *fi)
1551 {
1552         struct mlxsw_sp_nexthop_group_key key;
1553         struct mlxsw_sp_nexthop_group *nh_grp;
1554
1555         key.fi = fi;
1556         nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1557         if (!nh_grp) {
1558                 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1559                 if (IS_ERR(nh_grp))
1560                         return PTR_ERR(nh_grp);
1561         }
1562         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1563         fib_entry->nh_group = nh_grp;
1564         return 0;
1565 }
1566
1567 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1568                                        struct mlxsw_sp_fib_entry *fib_entry)
1569 {
1570         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1571
1572         list_del(&fib_entry->nexthop_group_node);
1573         if (!list_empty(&nh_grp->fib_list))
1574                 return;
1575         mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1576 }
1577
1578 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1579                                          struct mlxsw_sp_fib_entry *fib_entry,
1580                                          enum mlxsw_reg_ralue_op op)
1581 {
1582         char ralue_pl[MLXSW_REG_RALUE_LEN];
1583         u32 *p_dip = (u32 *) fib_entry->key.addr;
1584         struct mlxsw_sp_vr *vr = fib_entry->vr;
1585         enum mlxsw_reg_ralue_trap_action trap_action;
1586         u16 trap_id = 0;
1587         u32 adjacency_index = 0;
1588         u16 ecmp_size = 0;
1589
1590         /* In case the nexthop group adjacency index is valid, use it
1591          * with provided ECMP size. Otherwise, setup trap and pass
1592          * traffic to kernel.
1593          */
1594         if (fib_entry->nh_group->adj_index_valid) {
1595                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1596                 adjacency_index = fib_entry->nh_group->adj_index;
1597                 ecmp_size = fib_entry->nh_group->ecmp_size;
1598         } else {
1599                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1600                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1601         }
1602
1603         mlxsw_reg_ralue_pack4(ralue_pl,
1604                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1605                               vr->id, fib_entry->key.prefix_len, *p_dip);
1606         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1607                                         adjacency_index, ecmp_size);
1608         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1609 }
1610
1611 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1612                                         struct mlxsw_sp_fib_entry *fib_entry,
1613                                         enum mlxsw_reg_ralue_op op)
1614 {
1615         char ralue_pl[MLXSW_REG_RALUE_LEN];
1616         u32 *p_dip = (u32 *) fib_entry->key.addr;
1617         struct mlxsw_sp_vr *vr = fib_entry->vr;
1618
1619         mlxsw_reg_ralue_pack4(ralue_pl,
1620                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1621                               vr->id, fib_entry->key.prefix_len, *p_dip);
1622         mlxsw_reg_ralue_act_local_pack(ralue_pl,
1623                                        MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
1624                                        fib_entry->rif);
1625         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1626 }
1627
1628 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1629                                        struct mlxsw_sp_fib_entry *fib_entry,
1630                                        enum mlxsw_reg_ralue_op op)
1631 {
1632         char ralue_pl[MLXSW_REG_RALUE_LEN];
1633         u32 *p_dip = (u32 *) fib_entry->key.addr;
1634         struct mlxsw_sp_vr *vr = fib_entry->vr;
1635
1636         mlxsw_reg_ralue_pack4(ralue_pl,
1637                               (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1638                               vr->id, fib_entry->key.prefix_len, *p_dip);
1639         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1640         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1641 }
1642
1643 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1644                                   struct mlxsw_sp_fib_entry *fib_entry,
1645                                   enum mlxsw_reg_ralue_op op)
1646 {
1647         switch (fib_entry->type) {
1648         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1649                 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1650         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1651                 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1652         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1653                 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1654         }
1655         return -EINVAL;
1656 }
1657
1658 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1659                                  struct mlxsw_sp_fib_entry *fib_entry,
1660                                  enum mlxsw_reg_ralue_op op)
1661 {
1662         switch (fib_entry->vr->proto) {
1663         case MLXSW_SP_L3_PROTO_IPV4:
1664                 return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1665         case MLXSW_SP_L3_PROTO_IPV6:
1666                 return -EINVAL;
1667         }
1668         return -EINVAL;
1669 }
1670
1671 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1672                                      struct mlxsw_sp_fib_entry *fib_entry)
1673 {
1674         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1675                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
1676 }
1677
1678 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1679                                   struct mlxsw_sp_fib_entry *fib_entry)
1680 {
1681         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1682                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
1683 }
1684
1685 static int
1686 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
1687                                 const struct fib_entry_notifier_info *fen_info,
1688                                 struct mlxsw_sp_fib_entry *fib_entry)
1689 {
1690         struct fib_info *fi = fen_info->fi;
1691         struct mlxsw_sp_rif *r = NULL;
1692         int nhsel;
1693         int err;
1694
1695         if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1696                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1697                 return 0;
1698         }
1699         if (fen_info->type != RTN_UNICAST)
1700                 return -EINVAL;
1701
1702         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1703                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1704
1705                 if (!nh->nh_dev)
1706                         continue;
1707                 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev);
1708                 if (!r) {
1709                         /* In case router interface is not found for
1710                          * at least one of the nexthops, that means
1711                          * the nexthop points to some device unrelated
1712                          * to us. Set trap and pass the packets for
1713                          * this prefix to kernel.
1714                          */
1715                         break;
1716                 }
1717         }
1718
1719         if (!r) {
1720                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1721                 return 0;
1722         }
1723
1724         if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) {
1725                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1726                 fib_entry->rif = r->rif;
1727         } else {
1728                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1729                 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
1730                 if (err)
1731                         return err;
1732         }
1733         fib_info_offload_inc(fen_info->fi);
1734         return 0;
1735 }
1736
1737 static void
1738 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
1739                                 struct mlxsw_sp_fib_entry *fib_entry)
1740 {
1741         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1742                 fib_info_offload_dec(fib_entry->fi);
1743         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
1744                 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1745 }
1746
1747 static struct mlxsw_sp_fib_entry *
1748 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp,
1749                        const struct fib_entry_notifier_info *fen_info)
1750 {
1751         struct mlxsw_sp_fib_entry *fib_entry;
1752         struct fib_info *fi = fen_info->fi;
1753         struct mlxsw_sp_vr *vr;
1754         int err;
1755
1756         vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
1757                              MLXSW_SP_L3_PROTO_IPV4);
1758         if (IS_ERR(vr))
1759                 return ERR_CAST(vr);
1760
1761         fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1762                                               sizeof(fen_info->dst),
1763                                               fen_info->dst_len, fi->fib_dev);
1764         if (fib_entry) {
1765                 /* Already exists, just take a reference */
1766                 fib_entry->ref_count++;
1767                 return fib_entry;
1768         }
1769         fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst,
1770                                               sizeof(fen_info->dst),
1771                                               fen_info->dst_len, fi->fib_dev);
1772         if (!fib_entry) {
1773                 err = -ENOMEM;
1774                 goto err_fib_entry_create;
1775         }
1776         fib_entry->vr = vr;
1777         fib_entry->fi = fi;
1778         fib_entry->ref_count = 1;
1779
1780         err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry);
1781         if (err)
1782                 goto err_fib4_entry_init;
1783
1784         return fib_entry;
1785
1786 err_fib4_entry_init:
1787         mlxsw_sp_fib_entry_destroy(fib_entry);
1788 err_fib_entry_create:
1789         mlxsw_sp_vr_put(mlxsw_sp, vr);
1790
1791         return ERR_PTR(err);
1792 }
1793
1794 static struct mlxsw_sp_fib_entry *
1795 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp,
1796                         const struct fib_entry_notifier_info *fen_info)
1797 {
1798         struct mlxsw_sp_vr *vr;
1799
1800         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id,
1801                               MLXSW_SP_L3_PROTO_IPV4);
1802         if (!vr)
1803                 return NULL;
1804
1805         return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1806                                          sizeof(fen_info->dst),
1807                                          fen_info->dst_len,
1808                                          fen_info->fi->fib_dev);
1809 }
1810
1811 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp,
1812                                    struct mlxsw_sp_fib_entry *fib_entry)
1813 {
1814         struct mlxsw_sp_vr *vr = fib_entry->vr;
1815
1816         if (--fib_entry->ref_count == 0) {
1817                 mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1818                 mlxsw_sp_fib_entry_destroy(fib_entry);
1819         }
1820         mlxsw_sp_vr_put(mlxsw_sp, vr);
1821 }
1822
1823 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp,
1824                                        struct mlxsw_sp_fib_entry *fib_entry)
1825 {
1826         unsigned int last_ref_count;
1827
1828         do {
1829                 last_ref_count = fib_entry->ref_count;
1830                 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1831         } while (last_ref_count != 1);
1832 }
1833
1834 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
1835                                     struct fib_entry_notifier_info *fen_info)
1836 {
1837         struct mlxsw_sp_fib_entry *fib_entry;
1838         struct mlxsw_sp_vr *vr;
1839         int err;
1840
1841         if (mlxsw_sp->router.aborted)
1842                 return 0;
1843
1844         fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info);
1845         if (IS_ERR(fib_entry)) {
1846                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n");
1847                 return PTR_ERR(fib_entry);
1848         }
1849
1850         if (fib_entry->ref_count != 1)
1851                 return 0;
1852
1853         vr = fib_entry->vr;
1854         err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry);
1855         if (err) {
1856                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n");
1857                 goto err_fib_entry_insert;
1858         }
1859         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1860         if (err)
1861                 goto err_fib_entry_add;
1862         return 0;
1863
1864 err_fib_entry_add:
1865         mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
1866 err_fib_entry_insert:
1867         mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1868         return err;
1869 }
1870
1871 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
1872                                      struct fib_entry_notifier_info *fen_info)
1873 {
1874         struct mlxsw_sp_fib_entry *fib_entry;
1875
1876         if (mlxsw_sp->router.aborted)
1877                 return;
1878
1879         fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info);
1880         if (!fib_entry)
1881                 return;
1882
1883         if (fib_entry->ref_count == 1) {
1884                 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1885                 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry);
1886         }
1887
1888         mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1889 }
1890
1891 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
1892 {
1893         char ralta_pl[MLXSW_REG_RALTA_LEN];
1894         char ralst_pl[MLXSW_REG_RALST_LEN];
1895         char raltb_pl[MLXSW_REG_RALTB_LEN];
1896         char ralue_pl[MLXSW_REG_RALUE_LEN];
1897         int err;
1898
1899         mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1900                              MLXSW_SP_LPM_TREE_MIN);
1901         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
1902         if (err)
1903                 return err;
1904
1905         mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
1906         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
1907         if (err)
1908                 return err;
1909
1910         mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1911                              MLXSW_SP_LPM_TREE_MIN);
1912         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
1913         if (err)
1914                 return err;
1915
1916         mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
1917                               MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
1918         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1919         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1920 }
1921
1922 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
1923 {
1924         struct mlxsw_sp_fib_entry *fib_entry;
1925         struct mlxsw_sp_fib_entry *tmp;
1926         struct mlxsw_sp_vr *vr;
1927         int i;
1928
1929         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
1930                 vr = &mlxsw_sp->router.vrs[i];
1931
1932                 if (!vr->used)
1933                         continue;
1934
1935                 list_for_each_entry_safe(fib_entry, tmp,
1936                                          &vr->fib->entry_list, list) {
1937                         bool do_break = &tmp->list == &vr->fib->entry_list;
1938
1939                         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1940                         mlxsw_sp_fib_entry_remove(fib_entry->vr->fib,
1941                                                   fib_entry);
1942                         mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry);
1943                         if (do_break)
1944                                 break;
1945                 }
1946         }
1947 }
1948
1949 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
1950 {
1951         int err;
1952
1953         if (mlxsw_sp->router.aborted)
1954                 return;
1955         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
1956         mlxsw_sp_router_fib_flush(mlxsw_sp);
1957         mlxsw_sp->router.aborted = true;
1958         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
1959         if (err)
1960                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
1961 }
1962
1963 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1964 {
1965         char rgcr_pl[MLXSW_REG_RGCR_LEN];
1966         u64 max_rifs;
1967         int err;
1968
1969         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
1970                 return -EIO;
1971
1972         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
1973         mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
1974                                  GFP_KERNEL);
1975         if (!mlxsw_sp->rifs)
1976                 return -ENOMEM;
1977
1978         mlxsw_reg_rgcr_pack(rgcr_pl, true);
1979         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
1980         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1981         if (err)
1982                 goto err_rgcr_fail;
1983
1984         return 0;
1985
1986 err_rgcr_fail:
1987         kfree(mlxsw_sp->rifs);
1988         return err;
1989 }
1990
1991 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
1992 {
1993         char rgcr_pl[MLXSW_REG_RGCR_LEN];
1994         int i;
1995
1996         mlxsw_reg_rgcr_pack(rgcr_pl, false);
1997         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1998
1999         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2000                 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2001
2002         kfree(mlxsw_sp->rifs);
2003 }
2004
2005 struct mlxsw_sp_fib_event_work {
2006         struct work_struct work;
2007         struct fib_entry_notifier_info fen_info;
2008         struct mlxsw_sp *mlxsw_sp;
2009         unsigned long event;
2010 };
2011
2012 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2013 {
2014         struct mlxsw_sp_fib_event_work *fib_work =
2015                 container_of(work, struct mlxsw_sp_fib_event_work, work);
2016         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2017         int err;
2018
2019         /* Protect internal structures from changes */
2020         rtnl_lock();
2021         switch (fib_work->event) {
2022         case FIB_EVENT_ENTRY_ADD:
2023                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info);
2024                 if (err)
2025                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2026                 fib_info_put(fib_work->fen_info.fi);
2027                 break;
2028         case FIB_EVENT_ENTRY_DEL:
2029                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2030                 fib_info_put(fib_work->fen_info.fi);
2031                 break;
2032         case FIB_EVENT_RULE_ADD: /* fall through */
2033         case FIB_EVENT_RULE_DEL:
2034                 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2035                 break;
2036         }
2037         rtnl_unlock();
2038         kfree(fib_work);
2039 }
2040
2041 /* Called with rcu_read_lock() */
2042 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2043                                      unsigned long event, void *ptr)
2044 {
2045         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2046         struct mlxsw_sp_fib_event_work *fib_work;
2047         struct fib_notifier_info *info = ptr;
2048
2049         if (!net_eq(info->net, &init_net))
2050                 return NOTIFY_DONE;
2051
2052         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2053         if (WARN_ON(!fib_work))
2054                 return NOTIFY_BAD;
2055
2056         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2057         fib_work->mlxsw_sp = mlxsw_sp;
2058         fib_work->event = event;
2059
2060         switch (event) {
2061         case FIB_EVENT_ENTRY_ADD: /* fall through */
2062         case FIB_EVENT_ENTRY_DEL:
2063                 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2064                 /* Take referece on fib_info to prevent it from being
2065                  * freed while work is queued. Release it afterwards.
2066                  */
2067                 fib_info_hold(fib_work->fen_info.fi);
2068                 break;
2069         }
2070
2071         mlxsw_core_schedule_work(&fib_work->work);
2072
2073         return NOTIFY_DONE;
2074 }
2075
2076 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2077 {
2078         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2079
2080         /* Flush pending FIB notifications and then flush the device's
2081          * table before requesting another dump. The FIB notification
2082          * block is unregistered, so no need to take RTNL.
2083          */
2084         mlxsw_core_flush_owq();
2085         mlxsw_sp_router_fib_flush(mlxsw_sp);
2086 }
2087
2088 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2089 {
2090         int err;
2091
2092         INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2093         err = __mlxsw_sp_router_init(mlxsw_sp);
2094         if (err)
2095                 return err;
2096
2097         err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2098                               &mlxsw_sp_nexthop_ht_params);
2099         if (err)
2100                 goto err_nexthop_ht_init;
2101
2102         err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2103                               &mlxsw_sp_nexthop_group_ht_params);
2104         if (err)
2105                 goto err_nexthop_group_ht_init;
2106
2107         mlxsw_sp_lpm_init(mlxsw_sp);
2108         err = mlxsw_sp_vrs_init(mlxsw_sp);
2109         if (err)
2110                 goto err_vrs_init;
2111
2112         err = mlxsw_sp_neigh_init(mlxsw_sp);
2113         if (err)
2114                 goto err_neigh_init;
2115
2116         mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2117         err = register_fib_notifier(&mlxsw_sp->fib_nb,
2118                                     mlxsw_sp_router_fib_dump_flush);
2119         if (err)
2120                 goto err_register_fib_notifier;
2121
2122         return 0;
2123
2124 err_register_fib_notifier:
2125         mlxsw_sp_neigh_fini(mlxsw_sp);
2126 err_neigh_init:
2127         mlxsw_sp_vrs_fini(mlxsw_sp);
2128 err_vrs_init:
2129         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2130 err_nexthop_group_ht_init:
2131         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2132 err_nexthop_ht_init:
2133         __mlxsw_sp_router_fini(mlxsw_sp);
2134         return err;
2135 }
2136
2137 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2138 {
2139         unregister_fib_notifier(&mlxsw_sp->fib_nb);
2140         mlxsw_sp_neigh_fini(mlxsw_sp);
2141         mlxsw_sp_vrs_fini(mlxsw_sp);
2142         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2143         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2144         __mlxsw_sp_router_fini(mlxsw_sp);
2145 }