]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/switchdev/switchdev.c
switchdev: skip over ports returning -EOPNOTSUPP when recursing ports
[karo-tx-linux.git] / net / switchdev / switchdev.c
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/list.h>
20 #include <net/ip_fib.h>
21 #include <net/switchdev.h>
22
23 /**
24  *      switchdev_trans_item_enqueue - Enqueue data item to transaction queue
25  *
26  *      @trans: transaction
27  *      @data: pointer to data being queued
28  *      @destructor: data destructor
29  *      @tritem: transaction item being queued
30  *
31  *      Enqeueue data item to transaction queue. tritem is typically placed in
32  *      cointainter pointed at by data pointer. Destructor is called on
33  *      transaction abort and after successful commit phase in case
34  *      the caller did not dequeue the item before.
35  */
36 void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
37                                   void *data, void (*destructor)(void const *),
38                                   struct switchdev_trans_item *tritem)
39 {
40         tritem->data = data;
41         tritem->destructor = destructor;
42         list_add_tail(&tritem->list, &trans->item_list);
43 }
44 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
45
46 static struct switchdev_trans_item *
47 __switchdev_trans_item_dequeue(struct switchdev_trans *trans)
48 {
49         struct switchdev_trans_item *tritem;
50
51         if (list_empty(&trans->item_list))
52                 return NULL;
53         tritem = list_first_entry(&trans->item_list,
54                                   struct switchdev_trans_item, list);
55         list_del(&tritem->list);
56         return tritem;
57 }
58
59 /**
60  *      switchdev_trans_item_dequeue - Dequeue data item from transaction queue
61  *
62  *      @trans: transaction
63  */
64 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
65 {
66         struct switchdev_trans_item *tritem;
67
68         tritem = __switchdev_trans_item_dequeue(trans);
69         BUG_ON(!tritem);
70         return tritem->data;
71 }
72 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
73
74 static void switchdev_trans_init(struct switchdev_trans *trans)
75 {
76         INIT_LIST_HEAD(&trans->item_list);
77 }
78
79 static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
80 {
81         struct switchdev_trans_item *tritem;
82
83         while ((tritem = __switchdev_trans_item_dequeue(trans)))
84                 tritem->destructor(tritem->data);
85 }
86
87 static void switchdev_trans_items_warn_destroy(struct net_device *dev,
88                                                struct switchdev_trans *trans)
89 {
90         WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
91              dev->name);
92         switchdev_trans_items_destroy(trans);
93 }
94
95 /**
96  *      switchdev_port_attr_get - Get port attribute
97  *
98  *      @dev: port device
99  *      @attr: attribute to get
100  */
101 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
102 {
103         const struct switchdev_ops *ops = dev->switchdev_ops;
104         struct net_device *lower_dev;
105         struct list_head *iter;
106         struct switchdev_attr first = {
107                 .id = SWITCHDEV_ATTR_ID_UNDEFINED
108         };
109         int err = -EOPNOTSUPP;
110
111         if (ops && ops->switchdev_port_attr_get)
112                 return ops->switchdev_port_attr_get(dev, attr);
113
114         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
115                 return err;
116
117         /* Switch device port(s) may be stacked under
118          * bond/team/vlan dev, so recurse down to get attr on
119          * each port.  Return -ENODATA if attr values don't
120          * compare across ports.
121          */
122
123         netdev_for_each_lower_dev(dev, lower_dev, iter) {
124                 err = switchdev_port_attr_get(lower_dev, attr);
125                 if (err)
126                         break;
127                 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
128                         first = *attr;
129                 else if (memcmp(&first, attr, sizeof(*attr)))
130                         return -ENODATA;
131         }
132
133         return err;
134 }
135 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
136
137 static int __switchdev_port_attr_set(struct net_device *dev,
138                                      struct switchdev_attr *attr,
139                                      struct switchdev_trans *trans)
140 {
141         const struct switchdev_ops *ops = dev->switchdev_ops;
142         struct net_device *lower_dev;
143         struct list_head *iter;
144         int err = -EOPNOTSUPP;
145
146         if (ops && ops->switchdev_port_attr_set)
147                 return ops->switchdev_port_attr_set(dev, attr, trans);
148
149         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
150                 goto done;
151
152         /* Switch device port(s) may be stacked under
153          * bond/team/vlan dev, so recurse down to set attr on
154          * each port.
155          */
156
157         netdev_for_each_lower_dev(dev, lower_dev, iter) {
158                 err = __switchdev_port_attr_set(lower_dev, attr, trans);
159                 if (err == -EOPNOTSUPP &&
160                     attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
161                         continue;
162                 if (err)
163                         break;
164         }
165
166 done:
167         if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
168                 err = 0;
169
170         return err;
171 }
172
173 struct switchdev_attr_set_work {
174         struct work_struct work;
175         struct net_device *dev;
176         struct switchdev_attr attr;
177 };
178
179 static void switchdev_port_attr_set_work(struct work_struct *work)
180 {
181         struct switchdev_attr_set_work *asw =
182                 container_of(work, struct switchdev_attr_set_work, work);
183         int err;
184
185         rtnl_lock();
186         err = switchdev_port_attr_set(asw->dev, &asw->attr);
187         if (err && err != -EOPNOTSUPP)
188                 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
189                            err, asw->attr.id);
190         rtnl_unlock();
191
192         dev_put(asw->dev);
193         kfree(work);
194 }
195
196 static int switchdev_port_attr_set_defer(struct net_device *dev,
197                                          struct switchdev_attr *attr)
198 {
199         struct switchdev_attr_set_work *asw;
200
201         asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
202         if (!asw)
203                 return -ENOMEM;
204
205         INIT_WORK(&asw->work, switchdev_port_attr_set_work);
206
207         dev_hold(dev);
208         asw->dev = dev;
209         memcpy(&asw->attr, attr, sizeof(asw->attr));
210
211         schedule_work(&asw->work);
212
213         return 0;
214 }
215
216 /**
217  *      switchdev_port_attr_set - Set port attribute
218  *
219  *      @dev: port device
220  *      @attr: attribute to set
221  *
222  *      Use a 2-phase prepare-commit transaction model to ensure
223  *      system is not left in a partially updated state due to
224  *      failure from driver/device.
225  */
226 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
227 {
228         struct switchdev_trans trans;
229         int err;
230
231         if (!rtnl_is_locked()) {
232                 /* Running prepare-commit transaction across stacked
233                  * devices requires nothing moves, so if rtnl_lock is
234                  * not held, schedule a worker thread to hold rtnl_lock
235                  * while setting attr.
236                  */
237
238                 return switchdev_port_attr_set_defer(dev, attr);
239         }
240
241         switchdev_trans_init(&trans);
242
243         /* Phase I: prepare for attr set. Driver/device should fail
244          * here if there are going to be issues in the commit phase,
245          * such as lack of resources or support.  The driver/device
246          * should reserve resources needed for the commit phase here,
247          * but should not commit the attr.
248          */
249
250         trans.ph_prepare = true;
251         err = __switchdev_port_attr_set(dev, attr, &trans);
252         if (err) {
253                 /* Prepare phase failed: abort the transaction.  Any
254                  * resources reserved in the prepare phase are
255                  * released.
256                  */
257
258                 if (err != -EOPNOTSUPP)
259                         switchdev_trans_items_destroy(&trans);
260
261                 return err;
262         }
263
264         /* Phase II: commit attr set.  This cannot fail as a fault
265          * of driver/device.  If it does, it's a bug in the driver/device
266          * because the driver said everythings was OK in phase I.
267          */
268
269         trans.ph_prepare = false;
270         err = __switchdev_port_attr_set(dev, attr, &trans);
271         WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
272              dev->name, attr->id);
273         switchdev_trans_items_warn_destroy(dev, &trans);
274
275         return err;
276 }
277 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
278
279 static int __switchdev_port_obj_add(struct net_device *dev,
280                                     const struct switchdev_obj *obj,
281                                     struct switchdev_trans *trans)
282 {
283         const struct switchdev_ops *ops = dev->switchdev_ops;
284         struct net_device *lower_dev;
285         struct list_head *iter;
286         int err = -EOPNOTSUPP;
287
288         if (ops && ops->switchdev_port_obj_add)
289                 return ops->switchdev_port_obj_add(dev, obj, trans);
290
291         /* Switch device port(s) may be stacked under
292          * bond/team/vlan dev, so recurse down to add object on
293          * each port.
294          */
295
296         netdev_for_each_lower_dev(dev, lower_dev, iter) {
297                 err = __switchdev_port_obj_add(lower_dev, obj, trans);
298                 if (err)
299                         break;
300         }
301
302         return err;
303 }
304
305 /**
306  *      switchdev_port_obj_add - Add port object
307  *
308  *      @dev: port device
309  *      @id: object ID
310  *      @obj: object to add
311  *
312  *      Use a 2-phase prepare-commit transaction model to ensure
313  *      system is not left in a partially updated state due to
314  *      failure from driver/device.
315  *
316  *      rtnl_lock must be held.
317  */
318 int switchdev_port_obj_add(struct net_device *dev,
319                            const struct switchdev_obj *obj)
320 {
321         struct switchdev_trans trans;
322         int err;
323
324         ASSERT_RTNL();
325
326         switchdev_trans_init(&trans);
327
328         /* Phase I: prepare for obj add. Driver/device should fail
329          * here if there are going to be issues in the commit phase,
330          * such as lack of resources or support.  The driver/device
331          * should reserve resources needed for the commit phase here,
332          * but should not commit the obj.
333          */
334
335         trans.ph_prepare = true;
336         err = __switchdev_port_obj_add(dev, obj, &trans);
337         if (err) {
338                 /* Prepare phase failed: abort the transaction.  Any
339                  * resources reserved in the prepare phase are
340                  * released.
341                  */
342
343                 if (err != -EOPNOTSUPP)
344                         switchdev_trans_items_destroy(&trans);
345
346                 return err;
347         }
348
349         /* Phase II: commit obj add.  This cannot fail as a fault
350          * of driver/device.  If it does, it's a bug in the driver/device
351          * because the driver said everythings was OK in phase I.
352          */
353
354         trans.ph_prepare = false;
355         err = __switchdev_port_obj_add(dev, obj, &trans);
356         WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
357         switchdev_trans_items_warn_destroy(dev, &trans);
358
359         return err;
360 }
361 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
362
363 /**
364  *      switchdev_port_obj_del - Delete port object
365  *
366  *      @dev: port device
367  *      @id: object ID
368  *      @obj: object to delete
369  */
370 int switchdev_port_obj_del(struct net_device *dev,
371                            const struct switchdev_obj *obj)
372 {
373         const struct switchdev_ops *ops = dev->switchdev_ops;
374         struct net_device *lower_dev;
375         struct list_head *iter;
376         int err = -EOPNOTSUPP;
377
378         if (ops && ops->switchdev_port_obj_del)
379                 return ops->switchdev_port_obj_del(dev, obj);
380
381         /* Switch device port(s) may be stacked under
382          * bond/team/vlan dev, so recurse down to delete object on
383          * each port.
384          */
385
386         netdev_for_each_lower_dev(dev, lower_dev, iter) {
387                 err = switchdev_port_obj_del(lower_dev, obj);
388                 if (err)
389                         break;
390         }
391
392         return err;
393 }
394 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
395
396 /**
397  *      switchdev_port_obj_dump - Dump port objects
398  *
399  *      @dev: port device
400  *      @id: object ID
401  *      @obj: object to dump
402  *      @cb: function to call with a filled object
403  */
404 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
405                             switchdev_obj_dump_cb_t *cb)
406 {
407         const struct switchdev_ops *ops = dev->switchdev_ops;
408         struct net_device *lower_dev;
409         struct list_head *iter;
410         int err = -EOPNOTSUPP;
411
412         if (ops && ops->switchdev_port_obj_dump)
413                 return ops->switchdev_port_obj_dump(dev, obj, cb);
414
415         /* Switch device port(s) may be stacked under
416          * bond/team/vlan dev, so recurse down to dump objects on
417          * first port at bottom of stack.
418          */
419
420         netdev_for_each_lower_dev(dev, lower_dev, iter) {
421                 err = switchdev_port_obj_dump(lower_dev, obj, cb);
422                 break;
423         }
424
425         return err;
426 }
427 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
428
429 static DEFINE_MUTEX(switchdev_mutex);
430 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
431
432 /**
433  *      register_switchdev_notifier - Register notifier
434  *      @nb: notifier_block
435  *
436  *      Register switch device notifier. This should be used by code
437  *      which needs to monitor events happening in particular device.
438  *      Return values are same as for atomic_notifier_chain_register().
439  */
440 int register_switchdev_notifier(struct notifier_block *nb)
441 {
442         int err;
443
444         mutex_lock(&switchdev_mutex);
445         err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
446         mutex_unlock(&switchdev_mutex);
447         return err;
448 }
449 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
450
451 /**
452  *      unregister_switchdev_notifier - Unregister notifier
453  *      @nb: notifier_block
454  *
455  *      Unregister switch device notifier.
456  *      Return values are same as for atomic_notifier_chain_unregister().
457  */
458 int unregister_switchdev_notifier(struct notifier_block *nb)
459 {
460         int err;
461
462         mutex_lock(&switchdev_mutex);
463         err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
464         mutex_unlock(&switchdev_mutex);
465         return err;
466 }
467 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
468
469 /**
470  *      call_switchdev_notifiers - Call notifiers
471  *      @val: value passed unmodified to notifier function
472  *      @dev: port device
473  *      @info: notifier information data
474  *
475  *      Call all network notifier blocks. This should be called by driver
476  *      when it needs to propagate hardware event.
477  *      Return values are same as for atomic_notifier_call_chain().
478  */
479 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
480                              struct switchdev_notifier_info *info)
481 {
482         int err;
483
484         info->dev = dev;
485         mutex_lock(&switchdev_mutex);
486         err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
487         mutex_unlock(&switchdev_mutex);
488         return err;
489 }
490 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
491
492 struct switchdev_vlan_dump {
493         struct switchdev_obj_port_vlan vlan;
494         struct sk_buff *skb;
495         u32 filter_mask;
496         u16 flags;
497         u16 begin;
498         u16 end;
499 };
500
501 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
502 {
503         struct bridge_vlan_info vinfo;
504
505         vinfo.flags = dump->flags;
506
507         if (dump->begin == 0 && dump->end == 0) {
508                 return 0;
509         } else if (dump->begin == dump->end) {
510                 vinfo.vid = dump->begin;
511                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
512                             sizeof(vinfo), &vinfo))
513                         return -EMSGSIZE;
514         } else {
515                 vinfo.vid = dump->begin;
516                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
517                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
518                             sizeof(vinfo), &vinfo))
519                         return -EMSGSIZE;
520                 vinfo.vid = dump->end;
521                 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
522                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
523                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
524                             sizeof(vinfo), &vinfo))
525                         return -EMSGSIZE;
526         }
527
528         return 0;
529 }
530
531 static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
532 {
533         struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
534         struct switchdev_vlan_dump *dump =
535                 container_of(vlan, struct switchdev_vlan_dump, vlan);
536         int err = 0;
537
538         if (vlan->vid_begin > vlan->vid_end)
539                 return -EINVAL;
540
541         if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
542                 dump->flags = vlan->flags;
543                 for (dump->begin = dump->end = vlan->vid_begin;
544                      dump->begin <= vlan->vid_end;
545                      dump->begin++, dump->end++) {
546                         err = switchdev_port_vlan_dump_put(dump);
547                         if (err)
548                                 return err;
549                 }
550         } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
551                 if (dump->begin > vlan->vid_begin &&
552                     dump->begin >= vlan->vid_end) {
553                         if ((dump->begin - 1) == vlan->vid_end &&
554                             dump->flags == vlan->flags) {
555                                 /* prepend */
556                                 dump->begin = vlan->vid_begin;
557                         } else {
558                                 err = switchdev_port_vlan_dump_put(dump);
559                                 dump->flags = vlan->flags;
560                                 dump->begin = vlan->vid_begin;
561                                 dump->end = vlan->vid_end;
562                         }
563                 } else if (dump->end <= vlan->vid_begin &&
564                            dump->end < vlan->vid_end) {
565                         if ((dump->end  + 1) == vlan->vid_begin &&
566                             dump->flags == vlan->flags) {
567                                 /* append */
568                                 dump->end = vlan->vid_end;
569                         } else {
570                                 err = switchdev_port_vlan_dump_put(dump);
571                                 dump->flags = vlan->flags;
572                                 dump->begin = vlan->vid_begin;
573                                 dump->end = vlan->vid_end;
574                         }
575                 } else {
576                         err = -EINVAL;
577                 }
578         }
579
580         return err;
581 }
582
583 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
584                                     u32 filter_mask)
585 {
586         struct switchdev_vlan_dump dump = {
587                 .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
588                 .skb = skb,
589                 .filter_mask = filter_mask,
590         };
591         int err = 0;
592
593         if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
594             (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
595                 err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
596                                               switchdev_port_vlan_dump_cb);
597                 if (err)
598                         goto err_out;
599                 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
600                         /* last one */
601                         err = switchdev_port_vlan_dump_put(&dump);
602         }
603
604 err_out:
605         return err == -EOPNOTSUPP ? 0 : err;
606 }
607
608 /**
609  *      switchdev_port_bridge_getlink - Get bridge port attributes
610  *
611  *      @dev: port device
612  *
613  *      Called for SELF on rtnl_bridge_getlink to get bridge port
614  *      attributes.
615  */
616 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
617                                   struct net_device *dev, u32 filter_mask,
618                                   int nlflags)
619 {
620         struct switchdev_attr attr = {
621                 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
622         };
623         u16 mode = BRIDGE_MODE_UNDEF;
624         u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
625         int err;
626
627         err = switchdev_port_attr_get(dev, &attr);
628         if (err && err != -EOPNOTSUPP)
629                 return err;
630
631         return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
632                                        attr.u.brport_flags, mask, nlflags,
633                                        filter_mask, switchdev_port_vlan_fill);
634 }
635 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
636
637 static int switchdev_port_br_setflag(struct net_device *dev,
638                                      struct nlattr *nlattr,
639                                      unsigned long brport_flag)
640 {
641         struct switchdev_attr attr = {
642                 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
643         };
644         u8 flag = nla_get_u8(nlattr);
645         int err;
646
647         err = switchdev_port_attr_get(dev, &attr);
648         if (err)
649                 return err;
650
651         if (flag)
652                 attr.u.brport_flags |= brport_flag;
653         else
654                 attr.u.brport_flags &= ~brport_flag;
655
656         return switchdev_port_attr_set(dev, &attr);
657 }
658
659 static const struct nla_policy
660 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
661         [IFLA_BRPORT_STATE]             = { .type = NLA_U8 },
662         [IFLA_BRPORT_COST]              = { .type = NLA_U32 },
663         [IFLA_BRPORT_PRIORITY]          = { .type = NLA_U16 },
664         [IFLA_BRPORT_MODE]              = { .type = NLA_U8 },
665         [IFLA_BRPORT_GUARD]             = { .type = NLA_U8 },
666         [IFLA_BRPORT_PROTECT]           = { .type = NLA_U8 },
667         [IFLA_BRPORT_FAST_LEAVE]        = { .type = NLA_U8 },
668         [IFLA_BRPORT_LEARNING]          = { .type = NLA_U8 },
669         [IFLA_BRPORT_LEARNING_SYNC]     = { .type = NLA_U8 },
670         [IFLA_BRPORT_UNICAST_FLOOD]     = { .type = NLA_U8 },
671 };
672
673 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
674                                               struct nlattr *protinfo)
675 {
676         struct nlattr *attr;
677         int rem;
678         int err;
679
680         err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
681                                   switchdev_port_bridge_policy);
682         if (err)
683                 return err;
684
685         nla_for_each_nested(attr, protinfo, rem) {
686                 switch (nla_type(attr)) {
687                 case IFLA_BRPORT_LEARNING:
688                         err = switchdev_port_br_setflag(dev, attr,
689                                                         BR_LEARNING);
690                         break;
691                 case IFLA_BRPORT_LEARNING_SYNC:
692                         err = switchdev_port_br_setflag(dev, attr,
693                                                         BR_LEARNING_SYNC);
694                         break;
695                 default:
696                         err = -EOPNOTSUPP;
697                         break;
698                 }
699                 if (err)
700                         return err;
701         }
702
703         return 0;
704 }
705
706 static int switchdev_port_br_afspec(struct net_device *dev,
707                                     struct nlattr *afspec,
708                                     int (*f)(struct net_device *dev,
709                                              const struct switchdev_obj *obj))
710 {
711         struct nlattr *attr;
712         struct bridge_vlan_info *vinfo;
713         struct switchdev_obj_port_vlan vlan = {
714                 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
715         };
716         int rem;
717         int err;
718
719         nla_for_each_nested(attr, afspec, rem) {
720                 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
721                         continue;
722                 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
723                         return -EINVAL;
724                 vinfo = nla_data(attr);
725                 vlan.flags = vinfo->flags;
726                 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
727                         if (vlan.vid_begin)
728                                 return -EINVAL;
729                         vlan.vid_begin = vinfo->vid;
730                 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
731                         if (!vlan.vid_begin)
732                                 return -EINVAL;
733                         vlan.vid_end = vinfo->vid;
734                         if (vlan.vid_end <= vlan.vid_begin)
735                                 return -EINVAL;
736                         err = f(dev, &vlan.obj);
737                         if (err)
738                                 return err;
739                         memset(&vlan, 0, sizeof(vlan));
740                 } else {
741                         if (vlan.vid_begin)
742                                 return -EINVAL;
743                         vlan.vid_begin = vinfo->vid;
744                         vlan.vid_end = vinfo->vid;
745                         err = f(dev, &vlan.obj);
746                         if (err)
747                                 return err;
748                         memset(&vlan, 0, sizeof(vlan));
749                 }
750         }
751
752         return 0;
753 }
754
755 /**
756  *      switchdev_port_bridge_setlink - Set bridge port attributes
757  *
758  *      @dev: port device
759  *      @nlh: netlink header
760  *      @flags: netlink flags
761  *
762  *      Called for SELF on rtnl_bridge_setlink to set bridge port
763  *      attributes.
764  */
765 int switchdev_port_bridge_setlink(struct net_device *dev,
766                                   struct nlmsghdr *nlh, u16 flags)
767 {
768         struct nlattr *protinfo;
769         struct nlattr *afspec;
770         int err = 0;
771
772         protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
773                                    IFLA_PROTINFO);
774         if (protinfo) {
775                 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
776                 if (err)
777                         return err;
778         }
779
780         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
781                                  IFLA_AF_SPEC);
782         if (afspec)
783                 err = switchdev_port_br_afspec(dev, afspec,
784                                                switchdev_port_obj_add);
785
786         return err;
787 }
788 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
789
790 /**
791  *      switchdev_port_bridge_dellink - Set bridge port attributes
792  *
793  *      @dev: port device
794  *      @nlh: netlink header
795  *      @flags: netlink flags
796  *
797  *      Called for SELF on rtnl_bridge_dellink to set bridge port
798  *      attributes.
799  */
800 int switchdev_port_bridge_dellink(struct net_device *dev,
801                                   struct nlmsghdr *nlh, u16 flags)
802 {
803         struct nlattr *afspec;
804
805         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
806                                  IFLA_AF_SPEC);
807         if (afspec)
808                 return switchdev_port_br_afspec(dev, afspec,
809                                                 switchdev_port_obj_del);
810
811         return 0;
812 }
813 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
814
815 /**
816  *      switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
817  *
818  *      @ndmsg: netlink hdr
819  *      @nlattr: netlink attributes
820  *      @dev: port device
821  *      @addr: MAC address to add
822  *      @vid: VLAN to add
823  *
824  *      Add FDB entry to switch device.
825  */
826 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
827                            struct net_device *dev, const unsigned char *addr,
828                            u16 vid, u16 nlm_flags)
829 {
830         struct switchdev_obj_port_fdb fdb = {
831                 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
832                 .addr = addr,
833                 .vid = vid,
834         };
835
836         return switchdev_port_obj_add(dev, &fdb.obj);
837 }
838 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
839
840 /**
841  *      switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
842  *
843  *      @ndmsg: netlink hdr
844  *      @nlattr: netlink attributes
845  *      @dev: port device
846  *      @addr: MAC address to delete
847  *      @vid: VLAN to delete
848  *
849  *      Delete FDB entry from switch device.
850  */
851 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
852                            struct net_device *dev, const unsigned char *addr,
853                            u16 vid)
854 {
855         struct switchdev_obj_port_fdb fdb = {
856                 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
857                 .addr = addr,
858                 .vid = vid,
859         };
860
861         return switchdev_port_obj_del(dev, &fdb.obj);
862 }
863 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
864
865 struct switchdev_fdb_dump {
866         struct switchdev_obj_port_fdb fdb;
867         struct net_device *dev;
868         struct sk_buff *skb;
869         struct netlink_callback *cb;
870         int idx;
871 };
872
873 static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
874 {
875         struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
876         struct switchdev_fdb_dump *dump =
877                 container_of(fdb, struct switchdev_fdb_dump, fdb);
878         u32 portid = NETLINK_CB(dump->cb->skb).portid;
879         u32 seq = dump->cb->nlh->nlmsg_seq;
880         struct nlmsghdr *nlh;
881         struct ndmsg *ndm;
882
883         if (dump->idx < dump->cb->args[0])
884                 goto skip;
885
886         nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
887                         sizeof(*ndm), NLM_F_MULTI);
888         if (!nlh)
889                 return -EMSGSIZE;
890
891         ndm = nlmsg_data(nlh);
892         ndm->ndm_family  = AF_BRIDGE;
893         ndm->ndm_pad1    = 0;
894         ndm->ndm_pad2    = 0;
895         ndm->ndm_flags   = NTF_SELF;
896         ndm->ndm_type    = 0;
897         ndm->ndm_ifindex = dump->dev->ifindex;
898         ndm->ndm_state   = fdb->ndm_state;
899
900         if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
901                 goto nla_put_failure;
902
903         if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
904                 goto nla_put_failure;
905
906         nlmsg_end(dump->skb, nlh);
907
908 skip:
909         dump->idx++;
910         return 0;
911
912 nla_put_failure:
913         nlmsg_cancel(dump->skb, nlh);
914         return -EMSGSIZE;
915 }
916
917 /**
918  *      switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
919  *
920  *      @skb: netlink skb
921  *      @cb: netlink callback
922  *      @dev: port device
923  *      @filter_dev: filter device
924  *      @idx:
925  *
926  *      Delete FDB entry from switch device.
927  */
928 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
929                             struct net_device *dev,
930                             struct net_device *filter_dev, int idx)
931 {
932         struct switchdev_fdb_dump dump = {
933                 .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
934                 .dev = dev,
935                 .skb = skb,
936                 .cb = cb,
937                 .idx = idx,
938         };
939
940         switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
941         return dump.idx;
942 }
943 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
944
945 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
946 {
947         const struct switchdev_ops *ops = dev->switchdev_ops;
948         struct net_device *lower_dev;
949         struct net_device *port_dev;
950         struct list_head *iter;
951
952         /* Recusively search down until we find a sw port dev.
953          * (A sw port dev supports switchdev_port_attr_get).
954          */
955
956         if (ops && ops->switchdev_port_attr_get)
957                 return dev;
958
959         netdev_for_each_lower_dev(dev, lower_dev, iter) {
960                 port_dev = switchdev_get_lowest_dev(lower_dev);
961                 if (port_dev)
962                         return port_dev;
963         }
964
965         return NULL;
966 }
967
968 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
969 {
970         struct switchdev_attr attr = {
971                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
972         };
973         struct switchdev_attr prev_attr;
974         struct net_device *dev = NULL;
975         int nhsel;
976
977         /* For this route, all nexthop devs must be on the same switch. */
978
979         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
980                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
981
982                 if (!nh->nh_dev)
983                         return NULL;
984
985                 dev = switchdev_get_lowest_dev(nh->nh_dev);
986                 if (!dev)
987                         return NULL;
988
989                 if (switchdev_port_attr_get(dev, &attr))
990                         return NULL;
991
992                 if (nhsel > 0 &&
993                     !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
994                                 return NULL;
995
996                 prev_attr = attr;
997         }
998
999         return dev;
1000 }
1001
1002 /**
1003  *      switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
1004  *
1005  *      @dst: route's IPv4 destination address
1006  *      @dst_len: destination address length (prefix length)
1007  *      @fi: route FIB info structure
1008  *      @tos: route TOS
1009  *      @type: route type
1010  *      @nlflags: netlink flags passed in (NLM_F_*)
1011  *      @tb_id: route table ID
1012  *
1013  *      Add/modify switch IPv4 route entry.
1014  */
1015 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
1016                            u8 tos, u8 type, u32 nlflags, u32 tb_id)
1017 {
1018         struct switchdev_obj_ipv4_fib ipv4_fib = {
1019                 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1020                 .dst = dst,
1021                 .dst_len = dst_len,
1022                 .fi = fi,
1023                 .tos = tos,
1024                 .type = type,
1025                 .nlflags = nlflags,
1026                 .tb_id = tb_id,
1027         };
1028         struct net_device *dev;
1029         int err = 0;
1030
1031         /* Don't offload route if using custom ip rules or if
1032          * IPv4 FIB offloading has been disabled completely.
1033          */
1034
1035 #ifdef CONFIG_IP_MULTIPLE_TABLES
1036         if (fi->fib_net->ipv4.fib_has_custom_rules)
1037                 return 0;
1038 #endif
1039
1040         if (fi->fib_net->ipv4.fib_offload_disabled)
1041                 return 0;
1042
1043         dev = switchdev_get_dev_by_nhs(fi);
1044         if (!dev)
1045                 return 0;
1046
1047         err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
1048         if (!err)
1049                 fi->fib_flags |= RTNH_F_OFFLOAD;
1050
1051         return err == -EOPNOTSUPP ? 0 : err;
1052 }
1053 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
1054
1055 /**
1056  *      switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1057  *
1058  *      @dst: route's IPv4 destination address
1059  *      @dst_len: destination address length (prefix length)
1060  *      @fi: route FIB info structure
1061  *      @tos: route TOS
1062  *      @type: route type
1063  *      @tb_id: route table ID
1064  *
1065  *      Delete IPv4 route entry from switch device.
1066  */
1067 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1068                            u8 tos, u8 type, u32 tb_id)
1069 {
1070         struct switchdev_obj_ipv4_fib ipv4_fib = {
1071                 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1072                 .dst = dst,
1073                 .dst_len = dst_len,
1074                 .fi = fi,
1075                 .tos = tos,
1076                 .type = type,
1077                 .nlflags = 0,
1078                 .tb_id = tb_id,
1079         };
1080         struct net_device *dev;
1081         int err = 0;
1082
1083         if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1084                 return 0;
1085
1086         dev = switchdev_get_dev_by_nhs(fi);
1087         if (!dev)
1088                 return 0;
1089
1090         err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
1091         if (!err)
1092                 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1093
1094         return err == -EOPNOTSUPP ? 0 : err;
1095 }
1096 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1097
1098 /**
1099  *      switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1100  *
1101  *      @fi: route FIB info structure
1102  */
1103 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1104 {
1105         /* There was a problem installing this route to the offload
1106          * device.  For now, until we come up with more refined
1107          * policy handling, abruptly end IPv4 fib offloading for
1108          * for entire net by flushing offload device(s) of all
1109          * IPv4 routes, and mark IPv4 fib offloading broken from
1110          * this point forward.
1111          */
1112
1113         fib_flush_external(fi->fib_net);
1114         fi->fib_net->ipv4.fib_offload_disabled = true;
1115 }
1116 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1117
1118 static bool switchdev_port_same_parent_id(struct net_device *a,
1119                                           struct net_device *b)
1120 {
1121         struct switchdev_attr a_attr = {
1122                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1123                 .flags = SWITCHDEV_F_NO_RECURSE,
1124         };
1125         struct switchdev_attr b_attr = {
1126                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1127                 .flags = SWITCHDEV_F_NO_RECURSE,
1128         };
1129
1130         if (switchdev_port_attr_get(a, &a_attr) ||
1131             switchdev_port_attr_get(b, &b_attr))
1132                 return false;
1133
1134         return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1135 }
1136
1137 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1138                                        struct net_device *group_dev)
1139 {
1140         struct net_device *lower_dev;
1141         struct list_head *iter;
1142
1143         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1144                 if (lower_dev == dev)
1145                         continue;
1146                 if (switchdev_port_same_parent_id(dev, lower_dev))
1147                         return lower_dev->offload_fwd_mark;
1148                 return switchdev_port_fwd_mark_get(dev, lower_dev);
1149         }
1150
1151         return dev->ifindex;
1152 }
1153
1154 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1155                                           u32 old_mark, u32 *reset_mark)
1156 {
1157         struct net_device *lower_dev;
1158         struct list_head *iter;
1159
1160         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1161                 if (lower_dev->offload_fwd_mark == old_mark) {
1162                         if (!*reset_mark)
1163                                 *reset_mark = lower_dev->ifindex;
1164                         lower_dev->offload_fwd_mark = *reset_mark;
1165                 }
1166                 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1167         }
1168 }
1169
1170 /**
1171  *      switchdev_port_fwd_mark_set - Set port offload forwarding mark
1172  *
1173  *      @dev: port device
1174  *      @group_dev: containing device
1175  *      @joining: true if dev is joining group; false if leaving group
1176  *
1177  *      An ungrouped port's offload mark is just its ifindex.  A grouped
1178  *      port's (member of a bridge, for example) offload mark is the ifindex
1179  *      of one of the ports in the group with the same parent (switch) ID.
1180  *      Ports on the same device in the same group will have the same mark.
1181  *
1182  *      Example:
1183  *
1184  *              br0             ifindex=9
1185  *                sw1p1         ifindex=2       mark=2
1186  *                sw1p2         ifindex=3       mark=2
1187  *                sw2p1         ifindex=4       mark=5
1188  *                sw2p2         ifindex=5       mark=5
1189  *
1190  *      If sw2p2 leaves the bridge, we'll have:
1191  *
1192  *              br0             ifindex=9
1193  *                sw1p1         ifindex=2       mark=2
1194  *                sw1p2         ifindex=3       mark=2
1195  *                sw2p1         ifindex=4       mark=4
1196  *              sw2p2           ifindex=5       mark=5
1197  */
1198 void switchdev_port_fwd_mark_set(struct net_device *dev,
1199                                  struct net_device *group_dev,
1200                                  bool joining)
1201 {
1202         u32 mark = dev->ifindex;
1203         u32 reset_mark = 0;
1204
1205         if (group_dev && joining) {
1206                 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1207         } else if (group_dev && !joining) {
1208                 if (dev->offload_fwd_mark == mark)
1209                         /* Ohoh, this port was the mark reference port,
1210                          * but it's leaving the group, so reset the
1211                          * mark for the remaining ports in the group.
1212                          */
1213                         switchdev_port_fwd_mark_reset(group_dev, mark,
1214                                                       &reset_mark);
1215         }
1216
1217         dev->offload_fwd_mark = mark;
1218 }
1219 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);