]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/switchdev/switchdev.c
switchdev: rename SWITCHDEV_ATTR_* enum values to SWITCHDEV_ATTR_ID_*
[karo-tx-linux.git] / net / switchdev / switchdev.c
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/list.h>
20 #include <net/ip_fib.h>
21 #include <net/switchdev.h>
22
23 /**
24  *      switchdev_trans_item_enqueue - Enqueue data item to transaction queue
25  *
26  *      @trans: transaction
27  *      @data: pointer to data being queued
28  *      @destructor: data destructor
29  *      @tritem: transaction item being queued
30  *
31  *      Enqeueue data item to transaction queue. tritem is typically placed in
32  *      cointainter pointed at by data pointer. Destructor is called on
33  *      transaction abort and after successful commit phase in case
34  *      the caller did not dequeue the item before.
35  */
36 void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
37                                   void *data, void (*destructor)(void const *),
38                                   struct switchdev_trans_item *tritem)
39 {
40         tritem->data = data;
41         tritem->destructor = destructor;
42         list_add_tail(&tritem->list, &trans->item_list);
43 }
44 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
45
46 static struct switchdev_trans_item *
47 __switchdev_trans_item_dequeue(struct switchdev_trans *trans)
48 {
49         struct switchdev_trans_item *tritem;
50
51         if (list_empty(&trans->item_list))
52                 return NULL;
53         tritem = list_first_entry(&trans->item_list,
54                                   struct switchdev_trans_item, list);
55         list_del(&tritem->list);
56         return tritem;
57 }
58
59 /**
60  *      switchdev_trans_item_dequeue - Dequeue data item from transaction queue
61  *
62  *      @trans: transaction
63  */
64 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
65 {
66         struct switchdev_trans_item *tritem;
67
68         tritem = __switchdev_trans_item_dequeue(trans);
69         BUG_ON(!tritem);
70         return tritem->data;
71 }
72 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
73
74 static void switchdev_trans_init(struct switchdev_trans *trans)
75 {
76         INIT_LIST_HEAD(&trans->item_list);
77 }
78
79 static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
80 {
81         struct switchdev_trans_item *tritem;
82
83         while ((tritem = __switchdev_trans_item_dequeue(trans)))
84                 tritem->destructor(tritem->data);
85 }
86
87 static void switchdev_trans_items_warn_destroy(struct net_device *dev,
88                                                struct switchdev_trans *trans)
89 {
90         WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
91              dev->name);
92         switchdev_trans_items_destroy(trans);
93 }
94
95 /**
96  *      switchdev_port_attr_get - Get port attribute
97  *
98  *      @dev: port device
99  *      @attr: attribute to get
100  */
101 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
102 {
103         const struct switchdev_ops *ops = dev->switchdev_ops;
104         struct net_device *lower_dev;
105         struct list_head *iter;
106         struct switchdev_attr first = {
107                 .id = SWITCHDEV_ATTR_ID_UNDEFINED
108         };
109         int err = -EOPNOTSUPP;
110
111         if (ops && ops->switchdev_port_attr_get)
112                 return ops->switchdev_port_attr_get(dev, attr);
113
114         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
115                 return err;
116
117         /* Switch device port(s) may be stacked under
118          * bond/team/vlan dev, so recurse down to get attr on
119          * each port.  Return -ENODATA if attr values don't
120          * compare across ports.
121          */
122
123         netdev_for_each_lower_dev(dev, lower_dev, iter) {
124                 err = switchdev_port_attr_get(lower_dev, attr);
125                 if (err)
126                         break;
127                 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
128                         first = *attr;
129                 else if (memcmp(&first, attr, sizeof(*attr)))
130                         return -ENODATA;
131         }
132
133         return err;
134 }
135 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
136
137 static int __switchdev_port_attr_set(struct net_device *dev,
138                                      struct switchdev_attr *attr,
139                                      struct switchdev_trans *trans)
140 {
141         const struct switchdev_ops *ops = dev->switchdev_ops;
142         struct net_device *lower_dev;
143         struct list_head *iter;
144         int err = -EOPNOTSUPP;
145
146         if (ops && ops->switchdev_port_attr_set)
147                 return ops->switchdev_port_attr_set(dev, attr, trans);
148
149         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
150                 return err;
151
152         /* Switch device port(s) may be stacked under
153          * bond/team/vlan dev, so recurse down to set attr on
154          * each port.
155          */
156
157         netdev_for_each_lower_dev(dev, lower_dev, iter) {
158                 err = __switchdev_port_attr_set(lower_dev, attr, trans);
159                 if (err)
160                         break;
161         }
162
163         return err;
164 }
165
166 struct switchdev_attr_set_work {
167         struct work_struct work;
168         struct net_device *dev;
169         struct switchdev_attr attr;
170 };
171
172 static void switchdev_port_attr_set_work(struct work_struct *work)
173 {
174         struct switchdev_attr_set_work *asw =
175                 container_of(work, struct switchdev_attr_set_work, work);
176         int err;
177
178         rtnl_lock();
179         err = switchdev_port_attr_set(asw->dev, &asw->attr);
180         if (err && err != -EOPNOTSUPP)
181                 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
182                            err, asw->attr.id);
183         rtnl_unlock();
184
185         dev_put(asw->dev);
186         kfree(work);
187 }
188
189 static int switchdev_port_attr_set_defer(struct net_device *dev,
190                                          struct switchdev_attr *attr)
191 {
192         struct switchdev_attr_set_work *asw;
193
194         asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
195         if (!asw)
196                 return -ENOMEM;
197
198         INIT_WORK(&asw->work, switchdev_port_attr_set_work);
199
200         dev_hold(dev);
201         asw->dev = dev;
202         memcpy(&asw->attr, attr, sizeof(asw->attr));
203
204         schedule_work(&asw->work);
205
206         return 0;
207 }
208
209 /**
210  *      switchdev_port_attr_set - Set port attribute
211  *
212  *      @dev: port device
213  *      @attr: attribute to set
214  *
215  *      Use a 2-phase prepare-commit transaction model to ensure
216  *      system is not left in a partially updated state due to
217  *      failure from driver/device.
218  */
219 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
220 {
221         struct switchdev_trans trans;
222         int err;
223
224         if (!rtnl_is_locked()) {
225                 /* Running prepare-commit transaction across stacked
226                  * devices requires nothing moves, so if rtnl_lock is
227                  * not held, schedule a worker thread to hold rtnl_lock
228                  * while setting attr.
229                  */
230
231                 return switchdev_port_attr_set_defer(dev, attr);
232         }
233
234         switchdev_trans_init(&trans);
235
236         /* Phase I: prepare for attr set. Driver/device should fail
237          * here if there are going to be issues in the commit phase,
238          * such as lack of resources or support.  The driver/device
239          * should reserve resources needed for the commit phase here,
240          * but should not commit the attr.
241          */
242
243         trans.ph_prepare = true;
244         err = __switchdev_port_attr_set(dev, attr, &trans);
245         if (err) {
246                 /* Prepare phase failed: abort the transaction.  Any
247                  * resources reserved in the prepare phase are
248                  * released.
249                  */
250
251                 if (err != -EOPNOTSUPP)
252                         switchdev_trans_items_destroy(&trans);
253
254                 return err;
255         }
256
257         /* Phase II: commit attr set.  This cannot fail as a fault
258          * of driver/device.  If it does, it's a bug in the driver/device
259          * because the driver said everythings was OK in phase I.
260          */
261
262         trans.ph_prepare = false;
263         err = __switchdev_port_attr_set(dev, attr, &trans);
264         WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
265              dev->name, attr->id);
266         switchdev_trans_items_warn_destroy(dev, &trans);
267
268         return err;
269 }
270 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
271
272 static int __switchdev_port_obj_add(struct net_device *dev,
273                                     enum switchdev_obj_id id, const void *obj,
274                                     struct switchdev_trans *trans)
275 {
276         const struct switchdev_ops *ops = dev->switchdev_ops;
277         struct net_device *lower_dev;
278         struct list_head *iter;
279         int err = -EOPNOTSUPP;
280
281         if (ops && ops->switchdev_port_obj_add)
282                 return ops->switchdev_port_obj_add(dev, id, obj, trans);
283
284         /* Switch device port(s) may be stacked under
285          * bond/team/vlan dev, so recurse down to add object on
286          * each port.
287          */
288
289         netdev_for_each_lower_dev(dev, lower_dev, iter) {
290                 err = __switchdev_port_obj_add(lower_dev, id, obj, trans);
291                 if (err)
292                         break;
293         }
294
295         return err;
296 }
297
298 /**
299  *      switchdev_port_obj_add - Add port object
300  *
301  *      @dev: port device
302  *      @id: object ID
303  *      @obj: object to add
304  *
305  *      Use a 2-phase prepare-commit transaction model to ensure
306  *      system is not left in a partially updated state due to
307  *      failure from driver/device.
308  *
309  *      rtnl_lock must be held.
310  */
311 int switchdev_port_obj_add(struct net_device *dev, enum switchdev_obj_id id,
312                            const void *obj)
313 {
314         struct switchdev_trans trans;
315         int err;
316
317         ASSERT_RTNL();
318
319         switchdev_trans_init(&trans);
320
321         /* Phase I: prepare for obj add. Driver/device should fail
322          * here if there are going to be issues in the commit phase,
323          * such as lack of resources or support.  The driver/device
324          * should reserve resources needed for the commit phase here,
325          * but should not commit the obj.
326          */
327
328         trans.ph_prepare = true;
329         err = __switchdev_port_obj_add(dev, id, obj, &trans);
330         if (err) {
331                 /* Prepare phase failed: abort the transaction.  Any
332                  * resources reserved in the prepare phase are
333                  * released.
334                  */
335
336                 if (err != -EOPNOTSUPP)
337                         switchdev_trans_items_destroy(&trans);
338
339                 return err;
340         }
341
342         /* Phase II: commit obj add.  This cannot fail as a fault
343          * of driver/device.  If it does, it's a bug in the driver/device
344          * because the driver said everythings was OK in phase I.
345          */
346
347         trans.ph_prepare = false;
348         err = __switchdev_port_obj_add(dev, id, obj, &trans);
349         WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, id);
350         switchdev_trans_items_warn_destroy(dev, &trans);
351
352         return err;
353 }
354 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
355
356 /**
357  *      switchdev_port_obj_del - Delete port object
358  *
359  *      @dev: port device
360  *      @id: object ID
361  *      @obj: object to delete
362  */
363 int switchdev_port_obj_del(struct net_device *dev, enum switchdev_obj_id id,
364                            const void *obj)
365 {
366         const struct switchdev_ops *ops = dev->switchdev_ops;
367         struct net_device *lower_dev;
368         struct list_head *iter;
369         int err = -EOPNOTSUPP;
370
371         if (ops && ops->switchdev_port_obj_del)
372                 return ops->switchdev_port_obj_del(dev, id, obj);
373
374         /* Switch device port(s) may be stacked under
375          * bond/team/vlan dev, so recurse down to delete object on
376          * each port.
377          */
378
379         netdev_for_each_lower_dev(dev, lower_dev, iter) {
380                 err = switchdev_port_obj_del(lower_dev, id, obj);
381                 if (err)
382                         break;
383         }
384
385         return err;
386 }
387 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
388
389 /**
390  *      switchdev_port_obj_dump - Dump port objects
391  *
392  *      @dev: port device
393  *      @id: object ID
394  *      @obj: object to dump
395  *      @cb: function to call with a filled object
396  */
397 int switchdev_port_obj_dump(struct net_device *dev, enum switchdev_obj_id id,
398                             void *obj, int (*cb)(void *obj))
399 {
400         const struct switchdev_ops *ops = dev->switchdev_ops;
401         struct net_device *lower_dev;
402         struct list_head *iter;
403         int err = -EOPNOTSUPP;
404
405         if (ops && ops->switchdev_port_obj_dump)
406                 return ops->switchdev_port_obj_dump(dev, id, obj, cb);
407
408         /* Switch device port(s) may be stacked under
409          * bond/team/vlan dev, so recurse down to dump objects on
410          * first port at bottom of stack.
411          */
412
413         netdev_for_each_lower_dev(dev, lower_dev, iter) {
414                 err = switchdev_port_obj_dump(lower_dev, id, obj, cb);
415                 break;
416         }
417
418         return err;
419 }
420 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
421
422 static DEFINE_MUTEX(switchdev_mutex);
423 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
424
425 /**
426  *      register_switchdev_notifier - Register notifier
427  *      @nb: notifier_block
428  *
429  *      Register switch device notifier. This should be used by code
430  *      which needs to monitor events happening in particular device.
431  *      Return values are same as for atomic_notifier_chain_register().
432  */
433 int register_switchdev_notifier(struct notifier_block *nb)
434 {
435         int err;
436
437         mutex_lock(&switchdev_mutex);
438         err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
439         mutex_unlock(&switchdev_mutex);
440         return err;
441 }
442 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
443
444 /**
445  *      unregister_switchdev_notifier - Unregister notifier
446  *      @nb: notifier_block
447  *
448  *      Unregister switch device notifier.
449  *      Return values are same as for atomic_notifier_chain_unregister().
450  */
451 int unregister_switchdev_notifier(struct notifier_block *nb)
452 {
453         int err;
454
455         mutex_lock(&switchdev_mutex);
456         err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
457         mutex_unlock(&switchdev_mutex);
458         return err;
459 }
460 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
461
462 /**
463  *      call_switchdev_notifiers - Call notifiers
464  *      @val: value passed unmodified to notifier function
465  *      @dev: port device
466  *      @info: notifier information data
467  *
468  *      Call all network notifier blocks. This should be called by driver
469  *      when it needs to propagate hardware event.
470  *      Return values are same as for atomic_notifier_call_chain().
471  */
472 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
473                              struct switchdev_notifier_info *info)
474 {
475         int err;
476
477         info->dev = dev;
478         mutex_lock(&switchdev_mutex);
479         err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
480         mutex_unlock(&switchdev_mutex);
481         return err;
482 }
483 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
484
485 struct switchdev_vlan_dump {
486         struct switchdev_obj_vlan vlan;
487         struct sk_buff *skb;
488         u32 filter_mask;
489         u16 flags;
490         u16 begin;
491         u16 end;
492 };
493
494 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
495 {
496         struct bridge_vlan_info vinfo;
497
498         vinfo.flags = dump->flags;
499
500         if (dump->begin == 0 && dump->end == 0) {
501                 return 0;
502         } else if (dump->begin == dump->end) {
503                 vinfo.vid = dump->begin;
504                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
505                             sizeof(vinfo), &vinfo))
506                         return -EMSGSIZE;
507         } else {
508                 vinfo.vid = dump->begin;
509                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
510                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
511                             sizeof(vinfo), &vinfo))
512                         return -EMSGSIZE;
513                 vinfo.vid = dump->end;
514                 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
515                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
516                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
517                             sizeof(vinfo), &vinfo))
518                         return -EMSGSIZE;
519         }
520
521         return 0;
522 }
523
524 static int switchdev_port_vlan_dump_cb(void *obj)
525 {
526         struct switchdev_obj_vlan *vlan = obj;
527         struct switchdev_vlan_dump *dump =
528                 container_of(vlan, struct switchdev_vlan_dump, vlan);
529         int err = 0;
530
531         if (vlan->vid_begin > vlan->vid_end)
532                 return -EINVAL;
533
534         if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
535                 dump->flags = vlan->flags;
536                 for (dump->begin = dump->end = vlan->vid_begin;
537                      dump->begin <= vlan->vid_end;
538                      dump->begin++, dump->end++) {
539                         err = switchdev_port_vlan_dump_put(dump);
540                         if (err)
541                                 return err;
542                 }
543         } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
544                 if (dump->begin > vlan->vid_begin &&
545                     dump->begin >= vlan->vid_end) {
546                         if ((dump->begin - 1) == vlan->vid_end &&
547                             dump->flags == vlan->flags) {
548                                 /* prepend */
549                                 dump->begin = vlan->vid_begin;
550                         } else {
551                                 err = switchdev_port_vlan_dump_put(dump);
552                                 dump->flags = vlan->flags;
553                                 dump->begin = vlan->vid_begin;
554                                 dump->end = vlan->vid_end;
555                         }
556                 } else if (dump->end <= vlan->vid_begin &&
557                            dump->end < vlan->vid_end) {
558                         if ((dump->end  + 1) == vlan->vid_begin &&
559                             dump->flags == vlan->flags) {
560                                 /* append */
561                                 dump->end = vlan->vid_end;
562                         } else {
563                                 err = switchdev_port_vlan_dump_put(dump);
564                                 dump->flags = vlan->flags;
565                                 dump->begin = vlan->vid_begin;
566                                 dump->end = vlan->vid_end;
567                         }
568                 } else {
569                         err = -EINVAL;
570                 }
571         }
572
573         return err;
574 }
575
576 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
577                                     u32 filter_mask)
578 {
579         struct switchdev_vlan_dump dump = {
580                 .skb = skb,
581                 .filter_mask = filter_mask,
582         };
583         int err = 0;
584
585         if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
586             (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
587                 err = switchdev_port_obj_dump(dev, SWITCHDEV_OBJ_ID_PORT_VLAN,
588                                               &dump.vlan,
589                                               switchdev_port_vlan_dump_cb);
590                 if (err)
591                         goto err_out;
592                 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
593                         /* last one */
594                         err = switchdev_port_vlan_dump_put(&dump);
595         }
596
597 err_out:
598         return err == -EOPNOTSUPP ? 0 : err;
599 }
600
601 /**
602  *      switchdev_port_bridge_getlink - Get bridge port attributes
603  *
604  *      @dev: port device
605  *
606  *      Called for SELF on rtnl_bridge_getlink to get bridge port
607  *      attributes.
608  */
609 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
610                                   struct net_device *dev, u32 filter_mask,
611                                   int nlflags)
612 {
613         struct switchdev_attr attr = {
614                 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
615         };
616         u16 mode = BRIDGE_MODE_UNDEF;
617         u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
618         int err;
619
620         err = switchdev_port_attr_get(dev, &attr);
621         if (err && err != -EOPNOTSUPP)
622                 return err;
623
624         return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
625                                        attr.u.brport_flags, mask, nlflags,
626                                        filter_mask, switchdev_port_vlan_fill);
627 }
628 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
629
630 static int switchdev_port_br_setflag(struct net_device *dev,
631                                      struct nlattr *nlattr,
632                                      unsigned long brport_flag)
633 {
634         struct switchdev_attr attr = {
635                 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
636         };
637         u8 flag = nla_get_u8(nlattr);
638         int err;
639
640         err = switchdev_port_attr_get(dev, &attr);
641         if (err)
642                 return err;
643
644         if (flag)
645                 attr.u.brport_flags |= brport_flag;
646         else
647                 attr.u.brport_flags &= ~brport_flag;
648
649         return switchdev_port_attr_set(dev, &attr);
650 }
651
652 static const struct nla_policy
653 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
654         [IFLA_BRPORT_STATE]             = { .type = NLA_U8 },
655         [IFLA_BRPORT_COST]              = { .type = NLA_U32 },
656         [IFLA_BRPORT_PRIORITY]          = { .type = NLA_U16 },
657         [IFLA_BRPORT_MODE]              = { .type = NLA_U8 },
658         [IFLA_BRPORT_GUARD]             = { .type = NLA_U8 },
659         [IFLA_BRPORT_PROTECT]           = { .type = NLA_U8 },
660         [IFLA_BRPORT_FAST_LEAVE]        = { .type = NLA_U8 },
661         [IFLA_BRPORT_LEARNING]          = { .type = NLA_U8 },
662         [IFLA_BRPORT_LEARNING_SYNC]     = { .type = NLA_U8 },
663         [IFLA_BRPORT_UNICAST_FLOOD]     = { .type = NLA_U8 },
664 };
665
666 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
667                                               struct nlattr *protinfo)
668 {
669         struct nlattr *attr;
670         int rem;
671         int err;
672
673         err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
674                                   switchdev_port_bridge_policy);
675         if (err)
676                 return err;
677
678         nla_for_each_nested(attr, protinfo, rem) {
679                 switch (nla_type(attr)) {
680                 case IFLA_BRPORT_LEARNING:
681                         err = switchdev_port_br_setflag(dev, attr,
682                                                         BR_LEARNING);
683                         break;
684                 case IFLA_BRPORT_LEARNING_SYNC:
685                         err = switchdev_port_br_setflag(dev, attr,
686                                                         BR_LEARNING_SYNC);
687                         break;
688                 default:
689                         err = -EOPNOTSUPP;
690                         break;
691                 }
692                 if (err)
693                         return err;
694         }
695
696         return 0;
697 }
698
699 static int switchdev_port_br_afspec(struct net_device *dev,
700                                     struct nlattr *afspec,
701                                     int (*f)(struct net_device *dev,
702                                              enum switchdev_obj_id id,
703                                              const void *obj))
704 {
705         struct nlattr *attr;
706         struct bridge_vlan_info *vinfo;
707         struct switchdev_obj_vlan vlan = { 0 };
708         int rem;
709         int err;
710
711         nla_for_each_nested(attr, afspec, rem) {
712                 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
713                         continue;
714                 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
715                         return -EINVAL;
716                 vinfo = nla_data(attr);
717                 vlan.flags = vinfo->flags;
718                 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
719                         if (vlan.vid_begin)
720                                 return -EINVAL;
721                         vlan.vid_begin = vinfo->vid;
722                 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
723                         if (!vlan.vid_begin)
724                                 return -EINVAL;
725                         vlan.vid_end = vinfo->vid;
726                         if (vlan.vid_end <= vlan.vid_begin)
727                                 return -EINVAL;
728                         err = f(dev, SWITCHDEV_OBJ_ID_PORT_VLAN, &vlan);
729                         if (err)
730                                 return err;
731                         memset(&vlan, 0, sizeof(vlan));
732                 } else {
733                         if (vlan.vid_begin)
734                                 return -EINVAL;
735                         vlan.vid_begin = vinfo->vid;
736                         vlan.vid_end = vinfo->vid;
737                         err = f(dev, SWITCHDEV_OBJ_ID_PORT_VLAN, &vlan);
738                         if (err)
739                                 return err;
740                         memset(&vlan, 0, sizeof(vlan));
741                 }
742         }
743
744         return 0;
745 }
746
747 /**
748  *      switchdev_port_bridge_setlink - Set bridge port attributes
749  *
750  *      @dev: port device
751  *      @nlh: netlink header
752  *      @flags: netlink flags
753  *
754  *      Called for SELF on rtnl_bridge_setlink to set bridge port
755  *      attributes.
756  */
757 int switchdev_port_bridge_setlink(struct net_device *dev,
758                                   struct nlmsghdr *nlh, u16 flags)
759 {
760         struct nlattr *protinfo;
761         struct nlattr *afspec;
762         int err = 0;
763
764         protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
765                                    IFLA_PROTINFO);
766         if (protinfo) {
767                 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
768                 if (err)
769                         return err;
770         }
771
772         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
773                                  IFLA_AF_SPEC);
774         if (afspec)
775                 err = switchdev_port_br_afspec(dev, afspec,
776                                                switchdev_port_obj_add);
777
778         return err;
779 }
780 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
781
782 /**
783  *      switchdev_port_bridge_dellink - Set bridge port attributes
784  *
785  *      @dev: port device
786  *      @nlh: netlink header
787  *      @flags: netlink flags
788  *
789  *      Called for SELF on rtnl_bridge_dellink to set bridge port
790  *      attributes.
791  */
792 int switchdev_port_bridge_dellink(struct net_device *dev,
793                                   struct nlmsghdr *nlh, u16 flags)
794 {
795         struct nlattr *afspec;
796
797         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
798                                  IFLA_AF_SPEC);
799         if (afspec)
800                 return switchdev_port_br_afspec(dev, afspec,
801                                                 switchdev_port_obj_del);
802
803         return 0;
804 }
805 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
806
807 /**
808  *      switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
809  *
810  *      @ndmsg: netlink hdr
811  *      @nlattr: netlink attributes
812  *      @dev: port device
813  *      @addr: MAC address to add
814  *      @vid: VLAN to add
815  *
816  *      Add FDB entry to switch device.
817  */
818 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
819                            struct net_device *dev, const unsigned char *addr,
820                            u16 vid, u16 nlm_flags)
821 {
822         struct switchdev_obj_fdb fdb = {
823                 .addr = addr,
824                 .vid = vid,
825         };
826
827         return switchdev_port_obj_add(dev, SWITCHDEV_OBJ_ID_PORT_FDB, &fdb);
828 }
829 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
830
831 /**
832  *      switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
833  *
834  *      @ndmsg: netlink hdr
835  *      @nlattr: netlink attributes
836  *      @dev: port device
837  *      @addr: MAC address to delete
838  *      @vid: VLAN to delete
839  *
840  *      Delete FDB entry from switch device.
841  */
842 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
843                            struct net_device *dev, const unsigned char *addr,
844                            u16 vid)
845 {
846         struct switchdev_obj_fdb fdb = {
847                 .addr = addr,
848                 .vid = vid,
849         };
850
851         return switchdev_port_obj_del(dev, SWITCHDEV_OBJ_ID_PORT_FDB, &fdb);
852 }
853 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
854
855 struct switchdev_fdb_dump {
856         struct switchdev_obj_fdb fdb;
857         struct net_device *dev;
858         struct sk_buff *skb;
859         struct netlink_callback *cb;
860         int idx;
861 };
862
863 static int switchdev_port_fdb_dump_cb(void *obj)
864 {
865         struct switchdev_obj_fdb *fdb = obj;
866         struct switchdev_fdb_dump *dump =
867                 container_of(fdb, struct switchdev_fdb_dump, fdb);
868         u32 portid = NETLINK_CB(dump->cb->skb).portid;
869         u32 seq = dump->cb->nlh->nlmsg_seq;
870         struct nlmsghdr *nlh;
871         struct ndmsg *ndm;
872
873         if (dump->idx < dump->cb->args[0])
874                 goto skip;
875
876         nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
877                         sizeof(*ndm), NLM_F_MULTI);
878         if (!nlh)
879                 return -EMSGSIZE;
880
881         ndm = nlmsg_data(nlh);
882         ndm->ndm_family  = AF_BRIDGE;
883         ndm->ndm_pad1    = 0;
884         ndm->ndm_pad2    = 0;
885         ndm->ndm_flags   = NTF_SELF;
886         ndm->ndm_type    = 0;
887         ndm->ndm_ifindex = dump->dev->ifindex;
888         ndm->ndm_state   = fdb->ndm_state;
889
890         if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
891                 goto nla_put_failure;
892
893         if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
894                 goto nla_put_failure;
895
896         nlmsg_end(dump->skb, nlh);
897
898 skip:
899         dump->idx++;
900         return 0;
901
902 nla_put_failure:
903         nlmsg_cancel(dump->skb, nlh);
904         return -EMSGSIZE;
905 }
906
907 /**
908  *      switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
909  *
910  *      @skb: netlink skb
911  *      @cb: netlink callback
912  *      @dev: port device
913  *      @filter_dev: filter device
914  *      @idx:
915  *
916  *      Delete FDB entry from switch device.
917  */
918 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
919                             struct net_device *dev,
920                             struct net_device *filter_dev, int idx)
921 {
922         struct switchdev_fdb_dump dump = {
923                 .dev = dev,
924                 .skb = skb,
925                 .cb = cb,
926                 .idx = idx,
927         };
928
929         switchdev_port_obj_dump(dev, SWITCHDEV_OBJ_ID_PORT_FDB, &dump.fdb,
930                                 switchdev_port_fdb_dump_cb);
931         return dump.idx;
932 }
933 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
934
935 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
936 {
937         const struct switchdev_ops *ops = dev->switchdev_ops;
938         struct net_device *lower_dev;
939         struct net_device *port_dev;
940         struct list_head *iter;
941
942         /* Recusively search down until we find a sw port dev.
943          * (A sw port dev supports switchdev_port_attr_get).
944          */
945
946         if (ops && ops->switchdev_port_attr_get)
947                 return dev;
948
949         netdev_for_each_lower_dev(dev, lower_dev, iter) {
950                 port_dev = switchdev_get_lowest_dev(lower_dev);
951                 if (port_dev)
952                         return port_dev;
953         }
954
955         return NULL;
956 }
957
958 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
959 {
960         struct switchdev_attr attr = {
961                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
962         };
963         struct switchdev_attr prev_attr;
964         struct net_device *dev = NULL;
965         int nhsel;
966
967         /* For this route, all nexthop devs must be on the same switch. */
968
969         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
970                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
971
972                 if (!nh->nh_dev)
973                         return NULL;
974
975                 dev = switchdev_get_lowest_dev(nh->nh_dev);
976                 if (!dev)
977                         return NULL;
978
979                 if (switchdev_port_attr_get(dev, &attr))
980                         return NULL;
981
982                 if (nhsel > 0 &&
983                     !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
984                                 return NULL;
985
986                 prev_attr = attr;
987         }
988
989         return dev;
990 }
991
992 /**
993  *      switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
994  *
995  *      @dst: route's IPv4 destination address
996  *      @dst_len: destination address length (prefix length)
997  *      @fi: route FIB info structure
998  *      @tos: route TOS
999  *      @type: route type
1000  *      @nlflags: netlink flags passed in (NLM_F_*)
1001  *      @tb_id: route table ID
1002  *
1003  *      Add/modify switch IPv4 route entry.
1004  */
1005 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
1006                            u8 tos, u8 type, u32 nlflags, u32 tb_id)
1007 {
1008         struct switchdev_obj_ipv4_fib ipv4_fib = {
1009                 .dst = dst,
1010                 .dst_len = dst_len,
1011                 .fi = fi,
1012                 .tos = tos,
1013                 .type = type,
1014                 .nlflags = nlflags,
1015                 .tb_id = tb_id,
1016         };
1017         struct net_device *dev;
1018         int err = 0;
1019
1020         /* Don't offload route if using custom ip rules or if
1021          * IPv4 FIB offloading has been disabled completely.
1022          */
1023
1024 #ifdef CONFIG_IP_MULTIPLE_TABLES
1025         if (fi->fib_net->ipv4.fib_has_custom_rules)
1026                 return 0;
1027 #endif
1028
1029         if (fi->fib_net->ipv4.fib_offload_disabled)
1030                 return 0;
1031
1032         dev = switchdev_get_dev_by_nhs(fi);
1033         if (!dev)
1034                 return 0;
1035
1036         err = switchdev_port_obj_add(dev, SWITCHDEV_OBJ_ID_IPV4_FIB, &ipv4_fib);
1037         if (!err)
1038                 fi->fib_flags |= RTNH_F_OFFLOAD;
1039
1040         return err == -EOPNOTSUPP ? 0 : err;
1041 }
1042 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
1043
1044 /**
1045  *      switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1046  *
1047  *      @dst: route's IPv4 destination address
1048  *      @dst_len: destination address length (prefix length)
1049  *      @fi: route FIB info structure
1050  *      @tos: route TOS
1051  *      @type: route type
1052  *      @tb_id: route table ID
1053  *
1054  *      Delete IPv4 route entry from switch device.
1055  */
1056 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1057                            u8 tos, u8 type, u32 tb_id)
1058 {
1059         struct switchdev_obj_ipv4_fib ipv4_fib = {
1060                 .dst = dst,
1061                 .dst_len = dst_len,
1062                 .fi = fi,
1063                 .tos = tos,
1064                 .type = type,
1065                 .nlflags = 0,
1066                 .tb_id = tb_id,
1067         };
1068         struct net_device *dev;
1069         int err = 0;
1070
1071         if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1072                 return 0;
1073
1074         dev = switchdev_get_dev_by_nhs(fi);
1075         if (!dev)
1076                 return 0;
1077
1078         err = switchdev_port_obj_del(dev, SWITCHDEV_OBJ_ID_IPV4_FIB, &ipv4_fib);
1079         if (!err)
1080                 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1081
1082         return err == -EOPNOTSUPP ? 0 : err;
1083 }
1084 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1085
1086 /**
1087  *      switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1088  *
1089  *      @fi: route FIB info structure
1090  */
1091 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1092 {
1093         /* There was a problem installing this route to the offload
1094          * device.  For now, until we come up with more refined
1095          * policy handling, abruptly end IPv4 fib offloading for
1096          * for entire net by flushing offload device(s) of all
1097          * IPv4 routes, and mark IPv4 fib offloading broken from
1098          * this point forward.
1099          */
1100
1101         fib_flush_external(fi->fib_net);
1102         fi->fib_net->ipv4.fib_offload_disabled = true;
1103 }
1104 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1105
1106 static bool switchdev_port_same_parent_id(struct net_device *a,
1107                                           struct net_device *b)
1108 {
1109         struct switchdev_attr a_attr = {
1110                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1111                 .flags = SWITCHDEV_F_NO_RECURSE,
1112         };
1113         struct switchdev_attr b_attr = {
1114                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1115                 .flags = SWITCHDEV_F_NO_RECURSE,
1116         };
1117
1118         if (switchdev_port_attr_get(a, &a_attr) ||
1119             switchdev_port_attr_get(b, &b_attr))
1120                 return false;
1121
1122         return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1123 }
1124
1125 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1126                                        struct net_device *group_dev)
1127 {
1128         struct net_device *lower_dev;
1129         struct list_head *iter;
1130
1131         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1132                 if (lower_dev == dev)
1133                         continue;
1134                 if (switchdev_port_same_parent_id(dev, lower_dev))
1135                         return lower_dev->offload_fwd_mark;
1136                 return switchdev_port_fwd_mark_get(dev, lower_dev);
1137         }
1138
1139         return dev->ifindex;
1140 }
1141
1142 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1143                                           u32 old_mark, u32 *reset_mark)
1144 {
1145         struct net_device *lower_dev;
1146         struct list_head *iter;
1147
1148         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1149                 if (lower_dev->offload_fwd_mark == old_mark) {
1150                         if (!*reset_mark)
1151                                 *reset_mark = lower_dev->ifindex;
1152                         lower_dev->offload_fwd_mark = *reset_mark;
1153                 }
1154                 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1155         }
1156 }
1157
1158 /**
1159  *      switchdev_port_fwd_mark_set - Set port offload forwarding mark
1160  *
1161  *      @dev: port device
1162  *      @group_dev: containing device
1163  *      @joining: true if dev is joining group; false if leaving group
1164  *
1165  *      An ungrouped port's offload mark is just its ifindex.  A grouped
1166  *      port's (member of a bridge, for example) offload mark is the ifindex
1167  *      of one of the ports in the group with the same parent (switch) ID.
1168  *      Ports on the same device in the same group will have the same mark.
1169  *
1170  *      Example:
1171  *
1172  *              br0             ifindex=9
1173  *                sw1p1         ifindex=2       mark=2
1174  *                sw1p2         ifindex=3       mark=2
1175  *                sw2p1         ifindex=4       mark=5
1176  *                sw2p2         ifindex=5       mark=5
1177  *
1178  *      If sw2p2 leaves the bridge, we'll have:
1179  *
1180  *              br0             ifindex=9
1181  *                sw1p1         ifindex=2       mark=2
1182  *                sw1p2         ifindex=3       mark=2
1183  *                sw2p1         ifindex=4       mark=4
1184  *              sw2p2           ifindex=5       mark=5
1185  */
1186 void switchdev_port_fwd_mark_set(struct net_device *dev,
1187                                  struct net_device *group_dev,
1188                                  bool joining)
1189 {
1190         u32 mark = dev->ifindex;
1191         u32 reset_mark = 0;
1192
1193         if (group_dev && joining) {
1194                 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1195         } else if (group_dev && !joining) {
1196                 if (dev->offload_fwd_mark == mark)
1197                         /* Ohoh, this port was the mark reference port,
1198                          * but it's leaving the group, so reset the
1199                          * mark for the remaining ports in the group.
1200                          */
1201                         switchdev_port_fwd_mark_reset(group_dev, mark,
1202                                                       &reset_mark);
1203         }
1204
1205         dev->offload_fwd_mark = mark;
1206 }
1207 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);