]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/switchdev/switchdev.c
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / net / switchdev / switchdev.c
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/if_vlan.h>
20 #include <net/ip_fib.h>
21 #include <net/switchdev.h>
22
23 /**
24  *      switchdev_port_attr_get - Get port attribute
25  *
26  *      @dev: port device
27  *      @attr: attribute to get
28  */
29 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
30 {
31         const struct switchdev_ops *ops = dev->switchdev_ops;
32         struct net_device *lower_dev;
33         struct list_head *iter;
34         struct switchdev_attr first = {
35                 .id = SWITCHDEV_ATTR_UNDEFINED
36         };
37         int err = -EOPNOTSUPP;
38
39         if (ops && ops->switchdev_port_attr_get)
40                 return ops->switchdev_port_attr_get(dev, attr);
41
42         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
43                 return err;
44
45         /* Switch device port(s) may be stacked under
46          * bond/team/vlan dev, so recurse down to get attr on
47          * each port.  Return -ENODATA if attr values don't
48          * compare across ports.
49          */
50
51         netdev_for_each_lower_dev(dev, lower_dev, iter) {
52                 err = switchdev_port_attr_get(lower_dev, attr);
53                 if (err)
54                         break;
55                 if (first.id == SWITCHDEV_ATTR_UNDEFINED)
56                         first = *attr;
57                 else if (memcmp(&first, attr, sizeof(*attr)))
58                         return -ENODATA;
59         }
60
61         return err;
62 }
63 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
64
65 static int __switchdev_port_attr_set(struct net_device *dev,
66                                      struct switchdev_attr *attr)
67 {
68         const struct switchdev_ops *ops = dev->switchdev_ops;
69         struct net_device *lower_dev;
70         struct list_head *iter;
71         int err = -EOPNOTSUPP;
72
73         if (ops && ops->switchdev_port_attr_set)
74                 return ops->switchdev_port_attr_set(dev, attr);
75
76         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
77                 return err;
78
79         /* Switch device port(s) may be stacked under
80          * bond/team/vlan dev, so recurse down to set attr on
81          * each port.
82          */
83
84         netdev_for_each_lower_dev(dev, lower_dev, iter) {
85                 err = __switchdev_port_attr_set(lower_dev, attr);
86                 if (err)
87                         break;
88         }
89
90         return err;
91 }
92
93 struct switchdev_attr_set_work {
94         struct work_struct work;
95         struct net_device *dev;
96         struct switchdev_attr attr;
97 };
98
99 static void switchdev_port_attr_set_work(struct work_struct *work)
100 {
101         struct switchdev_attr_set_work *asw =
102                 container_of(work, struct switchdev_attr_set_work, work);
103         int err;
104
105         rtnl_lock();
106         err = switchdev_port_attr_set(asw->dev, &asw->attr);
107         if (err && err != -EOPNOTSUPP)
108                 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
109                            err, asw->attr.id);
110         rtnl_unlock();
111
112         dev_put(asw->dev);
113         kfree(work);
114 }
115
116 static int switchdev_port_attr_set_defer(struct net_device *dev,
117                                          struct switchdev_attr *attr)
118 {
119         struct switchdev_attr_set_work *asw;
120
121         asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
122         if (!asw)
123                 return -ENOMEM;
124
125         INIT_WORK(&asw->work, switchdev_port_attr_set_work);
126
127         dev_hold(dev);
128         asw->dev = dev;
129         memcpy(&asw->attr, attr, sizeof(asw->attr));
130
131         schedule_work(&asw->work);
132
133         return 0;
134 }
135
136 /**
137  *      switchdev_port_attr_set - Set port attribute
138  *
139  *      @dev: port device
140  *      @attr: attribute to set
141  *
142  *      Use a 2-phase prepare-commit transaction model to ensure
143  *      system is not left in a partially updated state due to
144  *      failure from driver/device.
145  */
146 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
147 {
148         int err;
149
150         if (!rtnl_is_locked()) {
151                 /* Running prepare-commit transaction across stacked
152                  * devices requires nothing moves, so if rtnl_lock is
153                  * not held, schedule a worker thread to hold rtnl_lock
154                  * while setting attr.
155                  */
156
157                 return switchdev_port_attr_set_defer(dev, attr);
158         }
159
160         /* Phase I: prepare for attr set. Driver/device should fail
161          * here if there are going to be issues in the commit phase,
162          * such as lack of resources or support.  The driver/device
163          * should reserve resources needed for the commit phase here,
164          * but should not commit the attr.
165          */
166
167         attr->trans = SWITCHDEV_TRANS_PREPARE;
168         err = __switchdev_port_attr_set(dev, attr);
169         if (err) {
170                 /* Prepare phase failed: abort the transaction.  Any
171                  * resources reserved in the prepare phase are
172                  * released.
173                  */
174
175                 if (err != -EOPNOTSUPP) {
176                         attr->trans = SWITCHDEV_TRANS_ABORT;
177                         __switchdev_port_attr_set(dev, attr);
178                 }
179
180                 return err;
181         }
182
183         /* Phase II: commit attr set.  This cannot fail as a fault
184          * of driver/device.  If it does, it's a bug in the driver/device
185          * because the driver said everythings was OK in phase I.
186          */
187
188         attr->trans = SWITCHDEV_TRANS_COMMIT;
189         err = __switchdev_port_attr_set(dev, attr);
190         WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
191              dev->name, attr->id);
192
193         return err;
194 }
195 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
196
197 static int __switchdev_port_obj_add(struct net_device *dev,
198                                     struct switchdev_obj *obj)
199 {
200         const struct switchdev_ops *ops = dev->switchdev_ops;
201         struct net_device *lower_dev;
202         struct list_head *iter;
203         int err = -EOPNOTSUPP;
204
205         if (ops && ops->switchdev_port_obj_add)
206                 return ops->switchdev_port_obj_add(dev, obj);
207
208         /* Switch device port(s) may be stacked under
209          * bond/team/vlan dev, so recurse down to add object on
210          * each port.
211          */
212
213         netdev_for_each_lower_dev(dev, lower_dev, iter) {
214                 err = __switchdev_port_obj_add(lower_dev, obj);
215                 if (err)
216                         break;
217         }
218
219         return err;
220 }
221
222 /**
223  *      switchdev_port_obj_add - Add port object
224  *
225  *      @dev: port device
226  *      @obj: object to add
227  *
228  *      Use a 2-phase prepare-commit transaction model to ensure
229  *      system is not left in a partially updated state due to
230  *      failure from driver/device.
231  *
232  *      rtnl_lock must be held.
233  */
234 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
235 {
236         int err;
237
238         ASSERT_RTNL();
239
240         /* Phase I: prepare for obj add. Driver/device should fail
241          * here if there are going to be issues in the commit phase,
242          * such as lack of resources or support.  The driver/device
243          * should reserve resources needed for the commit phase here,
244          * but should not commit the obj.
245          */
246
247         obj->trans = SWITCHDEV_TRANS_PREPARE;
248         err = __switchdev_port_obj_add(dev, obj);
249         if (err) {
250                 /* Prepare phase failed: abort the transaction.  Any
251                  * resources reserved in the prepare phase are
252                  * released.
253                  */
254
255                 if (err != -EOPNOTSUPP) {
256                         obj->trans = SWITCHDEV_TRANS_ABORT;
257                         __switchdev_port_obj_add(dev, obj);
258                 }
259
260                 return err;
261         }
262
263         /* Phase II: commit obj add.  This cannot fail as a fault
264          * of driver/device.  If it does, it's a bug in the driver/device
265          * because the driver said everythings was OK in phase I.
266          */
267
268         obj->trans = SWITCHDEV_TRANS_COMMIT;
269         err = __switchdev_port_obj_add(dev, obj);
270         WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
271
272         return err;
273 }
274 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
275
276 /**
277  *      switchdev_port_obj_del - Delete port object
278  *
279  *      @dev: port device
280  *      @obj: object to delete
281  */
282 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
283 {
284         const struct switchdev_ops *ops = dev->switchdev_ops;
285         struct net_device *lower_dev;
286         struct list_head *iter;
287         int err = -EOPNOTSUPP;
288
289         if (ops && ops->switchdev_port_obj_del)
290                 return ops->switchdev_port_obj_del(dev, obj);
291
292         /* Switch device port(s) may be stacked under
293          * bond/team/vlan dev, so recurse down to delete object on
294          * each port.
295          */
296
297         netdev_for_each_lower_dev(dev, lower_dev, iter) {
298                 err = switchdev_port_obj_del(lower_dev, obj);
299                 if (err)
300                         break;
301         }
302
303         return err;
304 }
305 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
306
307 /**
308  *      switchdev_port_obj_dump - Dump port objects
309  *
310  *      @dev: port device
311  *      @obj: object to dump
312  */
313 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
314 {
315         const struct switchdev_ops *ops = dev->switchdev_ops;
316         struct net_device *lower_dev;
317         struct list_head *iter;
318         int err = -EOPNOTSUPP;
319
320         if (ops && ops->switchdev_port_obj_dump)
321                 return ops->switchdev_port_obj_dump(dev, obj);
322
323         /* Switch device port(s) may be stacked under
324          * bond/team/vlan dev, so recurse down to dump objects on
325          * first port at bottom of stack.
326          */
327
328         netdev_for_each_lower_dev(dev, lower_dev, iter) {
329                 err = switchdev_port_obj_dump(lower_dev, obj);
330                 break;
331         }
332
333         return err;
334 }
335 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
336
337 static DEFINE_MUTEX(switchdev_mutex);
338 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
339
340 /**
341  *      register_switchdev_notifier - Register notifier
342  *      @nb: notifier_block
343  *
344  *      Register switch device notifier. This should be used by code
345  *      which needs to monitor events happening in particular device.
346  *      Return values are same as for atomic_notifier_chain_register().
347  */
348 int register_switchdev_notifier(struct notifier_block *nb)
349 {
350         int err;
351
352         mutex_lock(&switchdev_mutex);
353         err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
354         mutex_unlock(&switchdev_mutex);
355         return err;
356 }
357 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
358
359 /**
360  *      unregister_switchdev_notifier - Unregister notifier
361  *      @nb: notifier_block
362  *
363  *      Unregister switch device notifier.
364  *      Return values are same as for atomic_notifier_chain_unregister().
365  */
366 int unregister_switchdev_notifier(struct notifier_block *nb)
367 {
368         int err;
369
370         mutex_lock(&switchdev_mutex);
371         err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
372         mutex_unlock(&switchdev_mutex);
373         return err;
374 }
375 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
376
377 /**
378  *      call_switchdev_notifiers - Call notifiers
379  *      @val: value passed unmodified to notifier function
380  *      @dev: port device
381  *      @info: notifier information data
382  *
383  *      Call all network notifier blocks. This should be called by driver
384  *      when it needs to propagate hardware event.
385  *      Return values are same as for atomic_notifier_call_chain().
386  */
387 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
388                              struct switchdev_notifier_info *info)
389 {
390         int err;
391
392         info->dev = dev;
393         mutex_lock(&switchdev_mutex);
394         err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
395         mutex_unlock(&switchdev_mutex);
396         return err;
397 }
398 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
399
400 struct switchdev_vlan_dump {
401         struct switchdev_obj obj;
402         struct sk_buff *skb;
403         u32 filter_mask;
404         u16 flags;
405         u16 begin;
406         u16 end;
407 };
408
409 static int switchdev_port_vlan_dump_put(struct net_device *dev,
410                                         struct switchdev_vlan_dump *dump)
411 {
412         struct bridge_vlan_info vinfo;
413
414         vinfo.flags = dump->flags;
415
416         if (dump->begin == 0 && dump->end == 0) {
417                 return 0;
418         } else if (dump->begin == dump->end) {
419                 vinfo.vid = dump->begin;
420                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
421                             sizeof(vinfo), &vinfo))
422                         return -EMSGSIZE;
423         } else {
424                 vinfo.vid = dump->begin;
425                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
426                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
427                             sizeof(vinfo), &vinfo))
428                         return -EMSGSIZE;
429                 vinfo.vid = dump->end;
430                 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
431                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
432                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
433                             sizeof(vinfo), &vinfo))
434                         return -EMSGSIZE;
435         }
436
437         return 0;
438 }
439
440 static int switchdev_port_vlan_dump_cb(struct net_device *dev,
441                                        struct switchdev_obj *obj)
442 {
443         struct switchdev_vlan_dump *dump =
444                 container_of(obj, struct switchdev_vlan_dump, obj);
445         struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
446         int err = 0;
447
448         if (vlan->vid_begin > vlan->vid_end)
449                 return -EINVAL;
450
451         if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
452                 dump->flags = vlan->flags;
453                 for (dump->begin = dump->end = vlan->vid_begin;
454                      dump->begin <= vlan->vid_end;
455                      dump->begin++, dump->end++) {
456                         err = switchdev_port_vlan_dump_put(dev, dump);
457                         if (err)
458                                 return err;
459                 }
460         } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
461                 if (dump->begin > vlan->vid_begin &&
462                     dump->begin >= vlan->vid_end) {
463                         if ((dump->begin - 1) == vlan->vid_end &&
464                             dump->flags == vlan->flags) {
465                                 /* prepend */
466                                 dump->begin = vlan->vid_begin;
467                         } else {
468                                 err = switchdev_port_vlan_dump_put(dev, dump);
469                                 dump->flags = vlan->flags;
470                                 dump->begin = vlan->vid_begin;
471                                 dump->end = vlan->vid_end;
472                         }
473                 } else if (dump->end <= vlan->vid_begin &&
474                            dump->end < vlan->vid_end) {
475                         if ((dump->end  + 1) == vlan->vid_begin &&
476                             dump->flags == vlan->flags) {
477                                 /* append */
478                                 dump->end = vlan->vid_end;
479                         } else {
480                                 err = switchdev_port_vlan_dump_put(dev, dump);
481                                 dump->flags = vlan->flags;
482                                 dump->begin = vlan->vid_begin;
483                                 dump->end = vlan->vid_end;
484                         }
485                 } else {
486                         err = -EINVAL;
487                 }
488         }
489
490         return err;
491 }
492
493 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
494                                     u32 filter_mask)
495 {
496         struct switchdev_vlan_dump dump = {
497                 .obj = {
498                         .id = SWITCHDEV_OBJ_PORT_VLAN,
499                         .cb = switchdev_port_vlan_dump_cb,
500                 },
501                 .skb = skb,
502                 .filter_mask = filter_mask,
503         };
504         int err = 0;
505
506         if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
507             (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
508                 err = switchdev_port_obj_dump(dev, &dump.obj);
509                 if (err)
510                         goto err_out;
511                 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
512                         /* last one */
513                         err = switchdev_port_vlan_dump_put(dev, &dump);
514         }
515
516 err_out:
517         return err == -EOPNOTSUPP ? 0 : err;
518 }
519
520 /**
521  *      switchdev_port_bridge_getlink - Get bridge port attributes
522  *
523  *      @dev: port device
524  *
525  *      Called for SELF on rtnl_bridge_getlink to get bridge port
526  *      attributes.
527  */
528 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
529                                   struct net_device *dev, u32 filter_mask,
530                                   int nlflags)
531 {
532         struct switchdev_attr attr = {
533                 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
534         };
535         u16 mode = BRIDGE_MODE_UNDEF;
536         u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
537         int err;
538
539         err = switchdev_port_attr_get(dev, &attr);
540         if (err && err != -EOPNOTSUPP)
541                 return err;
542
543         return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
544                                        attr.u.brport_flags, mask, nlflags,
545                                        filter_mask, switchdev_port_vlan_fill);
546 }
547 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
548
549 static int switchdev_port_br_setflag(struct net_device *dev,
550                                      struct nlattr *nlattr,
551                                      unsigned long brport_flag)
552 {
553         struct switchdev_attr attr = {
554                 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
555         };
556         u8 flag = nla_get_u8(nlattr);
557         int err;
558
559         err = switchdev_port_attr_get(dev, &attr);
560         if (err)
561                 return err;
562
563         if (flag)
564                 attr.u.brport_flags |= brport_flag;
565         else
566                 attr.u.brport_flags &= ~brport_flag;
567
568         return switchdev_port_attr_set(dev, &attr);
569 }
570
571 static const struct nla_policy
572 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
573         [IFLA_BRPORT_STATE]             = { .type = NLA_U8 },
574         [IFLA_BRPORT_COST]              = { .type = NLA_U32 },
575         [IFLA_BRPORT_PRIORITY]          = { .type = NLA_U16 },
576         [IFLA_BRPORT_MODE]              = { .type = NLA_U8 },
577         [IFLA_BRPORT_GUARD]             = { .type = NLA_U8 },
578         [IFLA_BRPORT_PROTECT]           = { .type = NLA_U8 },
579         [IFLA_BRPORT_FAST_LEAVE]        = { .type = NLA_U8 },
580         [IFLA_BRPORT_LEARNING]          = { .type = NLA_U8 },
581         [IFLA_BRPORT_LEARNING_SYNC]     = { .type = NLA_U8 },
582         [IFLA_BRPORT_UNICAST_FLOOD]     = { .type = NLA_U8 },
583 };
584
585 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
586                                               struct nlattr *protinfo)
587 {
588         struct nlattr *attr;
589         int rem;
590         int err;
591
592         err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
593                                   switchdev_port_bridge_policy);
594         if (err)
595                 return err;
596
597         nla_for_each_nested(attr, protinfo, rem) {
598                 switch (nla_type(attr)) {
599                 case IFLA_BRPORT_LEARNING:
600                         err = switchdev_port_br_setflag(dev, attr,
601                                                         BR_LEARNING);
602                         break;
603                 case IFLA_BRPORT_LEARNING_SYNC:
604                         err = switchdev_port_br_setflag(dev, attr,
605                                                         BR_LEARNING_SYNC);
606                         break;
607                 default:
608                         err = -EOPNOTSUPP;
609                         break;
610                 }
611                 if (err)
612                         return err;
613         }
614
615         return 0;
616 }
617
618 static int switchdev_port_br_afspec(struct net_device *dev,
619                                     struct nlattr *afspec,
620                                     int (*f)(struct net_device *dev,
621                                              struct switchdev_obj *obj))
622 {
623         struct nlattr *attr;
624         struct bridge_vlan_info *vinfo;
625         struct switchdev_obj obj = {
626                 .id = SWITCHDEV_OBJ_PORT_VLAN,
627         };
628         struct switchdev_obj_vlan *vlan = &obj.u.vlan;
629         int rem;
630         int err;
631
632         nla_for_each_nested(attr, afspec, rem) {
633                 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
634                         continue;
635                 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
636                         return -EINVAL;
637                 vinfo = nla_data(attr);
638                 if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
639                         return -EINVAL;
640                 vlan->flags = vinfo->flags;
641                 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
642                         if (vlan->vid_begin)
643                                 return -EINVAL;
644                         vlan->vid_begin = vinfo->vid;
645                 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
646                         if (!vlan->vid_begin)
647                                 return -EINVAL;
648                         vlan->vid_end = vinfo->vid;
649                         if (vlan->vid_end <= vlan->vid_begin)
650                                 return -EINVAL;
651                         err = f(dev, &obj);
652                         if (err)
653                                 return err;
654                         memset(vlan, 0, sizeof(*vlan));
655                 } else {
656                         if (vlan->vid_begin)
657                                 return -EINVAL;
658                         vlan->vid_begin = vinfo->vid;
659                         vlan->vid_end = vinfo->vid;
660                         err = f(dev, &obj);
661                         if (err)
662                                 return err;
663                         memset(vlan, 0, sizeof(*vlan));
664                 }
665         }
666
667         return 0;
668 }
669
670 /**
671  *      switchdev_port_bridge_setlink - Set bridge port attributes
672  *
673  *      @dev: port device
674  *      @nlh: netlink header
675  *      @flags: netlink flags
676  *
677  *      Called for SELF on rtnl_bridge_setlink to set bridge port
678  *      attributes.
679  */
680 int switchdev_port_bridge_setlink(struct net_device *dev,
681                                   struct nlmsghdr *nlh, u16 flags)
682 {
683         struct nlattr *protinfo;
684         struct nlattr *afspec;
685         int err = 0;
686
687         protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
688                                    IFLA_PROTINFO);
689         if (protinfo) {
690                 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
691                 if (err)
692                         return err;
693         }
694
695         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
696                                  IFLA_AF_SPEC);
697         if (afspec)
698                 err = switchdev_port_br_afspec(dev, afspec,
699                                                switchdev_port_obj_add);
700
701         return err;
702 }
703 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
704
705 /**
706  *      switchdev_port_bridge_dellink - Set bridge port attributes
707  *
708  *      @dev: port device
709  *      @nlh: netlink header
710  *      @flags: netlink flags
711  *
712  *      Called for SELF on rtnl_bridge_dellink to set bridge port
713  *      attributes.
714  */
715 int switchdev_port_bridge_dellink(struct net_device *dev,
716                                   struct nlmsghdr *nlh, u16 flags)
717 {
718         struct nlattr *afspec;
719
720         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
721                                  IFLA_AF_SPEC);
722         if (afspec)
723                 return switchdev_port_br_afspec(dev, afspec,
724                                                 switchdev_port_obj_del);
725
726         return 0;
727 }
728 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
729
730 /**
731  *      switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
732  *
733  *      @ndmsg: netlink hdr
734  *      @nlattr: netlink attributes
735  *      @dev: port device
736  *      @addr: MAC address to add
737  *      @vid: VLAN to add
738  *
739  *      Add FDB entry to switch device.
740  */
741 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
742                            struct net_device *dev, const unsigned char *addr,
743                            u16 vid, u16 nlm_flags)
744 {
745         struct switchdev_obj obj = {
746                 .id = SWITCHDEV_OBJ_PORT_FDB,
747                 .u.fdb = {
748                         .addr = addr,
749                         .vid = vid,
750                 },
751         };
752
753         return switchdev_port_obj_add(dev, &obj);
754 }
755 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
756
757 /**
758  *      switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
759  *
760  *      @ndmsg: netlink hdr
761  *      @nlattr: netlink attributes
762  *      @dev: port device
763  *      @addr: MAC address to delete
764  *      @vid: VLAN to delete
765  *
766  *      Delete FDB entry from switch device.
767  */
768 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
769                            struct net_device *dev, const unsigned char *addr,
770                            u16 vid)
771 {
772         struct switchdev_obj obj = {
773                 .id = SWITCHDEV_OBJ_PORT_FDB,
774                 .u.fdb = {
775                         .addr = addr,
776                         .vid = vid,
777                 },
778         };
779
780         return switchdev_port_obj_del(dev, &obj);
781 }
782 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
783
784 struct switchdev_fdb_dump {
785         struct switchdev_obj obj;
786         struct sk_buff *skb;
787         struct netlink_callback *cb;
788         int idx;
789 };
790
791 static int switchdev_port_fdb_dump_cb(struct net_device *dev,
792                                       struct switchdev_obj *obj)
793 {
794         struct switchdev_fdb_dump *dump =
795                 container_of(obj, struct switchdev_fdb_dump, obj);
796         u32 portid = NETLINK_CB(dump->cb->skb).portid;
797         u32 seq = dump->cb->nlh->nlmsg_seq;
798         struct nlmsghdr *nlh;
799         struct ndmsg *ndm;
800
801         if (dump->idx < dump->cb->args[0])
802                 goto skip;
803
804         nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
805                         sizeof(*ndm), NLM_F_MULTI);
806         if (!nlh)
807                 return -EMSGSIZE;
808
809         ndm = nlmsg_data(nlh);
810         ndm->ndm_family  = AF_BRIDGE;
811         ndm->ndm_pad1    = 0;
812         ndm->ndm_pad2    = 0;
813         ndm->ndm_flags   = NTF_SELF;
814         ndm->ndm_type    = 0;
815         ndm->ndm_ifindex = dev->ifindex;
816         ndm->ndm_state   = obj->u.fdb.ndm_state;
817
818         if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
819                 goto nla_put_failure;
820
821         if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
822                 goto nla_put_failure;
823
824         nlmsg_end(dump->skb, nlh);
825
826 skip:
827         dump->idx++;
828         return 0;
829
830 nla_put_failure:
831         nlmsg_cancel(dump->skb, nlh);
832         return -EMSGSIZE;
833 }
834
835 /**
836  *      switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
837  *
838  *      @skb: netlink skb
839  *      @cb: netlink callback
840  *      @dev: port device
841  *      @filter_dev: filter device
842  *      @idx:
843  *
844  *      Delete FDB entry from switch device.
845  */
846 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
847                             struct net_device *dev,
848                             struct net_device *filter_dev, int idx)
849 {
850         struct switchdev_fdb_dump dump = {
851                 .obj = {
852                         .id = SWITCHDEV_OBJ_PORT_FDB,
853                         .cb = switchdev_port_fdb_dump_cb,
854                 },
855                 .skb = skb,
856                 .cb = cb,
857                 .idx = idx,
858         };
859
860         switchdev_port_obj_dump(dev, &dump.obj);
861         return dump.idx;
862 }
863 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
864
865 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
866 {
867         const struct switchdev_ops *ops = dev->switchdev_ops;
868         struct net_device *lower_dev;
869         struct net_device *port_dev;
870         struct list_head *iter;
871
872         /* Recusively search down until we find a sw port dev.
873          * (A sw port dev supports switchdev_port_attr_get).
874          */
875
876         if (ops && ops->switchdev_port_attr_get)
877                 return dev;
878
879         netdev_for_each_lower_dev(dev, lower_dev, iter) {
880                 port_dev = switchdev_get_lowest_dev(lower_dev);
881                 if (port_dev)
882                         return port_dev;
883         }
884
885         return NULL;
886 }
887
888 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
889 {
890         struct switchdev_attr attr = {
891                 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
892         };
893         struct switchdev_attr prev_attr;
894         struct net_device *dev = NULL;
895         int nhsel;
896
897         /* For this route, all nexthop devs must be on the same switch. */
898
899         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
900                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
901
902                 if (!nh->nh_dev)
903                         return NULL;
904
905                 dev = switchdev_get_lowest_dev(nh->nh_dev);
906                 if (!dev)
907                         return NULL;
908
909                 if (switchdev_port_attr_get(dev, &attr))
910                         return NULL;
911
912                 if (nhsel > 0 &&
913                     !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
914                                 return NULL;
915
916                 prev_attr = attr;
917         }
918
919         return dev;
920 }
921
922 /**
923  *      switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
924  *
925  *      @dst: route's IPv4 destination address
926  *      @dst_len: destination address length (prefix length)
927  *      @fi: route FIB info structure
928  *      @tos: route TOS
929  *      @type: route type
930  *      @nlflags: netlink flags passed in (NLM_F_*)
931  *      @tb_id: route table ID
932  *
933  *      Add/modify switch IPv4 route entry.
934  */
935 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
936                            u8 tos, u8 type, u32 nlflags, u32 tb_id)
937 {
938         struct switchdev_obj fib_obj = {
939                 .id = SWITCHDEV_OBJ_IPV4_FIB,
940                 .u.ipv4_fib = {
941                         .dst = dst,
942                         .dst_len = dst_len,
943                         .fi = fi,
944                         .tos = tos,
945                         .type = type,
946                         .nlflags = nlflags,
947                         .tb_id = tb_id,
948                 },
949         };
950         struct net_device *dev;
951         int err = 0;
952
953         /* Don't offload route if using custom ip rules or if
954          * IPv4 FIB offloading has been disabled completely.
955          */
956
957 #ifdef CONFIG_IP_MULTIPLE_TABLES
958         if (fi->fib_net->ipv4.fib_has_custom_rules)
959                 return 0;
960 #endif
961
962         if (fi->fib_net->ipv4.fib_offload_disabled)
963                 return 0;
964
965         dev = switchdev_get_dev_by_nhs(fi);
966         if (!dev)
967                 return 0;
968
969         err = switchdev_port_obj_add(dev, &fib_obj);
970         if (!err)
971                 fi->fib_flags |= RTNH_F_OFFLOAD;
972
973         return err == -EOPNOTSUPP ? 0 : err;
974 }
975 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
976
977 /**
978  *      switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
979  *
980  *      @dst: route's IPv4 destination address
981  *      @dst_len: destination address length (prefix length)
982  *      @fi: route FIB info structure
983  *      @tos: route TOS
984  *      @type: route type
985  *      @tb_id: route table ID
986  *
987  *      Delete IPv4 route entry from switch device.
988  */
989 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
990                            u8 tos, u8 type, u32 tb_id)
991 {
992         struct switchdev_obj fib_obj = {
993                 .id = SWITCHDEV_OBJ_IPV4_FIB,
994                 .u.ipv4_fib = {
995                         .dst = dst,
996                         .dst_len = dst_len,
997                         .fi = fi,
998                         .tos = tos,
999                         .type = type,
1000                         .nlflags = 0,
1001                         .tb_id = tb_id,
1002                 },
1003         };
1004         struct net_device *dev;
1005         int err = 0;
1006
1007         if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1008                 return 0;
1009
1010         dev = switchdev_get_dev_by_nhs(fi);
1011         if (!dev)
1012                 return 0;
1013
1014         err = switchdev_port_obj_del(dev, &fib_obj);
1015         if (!err)
1016                 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1017
1018         return err == -EOPNOTSUPP ? 0 : err;
1019 }
1020 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1021
1022 /**
1023  *      switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1024  *
1025  *      @fi: route FIB info structure
1026  */
1027 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1028 {
1029         /* There was a problem installing this route to the offload
1030          * device.  For now, until we come up with more refined
1031          * policy handling, abruptly end IPv4 fib offloading for
1032          * for entire net by flushing offload device(s) of all
1033          * IPv4 routes, and mark IPv4 fib offloading broken from
1034          * this point forward.
1035          */
1036
1037         fib_flush_external(fi->fib_net);
1038         fi->fib_net->ipv4.fib_offload_disabled = true;
1039 }
1040 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1041
1042 static bool switchdev_port_same_parent_id(struct net_device *a,
1043                                           struct net_device *b)
1044 {
1045         struct switchdev_attr a_attr = {
1046                 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1047                 .flags = SWITCHDEV_F_NO_RECURSE,
1048         };
1049         struct switchdev_attr b_attr = {
1050                 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1051                 .flags = SWITCHDEV_F_NO_RECURSE,
1052         };
1053
1054         if (switchdev_port_attr_get(a, &a_attr) ||
1055             switchdev_port_attr_get(b, &b_attr))
1056                 return false;
1057
1058         return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1059 }
1060
1061 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1062                                        struct net_device *group_dev)
1063 {
1064         struct net_device *lower_dev;
1065         struct list_head *iter;
1066
1067         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1068                 if (lower_dev == dev)
1069                         continue;
1070                 if (switchdev_port_same_parent_id(dev, lower_dev))
1071                         return lower_dev->offload_fwd_mark;
1072                 return switchdev_port_fwd_mark_get(dev, lower_dev);
1073         }
1074
1075         return dev->ifindex;
1076 }
1077
1078 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1079                                           u32 old_mark, u32 *reset_mark)
1080 {
1081         struct net_device *lower_dev;
1082         struct list_head *iter;
1083
1084         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1085                 if (lower_dev->offload_fwd_mark == old_mark) {
1086                         if (!*reset_mark)
1087                                 *reset_mark = lower_dev->ifindex;
1088                         lower_dev->offload_fwd_mark = *reset_mark;
1089                 }
1090                 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1091         }
1092 }
1093
1094 /**
1095  *      switchdev_port_fwd_mark_set - Set port offload forwarding mark
1096  *
1097  *      @dev: port device
1098  *      @group_dev: containing device
1099  *      @joining: true if dev is joining group; false if leaving group
1100  *
1101  *      An ungrouped port's offload mark is just its ifindex.  A grouped
1102  *      port's (member of a bridge, for example) offload mark is the ifindex
1103  *      of one of the ports in the group with the same parent (switch) ID.
1104  *      Ports on the same device in the same group will have the same mark.
1105  *
1106  *      Example:
1107  *
1108  *              br0             ifindex=9
1109  *                sw1p1         ifindex=2       mark=2
1110  *                sw1p2         ifindex=3       mark=2
1111  *                sw2p1         ifindex=4       mark=5
1112  *                sw2p2         ifindex=5       mark=5
1113  *
1114  *      If sw2p2 leaves the bridge, we'll have:
1115  *
1116  *              br0             ifindex=9
1117  *                sw1p1         ifindex=2       mark=2
1118  *                sw1p2         ifindex=3       mark=2
1119  *                sw2p1         ifindex=4       mark=4
1120  *              sw2p2           ifindex=5       mark=5
1121  */
1122 void switchdev_port_fwd_mark_set(struct net_device *dev,
1123                                  struct net_device *group_dev,
1124                                  bool joining)
1125 {
1126         u32 mark = dev->ifindex;
1127         u32 reset_mark = 0;
1128
1129         if (group_dev && joining) {
1130                 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1131         } else if (group_dev && !joining) {
1132                 if (dev->offload_fwd_mark == mark)
1133                         /* Ohoh, this port was the mark reference port,
1134                          * but it's leaving the group, so reset the
1135                          * mark for the remaining ports in the group.
1136                          */
1137                         switchdev_port_fwd_mark_reset(group_dev, mark,
1138                                                       &reset_mark);
1139         }
1140
1141         dev->offload_fwd_mark = mark;
1142 }
1143 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);