]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/switchdev/switchdev.c
scsi_dh: don't try to load a device handler during async probing
[karo-tx-linux.git] / net / switchdev / switchdev.c
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <net/ip_fib.h>
20 #include <net/switchdev.h>
21
22 /**
23  *      switchdev_port_attr_get - Get port attribute
24  *
25  *      @dev: port device
26  *      @attr: attribute to get
27  */
28 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
29 {
30         const struct switchdev_ops *ops = dev->switchdev_ops;
31         struct net_device *lower_dev;
32         struct list_head *iter;
33         struct switchdev_attr first = {
34                 .id = SWITCHDEV_ATTR_UNDEFINED
35         };
36         int err = -EOPNOTSUPP;
37
38         if (ops && ops->switchdev_port_attr_get)
39                 return ops->switchdev_port_attr_get(dev, attr);
40
41         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
42                 return err;
43
44         /* Switch device port(s) may be stacked under
45          * bond/team/vlan dev, so recurse down to get attr on
46          * each port.  Return -ENODATA if attr values don't
47          * compare across ports.
48          */
49
50         netdev_for_each_lower_dev(dev, lower_dev, iter) {
51                 err = switchdev_port_attr_get(lower_dev, attr);
52                 if (err)
53                         break;
54                 if (first.id == SWITCHDEV_ATTR_UNDEFINED)
55                         first = *attr;
56                 else if (memcmp(&first, attr, sizeof(*attr)))
57                         return -ENODATA;
58         }
59
60         return err;
61 }
62 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
63
64 static int __switchdev_port_attr_set(struct net_device *dev,
65                                      struct switchdev_attr *attr)
66 {
67         const struct switchdev_ops *ops = dev->switchdev_ops;
68         struct net_device *lower_dev;
69         struct list_head *iter;
70         int err = -EOPNOTSUPP;
71
72         if (ops && ops->switchdev_port_attr_set)
73                 return ops->switchdev_port_attr_set(dev, attr);
74
75         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
76                 return err;
77
78         /* Switch device port(s) may be stacked under
79          * bond/team/vlan dev, so recurse down to set attr on
80          * each port.
81          */
82
83         netdev_for_each_lower_dev(dev, lower_dev, iter) {
84                 err = __switchdev_port_attr_set(lower_dev, attr);
85                 if (err)
86                         break;
87         }
88
89         return err;
90 }
91
92 struct switchdev_attr_set_work {
93         struct work_struct work;
94         struct net_device *dev;
95         struct switchdev_attr attr;
96 };
97
98 static void switchdev_port_attr_set_work(struct work_struct *work)
99 {
100         struct switchdev_attr_set_work *asw =
101                 container_of(work, struct switchdev_attr_set_work, work);
102         int err;
103
104         rtnl_lock();
105         err = switchdev_port_attr_set(asw->dev, &asw->attr);
106         if (err && err != -EOPNOTSUPP)
107                 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
108                            err, asw->attr.id);
109         rtnl_unlock();
110
111         dev_put(asw->dev);
112         kfree(work);
113 }
114
115 static int switchdev_port_attr_set_defer(struct net_device *dev,
116                                          struct switchdev_attr *attr)
117 {
118         struct switchdev_attr_set_work *asw;
119
120         asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
121         if (!asw)
122                 return -ENOMEM;
123
124         INIT_WORK(&asw->work, switchdev_port_attr_set_work);
125
126         dev_hold(dev);
127         asw->dev = dev;
128         memcpy(&asw->attr, attr, sizeof(asw->attr));
129
130         schedule_work(&asw->work);
131
132         return 0;
133 }
134
135 /**
136  *      switchdev_port_attr_set - Set port attribute
137  *
138  *      @dev: port device
139  *      @attr: attribute to set
140  *
141  *      Use a 2-phase prepare-commit transaction model to ensure
142  *      system is not left in a partially updated state due to
143  *      failure from driver/device.
144  */
145 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
146 {
147         int err;
148
149         if (!rtnl_is_locked()) {
150                 /* Running prepare-commit transaction across stacked
151                  * devices requires nothing moves, so if rtnl_lock is
152                  * not held, schedule a worker thread to hold rtnl_lock
153                  * while setting attr.
154                  */
155
156                 return switchdev_port_attr_set_defer(dev, attr);
157         }
158
159         /* Phase I: prepare for attr set. Driver/device should fail
160          * here if there are going to be issues in the commit phase,
161          * such as lack of resources or support.  The driver/device
162          * should reserve resources needed for the commit phase here,
163          * but should not commit the attr.
164          */
165
166         attr->trans = SWITCHDEV_TRANS_PREPARE;
167         err = __switchdev_port_attr_set(dev, attr);
168         if (err) {
169                 /* Prepare phase failed: abort the transaction.  Any
170                  * resources reserved in the prepare phase are
171                  * released.
172                  */
173
174                 if (err != -EOPNOTSUPP) {
175                         attr->trans = SWITCHDEV_TRANS_ABORT;
176                         __switchdev_port_attr_set(dev, attr);
177                 }
178
179                 return err;
180         }
181
182         /* Phase II: commit attr set.  This cannot fail as a fault
183          * of driver/device.  If it does, it's a bug in the driver/device
184          * because the driver said everythings was OK in phase I.
185          */
186
187         attr->trans = SWITCHDEV_TRANS_COMMIT;
188         err = __switchdev_port_attr_set(dev, attr);
189         WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
190              dev->name, attr->id);
191
192         return err;
193 }
194 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
195
196 static int __switchdev_port_obj_add(struct net_device *dev,
197                                     struct switchdev_obj *obj)
198 {
199         const struct switchdev_ops *ops = dev->switchdev_ops;
200         struct net_device *lower_dev;
201         struct list_head *iter;
202         int err = -EOPNOTSUPP;
203
204         if (ops && ops->switchdev_port_obj_add)
205                 return ops->switchdev_port_obj_add(dev, obj);
206
207         /* Switch device port(s) may be stacked under
208          * bond/team/vlan dev, so recurse down to add object on
209          * each port.
210          */
211
212         netdev_for_each_lower_dev(dev, lower_dev, iter) {
213                 err = __switchdev_port_obj_add(lower_dev, obj);
214                 if (err)
215                         break;
216         }
217
218         return err;
219 }
220
221 /**
222  *      switchdev_port_obj_add - Add port object
223  *
224  *      @dev: port device
225  *      @obj: object to add
226  *
227  *      Use a 2-phase prepare-commit transaction model to ensure
228  *      system is not left in a partially updated state due to
229  *      failure from driver/device.
230  *
231  *      rtnl_lock must be held.
232  */
233 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
234 {
235         int err;
236
237         ASSERT_RTNL();
238
239         /* Phase I: prepare for obj add. Driver/device should fail
240          * here if there are going to be issues in the commit phase,
241          * such as lack of resources or support.  The driver/device
242          * should reserve resources needed for the commit phase here,
243          * but should not commit the obj.
244          */
245
246         obj->trans = SWITCHDEV_TRANS_PREPARE;
247         err = __switchdev_port_obj_add(dev, obj);
248         if (err) {
249                 /* Prepare phase failed: abort the transaction.  Any
250                  * resources reserved in the prepare phase are
251                  * released.
252                  */
253
254                 if (err != -EOPNOTSUPP) {
255                         obj->trans = SWITCHDEV_TRANS_ABORT;
256                         __switchdev_port_obj_add(dev, obj);
257                 }
258
259                 return err;
260         }
261
262         /* Phase II: commit obj add.  This cannot fail as a fault
263          * of driver/device.  If it does, it's a bug in the driver/device
264          * because the driver said everythings was OK in phase I.
265          */
266
267         obj->trans = SWITCHDEV_TRANS_COMMIT;
268         err = __switchdev_port_obj_add(dev, obj);
269         WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
270
271         return err;
272 }
273 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
274
275 /**
276  *      switchdev_port_obj_del - Delete port object
277  *
278  *      @dev: port device
279  *      @obj: object to delete
280  */
281 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
282 {
283         const struct switchdev_ops *ops = dev->switchdev_ops;
284         struct net_device *lower_dev;
285         struct list_head *iter;
286         int err = -EOPNOTSUPP;
287
288         if (ops && ops->switchdev_port_obj_del)
289                 return ops->switchdev_port_obj_del(dev, obj);
290
291         /* Switch device port(s) may be stacked under
292          * bond/team/vlan dev, so recurse down to delete object on
293          * each port.
294          */
295
296         netdev_for_each_lower_dev(dev, lower_dev, iter) {
297                 err = switchdev_port_obj_del(lower_dev, obj);
298                 if (err)
299                         break;
300         }
301
302         return err;
303 }
304 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
305
306 /**
307  *      switchdev_port_obj_dump - Dump port objects
308  *
309  *      @dev: port device
310  *      @obj: object to dump
311  */
312 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
313 {
314         const struct switchdev_ops *ops = dev->switchdev_ops;
315         struct net_device *lower_dev;
316         struct list_head *iter;
317         int err = -EOPNOTSUPP;
318
319         if (ops && ops->switchdev_port_obj_dump)
320                 return ops->switchdev_port_obj_dump(dev, obj);
321
322         /* Switch device port(s) may be stacked under
323          * bond/team/vlan dev, so recurse down to dump objects on
324          * first port at bottom of stack.
325          */
326
327         netdev_for_each_lower_dev(dev, lower_dev, iter) {
328                 err = switchdev_port_obj_dump(lower_dev, obj);
329                 break;
330         }
331
332         return err;
333 }
334 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
335
336 static DEFINE_MUTEX(switchdev_mutex);
337 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
338
339 /**
340  *      register_switchdev_notifier - Register notifier
341  *      @nb: notifier_block
342  *
343  *      Register switch device notifier. This should be used by code
344  *      which needs to monitor events happening in particular device.
345  *      Return values are same as for atomic_notifier_chain_register().
346  */
347 int register_switchdev_notifier(struct notifier_block *nb)
348 {
349         int err;
350
351         mutex_lock(&switchdev_mutex);
352         err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
353         mutex_unlock(&switchdev_mutex);
354         return err;
355 }
356 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
357
358 /**
359  *      unregister_switchdev_notifier - Unregister notifier
360  *      @nb: notifier_block
361  *
362  *      Unregister switch device notifier.
363  *      Return values are same as for atomic_notifier_chain_unregister().
364  */
365 int unregister_switchdev_notifier(struct notifier_block *nb)
366 {
367         int err;
368
369         mutex_lock(&switchdev_mutex);
370         err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
371         mutex_unlock(&switchdev_mutex);
372         return err;
373 }
374 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
375
376 /**
377  *      call_switchdev_notifiers - Call notifiers
378  *      @val: value passed unmodified to notifier function
379  *      @dev: port device
380  *      @info: notifier information data
381  *
382  *      Call all network notifier blocks. This should be called by driver
383  *      when it needs to propagate hardware event.
384  *      Return values are same as for atomic_notifier_call_chain().
385  */
386 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
387                              struct switchdev_notifier_info *info)
388 {
389         int err;
390
391         info->dev = dev;
392         mutex_lock(&switchdev_mutex);
393         err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
394         mutex_unlock(&switchdev_mutex);
395         return err;
396 }
397 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
398
399 struct switchdev_vlan_dump {
400         struct switchdev_obj obj;
401         struct sk_buff *skb;
402         u32 filter_mask;
403         u16 flags;
404         u16 begin;
405         u16 end;
406 };
407
408 static int switchdev_port_vlan_dump_put(struct net_device *dev,
409                                         struct switchdev_vlan_dump *dump)
410 {
411         struct bridge_vlan_info vinfo;
412
413         vinfo.flags = dump->flags;
414
415         if (dump->begin == 0 && dump->end == 0) {
416                 return 0;
417         } else if (dump->begin == dump->end) {
418                 vinfo.vid = dump->begin;
419                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
420                             sizeof(vinfo), &vinfo))
421                         return -EMSGSIZE;
422         } else {
423                 vinfo.vid = dump->begin;
424                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
425                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
426                             sizeof(vinfo), &vinfo))
427                         return -EMSGSIZE;
428                 vinfo.vid = dump->end;
429                 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
430                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
431                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
432                             sizeof(vinfo), &vinfo))
433                         return -EMSGSIZE;
434         }
435
436         return 0;
437 }
438
439 static int switchdev_port_vlan_dump_cb(struct net_device *dev,
440                                        struct switchdev_obj *obj)
441 {
442         struct switchdev_vlan_dump *dump =
443                 container_of(obj, struct switchdev_vlan_dump, obj);
444         struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
445         int err = 0;
446
447         if (vlan->vid_begin > vlan->vid_end)
448                 return -EINVAL;
449
450         if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
451                 dump->flags = vlan->flags;
452                 for (dump->begin = dump->end = vlan->vid_begin;
453                      dump->begin <= vlan->vid_end;
454                      dump->begin++, dump->end++) {
455                         err = switchdev_port_vlan_dump_put(dev, dump);
456                         if (err)
457                                 return err;
458                 }
459         } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
460                 if (dump->begin > vlan->vid_begin &&
461                     dump->begin >= vlan->vid_end) {
462                         if ((dump->begin - 1) == vlan->vid_end &&
463                             dump->flags == vlan->flags) {
464                                 /* prepend */
465                                 dump->begin = vlan->vid_begin;
466                         } else {
467                                 err = switchdev_port_vlan_dump_put(dev, dump);
468                                 dump->flags = vlan->flags;
469                                 dump->begin = vlan->vid_begin;
470                                 dump->end = vlan->vid_end;
471                         }
472                 } else if (dump->end <= vlan->vid_begin &&
473                            dump->end < vlan->vid_end) {
474                         if ((dump->end  + 1) == vlan->vid_begin &&
475                             dump->flags == vlan->flags) {
476                                 /* append */
477                                 dump->end = vlan->vid_end;
478                         } else {
479                                 err = switchdev_port_vlan_dump_put(dev, dump);
480                                 dump->flags = vlan->flags;
481                                 dump->begin = vlan->vid_begin;
482                                 dump->end = vlan->vid_end;
483                         }
484                 } else {
485                         err = -EINVAL;
486                 }
487         }
488
489         return err;
490 }
491
492 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
493                                     u32 filter_mask)
494 {
495         struct switchdev_vlan_dump dump = {
496                 .obj = {
497                         .id = SWITCHDEV_OBJ_PORT_VLAN,
498                         .cb = switchdev_port_vlan_dump_cb,
499                 },
500                 .skb = skb,
501                 .filter_mask = filter_mask,
502         };
503         int err = 0;
504
505         if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
506             (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
507                 err = switchdev_port_obj_dump(dev, &dump.obj);
508                 if (err)
509                         goto err_out;
510                 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
511                         /* last one */
512                         err = switchdev_port_vlan_dump_put(dev, &dump);
513         }
514
515 err_out:
516         return err == -EOPNOTSUPP ? 0 : err;
517 }
518
519 /**
520  *      switchdev_port_bridge_getlink - Get bridge port attributes
521  *
522  *      @dev: port device
523  *
524  *      Called for SELF on rtnl_bridge_getlink to get bridge port
525  *      attributes.
526  */
527 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
528                                   struct net_device *dev, u32 filter_mask,
529                                   int nlflags)
530 {
531         struct switchdev_attr attr = {
532                 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
533         };
534         u16 mode = BRIDGE_MODE_UNDEF;
535         u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
536         int err;
537
538         err = switchdev_port_attr_get(dev, &attr);
539         if (err && err != -EOPNOTSUPP)
540                 return err;
541
542         return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
543                                        attr.u.brport_flags, mask, nlflags,
544                                        filter_mask, switchdev_port_vlan_fill);
545 }
546 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
547
548 static int switchdev_port_br_setflag(struct net_device *dev,
549                                      struct nlattr *nlattr,
550                                      unsigned long brport_flag)
551 {
552         struct switchdev_attr attr = {
553                 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
554         };
555         u8 flag = nla_get_u8(nlattr);
556         int err;
557
558         err = switchdev_port_attr_get(dev, &attr);
559         if (err)
560                 return err;
561
562         if (flag)
563                 attr.u.brport_flags |= brport_flag;
564         else
565                 attr.u.brport_flags &= ~brport_flag;
566
567         return switchdev_port_attr_set(dev, &attr);
568 }
569
570 static const struct nla_policy
571 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
572         [IFLA_BRPORT_STATE]             = { .type = NLA_U8 },
573         [IFLA_BRPORT_COST]              = { .type = NLA_U32 },
574         [IFLA_BRPORT_PRIORITY]          = { .type = NLA_U16 },
575         [IFLA_BRPORT_MODE]              = { .type = NLA_U8 },
576         [IFLA_BRPORT_GUARD]             = { .type = NLA_U8 },
577         [IFLA_BRPORT_PROTECT]           = { .type = NLA_U8 },
578         [IFLA_BRPORT_FAST_LEAVE]        = { .type = NLA_U8 },
579         [IFLA_BRPORT_LEARNING]          = { .type = NLA_U8 },
580         [IFLA_BRPORT_LEARNING_SYNC]     = { .type = NLA_U8 },
581         [IFLA_BRPORT_UNICAST_FLOOD]     = { .type = NLA_U8 },
582 };
583
584 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
585                                               struct nlattr *protinfo)
586 {
587         struct nlattr *attr;
588         int rem;
589         int err;
590
591         err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
592                                   switchdev_port_bridge_policy);
593         if (err)
594                 return err;
595
596         nla_for_each_nested(attr, protinfo, rem) {
597                 switch (nla_type(attr)) {
598                 case IFLA_BRPORT_LEARNING:
599                         err = switchdev_port_br_setflag(dev, attr,
600                                                         BR_LEARNING);
601                         break;
602                 case IFLA_BRPORT_LEARNING_SYNC:
603                         err = switchdev_port_br_setflag(dev, attr,
604                                                         BR_LEARNING_SYNC);
605                         break;
606                 default:
607                         err = -EOPNOTSUPP;
608                         break;
609                 }
610                 if (err)
611                         return err;
612         }
613
614         return 0;
615 }
616
617 static int switchdev_port_br_afspec(struct net_device *dev,
618                                     struct nlattr *afspec,
619                                     int (*f)(struct net_device *dev,
620                                              struct switchdev_obj *obj))
621 {
622         struct nlattr *attr;
623         struct bridge_vlan_info *vinfo;
624         struct switchdev_obj obj = {
625                 .id = SWITCHDEV_OBJ_PORT_VLAN,
626         };
627         struct switchdev_obj_vlan *vlan = &obj.u.vlan;
628         int rem;
629         int err;
630
631         nla_for_each_nested(attr, afspec, rem) {
632                 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
633                         continue;
634                 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
635                         return -EINVAL;
636                 vinfo = nla_data(attr);
637                 vlan->flags = vinfo->flags;
638                 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
639                         if (vlan->vid_begin)
640                                 return -EINVAL;
641                         vlan->vid_begin = vinfo->vid;
642                 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
643                         if (!vlan->vid_begin)
644                                 return -EINVAL;
645                         vlan->vid_end = vinfo->vid;
646                         if (vlan->vid_end <= vlan->vid_begin)
647                                 return -EINVAL;
648                         err = f(dev, &obj);
649                         if (err)
650                                 return err;
651                         memset(vlan, 0, sizeof(*vlan));
652                 } else {
653                         if (vlan->vid_begin)
654                                 return -EINVAL;
655                         vlan->vid_begin = vinfo->vid;
656                         vlan->vid_end = vinfo->vid;
657                         err = f(dev, &obj);
658                         if (err)
659                                 return err;
660                         memset(vlan, 0, sizeof(*vlan));
661                 }
662         }
663
664         return 0;
665 }
666
667 /**
668  *      switchdev_port_bridge_setlink - Set bridge port attributes
669  *
670  *      @dev: port device
671  *      @nlh: netlink header
672  *      @flags: netlink flags
673  *
674  *      Called for SELF on rtnl_bridge_setlink to set bridge port
675  *      attributes.
676  */
677 int switchdev_port_bridge_setlink(struct net_device *dev,
678                                   struct nlmsghdr *nlh, u16 flags)
679 {
680         struct nlattr *protinfo;
681         struct nlattr *afspec;
682         int err = 0;
683
684         protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
685                                    IFLA_PROTINFO);
686         if (protinfo) {
687                 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
688                 if (err)
689                         return err;
690         }
691
692         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
693                                  IFLA_AF_SPEC);
694         if (afspec)
695                 err = switchdev_port_br_afspec(dev, afspec,
696                                                switchdev_port_obj_add);
697
698         return err;
699 }
700 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
701
702 /**
703  *      switchdev_port_bridge_dellink - Set bridge port attributes
704  *
705  *      @dev: port device
706  *      @nlh: netlink header
707  *      @flags: netlink flags
708  *
709  *      Called for SELF on rtnl_bridge_dellink to set bridge port
710  *      attributes.
711  */
712 int switchdev_port_bridge_dellink(struct net_device *dev,
713                                   struct nlmsghdr *nlh, u16 flags)
714 {
715         struct nlattr *afspec;
716
717         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
718                                  IFLA_AF_SPEC);
719         if (afspec)
720                 return switchdev_port_br_afspec(dev, afspec,
721                                                 switchdev_port_obj_del);
722
723         return 0;
724 }
725 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
726
727 /**
728  *      switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
729  *
730  *      @ndmsg: netlink hdr
731  *      @nlattr: netlink attributes
732  *      @dev: port device
733  *      @addr: MAC address to add
734  *      @vid: VLAN to add
735  *
736  *      Add FDB entry to switch device.
737  */
738 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
739                            struct net_device *dev, const unsigned char *addr,
740                            u16 vid, u16 nlm_flags)
741 {
742         struct switchdev_obj obj = {
743                 .id = SWITCHDEV_OBJ_PORT_FDB,
744                 .u.fdb = {
745                         .addr = addr,
746                         .vid = vid,
747                 },
748         };
749
750         return switchdev_port_obj_add(dev, &obj);
751 }
752 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
753
754 /**
755  *      switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
756  *
757  *      @ndmsg: netlink hdr
758  *      @nlattr: netlink attributes
759  *      @dev: port device
760  *      @addr: MAC address to delete
761  *      @vid: VLAN to delete
762  *
763  *      Delete FDB entry from switch device.
764  */
765 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
766                            struct net_device *dev, const unsigned char *addr,
767                            u16 vid)
768 {
769         struct switchdev_obj obj = {
770                 .id = SWITCHDEV_OBJ_PORT_FDB,
771                 .u.fdb = {
772                         .addr = addr,
773                         .vid = vid,
774                 },
775         };
776
777         return switchdev_port_obj_del(dev, &obj);
778 }
779 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
780
781 struct switchdev_fdb_dump {
782         struct switchdev_obj obj;
783         struct sk_buff *skb;
784         struct netlink_callback *cb;
785         int idx;
786 };
787
788 static int switchdev_port_fdb_dump_cb(struct net_device *dev,
789                                       struct switchdev_obj *obj)
790 {
791         struct switchdev_fdb_dump *dump =
792                 container_of(obj, struct switchdev_fdb_dump, obj);
793         u32 portid = NETLINK_CB(dump->cb->skb).portid;
794         u32 seq = dump->cb->nlh->nlmsg_seq;
795         struct nlmsghdr *nlh;
796         struct ndmsg *ndm;
797
798         if (dump->idx < dump->cb->args[0])
799                 goto skip;
800
801         nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
802                         sizeof(*ndm), NLM_F_MULTI);
803         if (!nlh)
804                 return -EMSGSIZE;
805
806         ndm = nlmsg_data(nlh);
807         ndm->ndm_family  = AF_BRIDGE;
808         ndm->ndm_pad1    = 0;
809         ndm->ndm_pad2    = 0;
810         ndm->ndm_flags   = NTF_SELF;
811         ndm->ndm_type    = 0;
812         ndm->ndm_ifindex = dev->ifindex;
813         ndm->ndm_state   = obj->u.fdb.ndm_state;
814
815         if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
816                 goto nla_put_failure;
817
818         if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
819                 goto nla_put_failure;
820
821         nlmsg_end(dump->skb, nlh);
822
823 skip:
824         dump->idx++;
825         return 0;
826
827 nla_put_failure:
828         nlmsg_cancel(dump->skb, nlh);
829         return -EMSGSIZE;
830 }
831
832 /**
833  *      switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
834  *
835  *      @skb: netlink skb
836  *      @cb: netlink callback
837  *      @dev: port device
838  *      @filter_dev: filter device
839  *      @idx:
840  *
841  *      Delete FDB entry from switch device.
842  */
843 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
844                             struct net_device *dev,
845                             struct net_device *filter_dev, int idx)
846 {
847         struct switchdev_fdb_dump dump = {
848                 .obj = {
849                         .id = SWITCHDEV_OBJ_PORT_FDB,
850                         .cb = switchdev_port_fdb_dump_cb,
851                 },
852                 .skb = skb,
853                 .cb = cb,
854                 .idx = idx,
855         };
856
857         switchdev_port_obj_dump(dev, &dump.obj);
858         return dump.idx;
859 }
860 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
861
862 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
863 {
864         const struct switchdev_ops *ops = dev->switchdev_ops;
865         struct net_device *lower_dev;
866         struct net_device *port_dev;
867         struct list_head *iter;
868
869         /* Recusively search down until we find a sw port dev.
870          * (A sw port dev supports switchdev_port_attr_get).
871          */
872
873         if (ops && ops->switchdev_port_attr_get)
874                 return dev;
875
876         netdev_for_each_lower_dev(dev, lower_dev, iter) {
877                 port_dev = switchdev_get_lowest_dev(lower_dev);
878                 if (port_dev)
879                         return port_dev;
880         }
881
882         return NULL;
883 }
884
885 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
886 {
887         struct switchdev_attr attr = {
888                 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
889         };
890         struct switchdev_attr prev_attr;
891         struct net_device *dev = NULL;
892         int nhsel;
893
894         /* For this route, all nexthop devs must be on the same switch. */
895
896         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
897                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
898
899                 if (!nh->nh_dev)
900                         return NULL;
901
902                 dev = switchdev_get_lowest_dev(nh->nh_dev);
903                 if (!dev)
904                         return NULL;
905
906                 if (switchdev_port_attr_get(dev, &attr))
907                         return NULL;
908
909                 if (nhsel > 0 &&
910                     !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
911                                 return NULL;
912
913                 prev_attr = attr;
914         }
915
916         return dev;
917 }
918
919 /**
920  *      switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
921  *
922  *      @dst: route's IPv4 destination address
923  *      @dst_len: destination address length (prefix length)
924  *      @fi: route FIB info structure
925  *      @tos: route TOS
926  *      @type: route type
927  *      @nlflags: netlink flags passed in (NLM_F_*)
928  *      @tb_id: route table ID
929  *
930  *      Add/modify switch IPv4 route entry.
931  */
932 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
933                            u8 tos, u8 type, u32 nlflags, u32 tb_id)
934 {
935         struct switchdev_obj fib_obj = {
936                 .id = SWITCHDEV_OBJ_IPV4_FIB,
937                 .u.ipv4_fib = {
938                         .dst = dst,
939                         .dst_len = dst_len,
940                         .fi = fi,
941                         .tos = tos,
942                         .type = type,
943                         .nlflags = nlflags,
944                         .tb_id = tb_id,
945                 },
946         };
947         struct net_device *dev;
948         int err = 0;
949
950         /* Don't offload route if using custom ip rules or if
951          * IPv4 FIB offloading has been disabled completely.
952          */
953
954 #ifdef CONFIG_IP_MULTIPLE_TABLES
955         if (fi->fib_net->ipv4.fib_has_custom_rules)
956                 return 0;
957 #endif
958
959         if (fi->fib_net->ipv4.fib_offload_disabled)
960                 return 0;
961
962         dev = switchdev_get_dev_by_nhs(fi);
963         if (!dev)
964                 return 0;
965
966         err = switchdev_port_obj_add(dev, &fib_obj);
967         if (!err)
968                 fi->fib_flags |= RTNH_F_OFFLOAD;
969
970         return err == -EOPNOTSUPP ? 0 : err;
971 }
972 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
973
974 /**
975  *      switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
976  *
977  *      @dst: route's IPv4 destination address
978  *      @dst_len: destination address length (prefix length)
979  *      @fi: route FIB info structure
980  *      @tos: route TOS
981  *      @type: route type
982  *      @tb_id: route table ID
983  *
984  *      Delete IPv4 route entry from switch device.
985  */
986 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
987                            u8 tos, u8 type, u32 tb_id)
988 {
989         struct switchdev_obj fib_obj = {
990                 .id = SWITCHDEV_OBJ_IPV4_FIB,
991                 .u.ipv4_fib = {
992                         .dst = dst,
993                         .dst_len = dst_len,
994                         .fi = fi,
995                         .tos = tos,
996                         .type = type,
997                         .nlflags = 0,
998                         .tb_id = tb_id,
999                 },
1000         };
1001         struct net_device *dev;
1002         int err = 0;
1003
1004         if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1005                 return 0;
1006
1007         dev = switchdev_get_dev_by_nhs(fi);
1008         if (!dev)
1009                 return 0;
1010
1011         err = switchdev_port_obj_del(dev, &fib_obj);
1012         if (!err)
1013                 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1014
1015         return err == -EOPNOTSUPP ? 0 : err;
1016 }
1017 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1018
1019 /**
1020  *      switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1021  *
1022  *      @fi: route FIB info structure
1023  */
1024 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1025 {
1026         /* There was a problem installing this route to the offload
1027          * device.  For now, until we come up with more refined
1028          * policy handling, abruptly end IPv4 fib offloading for
1029          * for entire net by flushing offload device(s) of all
1030          * IPv4 routes, and mark IPv4 fib offloading broken from
1031          * this point forward.
1032          */
1033
1034         fib_flush_external(fi->fib_net);
1035         fi->fib_net->ipv4.fib_offload_disabled = true;
1036 }
1037 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1038
1039 static bool switchdev_port_same_parent_id(struct net_device *a,
1040                                           struct net_device *b)
1041 {
1042         struct switchdev_attr a_attr = {
1043                 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1044                 .flags = SWITCHDEV_F_NO_RECURSE,
1045         };
1046         struct switchdev_attr b_attr = {
1047                 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1048                 .flags = SWITCHDEV_F_NO_RECURSE,
1049         };
1050
1051         if (switchdev_port_attr_get(a, &a_attr) ||
1052             switchdev_port_attr_get(b, &b_attr))
1053                 return false;
1054
1055         return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1056 }
1057
1058 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1059                                        struct net_device *group_dev)
1060 {
1061         struct net_device *lower_dev;
1062         struct list_head *iter;
1063
1064         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1065                 if (lower_dev == dev)
1066                         continue;
1067                 if (switchdev_port_same_parent_id(dev, lower_dev))
1068                         return lower_dev->offload_fwd_mark;
1069                 return switchdev_port_fwd_mark_get(dev, lower_dev);
1070         }
1071
1072         return dev->ifindex;
1073 }
1074
1075 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1076                                           u32 old_mark, u32 *reset_mark)
1077 {
1078         struct net_device *lower_dev;
1079         struct list_head *iter;
1080
1081         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1082                 if (lower_dev->offload_fwd_mark == old_mark) {
1083                         if (!*reset_mark)
1084                                 *reset_mark = lower_dev->ifindex;
1085                         lower_dev->offload_fwd_mark = *reset_mark;
1086                 }
1087                 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1088         }
1089 }
1090
1091 /**
1092  *      switchdev_port_fwd_mark_set - Set port offload forwarding mark
1093  *
1094  *      @dev: port device
1095  *      @group_dev: containing device
1096  *      @joining: true if dev is joining group; false if leaving group
1097  *
1098  *      An ungrouped port's offload mark is just its ifindex.  A grouped
1099  *      port's (member of a bridge, for example) offload mark is the ifindex
1100  *      of one of the ports in the group with the same parent (switch) ID.
1101  *      Ports on the same device in the same group will have the same mark.
1102  *
1103  *      Example:
1104  *
1105  *              br0             ifindex=9
1106  *                sw1p1         ifindex=2       mark=2
1107  *                sw1p2         ifindex=3       mark=2
1108  *                sw2p1         ifindex=4       mark=5
1109  *                sw2p2         ifindex=5       mark=5
1110  *
1111  *      If sw2p2 leaves the bridge, we'll have:
1112  *
1113  *              br0             ifindex=9
1114  *                sw1p1         ifindex=2       mark=2
1115  *                sw1p2         ifindex=3       mark=2
1116  *                sw2p1         ifindex=4       mark=4
1117  *              sw2p2           ifindex=5       mark=5
1118  */
1119 void switchdev_port_fwd_mark_set(struct net_device *dev,
1120                                  struct net_device *group_dev,
1121                                  bool joining)
1122 {
1123         u32 mark = dev->ifindex;
1124         u32 reset_mark = 0;
1125
1126         if (group_dev && joining) {
1127                 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1128         } else if (group_dev && !joining) {
1129                 if (dev->offload_fwd_mark == mark)
1130                         /* Ohoh, this port was the mark reference port,
1131                          * but it's leaving the group, so reset the
1132                          * mark for the remaining ports in the group.
1133                          */
1134                         switchdev_port_fwd_mark_reset(group_dev, mark,
1135                                                       &reset_mark);
1136         }
1137
1138         dev->offload_fwd_mark = mark;
1139 }
1140 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);