2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/hash.h>
34 #include <linux/mlx5/fs.h>
36 #include <linux/ipv6.h>
44 struct in6_addr src_ipv6;
48 struct in6_addr dst_ipv6;
55 struct mlx5e_priv *priv;
56 struct work_struct arfs_work;
57 struct mlx5_flow_rule *rule;
58 struct hlist_node hlist;
60 /* Flow ID passed to ndo_rx_flow_steer */
62 /* Filter ID returned by ndo_rx_flow_steer */
64 struct arfs_tuple tuple;
67 #define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
68 for (i = 0; i < ARFS_NUM_TYPES; i++) \
69 mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
71 #define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
72 for (j = 0; j < ARFS_HASH_SIZE; j++) \
73 hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
75 static void arfs_destroy_table(struct arfs_table *arfs_t)
77 mlx5_del_flow_rule(arfs_t->default_rule);
78 mlx5e_destroy_flow_table(&arfs_t->ft);
81 static void arfs_del_rules(struct mlx5e_priv *priv);
83 void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
87 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
91 destroy_workqueue(priv->fs.arfs.wq);
92 for (i = 0; i < ARFS_NUM_TYPES; i++) {
93 if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
94 arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
98 static int arfs_add_default_rule(struct mlx5e_priv *priv,
101 struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
102 struct mlx5_flow_destination dest;
103 u8 match_criteria_enable = 0;
104 u32 *tirn = priv->indir_tirn;
109 match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
110 match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
111 if (!match_value || !match_criteria) {
112 netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
117 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
120 dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
123 dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
126 dest.tir_num = tirn[MLX5E_TT_IPV6_TCP];
129 dest.tir_num = tirn[MLX5E_TT_IPV6_UDP];
136 arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable,
137 match_criteria, match_value,
138 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
139 MLX5_FS_DEFAULT_FLOW_TAG,
141 if (IS_ERR(arfs_t->default_rule)) {
142 err = PTR_ERR(arfs_t->default_rule);
143 arfs_t->default_rule = NULL;
144 netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
148 kvfree(match_criteria);
153 #define MLX5E_ARFS_NUM_GROUPS 2
154 #define MLX5E_ARFS_GROUP1_SIZE BIT(12)
155 #define MLX5E_ARFS_GROUP2_SIZE BIT(0)
156 #define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
157 MLX5E_ARFS_GROUP2_SIZE)
158 static int arfs_create_groups(struct mlx5e_flow_table *ft,
161 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
162 void *outer_headers_c;
168 ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
169 sizeof(*ft->g), GFP_KERNEL);
170 in = mlx5_vzalloc(inlen);
177 mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
178 outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
180 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
184 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
185 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
189 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
190 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
200 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
201 src_ipv4_src_ipv6.ipv4_layout.ipv4);
202 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
203 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
207 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
208 src_ipv4_src_ipv6.ipv6_layout.ipv6),
210 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
211 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
219 MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
220 MLX5_SET_CFG(in, start_flow_index, ix);
221 ix += MLX5E_ARFS_GROUP1_SIZE;
222 MLX5_SET_CFG(in, end_flow_index, ix - 1);
223 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
224 if (IS_ERR(ft->g[ft->num_groups]))
228 memset(in, 0, inlen);
229 MLX5_SET_CFG(in, start_flow_index, ix);
230 ix += MLX5E_ARFS_GROUP2_SIZE;
231 MLX5_SET_CFG(in, end_flow_index, ix - 1);
232 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
233 if (IS_ERR(ft->g[ft->num_groups]))
241 err = PTR_ERR(ft->g[ft->num_groups]);
242 ft->g[ft->num_groups] = NULL;
249 static int arfs_create_table(struct mlx5e_priv *priv,
252 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
253 struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
256 ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
257 MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL);
259 err = PTR_ERR(ft->t);
264 err = arfs_create_groups(ft, type);
268 err = arfs_add_default_rule(priv, type);
274 mlx5e_destroy_flow_table(ft);
278 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
283 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
286 spin_lock_init(&priv->fs.arfs.arfs_lock);
287 INIT_LIST_HEAD(&priv->fs.arfs.rules);
288 priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
289 if (!priv->fs.arfs.wq)
292 for (i = 0; i < ARFS_NUM_TYPES; i++) {
293 err = arfs_create_table(priv, i);
299 mlx5e_arfs_destroy_tables(priv);
303 #define MLX5E_ARFS_EXPIRY_QUOTA 60
305 static void arfs_may_expire_flow(struct mlx5e_priv *priv)
307 struct arfs_rule *arfs_rule;
308 struct hlist_node *htmp;
313 HLIST_HEAD(del_list);
314 spin_lock_bh(&priv->fs.arfs.arfs_lock);
315 mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
316 if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
318 if (!work_pending(&arfs_rule->arfs_work) &&
319 rps_may_expire_flow(priv->netdev,
320 arfs_rule->rxq, arfs_rule->flow_id,
321 arfs_rule->filter_id)) {
322 hlist_del_init(&arfs_rule->hlist);
323 hlist_add_head(&arfs_rule->hlist, &del_list);
326 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
327 hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
329 mlx5_del_flow_rule(arfs_rule->rule);
330 hlist_del(&arfs_rule->hlist);
335 static void arfs_del_rules(struct mlx5e_priv *priv)
337 struct hlist_node *htmp;
338 struct arfs_rule *rule;
342 HLIST_HEAD(del_list);
343 spin_lock_bh(&priv->fs.arfs.arfs_lock);
344 mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
345 hlist_del_init(&rule->hlist);
346 hlist_add_head(&rule->hlist, &del_list);
348 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
350 hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
351 cancel_work_sync(&rule->arfs_work);
353 mlx5_del_flow_rule(rule->rule);
354 hlist_del(&rule->hlist);
359 static struct hlist_head *
360 arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
366 l = (__force unsigned long)src_port |
367 ((__force unsigned long)dst_port << 2);
369 bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
371 return &arfs_t->rules_hash[bucket_idx];
374 static u8 arfs_get_ip_proto(const struct sk_buff *skb)
376 return (skb->protocol == htons(ETH_P_IP)) ?
377 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
380 static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
381 u8 ip_proto, __be16 etype)
383 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
384 return &arfs->arfs_tables[ARFS_IPV4_TCP];
385 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
386 return &arfs->arfs_tables[ARFS_IPV4_UDP];
387 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
388 return &arfs->arfs_tables[ARFS_IPV6_TCP];
389 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
390 return &arfs->arfs_tables[ARFS_IPV6_UDP];
395 static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
396 struct arfs_rule *arfs_rule)
398 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
399 struct arfs_tuple *tuple = &arfs_rule->tuple;
400 struct mlx5_flow_rule *rule = NULL;
401 struct mlx5_flow_destination dest;
402 struct arfs_table *arfs_table;
403 u8 match_criteria_enable = 0;
404 struct mlx5_flow_table *ft;
409 match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
410 match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
411 if (!match_value || !match_criteria) {
412 netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
416 match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
417 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
418 outer_headers.ethertype);
419 MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
420 ntohs(tuple->etype));
421 arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
427 ft = arfs_table->ft.t;
428 if (tuple->ip_proto == IPPROTO_TCP) {
429 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
430 outer_headers.tcp_dport);
431 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
432 outer_headers.tcp_sport);
433 MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport,
434 ntohs(tuple->dst_port));
435 MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport,
436 ntohs(tuple->src_port));
438 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
439 outer_headers.udp_dport);
440 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
441 outer_headers.udp_sport);
442 MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport,
443 ntohs(tuple->dst_port));
444 MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport,
445 ntohs(tuple->src_port));
447 if (tuple->etype == htons(ETH_P_IP)) {
448 memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
449 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
452 memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
453 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
456 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
457 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
458 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
459 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
461 memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
462 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
465 memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
466 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
469 memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
470 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
473 memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
474 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
478 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
479 dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
480 rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
481 match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
482 MLX5_FS_DEFAULT_FLOW_TAG,
486 netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
487 __func__, arfs_rule->filter_id, arfs_rule->rxq, err);
491 kvfree(match_criteria);
493 return err ? ERR_PTR(err) : rule;
496 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
497 struct mlx5_flow_rule *rule, u16 rxq)
499 struct mlx5_flow_destination dst;
502 dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
503 dst.tir_num = priv->direct_tir[rxq].tirn;
504 err = mlx5_modify_rule_destination(rule, &dst);
506 netdev_warn(priv->netdev,
507 "Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
510 static void arfs_handle_work(struct work_struct *work)
512 struct arfs_rule *arfs_rule = container_of(work,
515 struct mlx5e_priv *priv = arfs_rule->priv;
516 struct mlx5_flow_rule *rule;
518 mutex_lock(&priv->state_lock);
519 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
520 spin_lock_bh(&priv->fs.arfs.arfs_lock);
521 hlist_del(&arfs_rule->hlist);
522 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
524 mutex_unlock(&priv->state_lock);
528 mutex_unlock(&priv->state_lock);
530 if (!arfs_rule->rule) {
531 rule = arfs_add_rule(priv, arfs_rule);
534 arfs_rule->rule = rule;
536 arfs_modify_rule_rq(priv, arfs_rule->rule,
540 arfs_may_expire_flow(priv);
543 /* return L4 destination port from ip4/6 packets */
544 static __be16 arfs_get_dst_port(const struct sk_buff *skb)
546 char *transport_header;
548 transport_header = skb_transport_header(skb);
549 if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
550 return ((struct tcphdr *)transport_header)->dest;
551 return ((struct udphdr *)transport_header)->dest;
554 /* return L4 source port from ip4/6 packets */
555 static __be16 arfs_get_src_port(const struct sk_buff *skb)
557 char *transport_header;
559 transport_header = skb_transport_header(skb);
560 if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
561 return ((struct tcphdr *)transport_header)->source;
562 return ((struct udphdr *)transport_header)->source;
565 static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
566 struct arfs_table *arfs_t,
567 const struct sk_buff *skb,
568 u16 rxq, u32 flow_id)
570 struct arfs_rule *rule;
571 struct arfs_tuple *tuple;
573 rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
579 INIT_WORK(&rule->arfs_work, arfs_handle_work);
581 tuple = &rule->tuple;
582 tuple->etype = skb->protocol;
583 if (tuple->etype == htons(ETH_P_IP)) {
584 tuple->src_ipv4 = ip_hdr(skb)->saddr;
585 tuple->dst_ipv4 = ip_hdr(skb)->daddr;
587 memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
588 sizeof(struct in6_addr));
589 memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
590 sizeof(struct in6_addr));
592 tuple->ip_proto = arfs_get_ip_proto(skb);
593 tuple->src_port = arfs_get_src_port(skb);
594 tuple->dst_port = arfs_get_dst_port(skb);
596 rule->flow_id = flow_id;
597 rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
599 hlist_add_head(&rule->hlist,
600 arfs_hash_bucket(arfs_t, tuple->src_port,
605 static bool arfs_cmp_ips(struct arfs_tuple *tuple,
606 const struct sk_buff *skb)
608 if (tuple->etype == htons(ETH_P_IP) &&
609 tuple->src_ipv4 == ip_hdr(skb)->saddr &&
610 tuple->dst_ipv4 == ip_hdr(skb)->daddr)
612 if (tuple->etype == htons(ETH_P_IPV6) &&
613 (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
614 sizeof(struct in6_addr))) &&
615 (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
616 sizeof(struct in6_addr))))
621 static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
622 const struct sk_buff *skb)
624 struct arfs_rule *arfs_rule;
625 struct hlist_head *head;
626 __be16 src_port = arfs_get_src_port(skb);
627 __be16 dst_port = arfs_get_dst_port(skb);
629 head = arfs_hash_bucket(arfs_t, src_port, dst_port);
630 hlist_for_each_entry(arfs_rule, head, hlist) {
631 if (arfs_rule->tuple.src_port == src_port &&
632 arfs_rule->tuple.dst_port == dst_port &&
633 arfs_cmp_ips(&arfs_rule->tuple, skb)) {
641 int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
642 u16 rxq_index, u32 flow_id)
644 struct mlx5e_priv *priv = netdev_priv(dev);
645 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
646 struct arfs_table *arfs_t;
647 struct arfs_rule *arfs_rule;
649 if (skb->protocol != htons(ETH_P_IP) &&
650 skb->protocol != htons(ETH_P_IPV6))
651 return -EPROTONOSUPPORT;
653 arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol);
655 return -EPROTONOSUPPORT;
657 spin_lock_bh(&arfs->arfs_lock);
658 arfs_rule = arfs_find_rule(arfs_t, skb);
660 if (arfs_rule->rxq == rxq_index) {
661 spin_unlock_bh(&arfs->arfs_lock);
662 return arfs_rule->filter_id;
664 arfs_rule->rxq = rxq_index;
666 arfs_rule = arfs_alloc_rule(priv, arfs_t, skb,
669 spin_unlock_bh(&arfs->arfs_lock);
673 queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
674 spin_unlock_bh(&arfs->arfs_lock);
675 return arfs_rule->filter_id;