2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <linux/bitops.h>
38 #include <linux/compiler.h>
39 #include <linux/list.h>
40 #include <linux/mutex.h>
41 #include <linux/netdevice.h>
42 #include <linux/if_vlan.h>
43 #include <linux/net_tstamp.h>
44 #ifdef CONFIG_MLX4_EN_DCB
45 #include <linux/dcbnl.h>
47 #include <linux/cpu_rmap.h>
49 #include <linux/mlx4/device.h>
50 #include <linux/mlx4/qp.h>
51 #include <linux/mlx4/cq.h>
52 #include <linux/mlx4/srq.h>
53 #include <linux/mlx4/doorbell.h>
54 #include <linux/mlx4/cmd.h>
58 #define DRV_NAME "mlx4_en"
59 #define DRV_VERSION "2.0"
60 #define DRV_RELDATE "Dec 2011"
62 #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
69 #define MLX4_EN_PAGE_SHIFT 12
70 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT)
71 #define DEF_RX_RINGS 16
72 #define MAX_RX_RINGS 128
73 #define MIN_RX_RINGS 4
75 #define HEADROOM (2048 / TXBB_SIZE + 1)
76 #define STAMP_STRIDE 64
77 #define STAMP_DWORDS (STAMP_STRIDE / 4)
78 #define STAMP_SHIFT 31
79 #define STAMP_VAL 0x7fffffff
80 #define STATS_DELAY (HZ / 4)
81 #define MAX_NUM_OF_FS_RULES 256
83 #define MLX4_EN_FILTER_HASH_SHIFT 4
84 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60
86 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */
87 #define MAX_DESC_SIZE 512
88 #define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE)
91 * OS related constants and tunables
94 #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ)
96 /* Use the maximum between 16384 and a single page */
97 #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
98 #define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE)
100 /* Receive fragment sizes; we use at most 4 fragments (for 9600 byte MTU
101 * and 4K allocations) */
103 FRAG_SZ0 = 512 - NET_IP_ALIGN,
106 FRAG_SZ3 = MLX4_EN_ALLOC_SIZE
108 #define MLX4_EN_MAX_RX_FRAGS 4
110 /* Maximum ring sizes */
111 #define MLX4_EN_MAX_TX_SIZE 8192
112 #define MLX4_EN_MAX_RX_SIZE 8192
114 /* Minimum ring size for our page-allocation scheme to work */
115 #define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES)
116 #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE)
118 #define MLX4_EN_SMALL_PKT_SIZE 64
119 #define MLX4_EN_MAX_TX_RING_P_UP 32
120 #define MLX4_EN_NUM_UP 8
121 #define MLX4_EN_DEF_TX_RING_SIZE 512
122 #define MLX4_EN_DEF_RX_RING_SIZE 1024
123 #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \
126 /* Target number of packets to coalesce with interrupt moderation */
127 #define MLX4_EN_RX_COAL_TARGET 44
128 #define MLX4_EN_RX_COAL_TIME 0x10
130 #define MLX4_EN_TX_COAL_PKTS 16
131 #define MLX4_EN_TX_COAL_TIME 0x10
133 #define MLX4_EN_RX_RATE_LOW 400000
134 #define MLX4_EN_RX_COAL_TIME_LOW 0
135 #define MLX4_EN_RX_RATE_HIGH 450000
136 #define MLX4_EN_RX_COAL_TIME_HIGH 128
137 #define MLX4_EN_RX_SIZE_THRESH 1024
138 #define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH)
139 #define MLX4_EN_SAMPLE_INTERVAL 0
140 #define MLX4_EN_AVG_PKT_SMALL 256
142 #define MLX4_EN_AUTO_CONF 0xffff
144 #define MLX4_EN_DEF_RX_PAUSE 1
145 #define MLX4_EN_DEF_TX_PAUSE 1
147 /* Interval between successive polls in the Tx routine when polling is used
148 instead of interrupts (in per-core Tx rings) - should be power of 2 */
149 #define MLX4_EN_TX_POLL_MODER 16
150 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4)
152 #define ETH_LLC_SNAP_SIZE 8
154 #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN)
155 #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN)
156 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
158 #define MLX4_EN_MIN_MTU 46
159 #define ETH_BCAST 0xffffffffffffULL
161 #define MLX4_EN_LOOPBACK_RETRIES 5
162 #define MLX4_EN_LOOPBACK_TIMEOUT 100
164 #ifdef MLX4_EN_PERF_STAT
165 /* Number of samples to 'average' */
167 #define AVG_FACTOR 1024
168 #define NUM_PERF_STATS NUM_PERF_COUNTERS
170 #define INC_PERF_COUNTER(cnt) (++(cnt))
171 #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add))
172 #define AVG_PERF_COUNTER(cnt, sample) \
173 ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE)
174 #define GET_PERF_COUNTER(cnt) (cnt)
175 #define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR)
179 #define NUM_PERF_STATS 0
180 #define INC_PERF_COUNTER(cnt) do {} while (0)
181 #define ADD_PERF_COUNTER(cnt, add) do {} while (0)
182 #define AVG_PERF_COUNTER(cnt, sample) do {} while (0)
183 #define GET_PERF_COUNTER(cnt) (0)
184 #define GET_AVG_PERF_COUNTER(cnt) (0)
185 #endif /* MLX4_EN_PERF_STAT */
200 #define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x))
201 #define XNOR(x, y) (!(x) == !(y))
204 struct mlx4_en_tx_info {
215 #define MLX4_EN_BIT_DESC_OWN 0x80000000
216 #define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg)
217 #define MLX4_EN_MEMTYPE_PAD 0x100
218 #define DS_SIZE sizeof(struct mlx4_wqe_data_seg)
221 struct mlx4_en_tx_desc {
222 struct mlx4_wqe_ctrl_seg ctrl;
224 struct mlx4_wqe_data_seg data; /* at least one data segment */
225 struct mlx4_wqe_lso_seg lso;
226 struct mlx4_wqe_inline_seg inl;
230 #define MLX4_EN_USE_SRQ 0x01000000
232 #define MLX4_EN_CX3_LOW_ID 0x1000
233 #define MLX4_EN_CX3_HIGH_ID 0x1005
235 struct mlx4_en_rx_alloc {
241 struct mlx4_en_tx_ring {
242 struct mlx4_hwq_resources wqres;
243 u32 size ; /* number of TXBBs */
246 u16 cqn; /* index of port CQ associated with this ring */
253 struct mlx4_en_tx_info *tx_info;
257 struct mlx4_qp_context context;
259 enum mlx4_qp_state qp_state;
260 struct mlx4_srq dummy;
262 unsigned long packets;
263 unsigned long tx_csum;
266 struct netdev_queue *tx_queue;
267 int hwtstamp_tx_type;
270 struct mlx4_en_rx_desc {
271 /* actual number of entries depends on rx ring stride */
272 struct mlx4_wqe_data_seg data[0];
275 struct mlx4_en_rx_ring {
276 struct mlx4_hwq_resources wqres;
277 struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
278 u32 size ; /* number of Rx descs*/
283 u16 cqn; /* index of port CQ associated with this ring */
291 unsigned long packets;
292 unsigned long csum_ok;
293 unsigned long csum_none;
294 int hwtstamp_rx_filter;
299 struct mlx4_hwq_resources wqres;
302 struct net_device *dev;
303 struct napi_struct napi;
310 struct mlx4_cqe *buf;
311 #define MLX4_EN_OPCODE_ERROR 0x1e
314 struct mlx4_en_port_profile {
327 struct mlx4_en_profile {
334 u8 num_tx_rings_p_up;
335 struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
339 struct mlx4_dev *dev;
340 struct pci_dev *pdev;
341 struct mutex state_lock;
342 struct net_device *pndev[MLX4_MAX_PORTS + 1];
345 struct mlx4_en_profile profile;
347 struct workqueue_struct *workqueue;
348 struct device *dma_device;
349 void __iomem *uar_map;
350 struct mlx4_uar priv_uar;
354 u8 mac_removed[MLX4_MAX_PORTS + 1];
355 struct cyclecounter cycles;
356 struct timecounter clock;
357 unsigned long last_overflow_check;
361 struct mlx4_en_rss_map {
363 struct mlx4_qp qps[MAX_RX_RINGS];
364 enum mlx4_qp_state state[MAX_RX_RINGS];
365 struct mlx4_qp indir_qp;
366 enum mlx4_qp_state indir_state;
369 struct mlx4_en_port_state {
375 struct mlx4_en_pkt_stats {
376 unsigned long broadcast;
377 unsigned long rx_prio[8];
378 unsigned long tx_prio[8];
379 #define NUM_PKT_STATS 17
382 struct mlx4_en_port_stats {
383 unsigned long tso_packets;
384 unsigned long queue_stopped;
385 unsigned long wake_queue;
386 unsigned long tx_timeout;
387 unsigned long rx_alloc_failed;
388 unsigned long rx_chksum_good;
389 unsigned long rx_chksum_none;
390 unsigned long tx_chksum_offload;
391 #define NUM_PORT_STATS 8
394 struct mlx4_en_perf_stats {
401 #define NUM_PERF_COUNTERS 6
404 enum mlx4_en_mclist_act {
410 struct mlx4_en_mc_list {
411 struct list_head list;
412 enum mlx4_en_mclist_act action;
417 struct mlx4_en_frag_info {
419 u16 frag_prefix_size;
426 #ifdef CONFIG_MLX4_EN_DCB
427 /* Minimal TC BW - setting to 0 will block traffic */
428 #define MLX4_EN_BW_MIN 1
429 #define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */
431 #define MLX4_EN_TC_ETS 7
435 struct ethtool_flow_id {
436 struct list_head list;
437 struct ethtool_rx_flow_spec flow_spec;
442 MLX4_EN_FLAG_PROMISC = (1 << 0),
443 MLX4_EN_FLAG_MC_PROMISC = (1 << 1),
444 /* whether we need to enable hardware loopback by putting dmac
447 MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2),
448 /* whether we need to drop packets that hardware loopback-ed */
449 MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3),
450 MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4)
453 #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
454 #define MLX4_EN_MAC_HASH_IDX 5
456 struct mlx4_en_priv {
457 struct mlx4_en_dev *mdev;
458 struct mlx4_en_port_profile *prof;
459 struct net_device *dev;
460 unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
461 struct net_device_stats stats;
462 struct net_device_stats ret_stats;
463 struct mlx4_en_port_state port_state;
464 spinlock_t stats_lock;
465 struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES];
466 /* To allow rules removal while port is going down */
467 struct list_head ethtool_list;
469 unsigned long last_moder_packets[MAX_RX_RINGS];
470 unsigned long last_moder_tx_packets;
471 unsigned long last_moder_bytes[MAX_RX_RINGS];
472 unsigned long last_moder_jiffies;
473 int last_moder_time[MAX_RX_RINGS];
483 u16 adaptive_rx_coal;
486 u32 validate_loopback;
488 struct mlx4_hwq_resources res;
496 unsigned char prev_mac[ETH_ALEN + 2];
502 struct mlx4_en_rss_map rss_map;
505 u8 num_tx_rings_p_up;
509 struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
513 struct mlx4_en_tx_ring *tx_ring;
514 struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS];
515 struct mlx4_en_cq *tx_cq;
516 struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
517 struct mlx4_qp drop_qp;
518 struct work_struct rx_mode_task;
519 struct work_struct watchdog_task;
520 struct work_struct linkstate_task;
521 struct delayed_work stats_task;
522 struct mlx4_en_perf_stats pstats;
523 struct mlx4_en_pkt_stats pkstats;
524 struct mlx4_en_port_stats port_stats;
526 struct list_head mc_list;
527 struct list_head curr_list;
529 struct mlx4_en_stat_out_mbox hw_stats;
534 struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
535 struct hwtstamp_config hwtstamp_config;
537 #ifdef CONFIG_MLX4_EN_DCB
539 u16 maxrate[IEEE_8021QAZ_MAX_TCS];
541 #ifdef CONFIG_RFS_ACCEL
542 spinlock_t filters_lock;
544 struct list_head filters;
545 struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT];
551 MLX4_EN_WOL_MAGIC = (1ULL << 61),
552 MLX4_EN_WOL_ENABLED = (1ULL << 62),
555 struct mlx4_mac_entry {
556 struct hlist_node hlist;
557 unsigned char mac[ETH_ALEN + 2];
562 #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
564 void mlx4_en_update_loopback_state(struct net_device *dev,
565 netdev_features_t features);
567 void mlx4_en_destroy_netdev(struct net_device *dev);
568 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
569 struct mlx4_en_port_profile *prof);
571 int mlx4_en_start_port(struct net_device *dev);
572 void mlx4_en_stop_port(struct net_device *dev, int detach);
574 void mlx4_en_free_resources(struct mlx4_en_priv *priv);
575 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
577 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
578 int entries, int ring, enum cq_type mode);
579 void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
580 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
582 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
583 int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
584 int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
586 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
587 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb);
588 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
590 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring,
591 int qpn, u32 size, u16 stride);
592 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring);
593 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
594 struct mlx4_en_tx_ring *ring,
595 int cq, int user_prio);
596 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
597 struct mlx4_en_tx_ring *ring);
599 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
600 struct mlx4_en_rx_ring *ring,
601 u32 size, u16 stride);
602 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
603 struct mlx4_en_rx_ring *ring,
604 u32 size, u16 stride);
605 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
606 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
607 struct mlx4_en_rx_ring *ring);
608 int mlx4_en_process_rx_cq(struct net_device *dev,
609 struct mlx4_en_cq *cq,
611 int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
612 void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
613 int is_tx, int rss, int qpn, int cqn, int user_prio,
614 struct mlx4_qp_context *context);
615 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
616 int mlx4_en_map_buffer(struct mlx4_buf *buf);
617 void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
619 void mlx4_en_calc_rx_buf(struct net_device *dev);
620 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
621 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
622 int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv);
623 void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv);
624 int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
625 void mlx4_en_rx_irq(struct mlx4_cq *mcq);
627 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
628 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv);
630 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
631 int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
633 #ifdef CONFIG_MLX4_EN_DCB
634 extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops;
635 extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops;
638 int mlx4_en_setup_tc(struct net_device *dev, u8 up);
640 #ifdef CONFIG_RFS_ACCEL
641 void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv,
642 struct mlx4_en_rx_ring *rx_ring);
645 #define MLX4_EN_NUM_SELF_TEST 5
646 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf);
647 u64 mlx4_en_mac_to_u64(u8 *addr);
650 * Functions for time stamping
652 u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe);
653 void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
654 struct skb_shared_hwtstamps *hwts,
656 void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev);
657 int mlx4_en_timestamp_config(struct net_device *dev,
663 extern const struct ethtool_ops mlx4_en_ethtool_ops;
668 * printk / logging functions
672 int en_print(const char *level, const struct mlx4_en_priv *priv,
673 const char *format, ...);
675 #define en_dbg(mlevel, priv, format, arg...) \
677 if (NETIF_MSG_##mlevel & priv->msg_enable) \
678 en_print(KERN_DEBUG, priv, format, ##arg); \
680 #define en_warn(priv, format, arg...) \
681 en_print(KERN_WARNING, priv, format, ##arg)
682 #define en_err(priv, format, arg...) \
683 en_print(KERN_ERR, priv, format, ##arg)
684 #define en_info(priv, format, arg...) \
685 en_print(KERN_INFO, priv, format, ## arg)
687 #define mlx4_err(mdev, format, arg...) \
688 pr_err("%s %s: " format, DRV_NAME, \
689 dev_name(&mdev->pdev->dev), ##arg)
690 #define mlx4_info(mdev, format, arg...) \
691 pr_info("%s %s: " format, DRV_NAME, \
692 dev_name(&mdev->pdev->dev), ##arg)
693 #define mlx4_warn(mdev, format, arg...) \
694 pr_warning("%s %s: " format, DRV_NAME, \
695 dev_name(&mdev->pdev->dev), ##arg)