2 * Copyright(c) 2017 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 * This file contains HFI1 support for VNIC functionality
53 #include <linux/if_vlan.h>
57 #define HFI_TX_TIMEOUT_MS 1000
59 #define HFI1_VNIC_RCV_Q_SIZE 1024
61 #define HFI1_VNIC_UP 0
63 static DEFINE_SPINLOCK(vport_cntr_lock);
65 static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
67 unsigned int rcvctrl_ops = 0;
70 hfi1_init_ctxt(uctxt->sc);
72 uctxt->do_interrupt = &handle_receive_interrupt;
74 /* Now allocate the RcvHdr queue and eager buffers. */
75 ret = hfi1_create_rcvhdrq(dd, uctxt);
79 ret = hfi1_setup_eagerbufs(uctxt);
83 if (uctxt->rcvhdrtail_kvaddr)
84 clear_rcvhdrtail(uctxt);
86 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
87 rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
89 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
90 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
91 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
92 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
93 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
94 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
95 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
96 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
98 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
100 uctxt->is_vnic = true;
105 static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
106 struct hfi1_ctxtdata **vnic_ctxt)
108 struct hfi1_ctxtdata *uctxt;
112 if (dd->flags & HFI1_FROZEN)
115 for (ctxt = dd->first_dyn_alloc_ctxt;
116 ctxt < dd->num_rcv_contexts; ctxt++)
120 if (ctxt == dd->num_rcv_contexts)
123 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
125 dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
129 uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
130 HFI1_CAP_KGET(NODROP_RHQ_FULL) |
131 HFI1_CAP_KGET(NODROP_EGR_FULL) |
132 HFI1_CAP_KGET(DMA_RTAIL);
135 /* Allocate and enable a PIO send context */
136 uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
139 ret = uctxt->sc ? 0 : -ENOMEM;
143 dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
144 uctxt->sc->sw_index, uctxt->sc->hw_context);
145 ret = sc_enable(uctxt->sc);
149 if (dd->num_msix_entries)
150 hfi1_set_vnic_msix_info(uctxt);
152 hfi1_stats.sps_ctxts++;
153 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
159 * hfi1_free_ctxtdata() also releases send_context
160 * structure if uctxt->sc is not null
162 dd->rcd[uctxt->ctxt] = NULL;
163 hfi1_free_ctxtdata(dd, uctxt);
164 dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
168 static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
169 struct hfi1_ctxtdata *uctxt)
173 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
176 if (dd->num_msix_entries)
177 hfi1_reset_vnic_msix_info(uctxt);
179 spin_lock_irqsave(&dd->uctxt_lock, flags);
181 * Disable receive context and interrupt available, reset all
182 * RcvCtxtCtrl bits to default values.
184 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
185 HFI1_RCVCTRL_TIDFLOW_DIS |
186 HFI1_RCVCTRL_INTRAVAIL_DIS |
187 HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
188 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
189 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
191 * VNIC contexts are allocated from user context pool.
192 * Release them back to user context pool.
194 * Reset context integrity checks to default.
195 * (writes to CSRs probably belong in chip.c)
197 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
198 hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
199 sc_disable(uctxt->sc);
201 dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
202 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
204 dd->rcd[uctxt->ctxt] = NULL;
205 uctxt->event_flags = 0;
207 hfi1_clear_tids(uctxt);
208 hfi1_clear_ctxt_pkey(dd, uctxt);
210 hfi1_stats.sps_ctxts--;
211 hfi1_free_ctxtdata(dd, uctxt);
214 void hfi1_vnic_setup(struct hfi1_devdata *dd)
216 idr_init(&dd->vnic.vesw_idr);
219 void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
221 idr_destroy(&dd->vnic.vesw_idr);
224 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
225 u64 *src64, *dst64; \
226 for (src64 = &qstats->x_grp.unicast, \
227 dst64 = &stats->x_grp.unicast; \
228 dst64 <= &stats->x_grp.s_1519_max;) { \
229 *dst64++ += *src64++; \
233 /* hfi1_vnic_update_stats - update statistics */
234 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
235 struct opa_vnic_stats *stats)
237 struct net_device *netdev = vinfo->netdev;
240 /* add tx counters on different queues */
241 for (i = 0; i < vinfo->num_tx_q; i++) {
242 struct opa_vnic_stats *qstats = &vinfo->stats[i];
243 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
245 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
246 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
247 stats->tx_drop_state += qstats->tx_drop_state;
248 stats->tx_dlid_zero += qstats->tx_dlid_zero;
250 SUM_GRP_COUNTERS(stats, qstats, tx_grp);
251 stats->netstats.tx_packets += qnstats->tx_packets;
252 stats->netstats.tx_bytes += qnstats->tx_bytes;
255 /* add rx counters on different queues */
256 for (i = 0; i < vinfo->num_rx_q; i++) {
257 struct opa_vnic_stats *qstats = &vinfo->stats[i];
258 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
260 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
261 stats->netstats.rx_nohandler += qnstats->rx_nohandler;
262 stats->rx_drop_state += qstats->rx_drop_state;
263 stats->rx_oversize += qstats->rx_oversize;
264 stats->rx_runt += qstats->rx_runt;
266 SUM_GRP_COUNTERS(stats, qstats, rx_grp);
267 stats->netstats.rx_packets += qnstats->rx_packets;
268 stats->netstats.rx_bytes += qnstats->rx_bytes;
271 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
272 stats->netstats.tx_carrier_errors +
273 stats->tx_drop_state + stats->tx_dlid_zero;
274 stats->netstats.tx_dropped = stats->netstats.tx_errors;
276 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
277 stats->netstats.rx_nohandler +
278 stats->rx_drop_state + stats->rx_oversize +
280 stats->netstats.rx_dropped = stats->netstats.rx_errors;
282 netdev->stats.tx_packets = stats->netstats.tx_packets;
283 netdev->stats.tx_bytes = stats->netstats.tx_bytes;
284 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
285 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
286 netdev->stats.tx_errors = stats->netstats.tx_errors;
287 netdev->stats.tx_dropped = stats->netstats.tx_dropped;
289 netdev->stats.rx_packets = stats->netstats.rx_packets;
290 netdev->stats.rx_bytes = stats->netstats.rx_bytes;
291 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
292 netdev->stats.multicast = stats->rx_grp.mcastbcast;
293 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
294 netdev->stats.rx_errors = stats->netstats.rx_errors;
295 netdev->stats.rx_dropped = stats->netstats.rx_dropped;
298 /* update_len_counters - update pkt's len histogram counters */
299 static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
302 /* account for 4 byte FCS */
305 else if (len >= 1020)
319 /* hfi1_vnic_update_tx_counters - update transmit counters */
320 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
321 u8 q_idx, struct sk_buff *skb, int err)
323 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
324 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
325 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
328 stats->netstats.tx_packets++;
329 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
331 update_len_counters(tx_grp, skb->len);
333 /* rest of the counts are for good packets only */
337 if (is_multicast_ether_addr(mac_hdr->h_dest))
338 tx_grp->mcastbcast++;
342 if (!__vlan_get_tag(skb, &vlan_tci))
348 /* hfi1_vnic_update_rx_counters - update receive counters */
349 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
350 u8 q_idx, struct sk_buff *skb, int err)
352 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
353 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
354 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
357 stats->netstats.rx_packets++;
358 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
360 update_len_counters(rx_grp, skb->len);
362 /* rest of the counts are for good packets only */
366 if (is_multicast_ether_addr(mac_hdr->h_dest))
367 rx_grp->mcastbcast++;
371 if (!__vlan_get_tag(skb, &vlan_tci))
377 /* This function is overloaded for opa_vnic specific implementation */
378 static void hfi1_vnic_get_stats64(struct net_device *netdev,
379 struct rtnl_link_stats64 *stats)
381 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
382 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
384 hfi1_vnic_update_stats(vinfo, vstats);
387 static u64 create_bypass_pbc(u32 vl, u32 dw_len)
391 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
392 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
394 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
395 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
400 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
401 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
404 netif_stop_subqueue(vinfo->netdev, q_idx);
405 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
408 netif_start_subqueue(vinfo->netdev, q_idx);
411 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
412 struct net_device *netdev)
414 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
415 u8 pad_len, q_idx = skb->queue_mapping;
416 struct hfi1_devdata *dd = vinfo->dd;
417 struct opa_vnic_skb_mdata *mdata;
418 u32 pkt_len, total_len;
422 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
423 if (unlikely(!netif_oper_up(netdev))) {
424 vinfo->stats[q_idx].tx_drop_state++;
428 /* take out meta data */
429 mdata = (struct opa_vnic_skb_mdata *)skb->data;
430 skb_pull(skb, sizeof(*mdata));
431 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
432 vinfo->stats[q_idx].tx_dlid_zero++;
436 /* add tail padding (for 8 bytes size alignment) and icrc */
437 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
438 pad_len += OPA_VNIC_ICRC_TAIL_LEN;
441 * pkt_len is how much data we have to write, includes header and data.
442 * total_len is length of the packet in Dwords plus the PBC should not
445 pkt_len = (skb->len + pad_len) >> 2;
446 total_len = pkt_len + 2; /* PBC + packet */
448 pbc = create_bypass_pbc(mdata->vl, total_len);
451 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
452 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
455 vinfo->stats[q_idx].netstats.tx_fifo_errors++;
456 else if (err != -EBUSY)
457 vinfo->stats[q_idx].netstats.tx_carrier_errors++;
459 /* remove the header before updating tx counters */
460 skb_pull(skb, OPA_VNIC_HDR_LEN);
462 if (unlikely(err == -EBUSY)) {
463 hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
464 dev_kfree_skb_any(skb);
465 return NETDEV_TX_BUSY;
469 /* update tx counters */
470 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
471 dev_kfree_skb_any(skb);
475 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
478 select_queue_fallback_t fallback)
480 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
481 struct opa_vnic_skb_mdata *mdata;
482 struct sdma_engine *sde;
484 mdata = (struct opa_vnic_skb_mdata *)skb->data;
485 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
486 return sde->this_idx;
489 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
490 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
493 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
494 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
497 skb_pull(skb, OPA_VNIC_HDR_LEN);
499 /* Validate Packet length */
500 if (unlikely(skb->len > max_len))
501 vinfo->stats[rxq->idx].rx_oversize++;
502 else if (unlikely(skb->len < ETH_ZLEN))
503 vinfo->stats[rxq->idx].rx_runt++;
509 static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
511 unsigned char *pad_info;
514 skb = skb_dequeue(&rxq->skbq);
518 /* remove tail padding and icrc */
519 pad_info = skb->data + skb->len - 1;
520 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
521 ((*pad_info) & 0x7)));
526 /* hfi1_vnic_handle_rx - handle skb receive */
527 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
528 int *work_done, int work_to_do)
530 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
535 if (*work_done >= work_to_do)
538 skb = hfi1_vnic_get_skb(rxq);
542 rc = hfi1_vnic_decap_skb(rxq, skb);
543 /* update rx counters */
544 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
546 dev_kfree_skb_any(skb);
550 skb_checksum_none_assert(skb);
551 skb->protocol = eth_type_trans(skb, rxq->netdev);
553 napi_gro_receive(&rxq->napi, skb);
558 /* hfi1_vnic_napi - napi receive polling callback function */
559 static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
561 struct hfi1_vnic_rx_queue *rxq = container_of(napi,
562 struct hfi1_vnic_rx_queue, napi);
563 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
566 v_dbg("napi %d budget %d\n", rxq->idx, budget);
567 hfi1_vnic_handle_rx(rxq, &work_done, budget);
569 v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
570 if (work_done < budget)
576 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
578 struct hfi1_devdata *dd = packet->rcd->dd;
579 struct hfi1_vnic_vport_info *vinfo = NULL;
580 struct hfi1_vnic_rx_queue *rxq;
582 int l4_type, vesw_id = -1;
585 l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
586 if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
587 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
588 vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
591 * In case of invalid vesw id, count the error on
592 * the first available vport.
594 if (unlikely(!vinfo)) {
595 struct hfi1_vnic_vport_info *vinfo_tmp;
598 vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
600 spin_lock(&vport_cntr_lock);
601 vinfo_tmp->stats[0].netstats.rx_nohandler++;
602 spin_unlock(&vport_cntr_lock);
607 if (unlikely(!vinfo)) {
608 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
609 l4_type, vesw_id, packet->rcd->ctxt);
613 q_idx = packet->rcd->vnic_q_idx;
614 rxq = &vinfo->rxq[q_idx];
615 if (unlikely(!netif_oper_up(vinfo->netdev))) {
616 vinfo->stats[q_idx].rx_drop_state++;
617 skb_queue_purge(&rxq->skbq);
621 if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
622 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
626 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
627 if (unlikely(!skb)) {
628 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
632 memcpy(skb->data, packet->ebuf, packet->tlen);
633 skb_put(skb, packet->tlen);
634 skb_queue_tail(&rxq->skbq, skb);
636 if (napi_schedule_prep(&rxq->napi)) {
637 v_dbg("napi %d scheduling\n", q_idx);
638 __napi_schedule(&rxq->napi);
642 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
644 struct hfi1_devdata *dd = vinfo->dd;
645 struct net_device *netdev = vinfo->netdev;
648 /* ensure virtual eth switch id is valid */
652 rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
653 vinfo->vesw_id + 1, GFP_NOWAIT);
657 for (i = 0; i < vinfo->num_rx_q; i++) {
658 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
660 skb_queue_head_init(&rxq->skbq);
661 napi_enable(&rxq->napi);
664 netif_carrier_on(netdev);
665 netif_tx_start_all_queues(netdev);
666 set_bit(HFI1_VNIC_UP, &vinfo->flags);
671 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
673 struct hfi1_devdata *dd = vinfo->dd;
676 clear_bit(HFI1_VNIC_UP, &vinfo->flags);
677 netif_carrier_off(vinfo->netdev);
678 netif_tx_disable(vinfo->netdev);
679 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
681 /* ensure irqs see the change */
682 hfi1_vnic_synchronize_irq(dd);
684 /* remove unread skbs */
685 for (i = 0; i < vinfo->num_rx_q; i++) {
686 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
688 napi_disable(&rxq->napi);
689 skb_queue_purge(&rxq->skbq);
693 static int hfi1_netdev_open(struct net_device *netdev)
695 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
698 mutex_lock(&vinfo->lock);
699 rc = hfi1_vnic_up(vinfo);
700 mutex_unlock(&vinfo->lock);
704 static int hfi1_netdev_close(struct net_device *netdev)
706 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
708 mutex_lock(&vinfo->lock);
709 if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
710 hfi1_vnic_down(vinfo);
711 mutex_unlock(&vinfo->lock);
715 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
716 struct hfi1_ctxtdata **vnic_ctxt)
720 rc = allocate_vnic_ctxt(dd, vnic_ctxt);
722 dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
726 rc = setup_vnic_ctxt(dd, *vnic_ctxt);
728 dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
729 deallocate_vnic_ctxt(dd, *vnic_ctxt);
736 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
738 struct hfi1_devdata *dd = vinfo->dd;
741 mutex_lock(&hfi1_mutex);
742 if (!dd->vnic.num_vports) {
743 rc = hfi1_vnic_txreq_init(dd);
747 dd->vnic.msix_idx = dd->first_dyn_msix_idx;
750 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
751 rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
754 dd->vnic.ctxt[i]->vnic_q_idx = i;
757 if (i < vinfo->num_rx_q) {
759 * If required amount of contexts is not
760 * allocated successfully then remaining contexts
763 while (i-- > dd->vnic.num_ctxt) {
764 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
765 dd->vnic.ctxt[i] = NULL;
770 if (dd->vnic.num_ctxt != i) {
771 dd->vnic.num_ctxt = i;
772 hfi1_init_vnic_rsm(dd);
775 dd->vnic.num_vports++;
776 hfi1_vnic_sdma_init(vinfo);
778 if (!dd->vnic.num_vports)
779 hfi1_vnic_txreq_deinit(dd);
781 mutex_unlock(&hfi1_mutex);
785 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
787 struct hfi1_devdata *dd = vinfo->dd;
790 mutex_lock(&hfi1_mutex);
791 if (--dd->vnic.num_vports == 0) {
792 for (i = 0; i < dd->vnic.num_ctxt; i++) {
793 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
794 dd->vnic.ctxt[i] = NULL;
796 hfi1_deinit_vnic_rsm(dd);
797 dd->vnic.num_ctxt = 0;
798 hfi1_vnic_txreq_deinit(dd);
800 mutex_unlock(&hfi1_mutex);
803 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
805 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
809 * If vesw_id is being changed, and if the vnic port is up,
810 * reset the vnic port to ensure new vesw_id gets picked up
812 if (id != vinfo->vesw_id) {
813 mutex_lock(&vinfo->lock);
814 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
815 hfi1_vnic_down(vinfo);
823 mutex_unlock(&vinfo->lock);
828 static const struct net_device_ops hfi1_netdev_ops = {
829 .ndo_open = hfi1_netdev_open,
830 .ndo_stop = hfi1_netdev_close,
831 .ndo_start_xmit = hfi1_netdev_start_xmit,
832 .ndo_select_queue = hfi1_vnic_select_queue,
833 .ndo_get_stats64 = hfi1_vnic_get_stats64,
836 static void hfi1_vnic_free_rn(struct net_device *netdev)
838 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
840 hfi1_vnic_deinit(vinfo);
841 mutex_destroy(&vinfo->lock);
845 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
847 enum rdma_netdev_t type,
849 unsigned char name_assign_type,
850 void (*setup)(struct net_device *))
852 struct hfi1_devdata *dd = dd_from_ibdev(device);
853 struct hfi1_vnic_vport_info *vinfo;
854 struct net_device *netdev;
855 struct rdma_netdev *rn;
858 if (!port_num || (port_num > dd->num_pports))
859 return ERR_PTR(-EINVAL);
861 if (type != RDMA_NETDEV_OPA_VNIC)
862 return ERR_PTR(-EOPNOTSUPP);
864 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
865 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
866 dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
868 return ERR_PTR(-ENOMEM);
870 rn = netdev_priv(netdev);
871 vinfo = opa_vnic_dev_priv(netdev);
873 vinfo->num_tx_q = dd->chip_sdma_engines;
874 vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
875 vinfo->netdev = netdev;
876 rn->free_rdma_netdev = hfi1_vnic_free_rn;
877 rn->set_id = hfi1_vnic_set_vesw_id;
879 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
880 netdev->hw_features = netdev->features;
881 netdev->vlan_features = netdev->features;
882 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
883 netdev->netdev_ops = &hfi1_netdev_ops;
884 mutex_init(&vinfo->lock);
886 for (i = 0; i < vinfo->num_rx_q; i++) {
887 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
891 rxq->netdev = netdev;
892 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
895 rc = hfi1_vnic_init(vinfo);
901 mutex_destroy(&vinfo->lock);