]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/net/vmxnet3/vmxnet3_drv.c
55a62cae2cb4c976388a9b67dc20ad5c15832604
[karo-tx-linux.git] / drivers / net / vmxnet3 / vmxnet3_drv.c
1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29
30 #include "vmxnet3_int.h"
31
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40         {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41         {0}
42 };
43
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46 static int enable_mq = 1;
47
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59
60
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66
67
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74         int i;
75
76         for (i = 0; i < adapter->intr.num_intrs; i++)
77                 vmxnet3_enable_intr(adapter, i);
78         adapter->shared->devRead.intrConf.intrCtrl &=
79                                         cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81
82
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86         int i;
87
88         adapter->shared->devRead.intrConf.intrCtrl |=
89                                         cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90         for (i = 0; i < adapter->intr.num_intrs; i++)
91                 vmxnet3_disable_intr(adapter, i);
92 }
93
94
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100
101
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105         return tq->stopped;
106 }
107
108
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112         tq->stopped = false;
113         netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115
116
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120         tq->stopped = false;
121         netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123
124
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128         tq->stopped = true;
129         tq->num_stop++;
130         netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132
133
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140         u32 ret;
141         int i;
142         unsigned long flags;
143
144         spin_lock_irqsave(&adapter->cmd_lock, flags);
145         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149         adapter->link_speed = ret >> 16;
150         if (ret & 1) { /* Link is up. */
151                 netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152                             adapter->link_speed);
153                 netif_carrier_on(adapter->netdev);
154
155                 if (affectTxQueue) {
156                         for (i = 0; i < adapter->num_tx_queues; i++)
157                                 vmxnet3_tq_start(&adapter->tx_queue[i],
158                                                  adapter);
159                 }
160         } else {
161                 netdev_info(adapter->netdev, "NIC Link is Down\n");
162                 netif_carrier_off(adapter->netdev);
163
164                 if (affectTxQueue) {
165                         for (i = 0; i < adapter->num_tx_queues; i++)
166                                 vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167                 }
168         }
169 }
170
171 static void
172 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173 {
174         int i;
175         unsigned long flags;
176         u32 events = le32_to_cpu(adapter->shared->ecr);
177         if (!events)
178                 return;
179
180         vmxnet3_ack_events(adapter, events);
181
182         /* Check if link state has changed */
183         if (events & VMXNET3_ECR_LINK)
184                 vmxnet3_check_link(adapter, true);
185
186         /* Check if there is an error on xmit/recv queues */
187         if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188                 spin_lock_irqsave(&adapter->cmd_lock, flags);
189                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190                                        VMXNET3_CMD_GET_QUEUE_STATUS);
191                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193                 for (i = 0; i < adapter->num_tx_queues; i++)
194                         if (adapter->tqd_start[i].status.stopped)
195                                 dev_err(&adapter->netdev->dev,
196                                         "%s: tq[%d] error 0x%x\n",
197                                         adapter->netdev->name, i, le32_to_cpu(
198                                         adapter->tqd_start[i].status.error));
199                 for (i = 0; i < adapter->num_rx_queues; i++)
200                         if (adapter->rqd_start[i].status.stopped)
201                                 dev_err(&adapter->netdev->dev,
202                                         "%s: rq[%d] error 0x%x\n",
203                                         adapter->netdev->name, i,
204                                         adapter->rqd_start[i].status.error);
205
206                 schedule_work(&adapter->work);
207         }
208 }
209
210 #ifdef __BIG_ENDIAN_BITFIELD
211 /*
212  * The device expects the bitfields in shared structures to be written in
213  * little endian. When CPU is big endian, the following routines are used to
214  * correctly read and write into ABI.
215  * The general technique used here is : double word bitfields are defined in
216  * opposite order for big endian architecture. Then before reading them in
217  * driver the complete double word is translated using le32_to_cpu. Similarly
218  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219  * double words into required format.
220  * In order to avoid touching bits in shared structure more than once, temporary
221  * descriptors are used. These are passed as srcDesc to following functions.
222  */
223 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224                                 struct Vmxnet3_RxDesc *dstDesc)
225 {
226         u32 *src = (u32 *)srcDesc + 2;
227         u32 *dst = (u32 *)dstDesc + 2;
228         dstDesc->addr = le64_to_cpu(srcDesc->addr);
229         *dst = le32_to_cpu(*src);
230         dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231 }
232
233 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234                                struct Vmxnet3_TxDesc *dstDesc)
235 {
236         int i;
237         u32 *src = (u32 *)(srcDesc + 1);
238         u32 *dst = (u32 *)(dstDesc + 1);
239
240         /* Working backwards so that the gen bit is set at the end. */
241         for (i = 2; i > 0; i--) {
242                 src--;
243                 dst--;
244                 *dst = cpu_to_le32(*src);
245         }
246 }
247
248
249 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250                                 struct Vmxnet3_RxCompDesc *dstDesc)
251 {
252         int i = 0;
253         u32 *src = (u32 *)srcDesc;
254         u32 *dst = (u32 *)dstDesc;
255         for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256                 *dst = le32_to_cpu(*src);
257                 src++;
258                 dst++;
259         }
260 }
261
262
263 /* Used to read bitfield values from double words. */
264 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265 {
266         u32 temp = le32_to_cpu(*bitfield);
267         u32 mask = ((1 << size) - 1) << pos;
268         temp &= mask;
269         temp >>= pos;
270         return temp;
271 }
272
273
274
275 #endif  /* __BIG_ENDIAN_BITFIELD */
276
277 #ifdef __BIG_ENDIAN_BITFIELD
278
279 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280                         txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281                         VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283                         txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284                         VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286                         VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287                         VMXNET3_TCD_GEN_SIZE)
288 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289                         VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291                         (dstrcd) = (tmp); \
292                         vmxnet3_RxCompToCPU((rcd), (tmp)); \
293                 } while (0)
294 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295                         (dstrxd) = (tmp); \
296                         vmxnet3_RxDescToCPU((rxd), (tmp)); \
297                 } while (0)
298
299 #else
300
301 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308 #endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311 static void
312 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313                      struct pci_dev *pdev)
314 {
315         if (tbi->map_type == VMXNET3_MAP_SINGLE)
316                 pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
317                                  PCI_DMA_TODEVICE);
318         else if (tbi->map_type == VMXNET3_MAP_PAGE)
319                 pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
320                                PCI_DMA_TODEVICE);
321         else
322                 BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324         tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325 }
326
327
328 static int
329 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330                   struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
331 {
332         struct sk_buff *skb;
333         int entries = 0;
334
335         /* no out of order completion */
336         BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337         BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339         skb = tq->buf_info[eop_idx].skb;
340         BUG_ON(skb == NULL);
341         tq->buf_info[eop_idx].skb = NULL;
342
343         VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345         while (tq->tx_ring.next2comp != eop_idx) {
346                 vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347                                      pdev);
348
349                 /* update next2comp w/o tx_lock. Since we are marking more,
350                  * instead of less, tx ring entries avail, the worst case is
351                  * that the tx routine incorrectly re-queues a pkt due to
352                  * insufficient tx ring entries.
353                  */
354                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355                 entries++;
356         }
357
358         dev_kfree_skb_any(skb);
359         return entries;
360 }
361
362
363 static int
364 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365                         struct vmxnet3_adapter *adapter)
366 {
367         int completed = 0;
368         union Vmxnet3_GenericDesc *gdesc;
369
370         gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371         while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372                 completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373                                                &gdesc->tcd), tq, adapter->pdev,
374                                                adapter);
375
376                 vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377                 gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378         }
379
380         if (completed) {
381                 spin_lock(&tq->tx_lock);
382                 if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383                              vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384                              VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385                              netif_carrier_ok(adapter->netdev))) {
386                         vmxnet3_tq_wake(tq, adapter);
387                 }
388                 spin_unlock(&tq->tx_lock);
389         }
390         return completed;
391 }
392
393
394 static void
395 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396                    struct vmxnet3_adapter *adapter)
397 {
398         int i;
399
400         while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401                 struct vmxnet3_tx_buf_info *tbi;
402
403                 tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405                 vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406                 if (tbi->skb) {
407                         dev_kfree_skb_any(tbi->skb);
408                         tbi->skb = NULL;
409                 }
410                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411         }
412
413         /* sanity check, verify all buffers are indeed unmapped and freed */
414         for (i = 0; i < tq->tx_ring.size; i++) {
415                 BUG_ON(tq->buf_info[i].skb != NULL ||
416                        tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417         }
418
419         tq->tx_ring.gen = VMXNET3_INIT_GEN;
420         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422         tq->comp_ring.gen = VMXNET3_INIT_GEN;
423         tq->comp_ring.next2proc = 0;
424 }
425
426
427 static void
428 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429                    struct vmxnet3_adapter *adapter)
430 {
431         if (tq->tx_ring.base) {
432                 pci_free_consistent(adapter->pdev, tq->tx_ring.size *
433                                     sizeof(struct Vmxnet3_TxDesc),
434                                     tq->tx_ring.base, tq->tx_ring.basePA);
435                 tq->tx_ring.base = NULL;
436         }
437         if (tq->data_ring.base) {
438                 pci_free_consistent(adapter->pdev, tq->data_ring.size *
439                                     sizeof(struct Vmxnet3_TxDataDesc),
440                                     tq->data_ring.base, tq->data_ring.basePA);
441                 tq->data_ring.base = NULL;
442         }
443         if (tq->comp_ring.base) {
444                 pci_free_consistent(adapter->pdev, tq->comp_ring.size *
445                                     sizeof(struct Vmxnet3_TxCompDesc),
446                                     tq->comp_ring.base, tq->comp_ring.basePA);
447                 tq->comp_ring.base = NULL;
448         }
449         kfree(tq->buf_info);
450         tq->buf_info = NULL;
451 }
452
453
454 /* Destroy all tx queues */
455 void
456 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
457 {
458         int i;
459
460         for (i = 0; i < adapter->num_tx_queues; i++)
461                 vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
462 }
463
464
465 static void
466 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
467                 struct vmxnet3_adapter *adapter)
468 {
469         int i;
470
471         /* reset the tx ring contents to 0 and reset the tx ring states */
472         memset(tq->tx_ring.base, 0, tq->tx_ring.size *
473                sizeof(struct Vmxnet3_TxDesc));
474         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
475         tq->tx_ring.gen = VMXNET3_INIT_GEN;
476
477         memset(tq->data_ring.base, 0, tq->data_ring.size *
478                sizeof(struct Vmxnet3_TxDataDesc));
479
480         /* reset the tx comp ring contents to 0 and reset comp ring states */
481         memset(tq->comp_ring.base, 0, tq->comp_ring.size *
482                sizeof(struct Vmxnet3_TxCompDesc));
483         tq->comp_ring.next2proc = 0;
484         tq->comp_ring.gen = VMXNET3_INIT_GEN;
485
486         /* reset the bookkeeping data */
487         memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
488         for (i = 0; i < tq->tx_ring.size; i++)
489                 tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
490
491         /* stats are not reset */
492 }
493
494
495 static int
496 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
497                   struct vmxnet3_adapter *adapter)
498 {
499         BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
500                tq->comp_ring.base || tq->buf_info);
501
502         tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
503                            * sizeof(struct Vmxnet3_TxDesc),
504                            &tq->tx_ring.basePA);
505         if (!tq->tx_ring.base) {
506                 netdev_err(adapter->netdev, "failed to allocate tx ring\n");
507                 goto err;
508         }
509
510         tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
511                              tq->data_ring.size *
512                              sizeof(struct Vmxnet3_TxDataDesc),
513                              &tq->data_ring.basePA);
514         if (!tq->data_ring.base) {
515                 netdev_err(adapter->netdev, "failed to allocate data ring\n");
516                 goto err;
517         }
518
519         tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
520                              tq->comp_ring.size *
521                              sizeof(struct Vmxnet3_TxCompDesc),
522                              &tq->comp_ring.basePA);
523         if (!tq->comp_ring.base) {
524                 netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
525                 goto err;
526         }
527
528         tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
529                                GFP_KERNEL);
530         if (!tq->buf_info)
531                 goto err;
532
533         return 0;
534
535 err:
536         vmxnet3_tq_destroy(tq, adapter);
537         return -ENOMEM;
538 }
539
540 static void
541 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
542 {
543         int i;
544
545         for (i = 0; i < adapter->num_tx_queues; i++)
546                 vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
547 }
548
549 /*
550  *    starting from ring->next2fill, allocate rx buffers for the given ring
551  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
552  *    are allocated or allocation fails
553  */
554
555 static int
556 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
557                         int num_to_alloc, struct vmxnet3_adapter *adapter)
558 {
559         int num_allocated = 0;
560         struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
561         struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
562         u32 val;
563
564         while (num_allocated <= num_to_alloc) {
565                 struct vmxnet3_rx_buf_info *rbi;
566                 union Vmxnet3_GenericDesc *gd;
567
568                 rbi = rbi_base + ring->next2fill;
569                 gd = ring->base + ring->next2fill;
570
571                 if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
572                         if (rbi->skb == NULL) {
573                                 rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
574                                                                        rbi->len,
575                                                                        GFP_KERNEL);
576                                 if (unlikely(rbi->skb == NULL)) {
577                                         rq->stats.rx_buf_alloc_failure++;
578                                         break;
579                                 }
580
581                                 rbi->dma_addr = pci_map_single(adapter->pdev,
582                                                 rbi->skb->data, rbi->len,
583                                                 PCI_DMA_FROMDEVICE);
584                         } else {
585                                 /* rx buffer skipped by the device */
586                         }
587                         val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
588                 } else {
589                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
590                                rbi->len  != PAGE_SIZE);
591
592                         if (rbi->page == NULL) {
593                                 rbi->page = alloc_page(GFP_ATOMIC);
594                                 if (unlikely(rbi->page == NULL)) {
595                                         rq->stats.rx_buf_alloc_failure++;
596                                         break;
597                                 }
598                                 rbi->dma_addr = pci_map_page(adapter->pdev,
599                                                 rbi->page, 0, PAGE_SIZE,
600                                                 PCI_DMA_FROMDEVICE);
601                         } else {
602                                 /* rx buffers skipped by the device */
603                         }
604                         val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
605                 }
606
607                 BUG_ON(rbi->dma_addr == 0);
608                 gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
609                 gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
610                                            | val | rbi->len);
611
612                 /* Fill the last buffer but dont mark it ready, or else the
613                  * device will think that the queue is full */
614                 if (num_allocated == num_to_alloc)
615                         break;
616
617                 gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
618                 num_allocated++;
619                 vmxnet3_cmd_ring_adv_next2fill(ring);
620         }
621
622         netdev_dbg(adapter->netdev,
623                 "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
624                 num_allocated, ring->next2fill, ring->next2comp);
625
626         /* so that the device can distinguish a full ring and an empty ring */
627         BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
628
629         return num_allocated;
630 }
631
632
633 static void
634 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
635                     struct vmxnet3_rx_buf_info *rbi)
636 {
637         struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
638                 skb_shinfo(skb)->nr_frags;
639
640         BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
641
642         __skb_frag_set_page(frag, rbi->page);
643         frag->page_offset = 0;
644         skb_frag_size_set(frag, rcd->len);
645         skb->data_len += rcd->len;
646         skb->truesize += PAGE_SIZE;
647         skb_shinfo(skb)->nr_frags++;
648 }
649
650
651 static void
652 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
653                 struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
654                 struct vmxnet3_adapter *adapter)
655 {
656         u32 dw2, len;
657         unsigned long buf_offset;
658         int i;
659         union Vmxnet3_GenericDesc *gdesc;
660         struct vmxnet3_tx_buf_info *tbi = NULL;
661
662         BUG_ON(ctx->copy_size > skb_headlen(skb));
663
664         /* use the previous gen bit for the SOP desc */
665         dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
666
667         ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
668         gdesc = ctx->sop_txd; /* both loops below can be skipped */
669
670         /* no need to map the buffer if headers are copied */
671         if (ctx->copy_size) {
672                 ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
673                                         tq->tx_ring.next2fill *
674                                         sizeof(struct Vmxnet3_TxDataDesc));
675                 ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
676                 ctx->sop_txd->dword[3] = 0;
677
678                 tbi = tq->buf_info + tq->tx_ring.next2fill;
679                 tbi->map_type = VMXNET3_MAP_NONE;
680
681                 netdev_dbg(adapter->netdev,
682                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
683                         tq->tx_ring.next2fill,
684                         le64_to_cpu(ctx->sop_txd->txd.addr),
685                         ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
686                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
687
688                 /* use the right gen for non-SOP desc */
689                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
690         }
691
692         /* linear part can use multiple tx desc if it's big */
693         len = skb_headlen(skb) - ctx->copy_size;
694         buf_offset = ctx->copy_size;
695         while (len) {
696                 u32 buf_size;
697
698                 if (len < VMXNET3_MAX_TX_BUF_SIZE) {
699                         buf_size = len;
700                         dw2 |= len;
701                 } else {
702                         buf_size = VMXNET3_MAX_TX_BUF_SIZE;
703                         /* spec says that for TxDesc.len, 0 == 2^14 */
704                 }
705
706                 tbi = tq->buf_info + tq->tx_ring.next2fill;
707                 tbi->map_type = VMXNET3_MAP_SINGLE;
708                 tbi->dma_addr = pci_map_single(adapter->pdev,
709                                 skb->data + buf_offset, buf_size,
710                                 PCI_DMA_TODEVICE);
711
712                 tbi->len = buf_size;
713
714                 gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
715                 BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
716
717                 gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
718                 gdesc->dword[2] = cpu_to_le32(dw2);
719                 gdesc->dword[3] = 0;
720
721                 netdev_dbg(adapter->netdev,
722                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
723                         tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
724                         le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
725                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
726                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
727
728                 len -= buf_size;
729                 buf_offset += buf_size;
730         }
731
732         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
733                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
734                 u32 buf_size;
735
736                 buf_offset = 0;
737                 len = skb_frag_size(frag);
738                 while (len) {
739                         tbi = tq->buf_info + tq->tx_ring.next2fill;
740                         if (len < VMXNET3_MAX_TX_BUF_SIZE) {
741                                 buf_size = len;
742                                 dw2 |= len;
743                         } else {
744                                 buf_size = VMXNET3_MAX_TX_BUF_SIZE;
745                                 /* spec says that for TxDesc.len, 0 == 2^14 */
746                         }
747                         tbi->map_type = VMXNET3_MAP_PAGE;
748                         tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
749                                                          buf_offset, buf_size,
750                                                          DMA_TO_DEVICE);
751
752                         tbi->len = buf_size;
753
754                         gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
755                         BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
756
757                         gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
758                         gdesc->dword[2] = cpu_to_le32(dw2);
759                         gdesc->dword[3] = 0;
760
761                         netdev_dbg(adapter->netdev,
762                                 "txd[%u]: 0x%llu %u %u\n",
763                                 tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
764                                 le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
765                         vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
766                         dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
767
768                         len -= buf_size;
769                         buf_offset += buf_size;
770                 }
771         }
772
773         ctx->eop_txd = gdesc;
774
775         /* set the last buf_info for the pkt */
776         tbi->skb = skb;
777         tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
778 }
779
780
781 /* Init all tx queues */
782 static void
783 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
784 {
785         int i;
786
787         for (i = 0; i < adapter->num_tx_queues; i++)
788                 vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
789 }
790
791
792 /*
793  *    parse and copy relevant protocol headers:
794  *      For a tso pkt, relevant headers are L2/3/4 including options
795  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
796  *      if it's a TCP/UDP pkt
797  *
798  * Returns:
799  *    -1:  error happens during parsing
800  *     0:  protocol headers parsed, but too big to be copied
801  *     1:  protocol headers parsed and copied
802  *
803  * Other effects:
804  *    1. related *ctx fields are updated.
805  *    2. ctx->copy_size is # of bytes copied
806  *    3. the portion copied is guaranteed to be in the linear part
807  *
808  */
809 static int
810 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
811                            struct vmxnet3_tx_ctx *ctx,
812                            struct vmxnet3_adapter *adapter)
813 {
814         struct Vmxnet3_TxDataDesc *tdd;
815
816         if (ctx->mss) { /* TSO */
817                 ctx->eth_ip_hdr_size = skb_transport_offset(skb);
818                 ctx->l4_hdr_size = tcp_hdrlen(skb);
819                 ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
820         } else {
821                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
822                         ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
823
824                         if (ctx->ipv4) {
825                                 const struct iphdr *iph = ip_hdr(skb);
826
827                                 if (iph->protocol == IPPROTO_TCP)
828                                         ctx->l4_hdr_size = tcp_hdrlen(skb);
829                                 else if (iph->protocol == IPPROTO_UDP)
830                                         ctx->l4_hdr_size = sizeof(struct udphdr);
831                                 else
832                                         ctx->l4_hdr_size = 0;
833                         } else {
834                                 /* for simplicity, don't copy L4 headers */
835                                 ctx->l4_hdr_size = 0;
836                         }
837                         ctx->copy_size = min(ctx->eth_ip_hdr_size +
838                                          ctx->l4_hdr_size, skb->len);
839                 } else {
840                         ctx->eth_ip_hdr_size = 0;
841                         ctx->l4_hdr_size = 0;
842                         /* copy as much as allowed */
843                         ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
844                                              , skb_headlen(skb));
845                 }
846
847                 /* make sure headers are accessible directly */
848                 if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
849                         goto err;
850         }
851
852         if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
853                 tq->stats.oversized_hdr++;
854                 ctx->copy_size = 0;
855                 return 0;
856         }
857
858         tdd = tq->data_ring.base + tq->tx_ring.next2fill;
859
860         memcpy(tdd->data, skb->data, ctx->copy_size);
861         netdev_dbg(adapter->netdev,
862                 "copy %u bytes to dataRing[%u]\n",
863                 ctx->copy_size, tq->tx_ring.next2fill);
864         return 1;
865
866 err:
867         return -1;
868 }
869
870
871 static void
872 vmxnet3_prepare_tso(struct sk_buff *skb,
873                     struct vmxnet3_tx_ctx *ctx)
874 {
875         struct tcphdr *tcph = tcp_hdr(skb);
876
877         if (ctx->ipv4) {
878                 struct iphdr *iph = ip_hdr(skb);
879
880                 iph->check = 0;
881                 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
882                                                  IPPROTO_TCP, 0);
883         } else {
884                 struct ipv6hdr *iph = ipv6_hdr(skb);
885
886                 tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
887                                                IPPROTO_TCP, 0);
888         }
889 }
890
891 static int txd_estimate(const struct sk_buff *skb)
892 {
893         int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
894         int i;
895
896         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
897                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
898
899                 count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
900         }
901         return count;
902 }
903
904 /*
905  * Transmits a pkt thru a given tq
906  * Returns:
907  *    NETDEV_TX_OK:      descriptors are setup successfully
908  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
909  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
910  *
911  * Side-effects:
912  *    1. tx ring may be changed
913  *    2. tq stats may be updated accordingly
914  *    3. shared->txNumDeferred may be updated
915  */
916
917 static int
918 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
919                 struct vmxnet3_adapter *adapter, struct net_device *netdev)
920 {
921         int ret;
922         u32 count;
923         unsigned long flags;
924         struct vmxnet3_tx_ctx ctx;
925         union Vmxnet3_GenericDesc *gdesc;
926 #ifdef __BIG_ENDIAN_BITFIELD
927         /* Use temporary descriptor to avoid touching bits multiple times */
928         union Vmxnet3_GenericDesc tempTxDesc;
929 #endif
930
931         count = txd_estimate(skb);
932
933         ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
934
935         ctx.mss = skb_shinfo(skb)->gso_size;
936         if (ctx.mss) {
937                 if (skb_header_cloned(skb)) {
938                         if (unlikely(pskb_expand_head(skb, 0, 0,
939                                                       GFP_ATOMIC) != 0)) {
940                                 tq->stats.drop_tso++;
941                                 goto drop_pkt;
942                         }
943                         tq->stats.copy_skb_header++;
944                 }
945                 vmxnet3_prepare_tso(skb, &ctx);
946         } else {
947                 if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
948
949                         /* non-tso pkts must not use more than
950                          * VMXNET3_MAX_TXD_PER_PKT entries
951                          */
952                         if (skb_linearize(skb) != 0) {
953                                 tq->stats.drop_too_many_frags++;
954                                 goto drop_pkt;
955                         }
956                         tq->stats.linearized++;
957
958                         /* recalculate the # of descriptors to use */
959                         count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
960                 }
961         }
962
963         spin_lock_irqsave(&tq->tx_lock, flags);
964
965         if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
966                 tq->stats.tx_ring_full++;
967                 netdev_dbg(adapter->netdev,
968                         "tx queue stopped on %s, next2comp %u"
969                         " next2fill %u\n", adapter->netdev->name,
970                         tq->tx_ring.next2comp, tq->tx_ring.next2fill);
971
972                 vmxnet3_tq_stop(tq, adapter);
973                 spin_unlock_irqrestore(&tq->tx_lock, flags);
974                 return NETDEV_TX_BUSY;
975         }
976
977
978         ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
979         if (ret >= 0) {
980                 BUG_ON(ret <= 0 && ctx.copy_size != 0);
981                 /* hdrs parsed, check against other limits */
982                 if (ctx.mss) {
983                         if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
984                                      VMXNET3_MAX_TX_BUF_SIZE)) {
985                                 goto hdr_too_big;
986                         }
987                 } else {
988                         if (skb->ip_summed == CHECKSUM_PARTIAL) {
989                                 if (unlikely(ctx.eth_ip_hdr_size +
990                                              skb->csum_offset >
991                                              VMXNET3_MAX_CSUM_OFFSET)) {
992                                         goto hdr_too_big;
993                                 }
994                         }
995                 }
996         } else {
997                 tq->stats.drop_hdr_inspect_err++;
998                 goto unlock_drop_pkt;
999         }
1000
1001         /* fill tx descs related to addr & len */
1002         vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1003
1004         /* setup the EOP desc */
1005         ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1006
1007         /* setup the SOP desc */
1008 #ifdef __BIG_ENDIAN_BITFIELD
1009         gdesc = &tempTxDesc;
1010         gdesc->dword[2] = ctx.sop_txd->dword[2];
1011         gdesc->dword[3] = ctx.sop_txd->dword[3];
1012 #else
1013         gdesc = ctx.sop_txd;
1014 #endif
1015         if (ctx.mss) {
1016                 gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1017                 gdesc->txd.om = VMXNET3_OM_TSO;
1018                 gdesc->txd.msscof = ctx.mss;
1019                 le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1020                              gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1021         } else {
1022                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1023                         gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1024                         gdesc->txd.om = VMXNET3_OM_CSUM;
1025                         gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1026                                             skb->csum_offset;
1027                 } else {
1028                         gdesc->txd.om = 0;
1029                         gdesc->txd.msscof = 0;
1030                 }
1031                 le32_add_cpu(&tq->shared->txNumDeferred, 1);
1032         }
1033
1034         if (vlan_tx_tag_present(skb)) {
1035                 gdesc->txd.ti = 1;
1036                 gdesc->txd.tci = vlan_tx_tag_get(skb);
1037         }
1038
1039         /* finally flips the GEN bit of the SOP desc. */
1040         gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1041                                                   VMXNET3_TXD_GEN);
1042 #ifdef __BIG_ENDIAN_BITFIELD
1043         /* Finished updating in bitfields of Tx Desc, so write them in original
1044          * place.
1045          */
1046         vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1047                            (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1048         gdesc = ctx.sop_txd;
1049 #endif
1050         netdev_dbg(adapter->netdev,
1051                 "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1052                 (u32)(ctx.sop_txd -
1053                 tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1054                 le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1055
1056         spin_unlock_irqrestore(&tq->tx_lock, flags);
1057
1058         if (le32_to_cpu(tq->shared->txNumDeferred) >=
1059                                         le32_to_cpu(tq->shared->txThreshold)) {
1060                 tq->shared->txNumDeferred = 0;
1061                 VMXNET3_WRITE_BAR0_REG(adapter,
1062                                        VMXNET3_REG_TXPROD + tq->qid * 8,
1063                                        tq->tx_ring.next2fill);
1064         }
1065
1066         return NETDEV_TX_OK;
1067
1068 hdr_too_big:
1069         tq->stats.drop_oversized_hdr++;
1070 unlock_drop_pkt:
1071         spin_unlock_irqrestore(&tq->tx_lock, flags);
1072 drop_pkt:
1073         tq->stats.drop_total++;
1074         dev_kfree_skb(skb);
1075         return NETDEV_TX_OK;
1076 }
1077
1078
1079 static netdev_tx_t
1080 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1081 {
1082         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1083
1084         BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1085         return vmxnet3_tq_xmit(skb,
1086                                &adapter->tx_queue[skb->queue_mapping],
1087                                adapter, netdev);
1088 }
1089
1090
1091 static void
1092 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1093                 struct sk_buff *skb,
1094                 union Vmxnet3_GenericDesc *gdesc)
1095 {
1096         if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1097                 /* typical case: TCP/UDP over IP and both csums are correct */
1098                 if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1099                                                         VMXNET3_RCD_CSUM_OK) {
1100                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1101                         BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1102                         BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1103                         BUG_ON(gdesc->rcd.frg);
1104                 } else {
1105                         if (gdesc->rcd.csum) {
1106                                 skb->csum = htons(gdesc->rcd.csum);
1107                                 skb->ip_summed = CHECKSUM_PARTIAL;
1108                         } else {
1109                                 skb_checksum_none_assert(skb);
1110                         }
1111                 }
1112         } else {
1113                 skb_checksum_none_assert(skb);
1114         }
1115 }
1116
1117
1118 static void
1119 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1120                  struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1121 {
1122         rq->stats.drop_err++;
1123         if (!rcd->fcs)
1124                 rq->stats.drop_fcs++;
1125
1126         rq->stats.drop_total++;
1127
1128         /*
1129          * We do not unmap and chain the rx buffer to the skb.
1130          * We basically pretend this buffer is not used and will be recycled
1131          * by vmxnet3_rq_alloc_rx_buf()
1132          */
1133
1134         /*
1135          * ctx->skb may be NULL if this is the first and the only one
1136          * desc for the pkt
1137          */
1138         if (ctx->skb)
1139                 dev_kfree_skb_irq(ctx->skb);
1140
1141         ctx->skb = NULL;
1142 }
1143
1144
1145 static int
1146 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1147                        struct vmxnet3_adapter *adapter, int quota)
1148 {
1149         static const u32 rxprod_reg[2] = {
1150                 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1151         };
1152         u32 num_rxd = 0;
1153         bool skip_page_frags = false;
1154         struct Vmxnet3_RxCompDesc *rcd;
1155         struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1156 #ifdef __BIG_ENDIAN_BITFIELD
1157         struct Vmxnet3_RxDesc rxCmdDesc;
1158         struct Vmxnet3_RxCompDesc rxComp;
1159 #endif
1160         vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1161                           &rxComp);
1162         while (rcd->gen == rq->comp_ring.gen) {
1163                 struct vmxnet3_rx_buf_info *rbi;
1164                 struct sk_buff *skb, *new_skb = NULL;
1165                 struct page *new_page = NULL;
1166                 int num_to_alloc;
1167                 struct Vmxnet3_RxDesc *rxd;
1168                 u32 idx, ring_idx;
1169                 struct vmxnet3_cmd_ring *ring = NULL;
1170                 if (num_rxd >= quota) {
1171                         /* we may stop even before we see the EOP desc of
1172                          * the current pkt
1173                          */
1174                         break;
1175                 }
1176                 num_rxd++;
1177                 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1178                 idx = rcd->rxdIdx;
1179                 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1180                 ring = rq->rx_ring + ring_idx;
1181                 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1182                                   &rxCmdDesc);
1183                 rbi = rq->buf_info[ring_idx] + idx;
1184
1185                 BUG_ON(rxd->addr != rbi->dma_addr ||
1186                        rxd->len != rbi->len);
1187
1188                 if (unlikely(rcd->eop && rcd->err)) {
1189                         vmxnet3_rx_error(rq, rcd, ctx, adapter);
1190                         goto rcd_done;
1191                 }
1192
1193                 if (rcd->sop) { /* first buf of the pkt */
1194                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1195                                rcd->rqID != rq->qid);
1196
1197                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1198                         BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1199
1200                         if (unlikely(rcd->len == 0)) {
1201                                 /* Pretend the rx buffer is skipped. */
1202                                 BUG_ON(!(rcd->sop && rcd->eop));
1203                                 netdev_dbg(adapter->netdev,
1204                                         "rxRing[%u][%u] 0 length\n",
1205                                         ring_idx, idx);
1206                                 goto rcd_done;
1207                         }
1208
1209                         skip_page_frags = false;
1210                         ctx->skb = rbi->skb;
1211                         new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1212                                                             rbi->len);
1213                         if (new_skb == NULL) {
1214                                 /* Skb allocation failed, do not handover this
1215                                  * skb to stack. Reuse it. Drop the existing pkt
1216                                  */
1217                                 rq->stats.rx_buf_alloc_failure++;
1218                                 ctx->skb = NULL;
1219                                 rq->stats.drop_total++;
1220                                 skip_page_frags = true;
1221                                 goto rcd_done;
1222                         }
1223
1224                         pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1225                                          PCI_DMA_FROMDEVICE);
1226
1227 #ifdef VMXNET3_RSS
1228                         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1229                             (adapter->netdev->features & NETIF_F_RXHASH))
1230                                 ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1231 #endif
1232                         skb_put(ctx->skb, rcd->len);
1233
1234                         /* Immediate refill */
1235                         rbi->skb = new_skb;
1236                         rbi->dma_addr = pci_map_single(adapter->pdev,
1237                                                        rbi->skb->data, rbi->len,
1238                                                        PCI_DMA_FROMDEVICE);
1239                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1240                         rxd->len = rbi->len;
1241
1242                 } else {
1243                         BUG_ON(ctx->skb == NULL && !skip_page_frags);
1244
1245                         /* non SOP buffer must be type 1 in most cases */
1246                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1247                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1248
1249                         /* If an sop buffer was dropped, skip all
1250                          * following non-sop fragments. They will be reused.
1251                          */
1252                         if (skip_page_frags)
1253                                 goto rcd_done;
1254
1255                         new_page = alloc_page(GFP_ATOMIC);
1256                         if (unlikely(new_page == NULL)) {
1257                                 /* Replacement page frag could not be allocated.
1258                                  * Reuse this page. Drop the pkt and free the
1259                                  * skb which contained this page as a frag. Skip
1260                                  * processing all the following non-sop frags.
1261                                  */
1262                                 rq->stats.rx_buf_alloc_failure++;
1263                                 dev_kfree_skb(ctx->skb);
1264                                 ctx->skb = NULL;
1265                                 skip_page_frags = true;
1266                                 goto rcd_done;
1267                         }
1268
1269                         if (rcd->len) {
1270                                 pci_unmap_page(adapter->pdev,
1271                                                rbi->dma_addr, rbi->len,
1272                                                PCI_DMA_FROMDEVICE);
1273
1274                                 vmxnet3_append_frag(ctx->skb, rcd, rbi);
1275                         }
1276
1277                         /* Immediate refill */
1278                         rbi->page = new_page;
1279                         rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1280                                                      0, PAGE_SIZE,
1281                                                      PCI_DMA_FROMDEVICE);
1282                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1283                         rxd->len = rbi->len;
1284                 }
1285
1286
1287                 skb = ctx->skb;
1288                 if (rcd->eop) {
1289                         skb->len += skb->data_len;
1290
1291                         vmxnet3_rx_csum(adapter, skb,
1292                                         (union Vmxnet3_GenericDesc *)rcd);
1293                         skb->protocol = eth_type_trans(skb, adapter->netdev);
1294
1295                         if (unlikely(rcd->ts))
1296                                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1297
1298                         if (adapter->netdev->features & NETIF_F_LRO)
1299                                 netif_receive_skb(skb);
1300                         else
1301                                 napi_gro_receive(&rq->napi, skb);
1302
1303                         ctx->skb = NULL;
1304                 }
1305
1306 rcd_done:
1307                 /* device may have skipped some rx descs */
1308                 ring->next2comp = idx;
1309                 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1310                 ring = rq->rx_ring + ring_idx;
1311                 while (num_to_alloc) {
1312                         vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1313                                           &rxCmdDesc);
1314                         BUG_ON(!rxd->addr);
1315
1316                         /* Recv desc is ready to be used by the device */
1317                         rxd->gen = ring->gen;
1318                         vmxnet3_cmd_ring_adv_next2fill(ring);
1319                         num_to_alloc--;
1320                 }
1321
1322                 /* if needed, update the register */
1323                 if (unlikely(rq->shared->updateRxProd)) {
1324                         VMXNET3_WRITE_BAR0_REG(adapter,
1325                                                rxprod_reg[ring_idx] + rq->qid * 8,
1326                                                ring->next2fill);
1327                 }
1328
1329                 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1330                 vmxnet3_getRxComp(rcd,
1331                                   &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1332         }
1333
1334         return num_rxd;
1335 }
1336
1337
1338 static void
1339 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1340                    struct vmxnet3_adapter *adapter)
1341 {
1342         u32 i, ring_idx;
1343         struct Vmxnet3_RxDesc *rxd;
1344
1345         for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1346                 for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1347 #ifdef __BIG_ENDIAN_BITFIELD
1348                         struct Vmxnet3_RxDesc rxDesc;
1349 #endif
1350                         vmxnet3_getRxDesc(rxd,
1351                                 &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1352
1353                         if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1354                                         rq->buf_info[ring_idx][i].skb) {
1355                                 pci_unmap_single(adapter->pdev, rxd->addr,
1356                                                  rxd->len, PCI_DMA_FROMDEVICE);
1357                                 dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1358                                 rq->buf_info[ring_idx][i].skb = NULL;
1359                         } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1360                                         rq->buf_info[ring_idx][i].page) {
1361                                 pci_unmap_page(adapter->pdev, rxd->addr,
1362                                                rxd->len, PCI_DMA_FROMDEVICE);
1363                                 put_page(rq->buf_info[ring_idx][i].page);
1364                                 rq->buf_info[ring_idx][i].page = NULL;
1365                         }
1366                 }
1367
1368                 rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1369                 rq->rx_ring[ring_idx].next2fill =
1370                                         rq->rx_ring[ring_idx].next2comp = 0;
1371         }
1372
1373         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1374         rq->comp_ring.next2proc = 0;
1375 }
1376
1377
1378 static void
1379 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1380 {
1381         int i;
1382
1383         for (i = 0; i < adapter->num_rx_queues; i++)
1384                 vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1385 }
1386
1387
1388 static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1389                                struct vmxnet3_adapter *adapter)
1390 {
1391         int i;
1392         int j;
1393
1394         /* all rx buffers must have already been freed */
1395         for (i = 0; i < 2; i++) {
1396                 if (rq->buf_info[i]) {
1397                         for (j = 0; j < rq->rx_ring[i].size; j++)
1398                                 BUG_ON(rq->buf_info[i][j].page != NULL);
1399                 }
1400         }
1401
1402
1403         kfree(rq->buf_info[0]);
1404
1405         for (i = 0; i < 2; i++) {
1406                 if (rq->rx_ring[i].base) {
1407                         pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1408                                             * sizeof(struct Vmxnet3_RxDesc),
1409                                             rq->rx_ring[i].base,
1410                                             rq->rx_ring[i].basePA);
1411                         rq->rx_ring[i].base = NULL;
1412                 }
1413                 rq->buf_info[i] = NULL;
1414         }
1415
1416         if (rq->comp_ring.base) {
1417                 pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1418                                     sizeof(struct Vmxnet3_RxCompDesc),
1419                                     rq->comp_ring.base, rq->comp_ring.basePA);
1420                 rq->comp_ring.base = NULL;
1421         }
1422 }
1423
1424
1425 static int
1426 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1427                 struct vmxnet3_adapter  *adapter)
1428 {
1429         int i;
1430
1431         /* initialize buf_info */
1432         for (i = 0; i < rq->rx_ring[0].size; i++) {
1433
1434                 /* 1st buf for a pkt is skbuff */
1435                 if (i % adapter->rx_buf_per_pkt == 0) {
1436                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1437                         rq->buf_info[0][i].len = adapter->skb_buf_size;
1438                 } else { /* subsequent bufs for a pkt is frag */
1439                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1440                         rq->buf_info[0][i].len = PAGE_SIZE;
1441                 }
1442         }
1443         for (i = 0; i < rq->rx_ring[1].size; i++) {
1444                 rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1445                 rq->buf_info[1][i].len = PAGE_SIZE;
1446         }
1447
1448         /* reset internal state and allocate buffers for both rings */
1449         for (i = 0; i < 2; i++) {
1450                 rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1451
1452                 memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1453                        sizeof(struct Vmxnet3_RxDesc));
1454                 rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1455         }
1456         if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1457                                     adapter) == 0) {
1458                 /* at least has 1 rx buffer for the 1st ring */
1459                 return -ENOMEM;
1460         }
1461         vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1462
1463         /* reset the comp ring */
1464         rq->comp_ring.next2proc = 0;
1465         memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1466                sizeof(struct Vmxnet3_RxCompDesc));
1467         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1468
1469         /* reset rxctx */
1470         rq->rx_ctx.skb = NULL;
1471
1472         /* stats are not reset */
1473         return 0;
1474 }
1475
1476
1477 static int
1478 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1479 {
1480         int i, err = 0;
1481
1482         for (i = 0; i < adapter->num_rx_queues; i++) {
1483                 err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1484                 if (unlikely(err)) {
1485                         dev_err(&adapter->netdev->dev, "%s: failed to "
1486                                 "initialize rx queue%i\n",
1487                                 adapter->netdev->name, i);
1488                         break;
1489                 }
1490         }
1491         return err;
1492
1493 }
1494
1495
1496 static int
1497 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1498 {
1499         int i;
1500         size_t sz;
1501         struct vmxnet3_rx_buf_info *bi;
1502
1503         for (i = 0; i < 2; i++) {
1504
1505                 sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1506                 rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1507                                                         &rq->rx_ring[i].basePA);
1508                 if (!rq->rx_ring[i].base) {
1509                         netdev_err(adapter->netdev,
1510                                    "failed to allocate rx ring %d\n", i);
1511                         goto err;
1512                 }
1513         }
1514
1515         sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1516         rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1517                                                   &rq->comp_ring.basePA);
1518         if (!rq->comp_ring.base) {
1519                 netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1520                 goto err;
1521         }
1522
1523         sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1524                                                    rq->rx_ring[1].size);
1525         bi = kzalloc(sz, GFP_KERNEL);
1526         if (!bi)
1527                 goto err;
1528
1529         rq->buf_info[0] = bi;
1530         rq->buf_info[1] = bi + rq->rx_ring[0].size;
1531
1532         return 0;
1533
1534 err:
1535         vmxnet3_rq_destroy(rq, adapter);
1536         return -ENOMEM;
1537 }
1538
1539
1540 static int
1541 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1542 {
1543         int i, err = 0;
1544
1545         for (i = 0; i < adapter->num_rx_queues; i++) {
1546                 err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1547                 if (unlikely(err)) {
1548                         dev_err(&adapter->netdev->dev,
1549                                 "%s: failed to create rx queue%i\n",
1550                                 adapter->netdev->name, i);
1551                         goto err_out;
1552                 }
1553         }
1554         return err;
1555 err_out:
1556         vmxnet3_rq_destroy_all(adapter);
1557         return err;
1558
1559 }
1560
1561 /* Multiple queue aware polling function for tx and rx */
1562
1563 static int
1564 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1565 {
1566         int rcd_done = 0, i;
1567         if (unlikely(adapter->shared->ecr))
1568                 vmxnet3_process_events(adapter);
1569         for (i = 0; i < adapter->num_tx_queues; i++)
1570                 vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1571
1572         for (i = 0; i < adapter->num_rx_queues; i++)
1573                 rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1574                                                    adapter, budget);
1575         return rcd_done;
1576 }
1577
1578
1579 static int
1580 vmxnet3_poll(struct napi_struct *napi, int budget)
1581 {
1582         struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1583                                           struct vmxnet3_rx_queue, napi);
1584         int rxd_done;
1585
1586         rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1587
1588         if (rxd_done < budget) {
1589                 napi_complete(napi);
1590                 vmxnet3_enable_all_intrs(rx_queue->adapter);
1591         }
1592         return rxd_done;
1593 }
1594
1595 /*
1596  * NAPI polling function for MSI-X mode with multiple Rx queues
1597  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1598  */
1599
1600 static int
1601 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1602 {
1603         struct vmxnet3_rx_queue *rq = container_of(napi,
1604                                                 struct vmxnet3_rx_queue, napi);
1605         struct vmxnet3_adapter *adapter = rq->adapter;
1606         int rxd_done;
1607
1608         /* When sharing interrupt with corresponding tx queue, process
1609          * tx completions in that queue as well
1610          */
1611         if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1612                 struct vmxnet3_tx_queue *tq =
1613                                 &adapter->tx_queue[rq - adapter->rx_queue];
1614                 vmxnet3_tq_tx_complete(tq, adapter);
1615         }
1616
1617         rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1618
1619         if (rxd_done < budget) {
1620                 napi_complete(napi);
1621                 vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1622         }
1623         return rxd_done;
1624 }
1625
1626
1627 #ifdef CONFIG_PCI_MSI
1628
1629 /*
1630  * Handle completion interrupts on tx queues
1631  * Returns whether or not the intr is handled
1632  */
1633
1634 static irqreturn_t
1635 vmxnet3_msix_tx(int irq, void *data)
1636 {
1637         struct vmxnet3_tx_queue *tq = data;
1638         struct vmxnet3_adapter *adapter = tq->adapter;
1639
1640         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1641                 vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1642
1643         /* Handle the case where only one irq is allocate for all tx queues */
1644         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1645                 int i;
1646                 for (i = 0; i < adapter->num_tx_queues; i++) {
1647                         struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1648                         vmxnet3_tq_tx_complete(txq, adapter);
1649                 }
1650         } else {
1651                 vmxnet3_tq_tx_complete(tq, adapter);
1652         }
1653         vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1654
1655         return IRQ_HANDLED;
1656 }
1657
1658
1659 /*
1660  * Handle completion interrupts on rx queues. Returns whether or not the
1661  * intr is handled
1662  */
1663
1664 static irqreturn_t
1665 vmxnet3_msix_rx(int irq, void *data)
1666 {
1667         struct vmxnet3_rx_queue *rq = data;
1668         struct vmxnet3_adapter *adapter = rq->adapter;
1669
1670         /* disable intr if needed */
1671         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1672                 vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1673         napi_schedule(&rq->napi);
1674
1675         return IRQ_HANDLED;
1676 }
1677
1678 /*
1679  *----------------------------------------------------------------------------
1680  *
1681  * vmxnet3_msix_event --
1682  *
1683  *    vmxnet3 msix event intr handler
1684  *
1685  * Result:
1686  *    whether or not the intr is handled
1687  *
1688  *----------------------------------------------------------------------------
1689  */
1690
1691 static irqreturn_t
1692 vmxnet3_msix_event(int irq, void *data)
1693 {
1694         struct net_device *dev = data;
1695         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1696
1697         /* disable intr if needed */
1698         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1699                 vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1700
1701         if (adapter->shared->ecr)
1702                 vmxnet3_process_events(adapter);
1703
1704         vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1705
1706         return IRQ_HANDLED;
1707 }
1708
1709 #endif /* CONFIG_PCI_MSI  */
1710
1711
1712 /* Interrupt handler for vmxnet3  */
1713 static irqreturn_t
1714 vmxnet3_intr(int irq, void *dev_id)
1715 {
1716         struct net_device *dev = dev_id;
1717         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1718
1719         if (adapter->intr.type == VMXNET3_IT_INTX) {
1720                 u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1721                 if (unlikely(icr == 0))
1722                         /* not ours */
1723                         return IRQ_NONE;
1724         }
1725
1726
1727         /* disable intr if needed */
1728         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1729                 vmxnet3_disable_all_intrs(adapter);
1730
1731         napi_schedule(&adapter->rx_queue[0].napi);
1732
1733         return IRQ_HANDLED;
1734 }
1735
1736 #ifdef CONFIG_NET_POLL_CONTROLLER
1737
1738 /* netpoll callback. */
1739 static void
1740 vmxnet3_netpoll(struct net_device *netdev)
1741 {
1742         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1743
1744         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1745                 vmxnet3_disable_all_intrs(adapter);
1746
1747         vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1748         vmxnet3_enable_all_intrs(adapter);
1749
1750 }
1751 #endif  /* CONFIG_NET_POLL_CONTROLLER */
1752
1753 static int
1754 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1755 {
1756         struct vmxnet3_intr *intr = &adapter->intr;
1757         int err = 0, i;
1758         int vector = 0;
1759
1760 #ifdef CONFIG_PCI_MSI
1761         if (adapter->intr.type == VMXNET3_IT_MSIX) {
1762                 for (i = 0; i < adapter->num_tx_queues; i++) {
1763                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1764                                 sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1765                                         adapter->netdev->name, vector);
1766                                 err = request_irq(
1767                                               intr->msix_entries[vector].vector,
1768                                               vmxnet3_msix_tx, 0,
1769                                               adapter->tx_queue[i].name,
1770                                               &adapter->tx_queue[i]);
1771                         } else {
1772                                 sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1773                                         adapter->netdev->name, vector);
1774                         }
1775                         if (err) {
1776                                 dev_err(&adapter->netdev->dev,
1777                                         "Failed to request irq for MSIX, %s, "
1778                                         "error %d\n",
1779                                         adapter->tx_queue[i].name, err);
1780                                 return err;
1781                         }
1782
1783                         /* Handle the case where only 1 MSIx was allocated for
1784                          * all tx queues */
1785                         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1786                                 for (; i < adapter->num_tx_queues; i++)
1787                                         adapter->tx_queue[i].comp_ring.intr_idx
1788                                                                 = vector;
1789                                 vector++;
1790                                 break;
1791                         } else {
1792                                 adapter->tx_queue[i].comp_ring.intr_idx
1793                                                                 = vector++;
1794                         }
1795                 }
1796                 if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1797                         vector = 0;
1798
1799                 for (i = 0; i < adapter->num_rx_queues; i++) {
1800                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1801                                 sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1802                                         adapter->netdev->name, vector);
1803                         else
1804                                 sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1805                                         adapter->netdev->name, vector);
1806                         err = request_irq(intr->msix_entries[vector].vector,
1807                                           vmxnet3_msix_rx, 0,
1808                                           adapter->rx_queue[i].name,
1809                                           &(adapter->rx_queue[i]));
1810                         if (err) {
1811                                 netdev_err(adapter->netdev,
1812                                            "Failed to request irq for MSIX, "
1813                                            "%s, error %d\n",
1814                                            adapter->rx_queue[i].name, err);
1815                                 return err;
1816                         }
1817
1818                         adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1819                 }
1820
1821                 sprintf(intr->event_msi_vector_name, "%s-event-%d",
1822                         adapter->netdev->name, vector);
1823                 err = request_irq(intr->msix_entries[vector].vector,
1824                                   vmxnet3_msix_event, 0,
1825                                   intr->event_msi_vector_name, adapter->netdev);
1826                 intr->event_intr_idx = vector;
1827
1828         } else if (intr->type == VMXNET3_IT_MSI) {
1829                 adapter->num_rx_queues = 1;
1830                 err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1831                                   adapter->netdev->name, adapter->netdev);
1832         } else {
1833 #endif
1834                 adapter->num_rx_queues = 1;
1835                 err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1836                                   IRQF_SHARED, adapter->netdev->name,
1837                                   adapter->netdev);
1838 #ifdef CONFIG_PCI_MSI
1839         }
1840 #endif
1841         intr->num_intrs = vector + 1;
1842         if (err) {
1843                 netdev_err(adapter->netdev,
1844                            "Failed to request irq (intr type:%d), error %d\n",
1845                            intr->type, err);
1846         } else {
1847                 /* Number of rx queues will not change after this */
1848                 for (i = 0; i < adapter->num_rx_queues; i++) {
1849                         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1850                         rq->qid = i;
1851                         rq->qid2 = i + adapter->num_rx_queues;
1852                 }
1853
1854
1855
1856                 /* init our intr settings */
1857                 for (i = 0; i < intr->num_intrs; i++)
1858                         intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1859                 if (adapter->intr.type != VMXNET3_IT_MSIX) {
1860                         adapter->intr.event_intr_idx = 0;
1861                         for (i = 0; i < adapter->num_tx_queues; i++)
1862                                 adapter->tx_queue[i].comp_ring.intr_idx = 0;
1863                         adapter->rx_queue[0].comp_ring.intr_idx = 0;
1864                 }
1865
1866                 netdev_info(adapter->netdev,
1867                             "intr type %u, mode %u, %u vectors allocated\n",
1868                             intr->type, intr->mask_mode, intr->num_intrs);
1869         }
1870
1871         return err;
1872 }
1873
1874
1875 static void
1876 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1877 {
1878         struct vmxnet3_intr *intr = &adapter->intr;
1879         BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1880
1881         switch (intr->type) {
1882 #ifdef CONFIG_PCI_MSI
1883         case VMXNET3_IT_MSIX:
1884         {
1885                 int i, vector = 0;
1886
1887                 if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1888                         for (i = 0; i < adapter->num_tx_queues; i++) {
1889                                 free_irq(intr->msix_entries[vector++].vector,
1890                                          &(adapter->tx_queue[i]));
1891                                 if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1892                                         break;
1893                         }
1894                 }
1895
1896                 for (i = 0; i < adapter->num_rx_queues; i++) {
1897                         free_irq(intr->msix_entries[vector++].vector,
1898                                  &(adapter->rx_queue[i]));
1899                 }
1900
1901                 free_irq(intr->msix_entries[vector].vector,
1902                          adapter->netdev);
1903                 BUG_ON(vector >= intr->num_intrs);
1904                 break;
1905         }
1906 #endif
1907         case VMXNET3_IT_MSI:
1908                 free_irq(adapter->pdev->irq, adapter->netdev);
1909                 break;
1910         case VMXNET3_IT_INTX:
1911                 free_irq(adapter->pdev->irq, adapter->netdev);
1912                 break;
1913         default:
1914                 BUG();
1915         }
1916 }
1917
1918
1919 static void
1920 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1921 {
1922         u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1923         u16 vid;
1924
1925         /* allow untagged pkts */
1926         VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1927
1928         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1929                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1930 }
1931
1932
1933 static int
1934 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1935 {
1936         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1937
1938         if (!(netdev->flags & IFF_PROMISC)) {
1939                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1940                 unsigned long flags;
1941
1942                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1943                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1944                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1945                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1946                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1947         }
1948
1949         set_bit(vid, adapter->active_vlans);
1950
1951         return 0;
1952 }
1953
1954
1955 static int
1956 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1957 {
1958         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1959
1960         if (!(netdev->flags & IFF_PROMISC)) {
1961                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1962                 unsigned long flags;
1963
1964                 VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1965                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1966                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1967                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1968                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1969         }
1970
1971         clear_bit(vid, adapter->active_vlans);
1972
1973         return 0;
1974 }
1975
1976
1977 static u8 *
1978 vmxnet3_copy_mc(struct net_device *netdev)
1979 {
1980         u8 *buf = NULL;
1981         u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1982
1983         /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1984         if (sz <= 0xffff) {
1985                 /* We may be called with BH disabled */
1986                 buf = kmalloc(sz, GFP_ATOMIC);
1987                 if (buf) {
1988                         struct netdev_hw_addr *ha;
1989                         int i = 0;
1990
1991                         netdev_for_each_mc_addr(ha, netdev)
1992                                 memcpy(buf + i++ * ETH_ALEN, ha->addr,
1993                                        ETH_ALEN);
1994                 }
1995         }
1996         return buf;
1997 }
1998
1999
2000 static void
2001 vmxnet3_set_mc(struct net_device *netdev)
2002 {
2003         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2004         unsigned long flags;
2005         struct Vmxnet3_RxFilterConf *rxConf =
2006                                         &adapter->shared->devRead.rxFilterConf;
2007         u8 *new_table = NULL;
2008         u32 new_mode = VMXNET3_RXM_UCAST;
2009
2010         if (netdev->flags & IFF_PROMISC) {
2011                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2012                 memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2013
2014                 new_mode |= VMXNET3_RXM_PROMISC;
2015         } else {
2016                 vmxnet3_restore_vlan(adapter);
2017         }
2018
2019         if (netdev->flags & IFF_BROADCAST)
2020                 new_mode |= VMXNET3_RXM_BCAST;
2021
2022         if (netdev->flags & IFF_ALLMULTI)
2023                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2024         else
2025                 if (!netdev_mc_empty(netdev)) {
2026                         new_table = vmxnet3_copy_mc(netdev);
2027                         if (new_table) {
2028                                 new_mode |= VMXNET3_RXM_MCAST;
2029                                 rxConf->mfTableLen = cpu_to_le16(
2030                                         netdev_mc_count(netdev) * ETH_ALEN);
2031                                 rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
2032                                                     new_table));
2033                         } else {
2034                                 netdev_info(netdev, "failed to copy mcast list"
2035                                             ", setting ALL_MULTI\n");
2036                                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2037                         }
2038                 }
2039
2040
2041         if (!(new_mode & VMXNET3_RXM_MCAST)) {
2042                 rxConf->mfTableLen = 0;
2043                 rxConf->mfTablePA = 0;
2044         }
2045
2046         spin_lock_irqsave(&adapter->cmd_lock, flags);
2047         if (new_mode != rxConf->rxMode) {
2048                 rxConf->rxMode = cpu_to_le32(new_mode);
2049                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2050                                        VMXNET3_CMD_UPDATE_RX_MODE);
2051                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2052                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2053         }
2054
2055         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2056                                VMXNET3_CMD_UPDATE_MAC_FILTERS);
2057         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2058
2059         kfree(new_table);
2060 }
2061
2062 void
2063 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2064 {
2065         int i;
2066
2067         for (i = 0; i < adapter->num_rx_queues; i++)
2068                 vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2069 }
2070
2071
2072 /*
2073  *   Set up driver_shared based on settings in adapter.
2074  */
2075
2076 static void
2077 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2078 {
2079         struct Vmxnet3_DriverShared *shared = adapter->shared;
2080         struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2081         struct Vmxnet3_TxQueueConf *tqc;
2082         struct Vmxnet3_RxQueueConf *rqc;
2083         int i;
2084
2085         memset(shared, 0, sizeof(*shared));
2086
2087         /* driver settings */
2088         shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2089         devRead->misc.driverInfo.version = cpu_to_le32(
2090                                                 VMXNET3_DRIVER_VERSION_NUM);
2091         devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2092                                 VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2093         devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2094         *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2095                                 *((u32 *)&devRead->misc.driverInfo.gos));
2096         devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2097         devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2098
2099         devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2100         devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2101
2102         /* set up feature flags */
2103         if (adapter->netdev->features & NETIF_F_RXCSUM)
2104                 devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2105
2106         if (adapter->netdev->features & NETIF_F_LRO) {
2107                 devRead->misc.uptFeatures |= UPT1_F_LRO;
2108                 devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2109         }
2110         if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2111                 devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2112
2113         devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2114         devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2115         devRead->misc.queueDescLen = cpu_to_le32(
2116                 adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2117                 adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2118
2119         /* tx queue settings */
2120         devRead->misc.numTxQueues =  adapter->num_tx_queues;
2121         for (i = 0; i < adapter->num_tx_queues; i++) {
2122                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2123                 BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2124                 tqc = &adapter->tqd_start[i].conf;
2125                 tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2126                 tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2127                 tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2128                 tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2129                 tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2130                 tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2131                 tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2132                 tqc->ddLen          = cpu_to_le32(
2133                                         sizeof(struct vmxnet3_tx_buf_info) *
2134                                         tqc->txRingSize);
2135                 tqc->intrIdx        = tq->comp_ring.intr_idx;
2136         }
2137
2138         /* rx queue settings */
2139         devRead->misc.numRxQueues = adapter->num_rx_queues;
2140         for (i = 0; i < adapter->num_rx_queues; i++) {
2141                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2142                 rqc = &adapter->rqd_start[i].conf;
2143                 rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2144                 rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2145                 rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2146                 rqc->ddPA            = cpu_to_le64(virt_to_phys(
2147                                                         rq->buf_info));
2148                 rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2149                 rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2150                 rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2151                 rqc->ddLen           = cpu_to_le32(
2152                                         sizeof(struct vmxnet3_rx_buf_info) *
2153                                         (rqc->rxRingSize[0] +
2154                                          rqc->rxRingSize[1]));
2155                 rqc->intrIdx         = rq->comp_ring.intr_idx;
2156         }
2157
2158 #ifdef VMXNET3_RSS
2159         memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2160
2161         if (adapter->rss) {
2162                 struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2163                 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2164                         0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2165                         0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2166                         0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2167                         0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2168                         0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2169                 };
2170
2171                 devRead->misc.uptFeatures |= UPT1_F_RSS;
2172                 devRead->misc.numRxQueues = adapter->num_rx_queues;
2173                 rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2174                                     UPT1_RSS_HASH_TYPE_IPV4 |
2175                                     UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2176                                     UPT1_RSS_HASH_TYPE_IPV6;
2177                 rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2178                 rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2179                 rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2180                 memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2181
2182                 for (i = 0; i < rssConf->indTableSize; i++)
2183                         rssConf->indTable[i] = ethtool_rxfh_indir_default(
2184                                 i, adapter->num_rx_queues);
2185
2186                 devRead->rssConfDesc.confVer = 1;
2187                 devRead->rssConfDesc.confLen = sizeof(*rssConf);
2188                 devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2189         }
2190
2191 #endif /* VMXNET3_RSS */
2192
2193         /* intr settings */
2194         devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2195                                      VMXNET3_IMM_AUTO;
2196         devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2197         for (i = 0; i < adapter->intr.num_intrs; i++)
2198                 devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2199
2200         devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2201         devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2202
2203         /* rx filter settings */
2204         devRead->rxFilterConf.rxMode = 0;
2205         vmxnet3_restore_vlan(adapter);
2206         vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2207
2208         /* the rest are already zeroed */
2209 }
2210
2211
2212 int
2213 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2214 {
2215         int err, i;
2216         u32 ret;
2217         unsigned long flags;
2218
2219         netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2220                 " ring sizes %u %u %u\n", adapter->netdev->name,
2221                 adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2222                 adapter->tx_queue[0].tx_ring.size,
2223                 adapter->rx_queue[0].rx_ring[0].size,
2224                 adapter->rx_queue[0].rx_ring[1].size);
2225
2226         vmxnet3_tq_init_all(adapter);
2227         err = vmxnet3_rq_init_all(adapter);
2228         if (err) {
2229                 netdev_err(adapter->netdev,
2230                            "Failed to init rx queue error %d\n", err);
2231                 goto rq_err;
2232         }
2233
2234         err = vmxnet3_request_irqs(adapter);
2235         if (err) {
2236                 netdev_err(adapter->netdev,
2237                            "Failed to setup irq for error %d\n", err);
2238                 goto irq_err;
2239         }
2240
2241         vmxnet3_setup_driver_shared(adapter);
2242
2243         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2244                                adapter->shared_pa));
2245         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2246                                adapter->shared_pa));
2247         spin_lock_irqsave(&adapter->cmd_lock, flags);
2248         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2249                                VMXNET3_CMD_ACTIVATE_DEV);
2250         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2251         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2252
2253         if (ret != 0) {
2254                 netdev_err(adapter->netdev,
2255                            "Failed to activate dev: error %u\n", ret);
2256                 err = -EINVAL;
2257                 goto activate_err;
2258         }
2259
2260         for (i = 0; i < adapter->num_rx_queues; i++) {
2261                 VMXNET3_WRITE_BAR0_REG(adapter,
2262                                 VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2263                                 adapter->rx_queue[i].rx_ring[0].next2fill);
2264                 VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2265                                 (i * VMXNET3_REG_ALIGN)),
2266                                 adapter->rx_queue[i].rx_ring[1].next2fill);
2267         }
2268
2269         /* Apply the rx filter settins last. */
2270         vmxnet3_set_mc(adapter->netdev);
2271
2272         /*
2273          * Check link state when first activating device. It will start the
2274          * tx queue if the link is up.
2275          */
2276         vmxnet3_check_link(adapter, true);
2277         for (i = 0; i < adapter->num_rx_queues; i++)
2278                 napi_enable(&adapter->rx_queue[i].napi);
2279         vmxnet3_enable_all_intrs(adapter);
2280         clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2281         return 0;
2282
2283 activate_err:
2284         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2285         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2286         vmxnet3_free_irqs(adapter);
2287 irq_err:
2288 rq_err:
2289         /* free up buffers we allocated */
2290         vmxnet3_rq_cleanup_all(adapter);
2291         return err;
2292 }
2293
2294
2295 void
2296 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2297 {
2298         unsigned long flags;
2299         spin_lock_irqsave(&adapter->cmd_lock, flags);
2300         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2301         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2302 }
2303
2304
2305 int
2306 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2307 {
2308         int i;
2309         unsigned long flags;
2310         if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2311                 return 0;
2312
2313
2314         spin_lock_irqsave(&adapter->cmd_lock, flags);
2315         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2316                                VMXNET3_CMD_QUIESCE_DEV);
2317         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2318         vmxnet3_disable_all_intrs(adapter);
2319
2320         for (i = 0; i < adapter->num_rx_queues; i++)
2321                 napi_disable(&adapter->rx_queue[i].napi);
2322         netif_tx_disable(adapter->netdev);
2323         adapter->link_speed = 0;
2324         netif_carrier_off(adapter->netdev);
2325
2326         vmxnet3_tq_cleanup_all(adapter);
2327         vmxnet3_rq_cleanup_all(adapter);
2328         vmxnet3_free_irqs(adapter);
2329         return 0;
2330 }
2331
2332
2333 static void
2334 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2335 {
2336         u32 tmp;
2337
2338         tmp = *(u32 *)mac;
2339         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2340
2341         tmp = (mac[5] << 8) | mac[4];
2342         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2343 }
2344
2345
2346 static int
2347 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2348 {
2349         struct sockaddr *addr = p;
2350         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2351
2352         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2353         vmxnet3_write_mac_addr(adapter, addr->sa_data);
2354
2355         return 0;
2356 }
2357
2358
2359 /* ==================== initialization and cleanup routines ============ */
2360
2361 static int
2362 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2363 {
2364         int err;
2365         unsigned long mmio_start, mmio_len;
2366         struct pci_dev *pdev = adapter->pdev;
2367
2368         err = pci_enable_device(pdev);
2369         if (err) {
2370                 dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2371                 return err;
2372         }
2373
2374         if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2375                 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2376                         dev_err(&pdev->dev,
2377                                 "pci_set_consistent_dma_mask failed\n");
2378                         err = -EIO;
2379                         goto err_set_mask;
2380                 }
2381                 *dma64 = true;
2382         } else {
2383                 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2384                         dev_err(&pdev->dev,
2385                                 "pci_set_dma_mask failed\n");
2386                         err = -EIO;
2387                         goto err_set_mask;
2388                 }
2389                 *dma64 = false;
2390         }
2391
2392         err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2393                                            vmxnet3_driver_name);
2394         if (err) {
2395                 dev_err(&pdev->dev,
2396                         "Failed to request region for adapter: error %d\n", err);
2397                 goto err_set_mask;
2398         }
2399
2400         pci_set_master(pdev);
2401
2402         mmio_start = pci_resource_start(pdev, 0);
2403         mmio_len = pci_resource_len(pdev, 0);
2404         adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2405         if (!adapter->hw_addr0) {
2406                 dev_err(&pdev->dev, "Failed to map bar0\n");
2407                 err = -EIO;
2408                 goto err_ioremap;
2409         }
2410
2411         mmio_start = pci_resource_start(pdev, 1);
2412         mmio_len = pci_resource_len(pdev, 1);
2413         adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2414         if (!adapter->hw_addr1) {
2415                 dev_err(&pdev->dev, "Failed to map bar1\n");
2416                 err = -EIO;
2417                 goto err_bar1;
2418         }
2419         return 0;
2420
2421 err_bar1:
2422         iounmap(adapter->hw_addr0);
2423 err_ioremap:
2424         pci_release_selected_regions(pdev, (1 << 2) - 1);
2425 err_set_mask:
2426         pci_disable_device(pdev);
2427         return err;
2428 }
2429
2430
2431 static void
2432 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2433 {
2434         BUG_ON(!adapter->pdev);
2435
2436         iounmap(adapter->hw_addr0);
2437         iounmap(adapter->hw_addr1);
2438         pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2439         pci_disable_device(adapter->pdev);
2440 }
2441
2442
2443 static void
2444 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2445 {
2446         size_t sz, i, ring0_size, ring1_size, comp_size;
2447         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
2448
2449
2450         if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2451                                     VMXNET3_MAX_ETH_HDR_SIZE) {
2452                 adapter->skb_buf_size = adapter->netdev->mtu +
2453                                         VMXNET3_MAX_ETH_HDR_SIZE;
2454                 if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2455                         adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2456
2457                 adapter->rx_buf_per_pkt = 1;
2458         } else {
2459                 adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2460                 sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2461                                             VMXNET3_MAX_ETH_HDR_SIZE;
2462                 adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2463         }
2464
2465         /*
2466          * for simplicity, force the ring0 size to be a multiple of
2467          * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2468          */
2469         sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2470         ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2471         ring0_size = (ring0_size + sz - 1) / sz * sz;
2472         ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2473                            sz * sz);
2474         ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2475         comp_size = ring0_size + ring1_size;
2476
2477         for (i = 0; i < adapter->num_rx_queues; i++) {
2478                 rq = &adapter->rx_queue[i];
2479                 rq->rx_ring[0].size = ring0_size;
2480                 rq->rx_ring[1].size = ring1_size;
2481                 rq->comp_ring.size = comp_size;
2482         }
2483 }
2484
2485
2486 int
2487 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2488                       u32 rx_ring_size, u32 rx_ring2_size)
2489 {
2490         int err = 0, i;
2491
2492         for (i = 0; i < adapter->num_tx_queues; i++) {
2493                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2494                 tq->tx_ring.size   = tx_ring_size;
2495                 tq->data_ring.size = tx_ring_size;
2496                 tq->comp_ring.size = tx_ring_size;
2497                 tq->shared = &adapter->tqd_start[i].ctrl;
2498                 tq->stopped = true;
2499                 tq->adapter = adapter;
2500                 tq->qid = i;
2501                 err = vmxnet3_tq_create(tq, adapter);
2502                 /*
2503                  * Too late to change num_tx_queues. We cannot do away with
2504                  * lesser number of queues than what we asked for
2505                  */
2506                 if (err)
2507                         goto queue_err;
2508         }
2509
2510         adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2511         adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2512         vmxnet3_adjust_rx_ring_size(adapter);
2513         for (i = 0; i < adapter->num_rx_queues; i++) {
2514                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2515                 /* qid and qid2 for rx queues will be assigned later when num
2516                  * of rx queues is finalized after allocating intrs */
2517                 rq->shared = &adapter->rqd_start[i].ctrl;
2518                 rq->adapter = adapter;
2519                 err = vmxnet3_rq_create(rq, adapter);
2520                 if (err) {
2521                         if (i == 0) {
2522                                 netdev_err(adapter->netdev,
2523                                            "Could not allocate any rx queues. "
2524                                            "Aborting.\n");
2525                                 goto queue_err;
2526                         } else {
2527                                 netdev_info(adapter->netdev,
2528                                             "Number of rx queues changed "
2529                                             "to : %d.\n", i);
2530                                 adapter->num_rx_queues = i;
2531                                 err = 0;
2532                                 break;
2533                         }
2534                 }
2535         }
2536         return err;
2537 queue_err:
2538         vmxnet3_tq_destroy_all(adapter);
2539         return err;
2540 }
2541
2542 static int
2543 vmxnet3_open(struct net_device *netdev)
2544 {
2545         struct vmxnet3_adapter *adapter;
2546         int err, i;
2547
2548         adapter = netdev_priv(netdev);
2549
2550         for (i = 0; i < adapter->num_tx_queues; i++)
2551                 spin_lock_init(&adapter->tx_queue[i].tx_lock);
2552
2553         err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2554                                     VMXNET3_DEF_RX_RING_SIZE,
2555                                     VMXNET3_DEF_RX_RING_SIZE);
2556         if (err)
2557                 goto queue_err;
2558
2559         err = vmxnet3_activate_dev(adapter);
2560         if (err)
2561                 goto activate_err;
2562
2563         return 0;
2564
2565 activate_err:
2566         vmxnet3_rq_destroy_all(adapter);
2567         vmxnet3_tq_destroy_all(adapter);
2568 queue_err:
2569         return err;
2570 }
2571
2572
2573 static int
2574 vmxnet3_close(struct net_device *netdev)
2575 {
2576         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2577
2578         /*
2579          * Reset_work may be in the middle of resetting the device, wait for its
2580          * completion.
2581          */
2582         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2583                 msleep(1);
2584
2585         vmxnet3_quiesce_dev(adapter);
2586
2587         vmxnet3_rq_destroy_all(adapter);
2588         vmxnet3_tq_destroy_all(adapter);
2589
2590         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2591
2592
2593         return 0;
2594 }
2595
2596
2597 void
2598 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2599 {
2600         int i;
2601
2602         /*
2603          * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2604          * vmxnet3_close() will deadlock.
2605          */
2606         BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2607
2608         /* we need to enable NAPI, otherwise dev_close will deadlock */
2609         for (i = 0; i < adapter->num_rx_queues; i++)
2610                 napi_enable(&adapter->rx_queue[i].napi);
2611         dev_close(adapter->netdev);
2612 }
2613
2614
2615 static int
2616 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2617 {
2618         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2619         int err = 0;
2620
2621         if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2622                 return -EINVAL;
2623
2624         netdev->mtu = new_mtu;
2625
2626         /*
2627          * Reset_work may be in the middle of resetting the device, wait for its
2628          * completion.
2629          */
2630         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2631                 msleep(1);
2632
2633         if (netif_running(netdev)) {
2634                 vmxnet3_quiesce_dev(adapter);
2635                 vmxnet3_reset_dev(adapter);
2636
2637                 /* we need to re-create the rx queue based on the new mtu */
2638                 vmxnet3_rq_destroy_all(adapter);
2639                 vmxnet3_adjust_rx_ring_size(adapter);
2640                 err = vmxnet3_rq_create_all(adapter);
2641                 if (err) {
2642                         netdev_err(netdev,
2643                                    "failed to re-create rx queues, "
2644                                    " error %d. Closing it.\n", err);
2645                         goto out;
2646                 }
2647
2648                 err = vmxnet3_activate_dev(adapter);
2649                 if (err) {
2650                         netdev_err(netdev,
2651                                    "failed to re-activate, error %d. "
2652                                    "Closing it\n", err);
2653                         goto out;
2654                 }
2655         }
2656
2657 out:
2658         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2659         if (err)
2660                 vmxnet3_force_close(adapter);
2661
2662         return err;
2663 }
2664
2665
2666 static void
2667 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2668 {
2669         struct net_device *netdev = adapter->netdev;
2670
2671         netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2672                 NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2673                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2674                 NETIF_F_LRO;
2675         if (dma64)
2676                 netdev->hw_features |= NETIF_F_HIGHDMA;
2677         netdev->vlan_features = netdev->hw_features &
2678                                 ~(NETIF_F_HW_VLAN_CTAG_TX |
2679                                   NETIF_F_HW_VLAN_CTAG_RX);
2680         netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2681 }
2682
2683
2684 static void
2685 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2686 {
2687         u32 tmp;
2688
2689         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2690         *(u32 *)mac = tmp;
2691
2692         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2693         mac[4] = tmp & 0xff;
2694         mac[5] = (tmp >> 8) & 0xff;
2695 }
2696
2697 #ifdef CONFIG_PCI_MSI
2698
2699 /*
2700  * Enable MSIx vectors.
2701  * Returns :
2702  *      0 on successful enabling of required vectors,
2703  *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2704  *       could be enabled.
2705  *      number of vectors which can be enabled otherwise (this number is smaller
2706  *       than VMXNET3_LINUX_MIN_MSIX_VECT)
2707  */
2708
2709 static int
2710 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2711                              int vectors)
2712 {
2713         int err = 0, vector_threshold;
2714         vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2715
2716         while (vectors >= vector_threshold) {
2717                 err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2718                                       vectors);
2719                 if (!err) {
2720                         adapter->intr.num_intrs = vectors;
2721                         return 0;
2722                 } else if (err < 0) {
2723                         dev_err(&adapter->netdev->dev,
2724                                    "Failed to enable MSI-X, error: %d\n", err);
2725                         vectors = 0;
2726                 } else if (err < vector_threshold) {
2727                         break;
2728                 } else {
2729                         /* If fails to enable required number of MSI-x vectors
2730                          * try enabling minimum number of vectors required.
2731                          */
2732                         dev_err(&adapter->netdev->dev,
2733                                 "Failed to enable %d MSI-X, trying %d instead\n",
2734                                     vectors, vector_threshold);
2735                         vectors = vector_threshold;
2736                 }
2737         }
2738
2739         dev_info(&adapter->pdev->dev,
2740                  "Number of MSI-X interrupts which can be allocated "
2741                  "is lower than min threshold required.\n");
2742         return err;
2743 }
2744
2745
2746 #endif /* CONFIG_PCI_MSI */
2747
2748 static void
2749 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2750 {
2751         u32 cfg;
2752         unsigned long flags;
2753
2754         /* intr settings */
2755         spin_lock_irqsave(&adapter->cmd_lock, flags);
2756         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2757                                VMXNET3_CMD_GET_CONF_INTR);
2758         cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2759         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2760         adapter->intr.type = cfg & 0x3;
2761         adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2762
2763         if (adapter->intr.type == VMXNET3_IT_AUTO) {
2764                 adapter->intr.type = VMXNET3_IT_MSIX;
2765         }
2766
2767 #ifdef CONFIG_PCI_MSI
2768         if (adapter->intr.type == VMXNET3_IT_MSIX) {
2769                 int vector, err = 0;
2770
2771                 adapter->intr.num_intrs = (adapter->share_intr ==
2772                                            VMXNET3_INTR_TXSHARE) ? 1 :
2773                                            adapter->num_tx_queues;
2774                 adapter->intr.num_intrs += (adapter->share_intr ==
2775                                            VMXNET3_INTR_BUDDYSHARE) ? 0 :
2776                                            adapter->num_rx_queues;
2777                 adapter->intr.num_intrs += 1;           /* for link event */
2778
2779                 adapter->intr.num_intrs = (adapter->intr.num_intrs >
2780                                            VMXNET3_LINUX_MIN_MSIX_VECT
2781                                            ? adapter->intr.num_intrs :
2782                                            VMXNET3_LINUX_MIN_MSIX_VECT);
2783
2784                 for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2785                         adapter->intr.msix_entries[vector].entry = vector;
2786
2787                 err = vmxnet3_acquire_msix_vectors(adapter,
2788                                                    adapter->intr.num_intrs);
2789                 /* If we cannot allocate one MSIx vector per queue
2790                  * then limit the number of rx queues to 1
2791                  */
2792                 if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2793                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2794                             || adapter->num_rx_queues != 1) {
2795                                 adapter->share_intr = VMXNET3_INTR_TXSHARE;
2796                                 netdev_err(adapter->netdev,
2797                                            "Number of rx queues : 1\n");
2798                                 adapter->num_rx_queues = 1;
2799                                 adapter->intr.num_intrs =
2800                                                 VMXNET3_LINUX_MIN_MSIX_VECT;
2801                         }
2802                         return;
2803                 }
2804                 if (!err)
2805                         return;
2806
2807                 /* If we cannot allocate MSIx vectors use only one rx queue */
2808                 dev_info(&adapter->pdev->dev,
2809                          "Failed to enable MSI-X, error %d. "
2810                          "Limiting #rx queues to 1, try MSI.\n", err);
2811
2812                 adapter->intr.type = VMXNET3_IT_MSI;
2813         }
2814
2815         if (adapter->intr.type == VMXNET3_IT_MSI) {
2816                 int err;
2817                 err = pci_enable_msi(adapter->pdev);
2818                 if (!err) {
2819                         adapter->num_rx_queues = 1;
2820                         adapter->intr.num_intrs = 1;
2821                         return;
2822                 }
2823         }
2824 #endif /* CONFIG_PCI_MSI */
2825
2826         adapter->num_rx_queues = 1;
2827         dev_info(&adapter->netdev->dev,
2828                  "Using INTx interrupt, #Rx queues: 1.\n");
2829         adapter->intr.type = VMXNET3_IT_INTX;
2830
2831         /* INT-X related setting */
2832         adapter->intr.num_intrs = 1;
2833 }
2834
2835
2836 static void
2837 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2838 {
2839         if (adapter->intr.type == VMXNET3_IT_MSIX)
2840                 pci_disable_msix(adapter->pdev);
2841         else if (adapter->intr.type == VMXNET3_IT_MSI)
2842                 pci_disable_msi(adapter->pdev);
2843         else
2844                 BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2845 }
2846
2847
2848 static void
2849 vmxnet3_tx_timeout(struct net_device *netdev)
2850 {
2851         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2852         adapter->tx_timeout_count++;
2853
2854         netdev_err(adapter->netdev, "tx hang\n");
2855         schedule_work(&adapter->work);
2856         netif_wake_queue(adapter->netdev);
2857 }
2858
2859
2860 static void
2861 vmxnet3_reset_work(struct work_struct *data)
2862 {
2863         struct vmxnet3_adapter *adapter;
2864
2865         adapter = container_of(data, struct vmxnet3_adapter, work);
2866
2867         /* if another thread is resetting the device, no need to proceed */
2868         if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2869                 return;
2870
2871         /* if the device is closed, we must leave it alone */
2872         rtnl_lock();
2873         if (netif_running(adapter->netdev)) {
2874                 netdev_notice(adapter->netdev, "resetting\n");
2875                 vmxnet3_quiesce_dev(adapter);
2876                 vmxnet3_reset_dev(adapter);
2877                 vmxnet3_activate_dev(adapter);
2878         } else {
2879                 netdev_info(adapter->netdev, "already closed\n");
2880         }
2881         rtnl_unlock();
2882
2883         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2884 }
2885
2886
2887 static int
2888 vmxnet3_probe_device(struct pci_dev *pdev,
2889                      const struct pci_device_id *id)
2890 {
2891         static const struct net_device_ops vmxnet3_netdev_ops = {
2892                 .ndo_open = vmxnet3_open,
2893                 .ndo_stop = vmxnet3_close,
2894                 .ndo_start_xmit = vmxnet3_xmit_frame,
2895                 .ndo_set_mac_address = vmxnet3_set_mac_addr,
2896                 .ndo_change_mtu = vmxnet3_change_mtu,
2897                 .ndo_set_features = vmxnet3_set_features,
2898                 .ndo_get_stats64 = vmxnet3_get_stats64,
2899                 .ndo_tx_timeout = vmxnet3_tx_timeout,
2900                 .ndo_set_rx_mode = vmxnet3_set_mc,
2901                 .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2902                 .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2903 #ifdef CONFIG_NET_POLL_CONTROLLER
2904                 .ndo_poll_controller = vmxnet3_netpoll,
2905 #endif
2906         };
2907         int err;
2908         bool dma64 = false; /* stupid gcc */
2909         u32 ver;
2910         struct net_device *netdev;
2911         struct vmxnet3_adapter *adapter;
2912         u8 mac[ETH_ALEN];
2913         int size;
2914         int num_tx_queues;
2915         int num_rx_queues;
2916
2917         if (!pci_msi_enabled())
2918                 enable_mq = 0;
2919
2920 #ifdef VMXNET3_RSS
2921         if (enable_mq)
2922                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2923                                     (int)num_online_cpus());
2924         else
2925 #endif
2926                 num_rx_queues = 1;
2927         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2928
2929         if (enable_mq)
2930                 num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2931                                     (int)num_online_cpus());
2932         else
2933                 num_tx_queues = 1;
2934
2935         num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2936         netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2937                                    max(num_tx_queues, num_rx_queues));
2938         dev_info(&pdev->dev,
2939                  "# of Tx queues : %d, # of Rx queues : %d\n",
2940                  num_tx_queues, num_rx_queues);
2941
2942         if (!netdev)
2943                 return -ENOMEM;
2944
2945         pci_set_drvdata(pdev, netdev);
2946         adapter = netdev_priv(netdev);
2947         adapter->netdev = netdev;
2948         adapter->pdev = pdev;
2949
2950         spin_lock_init(&adapter->cmd_lock);
2951         adapter->shared = pci_alloc_consistent(adapter->pdev,
2952                                                sizeof(struct Vmxnet3_DriverShared),
2953                                                &adapter->shared_pa);
2954         if (!adapter->shared) {
2955                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2956                 err = -ENOMEM;
2957                 goto err_alloc_shared;
2958         }
2959
2960         adapter->num_rx_queues = num_rx_queues;
2961         adapter->num_tx_queues = num_tx_queues;
2962         adapter->rx_buf_per_pkt = 1;
2963
2964         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2965         size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2966         adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2967                                                   &adapter->queue_desc_pa);
2968
2969         if (!adapter->tqd_start) {
2970                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2971                 err = -ENOMEM;
2972                 goto err_alloc_queue_desc;
2973         }
2974         adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2975                                                             adapter->num_tx_queues);
2976
2977         adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2978         if (adapter->pm_conf == NULL) {
2979                 err = -ENOMEM;
2980                 goto err_alloc_pm;
2981         }
2982
2983 #ifdef VMXNET3_RSS
2984
2985         adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2986         if (adapter->rss_conf == NULL) {
2987                 err = -ENOMEM;
2988                 goto err_alloc_rss;
2989         }
2990 #endif /* VMXNET3_RSS */
2991
2992         err = vmxnet3_alloc_pci_resources(adapter, &dma64);
2993         if (err < 0)
2994                 goto err_alloc_pci;
2995
2996         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
2997         if (ver & 1) {
2998                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
2999         } else {
3000                 dev_err(&pdev->dev,
3001                         "Incompatible h/w version (0x%x) for adapter\n", ver);
3002                 err = -EBUSY;
3003                 goto err_ver;
3004         }
3005
3006         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3007         if (ver & 1) {
3008                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3009         } else {
3010                 dev_err(&pdev->dev,
3011                         "Incompatible upt version (0x%x) for adapter\n", ver);
3012                 err = -EBUSY;
3013                 goto err_ver;
3014         }
3015
3016         SET_NETDEV_DEV(netdev, &pdev->dev);
3017         vmxnet3_declare_features(adapter, dma64);
3018
3019         if (adapter->num_tx_queues == adapter->num_rx_queues)
3020                 adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3021         else
3022                 adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3023
3024         vmxnet3_alloc_intr_resources(adapter);
3025
3026 #ifdef VMXNET3_RSS
3027         if (adapter->num_rx_queues > 1 &&
3028             adapter->intr.type == VMXNET3_IT_MSIX) {
3029                 adapter->rss = true;
3030                 netdev->hw_features |= NETIF_F_RXHASH;
3031                 netdev->features |= NETIF_F_RXHASH;
3032                 dev_dbg(&pdev->dev, "RSS is enabled.\n");
3033         } else {
3034                 adapter->rss = false;
3035         }
3036 #endif
3037
3038         vmxnet3_read_mac_addr(adapter, mac);
3039         memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3040
3041         netdev->netdev_ops = &vmxnet3_netdev_ops;
3042         vmxnet3_set_ethtool_ops(netdev);
3043         netdev->watchdog_timeo = 5 * HZ;
3044
3045         INIT_WORK(&adapter->work, vmxnet3_reset_work);
3046         set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3047
3048         if (adapter->intr.type == VMXNET3_IT_MSIX) {
3049                 int i;
3050                 for (i = 0; i < adapter->num_rx_queues; i++) {
3051                         netif_napi_add(adapter->netdev,
3052                                        &adapter->rx_queue[i].napi,
3053                                        vmxnet3_poll_rx_only, 64);
3054                 }
3055         } else {
3056                 netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3057                                vmxnet3_poll, 64);
3058         }
3059
3060         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3061         netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3062
3063         netif_carrier_off(netdev);
3064         err = register_netdev(netdev);
3065
3066         if (err) {
3067                 dev_err(&pdev->dev, "Failed to register adapter\n");
3068                 goto err_register;
3069         }
3070
3071         vmxnet3_check_link(adapter, false);
3072         return 0;
3073
3074 err_register:
3075         vmxnet3_free_intr_resources(adapter);
3076 err_ver:
3077         vmxnet3_free_pci_resources(adapter);
3078 err_alloc_pci:
3079 #ifdef VMXNET3_RSS
3080         kfree(adapter->rss_conf);
3081 err_alloc_rss:
3082 #endif
3083         kfree(adapter->pm_conf);
3084 err_alloc_pm:
3085         pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3086                             adapter->queue_desc_pa);
3087 err_alloc_queue_desc:
3088         pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3089                             adapter->shared, adapter->shared_pa);
3090 err_alloc_shared:
3091         pci_set_drvdata(pdev, NULL);
3092         free_netdev(netdev);
3093         return err;
3094 }
3095
3096
3097 static void
3098 vmxnet3_remove_device(struct pci_dev *pdev)
3099 {
3100         struct net_device *netdev = pci_get_drvdata(pdev);
3101         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3102         int size = 0;
3103         int num_rx_queues;
3104
3105 #ifdef VMXNET3_RSS
3106         if (enable_mq)
3107                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3108                                     (int)num_online_cpus());
3109         else
3110 #endif
3111                 num_rx_queues = 1;
3112         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3113
3114         cancel_work_sync(&adapter->work);
3115
3116         unregister_netdev(netdev);
3117
3118         vmxnet3_free_intr_resources(adapter);
3119         vmxnet3_free_pci_resources(adapter);
3120 #ifdef VMXNET3_RSS
3121         kfree(adapter->rss_conf);
3122 #endif
3123         kfree(adapter->pm_conf);
3124
3125         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3126         size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3127         pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3128                             adapter->queue_desc_pa);
3129         pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3130                             adapter->shared, adapter->shared_pa);
3131         free_netdev(netdev);
3132 }
3133
3134
3135 #ifdef CONFIG_PM
3136
3137 static int
3138 vmxnet3_suspend(struct device *device)
3139 {
3140         struct pci_dev *pdev = to_pci_dev(device);
3141         struct net_device *netdev = pci_get_drvdata(pdev);
3142         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3143         struct Vmxnet3_PMConf *pmConf;
3144         struct ethhdr *ehdr;
3145         struct arphdr *ahdr;
3146         u8 *arpreq;
3147         struct in_device *in_dev;
3148         struct in_ifaddr *ifa;
3149         unsigned long flags;
3150         int i = 0;
3151
3152         if (!netif_running(netdev))
3153                 return 0;
3154
3155         for (i = 0; i < adapter->num_rx_queues; i++)
3156                 napi_disable(&adapter->rx_queue[i].napi);
3157
3158         vmxnet3_disable_all_intrs(adapter);
3159         vmxnet3_free_irqs(adapter);
3160         vmxnet3_free_intr_resources(adapter);
3161
3162         netif_device_detach(netdev);
3163         netif_tx_stop_all_queues(netdev);
3164
3165         /* Create wake-up filters. */
3166         pmConf = adapter->pm_conf;
3167         memset(pmConf, 0, sizeof(*pmConf));
3168
3169         if (adapter->wol & WAKE_UCAST) {
3170                 pmConf->filters[i].patternSize = ETH_ALEN;
3171                 pmConf->filters[i].maskSize = 1;
3172                 memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3173                 pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3174
3175                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3176                 i++;
3177         }
3178
3179         if (adapter->wol & WAKE_ARP) {
3180                 in_dev = in_dev_get(netdev);
3181                 if (!in_dev)
3182                         goto skip_arp;
3183
3184                 ifa = (struct in_ifaddr *)in_dev->ifa_list;
3185                 if (!ifa)
3186                         goto skip_arp;
3187
3188                 pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3189                         sizeof(struct arphdr) +         /* ARP header */
3190                         2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3191                         2 * sizeof(u32);        /*2 IPv4 addresses */
3192                 pmConf->filters[i].maskSize =
3193                         (pmConf->filters[i].patternSize - 1) / 8 + 1;
3194
3195                 /* ETH_P_ARP in Ethernet header. */
3196                 ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3197                 ehdr->h_proto = htons(ETH_P_ARP);
3198
3199                 /* ARPOP_REQUEST in ARP header. */
3200                 ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3201                 ahdr->ar_op = htons(ARPOP_REQUEST);
3202                 arpreq = (u8 *)(ahdr + 1);
3203
3204                 /* The Unicast IPv4 address in 'tip' field. */
3205                 arpreq += 2 * ETH_ALEN + sizeof(u32);
3206                 *(u32 *)arpreq = ifa->ifa_address;
3207
3208                 /* The mask for the relevant bits. */
3209                 pmConf->filters[i].mask[0] = 0x00;
3210                 pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3211                 pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3212                 pmConf->filters[i].mask[3] = 0x00;
3213                 pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3214                 pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3215                 in_dev_put(in_dev);
3216
3217                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3218                 i++;
3219         }
3220
3221 skip_arp:
3222         if (adapter->wol & WAKE_MAGIC)
3223                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3224
3225         pmConf->numFilters = i;
3226
3227         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3228         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3229                                                                   *pmConf));
3230         adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3231                                                                  pmConf));
3232
3233         spin_lock_irqsave(&adapter->cmd_lock, flags);
3234         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3235                                VMXNET3_CMD_UPDATE_PMCFG);
3236         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3237
3238         pci_save_state(pdev);
3239         pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3240                         adapter->wol);
3241         pci_disable_device(pdev);
3242         pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3243
3244         return 0;
3245 }
3246
3247
3248 static int
3249 vmxnet3_resume(struct device *device)
3250 {
3251         int err, i = 0;
3252         unsigned long flags;
3253         struct pci_dev *pdev = to_pci_dev(device);
3254         struct net_device *netdev = pci_get_drvdata(pdev);
3255         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3256         struct Vmxnet3_PMConf *pmConf;
3257
3258         if (!netif_running(netdev))
3259                 return 0;
3260
3261         /* Destroy wake-up filters. */
3262         pmConf = adapter->pm_conf;
3263         memset(pmConf, 0, sizeof(*pmConf));
3264
3265         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3266         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3267                                                                   *pmConf));
3268         adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3269                                                                  pmConf));
3270
3271         netif_device_attach(netdev);
3272         pci_set_power_state(pdev, PCI_D0);
3273         pci_restore_state(pdev);
3274         err = pci_enable_device_mem(pdev);
3275         if (err != 0)
3276                 return err;
3277
3278         pci_enable_wake(pdev, PCI_D0, 0);
3279
3280         spin_lock_irqsave(&adapter->cmd_lock, flags);
3281         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3282                                VMXNET3_CMD_UPDATE_PMCFG);
3283         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3284         vmxnet3_alloc_intr_resources(adapter);
3285         vmxnet3_request_irqs(adapter);
3286         for (i = 0; i < adapter->num_rx_queues; i++)
3287                 napi_enable(&adapter->rx_queue[i].napi);
3288         vmxnet3_enable_all_intrs(adapter);
3289
3290         return 0;
3291 }
3292
3293 static const struct dev_pm_ops vmxnet3_pm_ops = {
3294         .suspend = vmxnet3_suspend,
3295         .resume = vmxnet3_resume,
3296 };
3297 #endif
3298
3299 static struct pci_driver vmxnet3_driver = {
3300         .name           = vmxnet3_driver_name,
3301         .id_table       = vmxnet3_pciid_table,
3302         .probe          = vmxnet3_probe_device,
3303         .remove         = vmxnet3_remove_device,
3304 #ifdef CONFIG_PM
3305         .driver.pm      = &vmxnet3_pm_ops,
3306 #endif
3307 };
3308
3309
3310 static int __init
3311 vmxnet3_init_module(void)
3312 {
3313         pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3314                 VMXNET3_DRIVER_VERSION_REPORT);
3315         return pci_register_driver(&vmxnet3_driver);
3316 }
3317
3318 module_init(vmxnet3_init_module);
3319
3320
3321 static void
3322 vmxnet3_exit_module(void)
3323 {
3324         pci_unregister_driver(&vmxnet3_driver);
3325 }
3326
3327 module_exit(vmxnet3_exit_module);
3328
3329 MODULE_AUTHOR("VMware, Inc.");
3330 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3331 MODULE_LICENSE("GPL v2");
3332 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);