]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/staging/unisys/visornic/visornic_main.c
Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / drivers / staging / unisys / visornic / visornic_main.c
1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
2  * All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  * NON INFRINGEMENT.  See the GNU General Public License for more
12  * details.
13  */
14
15 /* This driver lives in a spar partition, and registers to ethernet io
16  * channels from the visorbus driver. It creates netdev devices and
17  * forwards transmit to the IO channel and accepts rcvs from the IO
18  * Partition via the IO channel.
19  */
20
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
27
28 #include "visorbus.h"
29 #include "iochannel.h"
30
31 #define VISORNIC_INFINITE_RSP_WAIT 0
32 #define VISORNICSOPENMAX 32
33 #define MAXDEVICES     16384
34
35 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36  *         = 163840 bytes
37  */
38 #define MAX_BUF 163840
39
40 static spinlock_t dev_num_pool_lock;
41 static void *dev_num_pool;      /**< pool to grab device numbers from */
42
43 static int visornic_probe(struct visor_device *dev);
44 static void visornic_remove(struct visor_device *dev);
45 static int visornic_pause(struct visor_device *dev,
46                           visorbus_state_complete_func complete_func);
47 static int visornic_resume(struct visor_device *dev,
48                            visorbus_state_complete_func complete_func);
49
50 /* DEBUGFS declarations */
51 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
52                                  size_t len, loff_t *offset);
53 static ssize_t enable_ints_write(struct file *file, const char __user *buf,
54                                  size_t len, loff_t *ppos);
55 static struct dentry *visornic_debugfs_dir;
56 static const struct file_operations debugfs_info_fops = {
57         .read = info_debugfs_read,
58 };
59
60 static const struct file_operations debugfs_enable_ints_fops = {
61         .write = enable_ints_write,
62 };
63
64 static struct workqueue_struct *visornic_timeout_reset_workqueue;
65
66 /* GUIDS for director channel type supported by this driver.  */
67 static struct visor_channeltype_descriptor visornic_channel_types[] = {
68         /* Note that the only channel type we expect to be reported by the
69          * bus driver is the SPAR_VNIC channel.
70          */
71         { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
72         { NULL_UUID_LE, NULL }
73 };
74 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
75 /*
76  * FIXME XXX: This next line of code must be fixed and removed before
77  * acceptance into the 'normal' part of the kernel.  It is only here as a place
78  * holder to get module autoloading functionality working for visorbus.  Code
79  * must be added to scripts/mode/file2alias.c, etc., to get this working
80  * properly.
81  */
82 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
83
84 /* This is used to tell the visor bus driver which types of visor devices
85  * we support, and what functions to call when a visor device that we support
86  * is attached or removed.
87  */
88 static struct visor_driver visornic_driver = {
89         .name = "visornic",
90         .version = "1.0.0.0",
91         .vertag = NULL,
92         .owner = THIS_MODULE,
93         .channel_types = visornic_channel_types,
94         .probe = visornic_probe,
95         .remove = visornic_remove,
96         .pause = visornic_pause,
97         .resume = visornic_resume,
98         .channel_interrupt = NULL,
99 };
100
101 struct chanstat {
102         unsigned long got_rcv;
103         unsigned long got_enbdisack;
104         unsigned long got_xmit_done;
105         unsigned long xmit_fail;
106         unsigned long sent_enbdis;
107         unsigned long sent_promisc;
108         unsigned long sent_post;
109         unsigned long sent_post_failed;
110         unsigned long sent_xmit;
111         unsigned long reject_count;
112         unsigned long extra_rcvbufs_sent;
113 };
114
115 struct visornic_devdata {
116         int devnum;
117         unsigned short enabled;         /* 0 disabled 1 enabled to receive */
118         unsigned short enab_dis_acked;  /* NET_RCV_ENABLE/DISABLE acked by
119                                          * IOPART
120                                          */
121         struct visor_device *dev;
122         char name[99];
123         struct list_head list_all;   /* < link within list_all_devices list */
124         struct net_device *netdev;
125         struct net_device_stats net_stats;
126         atomic_t interrupt_rcvd;
127         wait_queue_head_t rsp_queue;
128         struct sk_buff **rcvbuf;
129         u64 uniquenum; /* TODO figure out why not used */
130         unsigned short old_flags;       /* flags as they were prior to
131                                          * set_multicast_list
132                                          */
133         atomic_t usage;                 /* count of users */
134         int num_rcv_bufs;               /* indicates how many rcv buffers
135                                          * the vnic will post
136                                          */
137         int num_rcv_bufs_could_not_alloc;
138         atomic_t num_rcvbuf_in_iovm;
139         unsigned long alloc_failed_in_if_needed_cnt;
140         unsigned long alloc_failed_in_repost_rtn_cnt;
141         unsigned long max_outstanding_net_xmits; /* absolute max number of
142                                                   * outstanding xmits - should
143                                                   * never hit this
144                                                   */
145         unsigned long upper_threshold_net_xmits;  /* high water mark for
146                                                    * calling netif_stop_queue()
147                                                    */
148         unsigned long lower_threshold_net_xmits; /* high water mark for calling
149                                                   * netif_wake_queue()
150                                                   */
151         struct sk_buff_head xmitbufhead; /* xmitbufhead is the head of the
152                                           * xmit buffer list that have been
153                                           * sent to the IOPART end
154                                           */
155         visorbus_state_complete_func server_down_complete_func;
156         struct work_struct timeout_reset;
157         struct uiscmdrsp *cmdrsp_rcv;    /* cmdrsp_rcv is used for
158                                           * posting/unposting rcv buffers
159                                           */
160         struct uiscmdrsp *xmit_cmdrsp;   /* used to issue NET_XMIT - there is
161                                           * never more that one xmit in
162                                           * progress at a time
163                                           */
164         bool server_down;                /* IOPART is down */
165         bool server_change_state;        /* Processing SERVER_CHANGESTATE msg */
166         bool going_away;                 /* device is being torn down */
167         struct dentry *eth_debugfs_dir;
168         u64 interrupts_rcvd;
169         u64 interrupts_notme;
170         u64 interrupts_disabled;
171         u64 busy_cnt;
172         spinlock_t priv_lock;  /* spinlock to access devdata structures */
173
174         /* flow control counter */
175         u64 flow_control_upper_hits;
176         u64 flow_control_lower_hits;
177
178         /* debug counters */
179         unsigned long n_rcv0;                   /* # rcvs of 0 buffers */
180         unsigned long n_rcv1;                   /* # rcvs of 1 buffers */
181         unsigned long n_rcv2;                   /* # rcvs of 2 buffers */
182         unsigned long n_rcvx;                   /* # rcvs of >2 buffers */
183         unsigned long found_repost_rcvbuf_cnt;  /* # times we called
184                                                  *   repost_rcvbuf_cnt
185                                                  */
186         unsigned long repost_found_skb_cnt;     /* # times found the skb */
187         unsigned long n_repost_deficit;         /* # times we couldn't find
188                                                  *   all of the rcv buffers
189                                                  */
190         unsigned long bad_rcv_buf;              /* # times we negleted to
191                                                  * free the rcv skb because
192                                                  * we didn't know where it
193                                                  * came from
194                                                  */
195         unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
196
197         int queuefullmsg_logged;
198         struct chanstat chstat;
199         struct timer_list irq_poll_timer;
200         struct napi_struct napi;
201         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
202 };
203
204
205 /* List of all visornic_devdata structs,
206  * linked via the list_all member
207  */
208 static LIST_HEAD(list_all_devices);
209 static DEFINE_SPINLOCK(lock_all_devices);
210 static int visornic_poll(struct napi_struct *napi, int budget);
211 static void poll_for_irq(unsigned long v);
212
213 /**
214  *      visor_copy_fragsinfo_from_skb(
215  *      @skb_in: skbuff that we are pulling the frags from
216  *      @firstfraglen: length of first fragment in skb
217  *      @frags_max: max len of frags array
218  *      @frags: frags array filled in on output
219  *
220  *      Copy the fragment list in the SKB to a phys_info
221  *      array that the IOPART understands.
222  *      Return value indicates number of entries filled in frags
223  *      Negative values indicate an error.
224  */
225 static unsigned int
226 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
227                               unsigned int frags_max,
228                               struct phys_info frags[])
229 {
230         unsigned int count = 0, ii, size, offset = 0, numfrags;
231         unsigned int total_count;
232
233         numfrags = skb_shinfo(skb)->nr_frags;
234
235         /*
236          * Compute the number of fragments this skb has, and if its more than
237          * frag array can hold, linearize the skb
238          */
239         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
240         if (firstfraglen % PI_PAGE_SIZE)
241                 total_count++;
242
243         if (total_count > frags_max) {
244                 if (skb_linearize(skb))
245                         return -EINVAL;
246                 numfrags = skb_shinfo(skb)->nr_frags;
247                 firstfraglen = 0;
248         }
249
250         while (firstfraglen) {
251                 if (count == frags_max)
252                         return -EINVAL;
253
254                 frags[count].pi_pfn =
255                         page_to_pfn(virt_to_page(skb->data + offset));
256                 frags[count].pi_off =
257                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
258                 size = min_t(unsigned int, firstfraglen,
259                              PI_PAGE_SIZE - frags[count].pi_off);
260
261                 /* can take smallest of firstfraglen (what's left) OR
262                  * bytes left in the page
263                  */
264                 frags[count].pi_len = size;
265                 firstfraglen -= size;
266                 offset += size;
267                 count++;
268         }
269         if (numfrags) {
270                 if ((count + numfrags) > frags_max)
271                         return -EINVAL;
272
273                 for (ii = 0; ii < numfrags; ii++) {
274                         count = add_physinfo_entries(page_to_pfn(
275                                 skb_frag_page(&skb_shinfo(skb)->frags[ii])),
276                                               skb_shinfo(skb)->frags[ii].
277                                               page_offset,
278                                               skb_shinfo(skb)->frags[ii].
279                                               size, count, frags_max, frags);
280                         /*
281                          * add_physinfo_entries only returns
282                          * zero if the frags array is out of room
283                          * That should never happen because we
284                          * fail above, if count+numfrags > frags_max.
285                          * Given that theres no recovery mechanism from putting
286                          * half a packet in the I/O channel, panic here as this
287                          * should never happen
288                          */
289                         BUG_ON(!count);
290                 }
291         }
292         if (skb_shinfo(skb)->frag_list) {
293                 struct sk_buff *skbinlist;
294                 int c;
295
296                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
297                      skbinlist = skbinlist->next) {
298                         c = visor_copy_fragsinfo_from_skb(skbinlist,
299                                                           skbinlist->len -
300                                                           skbinlist->data_len,
301                                                           frags_max - count,
302                                                           &frags[count]);
303                         if (c < 0)
304                                 return c;
305                         count += c;
306                 }
307         }
308         return count;
309 }
310
311 static ssize_t enable_ints_write(struct file *file,
312                                  const char __user *buffer,
313                                  size_t count, loff_t *ppos)
314 {
315         /*
316          * Don't want to break ABI here by having a debugfs
317          * file that no longer exists or is writable, so
318          * lets just make this a vestigual function
319          */
320         return count;
321 }
322
323 /**
324  *      visornic_serverdown_complete - IOPART went down, need to pause
325  *                                     device
326  *      @work: Work queue it was scheduled on
327  *
328  *      The IO partition has gone down and we need to do some cleanup
329  *      for when it comes back. Treat the IO partition as the link
330  *      being down.
331  *      Returns void.
332  */
333 static void
334 visornic_serverdown_complete(struct visornic_devdata *devdata)
335 {
336         struct net_device *netdev;
337
338         netdev = devdata->netdev;
339
340         /* Stop polling for interrupts */
341         del_timer_sync(&devdata->irq_poll_timer);
342
343         rtnl_lock();
344         dev_close(netdev);
345         rtnl_unlock();
346
347         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
348         devdata->chstat.sent_xmit = 0;
349         devdata->chstat.got_xmit_done = 0;
350
351         if (devdata->server_down_complete_func)
352                 (*devdata->server_down_complete_func)(devdata->dev, 0);
353
354         devdata->server_down = true;
355         devdata->server_change_state = false;
356         devdata->server_down_complete_func = NULL;
357 }
358
359 /**
360  *      visornic_serverdown - Command has notified us that IOPARt is down
361  *      @devdata: device that is being managed by IOPART
362  *
363  *      Schedule the work needed to handle the server down request. Make
364  *      sure we haven't already handled the server change state event.
365  *      Returns 0 if we scheduled the work, -EINVAL on error.
366  */
367 static int
368 visornic_serverdown(struct visornic_devdata *devdata,
369                     visorbus_state_complete_func complete_func)
370 {
371         unsigned long flags;
372
373         spin_lock_irqsave(&devdata->priv_lock, flags);
374         if (!devdata->server_down && !devdata->server_change_state) {
375                 if (devdata->going_away) {
376                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
377                         dev_dbg(&devdata->dev->device,
378                                 "%s aborting because device removal pending\n",
379                                 __func__);
380                         return -ENODEV;
381                 }
382                 devdata->server_change_state = true;
383                 devdata->server_down_complete_func = complete_func;
384                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
385                 visornic_serverdown_complete(devdata);
386         } else if (devdata->server_change_state) {
387                 dev_dbg(&devdata->dev->device, "%s changing state\n",
388                         __func__);
389                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
390                 return -EINVAL;
391         } else
392                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
393         return 0;
394 }
395
396 /**
397  *      alloc_rcv_buf   - alloc rcv buffer to be given to the IO Partition.
398  *      @netdev: network adapter the rcv bufs are attached too.
399  *
400  *      Create an sk_buff (rcv_buf) that will be passed to the IO Partition
401  *      so that it can write rcv data into our memory space.
402  *      Return pointer to sk_buff
403  */
404 static struct sk_buff *
405 alloc_rcv_buf(struct net_device *netdev)
406 {
407         struct sk_buff *skb;
408
409         /* NOTE: the first fragment in each rcv buffer is pointed to by
410          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
411          * in length, so the firstfrag is large enough to hold 1514.
412          */
413         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
414         if (!skb)
415                 return NULL;
416         skb->dev = netdev;
417         skb->len = RCVPOST_BUF_SIZE;
418         /* current value of mtu doesn't come into play here; large
419          * packets will just end up using multiple rcv buffers all of
420          * same size
421          */
422         skb->data_len = 0;      /* dev_alloc_skb already zeroes it out
423                                  * for clarification.
424                                  */
425         return skb;
426 }
427
428 /**
429  *      post_skb        - post a skb to the IO Partition.
430  *      @cmdrsp: cmdrsp packet to be send to the IO Partition
431  *      @devdata: visornic_devdata to post the skb too
432  *      @skb: skb to give to the IO partition
433  *
434  *      Send the skb to the IO Partition.
435  *      Returns void
436  */
437 static inline void
438 post_skb(struct uiscmdrsp *cmdrsp,
439          struct visornic_devdata *devdata, struct sk_buff *skb)
440 {
441         cmdrsp->net.buf = skb;
442         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
443         cmdrsp->net.rcvpost.frag.pi_off =
444                 (unsigned long)skb->data & PI_PAGE_MASK;
445         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
446         cmdrsp->net.rcvpost.unique_num = devdata->uniquenum;
447
448         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
449                 cmdrsp->net.type = NET_RCV_POST;
450                 cmdrsp->cmdtype = CMD_NET_TYPE;
451                 if (visorchannel_signalinsert(devdata->dev->visorchannel,
452                                           IOCHAN_TO_IOPART,
453                                           cmdrsp)) {
454                         atomic_inc(&devdata->num_rcvbuf_in_iovm);
455                         devdata->chstat.sent_post++;
456                 } else {
457                         devdata->chstat.sent_post_failed++;
458                 }
459         }
460 }
461
462 /**
463  *      send_enbdis     - send NET_RCV_ENBDIS to IO Partition
464  *      @netdev: netdevice we are enable/disable, used as context
465  *               return value
466  *      @state: enable = 1/disable = 0
467  *      @devdata: visornic device we are enabling/disabling
468  *
469  *      Send the enable/disable message to the IO Partition.
470  *      Returns void
471  */
472 static void
473 send_enbdis(struct net_device *netdev, int state,
474             struct visornic_devdata *devdata)
475 {
476         devdata->cmdrsp_rcv->net.enbdis.enable = state;
477         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
478         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
479         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
480         if (visorchannel_signalinsert(devdata->dev->visorchannel,
481                                   IOCHAN_TO_IOPART,
482                                   devdata->cmdrsp_rcv))
483                 devdata->chstat.sent_enbdis++;
484 }
485
486 /**
487  *      visornic_disable_with_timeout - Disable network adapter
488  *      @netdev: netdevice to disale
489  *      @timeout: timeout to wait for disable
490  *
491  *      Disable the network adapter and inform the IO Partition that we
492  *      are disabled, reclaim memory from rcv bufs.
493  *      Returns 0 on success, negative for failure of IO Partition
494  *      responding.
495  *
496  */
497 static int
498 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
499 {
500         struct visornic_devdata *devdata = netdev_priv(netdev);
501         int i;
502         unsigned long flags;
503         int wait = 0;
504
505         /* send a msg telling the other end we are stopping incoming pkts */
506         spin_lock_irqsave(&devdata->priv_lock, flags);
507         devdata->enabled = 0;
508         devdata->enab_dis_acked = 0; /* must wait for ack */
509         spin_unlock_irqrestore(&devdata->priv_lock, flags);
510
511         /* send disable and wait for ack -- don't hold lock when sending
512          * disable because if the queue is full, insert might sleep.
513          */
514         send_enbdis(netdev, 0, devdata);
515
516         /* wait for ack to arrive before we try to free rcv buffers
517          * NOTE: the other end automatically unposts the rcv buffers when
518          * when it gets a disable.
519          */
520         spin_lock_irqsave(&devdata->priv_lock, flags);
521         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
522                (wait < timeout)) {
523                 if (devdata->enab_dis_acked)
524                         break;
525                 if (devdata->server_down || devdata->server_change_state) {
526                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
527                         dev_dbg(&netdev->dev, "%s server went away\n",
528                                 __func__);
529                         return -EIO;
530                 }
531                 set_current_state(TASK_INTERRUPTIBLE);
532                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
533                 wait += schedule_timeout(msecs_to_jiffies(10));
534                 spin_lock_irqsave(&devdata->priv_lock, flags);
535         }
536
537         /* Wait for usage to go to 1 (no other users) before freeing
538          * rcv buffers
539          */
540         if (atomic_read(&devdata->usage) > 1) {
541                 while (1) {
542                         set_current_state(TASK_INTERRUPTIBLE);
543                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
544                         schedule_timeout(msecs_to_jiffies(10));
545                         spin_lock_irqsave(&devdata->priv_lock, flags);
546                         if (atomic_read(&devdata->usage))
547                                 break;
548                 }
549         }
550         /* we've set enabled to 0, so we can give up the lock. */
551         spin_unlock_irqrestore(&devdata->priv_lock, flags);
552
553         /* stop the transmit queue so nothing more can be transmitted */
554         netif_stop_queue(netdev);
555
556         napi_disable(&devdata->napi);
557
558         skb_queue_purge(&devdata->xmitbufhead);
559
560         /* Free rcv buffers - other end has automatically unposed them on
561          * disable
562          */
563         for (i = 0; i < devdata->num_rcv_bufs; i++) {
564                 if (devdata->rcvbuf[i]) {
565                         kfree_skb(devdata->rcvbuf[i]);
566                         devdata->rcvbuf[i] = NULL;
567                 }
568         }
569
570         return 0;
571 }
572
573 /**
574  *      init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
575  *      @netdev: struct netdevice
576  *      @devdata: visornic_devdata
577  *
578  *      Allocate rcv buffers and post them to the IO Partition.
579  *      Return 0 for success, and negative for failure.
580  */
581 static int
582 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
583 {
584         int i, count;
585
586         /* allocate fixed number of receive buffers to post to uisnic
587          * post receive buffers after we've allocated a required amount
588          */
589         for (i = 0; i < devdata->num_rcv_bufs; i++) {
590                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
591                 if (!devdata->rcvbuf[i])
592                         break; /* if we failed to allocate one let us stop */
593         }
594         if (i == 0) /* couldn't even allocate one -- bail out */
595                 return -ENOMEM;
596         count = i;
597
598         /* Ensure we can alloc 2/3rd of the requeested number of buffers.
599          * 2/3 is an arbitrary choice; used also in ndis init.c
600          */
601         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
602                 /* free receive buffers we did alloc and then bail out */
603                 for (i = 0; i < count; i++) {
604                         kfree_skb(devdata->rcvbuf[i]);
605                         devdata->rcvbuf[i] = NULL;
606                 }
607                 return -ENOMEM;
608         }
609
610         /* post receive buffers to receive incoming input - without holding
611          * lock - we've not enabled nor started the queue so there shouldn't
612          * be any rcv or xmit activity
613          */
614         for (i = 0; i < count; i++)
615                 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
616
617         return 0;
618 }
619
620 /**
621  *      visornic_enable_with_timeout    - send enable to IO Part
622  *      @netdev: struct net_device
623  *      @timeout: Time to wait for the ACK from the enable
624  *
625  *      Sends enable to IOVM, inits, and posts receive buffers to IOVM
626  *      timeout is defined in msecs (timeout of 0 specifies infinite wait)
627  *      Return 0 for success, negavite for failure.
628  */
629 static int
630 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
631 {
632         int i;
633         struct visornic_devdata *devdata = netdev_priv(netdev);
634         unsigned long flags;
635         int wait = 0;
636
637         /* NOTE: the other end automatically unposts the rcv buffers when it
638          * gets a disable.
639          */
640         i = init_rcv_bufs(netdev, devdata);
641         if (i < 0) {
642                 dev_err(&netdev->dev,
643                         "%s failed to init rcv bufs (%d)\n", __func__, i);
644                 return i;
645         }
646
647         spin_lock_irqsave(&devdata->priv_lock, flags);
648         devdata->enabled = 1;
649         devdata->enab_dis_acked = 0;
650
651         /* now we're ready, let's send an ENB to uisnic but until we get
652          * an ACK back from uisnic, we'll drop the packets
653          */
654         devdata->n_rcv_packets_not_accepted = 0;
655         spin_unlock_irqrestore(&devdata->priv_lock, flags);
656
657         /* send enable and wait for ack -- don't hold lock when sending enable
658          * because if the queue is full, insert might sleep.
659          */
660         napi_enable(&devdata->napi);
661         send_enbdis(netdev, 1, devdata);
662
663         spin_lock_irqsave(&devdata->priv_lock, flags);
664         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
665                (wait < timeout)) {
666                 if (devdata->enab_dis_acked)
667                         break;
668                 if (devdata->server_down || devdata->server_change_state) {
669                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
670                         dev_dbg(&netdev->dev, "%s server went away\n",
671                                 __func__);
672                         return -EIO;
673                 }
674                 set_current_state(TASK_INTERRUPTIBLE);
675                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
676                 wait += schedule_timeout(msecs_to_jiffies(10));
677                 spin_lock_irqsave(&devdata->priv_lock, flags);
678         }
679
680         spin_unlock_irqrestore(&devdata->priv_lock, flags);
681
682         if (!devdata->enab_dis_acked) {
683                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
684                 return -EIO;
685         }
686
687         netif_start_queue(netdev);
688
689         return 0;
690 }
691
692 /**
693  *      visornic_timeout_reset  - handle xmit timeout resets
694  *      @work   work item that scheduled the work
695  *
696  *      Transmit Timeouts are typically handled by resetting the
697  *      device for our virtual NIC we will send a Disable and Enable
698  *      to the IOVM. If it doesn't respond we will trigger a serverdown.
699  */
700 static void
701 visornic_timeout_reset(struct work_struct *work)
702 {
703         struct visornic_devdata *devdata;
704         struct net_device *netdev;
705         int response = 0;
706
707         devdata = container_of(work, struct visornic_devdata, timeout_reset);
708         netdev = devdata->netdev;
709
710         rtnl_lock();
711         if (!netif_running(netdev)) {
712                 rtnl_unlock();
713                 return;
714         }
715
716         response = visornic_disable_with_timeout(netdev,
717                                                  VISORNIC_INFINITE_RSP_WAIT);
718         if (response)
719                 goto call_serverdown;
720
721         response = visornic_enable_with_timeout(netdev,
722                                                 VISORNIC_INFINITE_RSP_WAIT);
723         if (response)
724                 goto call_serverdown;
725
726         rtnl_unlock();
727
728         return;
729
730 call_serverdown:
731         visornic_serverdown(devdata, NULL);
732         rtnl_unlock();
733 }
734
735 /**
736  *      visornic_open - Enable the visornic device and mark the queue started
737  *      @netdev: netdevice to start
738  *
739  *      Enable the device and start the transmit queue.
740  *      Return 0 for success
741  */
742 static int
743 visornic_open(struct net_device *netdev)
744 {
745         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
746
747         return 0;
748 }
749
750 /**
751  *      visornic_close - Disables the visornic device and stops the queues
752  *      @netdev: netdevice to start
753  *
754  *      Disable the device and stop the transmit queue.
755  *      Return 0 for success
756  */
757 static int
758 visornic_close(struct net_device *netdev)
759 {
760         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
761
762         return 0;
763 }
764
765 /**
766  *      devdata_xmits_outstanding - compute outstanding xmits
767  *      @devdata: visornic_devdata for device
768  *
769  *      Return value is the number of outstanding xmits.
770  */
771 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
772 {
773         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
774                 return devdata->chstat.sent_xmit -
775                         devdata->chstat.got_xmit_done;
776         else
777                 return (ULONG_MAX - devdata->chstat.got_xmit_done
778                         + devdata->chstat.sent_xmit + 1);
779 }
780
781 /**
782  *      vnic_hit_high_watermark
783  *      @devdata: indicates visornic device we are checking
784  *      @high_watermark: max num of unacked xmits we will tolerate,
785  *                       before we will start throttling
786  *
787  *      Returns true iff the number of unacked xmits sent to
788  *      the IO partition is >= high_watermark.
789  */
790 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
791                                            ulong high_watermark)
792 {
793         return (devdata_xmits_outstanding(devdata) >= high_watermark);
794 }
795
796 /**
797  *      vnic_hit_low_watermark
798  *      @devdata: indicates visornic device we are checking
799  *      @low_watermark: we will wait until the num of unacked xmits
800  *                      drops to this value or lower before we start
801  *                      transmitting again
802  *
803  *      Returns true iff the number of unacked xmits sent to
804  *      the IO partition is <= low_watermark.
805  */
806 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
807                                           ulong low_watermark)
808 {
809         return (devdata_xmits_outstanding(devdata) <= low_watermark);
810 }
811
812 /**
813  *      visornic_xmit - send a packet to the IO Partition
814  *      @skb: Packet to be sent
815  *      @netdev: net device the packet is being sent from
816  *
817  *      Convert the skb to a cmdrsp so the IO Partition can undersand it.
818  *      Send the XMIT command to the IO Partition for processing. This
819  *      function is protected from concurrent calls by a spinlock xmit_lock
820  *      in the net_device struct, but as soon as the function returns it
821  *      can be called again.
822  *      Returns NETDEV_TX_OK.
823  */
824 static int
825 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
826 {
827         struct visornic_devdata *devdata;
828         int len, firstfraglen, padlen;
829         struct uiscmdrsp *cmdrsp = NULL;
830         unsigned long flags;
831
832         devdata = netdev_priv(netdev);
833         spin_lock_irqsave(&devdata->priv_lock, flags);
834
835         if (netif_queue_stopped(netdev) || devdata->server_down ||
836             devdata->server_change_state) {
837                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
838                 devdata->busy_cnt++;
839                 dev_dbg(&netdev->dev,
840                         "%s busy - queue stopped\n", __func__);
841                 kfree_skb(skb);
842                 return NETDEV_TX_OK;
843         }
844
845         /* sk_buff struct is used to host network data throughout all the
846          * linux network subsystems
847          */
848         len = skb->len;
849
850         /* skb->len is the FULL length of data (including fragmentary portion)
851          * skb->data_len is the length of the fragment portion in frags
852          * skb->len - skb->data_len is size of the 1st fragment in skb->data
853          * calculate the length of the first fragment that skb->data is
854          * pointing to
855          */
856         firstfraglen = skb->len - skb->data_len;
857         if (firstfraglen < ETH_HEADER_SIZE) {
858                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
859                 devdata->busy_cnt++;
860                 dev_err(&netdev->dev,
861                         "%s busy - first frag too small (%d)\n",
862                         __func__, firstfraglen);
863                 kfree_skb(skb);
864                 return NETDEV_TX_OK;
865         }
866
867         if ((len < ETH_MIN_PACKET_SIZE) &&
868             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
869                 /* pad the packet out to minimum size */
870                 padlen = ETH_MIN_PACKET_SIZE - len;
871                 memset(&skb->data[len], 0, padlen);
872                 skb->tail += padlen;
873                 skb->len += padlen;
874                 len += padlen;
875                 firstfraglen += padlen;
876         }
877
878         cmdrsp = devdata->xmit_cmdrsp;
879         /* clear cmdrsp */
880         memset(cmdrsp, 0, SIZEOF_CMDRSP);
881         cmdrsp->net.type = NET_XMIT;
882         cmdrsp->cmdtype = CMD_NET_TYPE;
883
884         /* save the pointer to skb -- we'll need it for completion */
885         cmdrsp->net.buf = skb;
886
887         if (vnic_hit_high_watermark(devdata,
888                                     devdata->max_outstanding_net_xmits)) {
889                 /* too many NET_XMITs queued over to IOVM - need to wait
890                  */
891                 devdata->chstat.reject_count++;
892                 if (!devdata->queuefullmsg_logged &&
893                     ((devdata->chstat.reject_count & 0x3ff) == 1))
894                         devdata->queuefullmsg_logged = 1;
895                 netif_stop_queue(netdev);
896                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
897                 devdata->busy_cnt++;
898                 dev_dbg(&netdev->dev,
899                         "%s busy - waiting for iovm to catch up\n",
900                         __func__);
901                 kfree_skb(skb);
902                 return NETDEV_TX_OK;
903         }
904         if (devdata->queuefullmsg_logged)
905                 devdata->queuefullmsg_logged = 0;
906
907         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
908                 cmdrsp->net.xmt.lincsum.valid = 1;
909                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
910                 if (skb_transport_header(skb) > skb->data) {
911                         cmdrsp->net.xmt.lincsum.hrawoff =
912                                 skb_transport_header(skb) - skb->data;
913                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
914                 }
915                 if (skb_network_header(skb) > skb->data) {
916                         cmdrsp->net.xmt.lincsum.nhrawoff =
917                                 skb_network_header(skb) - skb->data;
918                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
919                 }
920                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
921         } else {
922                 cmdrsp->net.xmt.lincsum.valid = 0;
923         }
924
925         /* save off the length of the entire data packet */
926         cmdrsp->net.xmt.len = len;
927
928         /* copy ethernet header from first frag into ocmdrsp
929          * - everything else will be pass in frags & DMA'ed
930          */
931         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE);
932         /* copy frags info - from skb->data we need to only provide access
933          * beyond eth header
934          */
935         cmdrsp->net.xmt.num_frags =
936                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
937                                               MAX_PHYS_INFO,
938                                               cmdrsp->net.xmt.frags);
939         if (cmdrsp->net.xmt.num_frags < 0) {
940                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
941                 devdata->busy_cnt++;
942                 dev_err(&netdev->dev,
943                         "%s busy - copy frags failed\n", __func__);
944                 kfree_skb(skb);
945                 return NETDEV_TX_OK;
946         }
947
948         if (!visorchannel_signalinsert(devdata->dev->visorchannel,
949                                        IOCHAN_TO_IOPART, cmdrsp)) {
950                 netif_stop_queue(netdev);
951                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
952                 devdata->busy_cnt++;
953                 dev_dbg(&netdev->dev,
954                         "%s busy - signalinsert failed\n", __func__);
955                 kfree_skb(skb);
956                 return NETDEV_TX_OK;
957         }
958
959         /* Track the skbs that have been sent to the IOVM for XMIT */
960         skb_queue_head(&devdata->xmitbufhead, skb);
961
962         /* update xmt stats */
963         devdata->net_stats.tx_packets++;
964         devdata->net_stats.tx_bytes += skb->len;
965         devdata->chstat.sent_xmit++;
966
967         /* check to see if we have hit the high watermark for
968          * netif_stop_queue()
969          */
970         if (vnic_hit_high_watermark(devdata,
971                                     devdata->upper_threshold_net_xmits)) {
972                 /* too many NET_XMITs queued over to IOVM - need to wait */
973                 netif_stop_queue(netdev); /* calling stop queue - call
974                                            * netif_wake_queue() after lower
975                                            * threshold
976                                            */
977                 dev_dbg(&netdev->dev,
978                         "%s busy - invoking iovm flow control\n",
979                         __func__);
980                 devdata->flow_control_upper_hits++;
981         }
982         spin_unlock_irqrestore(&devdata->priv_lock, flags);
983
984         /* skb will be freed when we get back NET_XMIT_DONE */
985         return NETDEV_TX_OK;
986 }
987
988 /**
989  *      visornic_get_stats - returns net_stats of the visornic device
990  *      @netdev: netdevice
991  *
992  *      Returns the net_device_stats for the device
993  */
994 static struct net_device_stats *
995 visornic_get_stats(struct net_device *netdev)
996 {
997         struct visornic_devdata *devdata = netdev_priv(netdev);
998
999         return &devdata->net_stats;
1000 }
1001
1002 /**
1003  *      visornic_change_mtu - changes mtu of device.
1004  *      @netdev: netdevice
1005  *      @new_mtu: value of new mtu
1006  *
1007  *      MTU cannot be changed by system, must be changed via
1008  *      CONTROLVM message. All vnics and pnics in a switch have
1009  *      to have the same MTU for everything to work.
1010  *      Currently not supported.
1011  *      Returns EINVAL
1012  */
1013 static int
1014 visornic_change_mtu(struct net_device *netdev, int new_mtu)
1015 {
1016         return -EINVAL;
1017 }
1018
1019 /**
1020  *      visornic_set_multi - changes mtu of device.
1021  *      @netdev: netdevice
1022  *
1023  *      Only flag we support currently is IFF_PROMISC
1024  *      Returns void
1025  */
1026 static void
1027 visornic_set_multi(struct net_device *netdev)
1028 {
1029         struct uiscmdrsp *cmdrsp;
1030         struct visornic_devdata *devdata = netdev_priv(netdev);
1031
1032         /* any filtering changes */
1033         if (devdata->old_flags != netdev->flags) {
1034                 if ((netdev->flags & IFF_PROMISC) !=
1035                     (devdata->old_flags & IFF_PROMISC)) {
1036                         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1037                         if (!cmdrsp)
1038                                 return;
1039                         cmdrsp->cmdtype = CMD_NET_TYPE;
1040                         cmdrsp->net.type = NET_RCV_PROMISC;
1041                         cmdrsp->net.enbdis.context = netdev;
1042                         cmdrsp->net.enbdis.enable =
1043                                 (netdev->flags & IFF_PROMISC);
1044                         visorchannel_signalinsert(devdata->dev->visorchannel,
1045                                                   IOCHAN_TO_IOPART,
1046                                                   cmdrsp);
1047                         kfree(cmdrsp);
1048                 }
1049                 devdata->old_flags = netdev->flags;
1050         }
1051 }
1052
1053 /**
1054  *      visornic_xmit_timeout - request to timeout the xmit
1055  *      @netdev
1056  *
1057  *      Queue the work and return. Make sure we have not already
1058  *      been informed the IO Partition is gone, if it is gone
1059  *      we will already timeout the xmits.
1060  */
1061 static void
1062 visornic_xmit_timeout(struct net_device *netdev)
1063 {
1064         struct visornic_devdata *devdata = netdev_priv(netdev);
1065         unsigned long flags;
1066
1067         spin_lock_irqsave(&devdata->priv_lock, flags);
1068         if (devdata->going_away) {
1069                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1070                 dev_dbg(&devdata->dev->device,
1071                         "%s aborting because device removal pending\n",
1072                         __func__);
1073                 return;
1074         }
1075
1076         /* Ensure that a ServerDown message hasn't been received */
1077         if (!devdata->enabled ||
1078             (devdata->server_down && !devdata->server_change_state)) {
1079                 dev_dbg(&netdev->dev, "%s no processing\n",
1080                         __func__);
1081                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1082                 return;
1083         }
1084         queue_work(visornic_timeout_reset_workqueue, &devdata->timeout_reset);
1085         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1086 }
1087
1088 /**
1089  *      repost_return   - repost rcv bufs that have come back
1090  *      @cmdrsp: io channel command struct to post
1091  *      @devdata: visornic devdata for the device
1092  *      @skb: skb
1093  *      @netdev: netdevice
1094  *
1095  *      Repost rcv buffers that have been returned to us when
1096  *      we are finished with them.
1097  *      Returns 0 for success, -1 for error.
1098  */
1099 static inline int
1100 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1101               struct sk_buff *skb, struct net_device *netdev)
1102 {
1103         struct net_pkt_rcv copy;
1104         int i = 0, cc, numreposted;
1105         int found_skb = 0;
1106         int status = 0;
1107
1108         copy = cmdrsp->net.rcv;
1109         switch (copy.numrcvbufs) {
1110         case 0:
1111                 devdata->n_rcv0++;
1112                 break;
1113         case 1:
1114                 devdata->n_rcv1++;
1115                 break;
1116         case 2:
1117                 devdata->n_rcv2++;
1118                 break;
1119         default:
1120                 devdata->n_rcvx++;
1121                 break;
1122         }
1123         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1124                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1125                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1126                                 continue;
1127
1128                         if ((skb) && devdata->rcvbuf[i] == skb) {
1129                                 devdata->found_repost_rcvbuf_cnt++;
1130                                 found_skb = 1;
1131                                 devdata->repost_found_skb_cnt++;
1132                         }
1133                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1134                         if (!devdata->rcvbuf[i]) {
1135                                 devdata->num_rcv_bufs_could_not_alloc++;
1136                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1137                                 status = -ENOMEM;
1138                                 break;
1139                         }
1140                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1141                         numreposted++;
1142                         break;
1143                 }
1144         }
1145         if (numreposted != copy.numrcvbufs) {
1146                 devdata->n_repost_deficit++;
1147                 status = -EINVAL;
1148         }
1149         if (skb) {
1150                 if (found_skb) {
1151                         kfree_skb(skb);
1152                 } else {
1153                         status = -EINVAL;
1154                         devdata->bad_rcv_buf++;
1155                 }
1156         }
1157         return status;
1158 }
1159
1160 /**
1161  *      visornic_rx - Handle receive packets coming back from IO Part
1162  *      @cmdrsp: Receive packet returned from IO Part
1163  *
1164  *      Got a receive packet back from the IO Part, handle it and send
1165  *      it up the stack.
1166  *      Returns void
1167  */
1168 static int
1169 visornic_rx(struct uiscmdrsp *cmdrsp)
1170 {
1171         struct visornic_devdata *devdata;
1172         struct sk_buff *skb, *prev, *curr;
1173         struct net_device *netdev;
1174         int cc, currsize, off;
1175         struct ethhdr *eth;
1176         unsigned long flags;
1177         int rx_count = 0;
1178
1179         /* post new rcv buf to the other end using the cmdrsp we have at hand
1180          * post it without holding lock - but we'll use the signal lock to
1181          * synchronize the queue insert the cmdrsp that contains the net.rcv
1182          * is the one we are using to repost, so copy the info we need from it.
1183          */
1184         skb = cmdrsp->net.buf;
1185         netdev = skb->dev;
1186
1187         devdata = netdev_priv(netdev);
1188
1189         spin_lock_irqsave(&devdata->priv_lock, flags);
1190         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1191
1192         /* set length to how much was ACTUALLY received -
1193          * NOTE: rcv_done_len includes actual length of data rcvd
1194          * including ethhdr
1195          */
1196         skb->len = cmdrsp->net.rcv.rcv_done_len;
1197
1198         /* update rcv stats - call it with priv_lock held */
1199         devdata->net_stats.rx_packets++;
1200         devdata->net_stats.rx_bytes += skb->len;
1201
1202         /* test enabled while holding lock */
1203         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1204                 /* don't process it unless we're in enable mode and until
1205                  * we've gotten an ACK saying the other end got our RCV enable
1206                  */
1207                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1208                 repost_return(cmdrsp, devdata, skb, netdev);
1209                 return rx_count;
1210         }
1211
1212         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1213
1214         /* when skb was allocated, skb->dev, skb->data, skb->len and
1215          * skb->data_len were setup. AND, data has already put into the
1216          * skb (both first frag and in frags pages)
1217          * NOTE: firstfragslen is the amount of data in skb->data and that
1218          * which is not in nr_frags or frag_list. This is now simply
1219          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1220          * firstfrag & set data_len to show rest see if we have to chain
1221          * frag_list.
1222          */
1223         if (skb->len > RCVPOST_BUF_SIZE) {      /* do PRECAUTIONARY check */
1224                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1225                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1226                                 dev_err(&devdata->netdev->dev,
1227                                         "repost_return failed");
1228                         return rx_count;
1229                 }
1230                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1231                 skb->tail += RCVPOST_BUF_SIZE;  /* amount in skb->data */
1232                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;    /* amount that
1233                                                                    will be in
1234                                                                    frag_list */
1235         } else {
1236                 /* data fits in this skb - no chaining - do
1237                  * PRECAUTIONARY check
1238                  */
1239                 if (cmdrsp->net.rcv.numrcvbufs != 1) {  /* should be 1 */
1240                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1241                                 dev_err(&devdata->netdev->dev,
1242                                         "repost_return failed");
1243                         return rx_count;
1244                 }
1245                 skb->tail += skb->len;
1246                 skb->data_len = 0;      /* nothing rcvd in frag_list */
1247         }
1248         off = skb_tail_pointer(skb) - skb->data;
1249
1250         /* amount we bumped tail by in the head skb
1251          * it is used to calculate the size of each chained skb below
1252          * it is also used to index into bufline to continue the copy
1253          * (for chansocktwopc)
1254          * if necessary chain the rcv skbs together.
1255          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1256          * chain the rest to that one.
1257          * - do PRECAUTIONARY check
1258          */
1259         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1260                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1261                         dev_err(&devdata->netdev->dev, "repost_return failed");
1262                 return rx_count;
1263         }
1264
1265         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1266                 /* chain the various rcv buffers into the skb's frag_list. */
1267                 /* Note: off was initialized above  */
1268                 for (cc = 1, prev = NULL;
1269                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1270                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1271                         curr->next = NULL;
1272                         if (!prev)      /* start of list- set head */
1273                                 skb_shinfo(skb)->frag_list = curr;
1274                         else
1275                                 prev->next = curr;
1276                         prev = curr;
1277
1278                         /* should we set skb->len and skb->data_len for each
1279                          * buffer being chained??? can't hurt!
1280                          */
1281                         currsize = min(skb->len - off,
1282                                        (unsigned int)RCVPOST_BUF_SIZE);
1283                         curr->len = currsize;
1284                         curr->tail += currsize;
1285                         curr->data_len = 0;
1286                         off += currsize;
1287                 }
1288                 /* assert skb->len == off */
1289                 if (skb->len != off) {
1290                         netdev_err(devdata->netdev,
1291                                    "something wrong; skb->len:%d != off:%d\n",
1292                                    skb->len, off);
1293                 }
1294         }
1295
1296         /* set up packet's protocl type using ethernet header - this
1297          * sets up skb->pkt_type & it also PULLS out the eth header
1298          */
1299         skb->protocol = eth_type_trans(skb, netdev);
1300
1301         eth = eth_hdr(skb);
1302
1303         skb->csum = 0;
1304         skb->ip_summed = CHECKSUM_NONE;
1305
1306         do {
1307                 if (netdev->flags & IFF_PROMISC)
1308                         break;  /* accept all packets */
1309                 if (skb->pkt_type == PACKET_BROADCAST) {
1310                         if (netdev->flags & IFF_BROADCAST)
1311                                 break;  /* accept all broadcast packets */
1312                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1313                         if ((netdev->flags & IFF_MULTICAST) &&
1314                             (netdev_mc_count(netdev))) {
1315                                 struct netdev_hw_addr *ha;
1316                                 int found_mc = 0;
1317
1318                                 /* only accept multicast packets that we can
1319                                  * find in our multicast address list
1320                                  */
1321                                 netdev_for_each_mc_addr(ha, netdev) {
1322                                         if (ether_addr_equal(eth->h_dest,
1323                                                              ha->addr)) {
1324                                                 found_mc = 1;
1325                                                 break;
1326                                         }
1327                                 }
1328                                 if (found_mc)
1329                                         break;  /* accept packet, dest
1330                                                    matches a multicast
1331                                                    address */
1332                         }
1333                 } else if (skb->pkt_type == PACKET_HOST) {
1334                         break;  /* accept packet, h_dest must match vnic
1335                                    mac address */
1336                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1337                         /* something is not right */
1338                         dev_err(&devdata->netdev->dev,
1339                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1340                                 netdev->name, eth->h_dest, netdev->dev_addr);
1341                 }
1342                 /* drop packet - don't forward it up to OS */
1343                 devdata->n_rcv_packets_not_accepted++;
1344                 repost_return(cmdrsp, devdata, skb, netdev);
1345                 return rx_count;
1346         } while (0);
1347
1348         rx_count++;
1349         netif_receive_skb(skb);
1350         /* netif_rx returns various values, but "in practice most drivers
1351          * ignore the return value
1352          */
1353
1354         skb = NULL;
1355         /*
1356          * whether the packet got dropped or handled, the skb is freed by
1357          * kernel code, so we shouldn't free it. but we should repost a
1358          * new rcv buffer.
1359          */
1360         repost_return(cmdrsp, devdata, skb, netdev);
1361         return rx_count;
1362 }
1363
1364 /**
1365  *      devdata_initialize      - Initialize devdata structure
1366  *      @devdata: visornic_devdata structure to initialize
1367  *      #dev: visorbus_deviced it belongs to
1368  *
1369  *      Setup initial values for the visornic based on channel and default
1370  *      values.
1371  *      Returns a pointer to the devdata if successful, else NULL
1372  */
1373 static struct visornic_devdata *
1374 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1375 {
1376         int devnum = -1;
1377
1378         if (!devdata)
1379                 return NULL;
1380         memset(devdata, '\0', sizeof(struct visornic_devdata));
1381         spin_lock(&dev_num_pool_lock);
1382         devnum = find_first_zero_bit(dev_num_pool, MAXDEVICES);
1383         set_bit(devnum, dev_num_pool);
1384         spin_unlock(&dev_num_pool_lock);
1385         if (devnum == MAXDEVICES)
1386                 devnum = -1;
1387         if (devnum < 0)
1388                 return NULL;
1389         devdata->devnum = devnum;
1390         devdata->dev = dev;
1391         strncpy(devdata->name, dev_name(&dev->device), sizeof(devdata->name));
1392         spin_lock(&lock_all_devices);
1393         list_add_tail(&devdata->list_all, &list_all_devices);
1394         spin_unlock(&lock_all_devices);
1395         return devdata;
1396 }
1397
1398 /**
1399  *      devdata_release - Frees up references in devdata
1400  *      @devdata: struct to clean up
1401  *
1402  *      Frees up references in devdata.
1403  *      Returns void
1404  */
1405 static void devdata_release(struct visornic_devdata *devdata)
1406 {
1407         spin_lock(&dev_num_pool_lock);
1408         clear_bit(devdata->devnum, dev_num_pool);
1409         spin_unlock(&dev_num_pool_lock);
1410         spin_lock(&lock_all_devices);
1411         list_del(&devdata->list_all);
1412         spin_unlock(&lock_all_devices);
1413         kfree(devdata->rcvbuf);
1414         kfree(devdata->cmdrsp_rcv);
1415         kfree(devdata->xmit_cmdrsp);
1416 }
1417
1418 static const struct net_device_ops visornic_dev_ops = {
1419         .ndo_open = visornic_open,
1420         .ndo_stop = visornic_close,
1421         .ndo_start_xmit = visornic_xmit,
1422         .ndo_get_stats = visornic_get_stats,
1423         .ndo_change_mtu = visornic_change_mtu,
1424         .ndo_tx_timeout = visornic_xmit_timeout,
1425         .ndo_set_rx_mode = visornic_set_multi,
1426 };
1427
1428 /* DebugFS code */
1429 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1430                                  size_t len, loff_t *offset)
1431 {
1432         ssize_t bytes_read = 0;
1433         int str_pos = 0;
1434         struct visornic_devdata *devdata;
1435         struct net_device *dev;
1436         char *vbuf;
1437
1438         if (len > MAX_BUF)
1439                 len = MAX_BUF;
1440         vbuf = kzalloc(len, GFP_KERNEL);
1441         if (!vbuf)
1442                 return -ENOMEM;
1443
1444         /* for each vnic channel
1445          * dump out channel specific data
1446          */
1447         rcu_read_lock();
1448         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1449                 /*
1450                  * Only consider netdevs that are visornic, and are open
1451                  */
1452                 if ((dev->netdev_ops != &visornic_dev_ops) ||
1453                     (!netif_queue_stopped(dev)))
1454                         continue;
1455
1456                 devdata = netdev_priv(dev);
1457                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1458                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1459                                      dev->name,
1460                                      dev,
1461                                      dev->dev_addr);
1462                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463                                      "VisorNic Dev Info = 0x%p\n", devdata);
1464                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1465                                      " num_rcv_bufs = %d\n",
1466                                      devdata->num_rcv_bufs);
1467                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468                                      " max_oustanding_next_xmits = %lu\n",
1469                                     devdata->max_outstanding_net_xmits);
1470                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471                                      " upper_threshold_net_xmits = %lu\n",
1472                                      devdata->upper_threshold_net_xmits);
1473                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474                                      " lower_threshold_net_xmits = %lu\n",
1475                                      devdata->lower_threshold_net_xmits);
1476                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477                                      " queuefullmsg_logged = %d\n",
1478                                      devdata->queuefullmsg_logged);
1479                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480                                      " chstat.got_rcv = %lu\n",
1481                                      devdata->chstat.got_rcv);
1482                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483                                      " chstat.got_enbdisack = %lu\n",
1484                                      devdata->chstat.got_enbdisack);
1485                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486                                      " chstat.got_xmit_done = %lu\n",
1487                                      devdata->chstat.got_xmit_done);
1488                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489                                      " chstat.xmit_fail = %lu\n",
1490                                      devdata->chstat.xmit_fail);
1491                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492                                      " chstat.sent_enbdis = %lu\n",
1493                                      devdata->chstat.sent_enbdis);
1494                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495                                      " chstat.sent_promisc = %lu\n",
1496                                      devdata->chstat.sent_promisc);
1497                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498                                      " chstat.sent_post = %lu\n",
1499                                      devdata->chstat.sent_post);
1500                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501                                      " chstat.sent_post_failed = %lu\n",
1502                                      devdata->chstat.sent_post_failed);
1503                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1504                                      " chstat.sent_xmit = %lu\n",
1505                                      devdata->chstat.sent_xmit);
1506                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                      " chstat.reject_count = %lu\n",
1508                                      devdata->chstat.reject_count);
1509                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1510                                      " chstat.extra_rcvbufs_sent = %lu\n",
1511                                      devdata->chstat.extra_rcvbufs_sent);
1512                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1514                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1515                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1516                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1517                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1518                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1519                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1520                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1521                                      " num_rcvbuf_in_iovm = %d\n",
1522                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1523                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1525                                      devdata->alloc_failed_in_if_needed_cnt);
1526                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1527                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1528                                      devdata->alloc_failed_in_repost_rtn_cnt);
1529                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1530                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1531                  *                   devdata->inner_loop_limit_reached_cnt);
1532                  */
1533                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1534                                      " found_repost_rcvbuf_cnt = %lu\n",
1535                                      devdata->found_repost_rcvbuf_cnt);
1536                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537                                      " repost_found_skb_cnt = %lu\n",
1538                                      devdata->repost_found_skb_cnt);
1539                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540                                      " n_repost_deficit = %lu\n",
1541                                      devdata->n_repost_deficit);
1542                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543                                      " bad_rcv_buf = %lu\n",
1544                                      devdata->bad_rcv_buf);
1545                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546                                      " n_rcv_packets_not_accepted = %lu\n",
1547                                      devdata->n_rcv_packets_not_accepted);
1548                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549                                      " interrupts_rcvd = %llu\n",
1550                                      devdata->interrupts_rcvd);
1551                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552                                      " interrupts_notme = %llu\n",
1553                                      devdata->interrupts_notme);
1554                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555                                      " interrupts_disabled = %llu\n",
1556                                      devdata->interrupts_disabled);
1557                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1558                                      " busy_cnt = %llu\n",
1559                                      devdata->busy_cnt);
1560                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561                                      " flow_control_upper_hits = %llu\n",
1562                                      devdata->flow_control_upper_hits);
1563                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1564                                      " flow_control_lower_hits = %llu\n",
1565                                      devdata->flow_control_lower_hits);
1566                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1567                                      " netif_queue = %s\n",
1568                                      netif_queue_stopped(devdata->netdev) ?
1569                                      "stopped" : "running");
1570                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1571                                      " xmits_outstanding = %lu\n",
1572                                      devdata_xmits_outstanding(devdata));
1573         }
1574         rcu_read_unlock();
1575         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1576         kfree(vbuf);
1577         return bytes_read;
1578 }
1579
1580 /**
1581  *      send_rcv_posts_if_needed
1582  *      @devdata: visornic device
1583  *
1584  *      Send receive buffers to the IO Partition.
1585  *      Returns void
1586  */
1587 static void
1588 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1589 {
1590         int i;
1591         struct net_device *netdev;
1592         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1593         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1594
1595         /* don't do this until vnic is marked ready */
1596         if (!(devdata->enabled && devdata->enab_dis_acked))
1597                 return;
1598
1599         netdev = devdata->netdev;
1600         rcv_bufs_allocated = 0;
1601         /* this code is trying to prevent getting stuck here forever,
1602          * but still retry it if you cant allocate them all this time.
1603          */
1604         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1605         while (cur_num_rcv_bufs_to_alloc > 0) {
1606                 cur_num_rcv_bufs_to_alloc--;
1607                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1608                         if (devdata->rcvbuf[i])
1609                                 continue;
1610                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1611                         if (!devdata->rcvbuf[i]) {
1612                                 devdata->alloc_failed_in_if_needed_cnt++;
1613                                 break;
1614                         }
1615                         rcv_bufs_allocated++;
1616                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1617                         devdata->chstat.extra_rcvbufs_sent++;
1618                 }
1619         }
1620         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1621 }
1622
1623 /**
1624  *      draing_queue    - drains the response queue
1625  *      @cmdrsp: io channel command response message
1626  *      @devdata: visornic device to drain
1627  *
1628  *      Drain the respones queue of any responses from the IO partition.
1629  *      Process the responses as we get them.
1630  *      Returns when response queue is empty or when the threadd stops.
1631  */
1632 static void
1633 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1634                    int *rx_work_done)
1635 {
1636         unsigned long flags;
1637         struct net_device *netdev;
1638
1639         /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1640          * moment */
1641         for (;;) {
1642                 if (!visorchannel_signalremove(devdata->dev->visorchannel,
1643                                                IOCHAN_FROM_IOPART,
1644                                                cmdrsp))
1645                         break; /* queue empty */
1646
1647                 switch (cmdrsp->net.type) {
1648                 case NET_RCV:
1649                         devdata->chstat.got_rcv++;
1650                         /* process incoming packet */
1651                         *rx_work_done += visornic_rx(cmdrsp);
1652                         break;
1653                 case NET_XMIT_DONE:
1654                         spin_lock_irqsave(&devdata->priv_lock, flags);
1655                         devdata->chstat.got_xmit_done++;
1656                         if (cmdrsp->net.xmtdone.xmt_done_result)
1657                                 devdata->chstat.xmit_fail++;
1658                         /* only call queue wake if we stopped it */
1659                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1660                         /* ASSERT netdev == vnicinfo->netdev; */
1661                         if ((netdev == devdata->netdev) &&
1662                             netif_queue_stopped(netdev)) {
1663                                 /* check to see if we have crossed
1664                                  * the lower watermark for
1665                                  * netif_wake_queue()
1666                                  */
1667                                 if (vnic_hit_low_watermark(devdata,
1668                                         devdata->lower_threshold_net_xmits)) {
1669                                         /* enough NET_XMITs completed
1670                                          * so can restart netif queue
1671                                          */
1672                                         netif_wake_queue(netdev);
1673                                         devdata->flow_control_lower_hits++;
1674                                 }
1675                         }
1676                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1677                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1678                         kfree_skb(cmdrsp->net.buf);
1679                         break;
1680                 case NET_RCV_ENBDIS_ACK:
1681                         devdata->chstat.got_enbdisack++;
1682                         netdev = (struct net_device *)
1683                         cmdrsp->net.enbdis.context;
1684                         spin_lock_irqsave(&devdata->priv_lock, flags);
1685                         devdata->enab_dis_acked = 1;
1686                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1687
1688                         if (devdata->server_down &&
1689                             devdata->server_change_state) {
1690                                 /* Inform Linux that the link is up */
1691                                 devdata->server_down = false;
1692                                 devdata->server_change_state = false;
1693                                 netif_wake_queue(netdev);
1694                                 netif_carrier_on(netdev);
1695                         }
1696                         break;
1697                 case NET_CONNECT_STATUS:
1698                         netdev = devdata->netdev;
1699                         if (cmdrsp->net.enbdis.enable == 1) {
1700                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1701                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1702                                 spin_unlock_irqrestore(&devdata->priv_lock,
1703                                                        flags);
1704                                 netif_wake_queue(netdev);
1705                                 netif_carrier_on(netdev);
1706                         } else {
1707                                 netif_stop_queue(netdev);
1708                                 netif_carrier_off(netdev);
1709                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1710                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1711                                 spin_unlock_irqrestore(&devdata->priv_lock,
1712                                                        flags);
1713                         }
1714                         break;
1715                 default:
1716                         break;
1717                 }
1718                 /* cmdrsp is now available for reuse  */
1719         }
1720 }
1721
1722 static int visornic_poll(struct napi_struct *napi, int budget)
1723 {
1724         struct visornic_devdata *devdata = container_of(napi,
1725                                                         struct visornic_devdata,
1726                                                         napi);
1727         int rx_count = 0;
1728
1729         send_rcv_posts_if_needed(devdata);
1730         service_resp_queue(devdata->cmdrsp, devdata, &rx_count);
1731
1732         /*
1733          * If there aren't any more packets to receive
1734          * stop the poll
1735          */
1736         if (rx_count < budget)
1737                 napi_complete(napi);
1738
1739         return rx_count;
1740 }
1741
1742 /**
1743  *      poll_for_irq    - Checks the status of the response queue.
1744  *      @v: void pointer to the visronic devdata
1745  *
1746  *      Main function of the vnic_incoming thread. Peridocially check the
1747  *      response queue and drain it if needed.
1748  *      Returns when thread has stopped.
1749  */
1750 static void
1751 poll_for_irq(unsigned long v)
1752 {
1753         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1754
1755         if (!visorchannel_signalempty(
1756                                    devdata->dev->visorchannel,
1757                                    IOCHAN_FROM_IOPART))
1758                 napi_schedule(&devdata->napi);
1759
1760         atomic_set(&devdata->interrupt_rcvd, 0);
1761
1762         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1763
1764 }
1765
1766 /**
1767  *      visornic_probe  - probe function for visornic devices
1768  *      @dev: The visor device discovered
1769  *
1770  *      Called when visorbus discovers a visornic device on its
1771  *      bus. It creates a new visornic ethernet adapter.
1772  *      Returns 0 or negative for error.
1773  */
1774 static int visornic_probe(struct visor_device *dev)
1775 {
1776         struct visornic_devdata *devdata = NULL;
1777         struct net_device *netdev = NULL;
1778         int err;
1779         int channel_offset = 0;
1780         u64 features;
1781
1782         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1783         if (!netdev) {
1784                 dev_err(&dev->device,
1785                         "%s alloc_etherdev failed\n", __func__);
1786                 return -ENOMEM;
1787         }
1788
1789         netdev->netdev_ops = &visornic_dev_ops;
1790         netdev->watchdog_timeo = (5 * HZ);
1791         SET_NETDEV_DEV(netdev, &dev->device);
1792
1793         /* Get MAC adddress from channel and read it into the device. */
1794         netdev->addr_len = ETH_ALEN;
1795         channel_offset = offsetof(struct spar_io_channel_protocol,
1796                                   vnic.macaddr);
1797         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1798                                     ETH_ALEN);
1799         if (err < 0) {
1800                 dev_err(&dev->device,
1801                         "%s failed to get mac addr from chan (%d)\n",
1802                         __func__, err);
1803                 goto cleanup_netdev;
1804         }
1805
1806         devdata = devdata_initialize(netdev_priv(netdev), dev);
1807         if (!devdata) {
1808                 dev_err(&dev->device,
1809                         "%s devdata_initialize failed\n", __func__);
1810                 err = -ENOMEM;
1811                 goto cleanup_netdev;
1812         }
1813
1814         devdata->netdev = netdev;
1815         dev_set_drvdata(&dev->device, devdata);
1816         init_waitqueue_head(&devdata->rsp_queue);
1817         spin_lock_init(&devdata->priv_lock);
1818         devdata->enabled = 0; /* not yet */
1819         atomic_set(&devdata->usage, 1);
1820
1821         /* Setup rcv bufs */
1822         channel_offset = offsetof(struct spar_io_channel_protocol,
1823                                   vnic.num_rcv_bufs);
1824         err = visorbus_read_channel(dev, channel_offset,
1825                                     &devdata->num_rcv_bufs, 4);
1826         if (err) {
1827                 dev_err(&dev->device,
1828                         "%s failed to get #rcv bufs from chan (%d)\n",
1829                         __func__, err);
1830                 goto cleanup_netdev;
1831         }
1832
1833         devdata->rcvbuf = kzalloc(sizeof(struct sk_buff *) *
1834                                   devdata->num_rcv_bufs, GFP_KERNEL);
1835         if (!devdata->rcvbuf) {
1836                 err = -ENOMEM;
1837                 goto cleanup_rcvbuf;
1838         }
1839
1840         /* set the net_xmit outstanding threshold */
1841         /* always leave two slots open but you should have 3 at a minimum */
1842         /* note that max_outstanding_net_xmits must be > 0 */
1843         devdata->max_outstanding_net_xmits =
1844                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1845         devdata->upper_threshold_net_xmits =
1846                 max_t(unsigned long,
1847                       2, (devdata->max_outstanding_net_xmits - 1));
1848         devdata->lower_threshold_net_xmits =
1849                 max_t(unsigned long,
1850                       1, (devdata->max_outstanding_net_xmits / 2));
1851
1852         skb_queue_head_init(&devdata->xmitbufhead);
1853
1854         /* create a cmdrsp we can use to post and unpost rcv buffers */
1855         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1856         if (!devdata->cmdrsp_rcv) {
1857                 err = -ENOMEM;
1858                 goto cleanup_cmdrsp_rcv;
1859         }
1860         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1861         if (!devdata->xmit_cmdrsp) {
1862                 err = -ENOMEM;
1863                 goto cleanup_xmit_cmdrsp;
1864         }
1865         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1866         devdata->server_down = false;
1867         devdata->server_change_state = false;
1868
1869         /*set the default mtu */
1870         channel_offset = offsetof(struct spar_io_channel_protocol,
1871                                   vnic.mtu);
1872         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1873         if (err) {
1874                 dev_err(&dev->device,
1875                         "%s failed to get mtu from chan (%d)\n",
1876                         __func__, err);
1877                 goto cleanup_xmit_cmdrsp;
1878         }
1879
1880         /* TODO: Setup Interrupt information */
1881         /* Let's start our threads to get responses */
1882         netif_napi_add(netdev, &devdata->napi, visornic_poll, 64);
1883
1884         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1885                     (unsigned long)devdata);
1886         /*
1887          * Note: This time has to start running before the while
1888          * loop below because the napi routine is responsible for
1889          * setting enab_dis_acked
1890          */
1891         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1892
1893         channel_offset = offsetof(struct spar_io_channel_protocol,
1894                                   channel_header.features);
1895         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1896         if (err) {
1897                 dev_err(&dev->device,
1898                         "%s failed to get features from chan (%d)\n",
1899                         __func__, err);
1900                 goto cleanup_napi_add;
1901         }
1902
1903         features |= ULTRA_IO_CHANNEL_IS_POLLING;
1904         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1905         if (err) {
1906                 dev_err(&dev->device,
1907                         "%s failed to set features in chan (%d)\n",
1908                         __func__, err);
1909                 goto cleanup_napi_add;
1910         }
1911
1912         err = register_netdev(netdev);
1913         if (err) {
1914                 dev_err(&dev->device,
1915                         "%s register_netdev failed (%d)\n", __func__, err);
1916                 goto cleanup_napi_add;
1917         }
1918
1919         /* create debgug/sysfs directories */
1920         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1921                                                       visornic_debugfs_dir);
1922         if (!devdata->eth_debugfs_dir) {
1923                 dev_err(&dev->device,
1924                         "%s debugfs_create_dir %s failed\n",
1925                         __func__, netdev->name);
1926                 err = -ENOMEM;
1927                 goto cleanup_register_netdev;
1928         }
1929
1930         dev_info(&dev->device, "%s success netdev=%s\n",
1931                  __func__, netdev->name);
1932         return 0;
1933
1934 cleanup_register_netdev:
1935         unregister_netdev(netdev);
1936
1937 cleanup_napi_add:
1938         del_timer_sync(&devdata->irq_poll_timer);
1939         netif_napi_del(&devdata->napi);
1940
1941 cleanup_xmit_cmdrsp:
1942         kfree(devdata->xmit_cmdrsp);
1943
1944 cleanup_cmdrsp_rcv:
1945         kfree(devdata->cmdrsp_rcv);
1946
1947 cleanup_rcvbuf:
1948         kfree(devdata->rcvbuf);
1949
1950 cleanup_netdev:
1951         free_netdev(netdev);
1952         return err;
1953 }
1954
1955 /**
1956  *      host_side_disappeared   - IO part is gone.
1957  *      @devdata: device object
1958  *
1959  *      IO partition servicing this device is gone, do cleanup
1960  *      Returns void.
1961  */
1962 static void host_side_disappeared(struct visornic_devdata *devdata)
1963 {
1964         unsigned long flags;
1965
1966         spin_lock_irqsave(&devdata->priv_lock, flags);
1967         sprintf(devdata->name, "<dev#%d-history>", devdata->devnum);
1968         devdata->dev = NULL;   /* indicate device destroyed */
1969         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1970 }
1971
1972 /**
1973  *      visornic_remove         - Called when visornic dev goes away
1974  *      @dev: visornic device that is being removed
1975  *
1976  *      Called when DEVICE_DESTROY gets called to remove device.
1977  *      Returns void
1978  */
1979 static void visornic_remove(struct visor_device *dev)
1980 {
1981         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1982         struct net_device *netdev;
1983         unsigned long flags;
1984
1985         if (!devdata) {
1986                 dev_err(&dev->device, "%s no devdata\n", __func__);
1987                 return;
1988         }
1989         spin_lock_irqsave(&devdata->priv_lock, flags);
1990         if (devdata->going_away) {
1991                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1992                 dev_err(&dev->device, "%s already being removed\n", __func__);
1993                 return;
1994         }
1995         devdata->going_away = true;
1996         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1997         netdev = devdata->netdev;
1998         if (!netdev) {
1999                 dev_err(&dev->device, "%s not net device\n", __func__);
2000                 return;
2001         }
2002
2003         /* going_away prevents new items being added to the workqueues */
2004         flush_workqueue(visornic_timeout_reset_workqueue);
2005
2006         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2007
2008         unregister_netdev(netdev);  /* this will call visornic_close() */
2009
2010         del_timer_sync(&devdata->irq_poll_timer);
2011         netif_napi_del(&devdata->napi);
2012
2013         dev_set_drvdata(&dev->device, NULL);
2014         host_side_disappeared(devdata);
2015         devdata_release(devdata);
2016         free_netdev(netdev);
2017 }
2018
2019 /**
2020  *      visornic_pause          - Called when IO Part disappears
2021  *      @dev: visornic device that is being serviced
2022  *      @complete_func: call when finished.
2023  *
2024  *      Called when the IO Partition has gone down. Need to free
2025  *      up resources and wait for IO partition to come back. Mark
2026  *      link as down and don't attempt any DMA. When we have freed
2027  *      memory call the complete_func so that Command knows we are
2028  *      done. If we don't call complete_func, IO part will never
2029  *      come back.
2030  *      Returns 0 for success.
2031  */
2032 static int visornic_pause(struct visor_device *dev,
2033                           visorbus_state_complete_func complete_func)
2034 {
2035         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2036
2037         visornic_serverdown(devdata, complete_func);
2038         return 0;
2039 }
2040
2041 /**
2042  *      visornic_resume         - Called when IO part has recovered
2043  *      @dev: visornic device that is being serviced
2044  *      @compelte_func: call when finished
2045  *
2046  *      Called when the IO partition has recovered. Reestablish
2047  *      connection to the IO part and set the link up. Okay to do
2048  *      DMA again.
2049  *      Returns 0 for success.
2050  */
2051 static int visornic_resume(struct visor_device *dev,
2052                            visorbus_state_complete_func complete_func)
2053 {
2054         struct visornic_devdata *devdata;
2055         struct net_device *netdev;
2056         unsigned long flags;
2057
2058         devdata = dev_get_drvdata(&dev->device);
2059         if (!devdata) {
2060                 dev_err(&dev->device, "%s no devdata\n", __func__);
2061                 return -EINVAL;
2062         }
2063
2064         netdev = devdata->netdev;
2065
2066         spin_lock_irqsave(&devdata->priv_lock, flags);
2067         if (devdata->server_change_state) {
2068                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2069                 dev_err(&dev->device, "%s server already changing state\n",
2070                         __func__);
2071                 return -EINVAL;
2072         }
2073         if (!devdata->server_down) {
2074                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2075                 dev_err(&dev->device, "%s server not down\n", __func__);
2076                 complete_func(dev, 0);
2077                 return 0;
2078         }
2079         devdata->server_change_state = true;
2080         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2081
2082         /* Must transition channel to ATTACHED state BEFORE
2083          * we can start using the device again.
2084          * TODO: State transitions
2085          */
2086         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2087
2088         init_rcv_bufs(netdev, devdata);
2089
2090         rtnl_lock();
2091         dev_open(netdev);
2092         rtnl_unlock();
2093
2094         complete_func(dev, 0);
2095         return 0;
2096 }
2097
2098 /**
2099  *      visornic_init   - Init function
2100  *
2101  *      Init function for the visornic driver. Do initial driver setup
2102  *      and wait for devices.
2103  *      Returns 0 for success, negative for error.
2104  */
2105 static int visornic_init(void)
2106 {
2107         struct dentry *ret;
2108         int err = -ENOMEM;
2109
2110         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2111         if (!visornic_debugfs_dir)
2112                 return err;
2113
2114         ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2115                                   &debugfs_info_fops);
2116         if (!ret)
2117                 goto cleanup_debugfs;
2118         ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2119                                   NULL, &debugfs_enable_ints_fops);
2120         if (!ret)
2121                 goto cleanup_debugfs;
2122
2123         /* create workqueue for tx timeout reset */
2124         visornic_timeout_reset_workqueue =
2125                 create_singlethread_workqueue("visornic_timeout_reset");
2126         if (!visornic_timeout_reset_workqueue)
2127                 goto cleanup_workqueue;
2128
2129         spin_lock_init(&dev_num_pool_lock);
2130         dev_num_pool = kzalloc(BITS_TO_LONGS(MAXDEVICES), GFP_KERNEL);
2131         if (!dev_num_pool)
2132                 goto cleanup_workqueue;
2133
2134         err = visorbus_register_visor_driver(&visornic_driver);
2135         if (!err)
2136                 return 0;
2137
2138 cleanup_workqueue:
2139         if (visornic_timeout_reset_workqueue) {
2140                 flush_workqueue(visornic_timeout_reset_workqueue);
2141                 destroy_workqueue(visornic_timeout_reset_workqueue);
2142         }
2143 cleanup_debugfs:
2144         debugfs_remove_recursive(visornic_debugfs_dir);
2145
2146         return err;
2147 }
2148
2149 /**
2150  *      visornic_cleanup        - driver exit routine
2151  *
2152  *      Unregister driver from the bus and free up memory.
2153  */
2154 static void visornic_cleanup(void)
2155 {
2156         visorbus_unregister_visor_driver(&visornic_driver);
2157
2158         if (visornic_timeout_reset_workqueue) {
2159                 flush_workqueue(visornic_timeout_reset_workqueue);
2160                 destroy_workqueue(visornic_timeout_reset_workqueue);
2161         }
2162         debugfs_remove_recursive(visornic_debugfs_dir);
2163
2164         kfree(dev_num_pool);
2165         dev_num_pool = NULL;
2166 }
2167
2168 module_init(visornic_init);
2169 module_exit(visornic_cleanup);
2170
2171 MODULE_AUTHOR("Unisys");
2172 MODULE_LICENSE("GPL");
2173 MODULE_DESCRIPTION("sPAR nic driver for sparlinux: ver 1.0.0.0");
2174 MODULE_VERSION("1.0.0.0");