1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for more
15 /* This driver lives in a spar partition, and registers to ethernet io
16 * channels from the visorbus driver. It creates netdev devices and
17 * forwards transmit to the IO channel and accepts rcvs from the IO
18 * Partition via the IO channel.
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
29 #include "iochannel.h"
31 #define VISORNIC_INFINITE_RSP_WAIT 0
33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36 #define MAX_BUF 163840
37 #define NAPI_WEIGHT 64
39 /* GUIDS for director channel type supported by this driver. */
40 static struct visor_channeltype_descriptor visornic_channel_types[] = {
41 /* Note that the only channel type we expect to be reported by the
42 * bus driver is the SPAR_VNIC channel.
44 { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
45 { NULL_UUID_LE, NULL }
47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
49 * FIXME XXX: This next line of code must be fixed and removed before
50 * acceptance into the 'normal' part of the kernel. It is only here as a place
51 * holder to get module autoloading functionality working for visorbus. Code
52 * must be added to scripts/mode/file2alias.c, etc., to get this working
55 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
58 unsigned long got_rcv;
59 unsigned long got_enbdisack;
60 unsigned long got_xmit_done;
61 unsigned long xmit_fail;
62 unsigned long sent_enbdis;
63 unsigned long sent_promisc;
64 unsigned long sent_post;
65 unsigned long sent_post_failed;
66 unsigned long sent_xmit;
67 unsigned long reject_count;
68 unsigned long extra_rcvbufs_sent;
71 struct visornic_devdata {
72 /* 0 disabled 1 enabled to receive */
73 unsigned short enabled;
74 /* NET_RCV_ENABLE/DISABLE acked by IOPART */
75 unsigned short enab_dis_acked;
77 struct visor_device *dev;
78 struct net_device *netdev;
79 struct net_device_stats net_stats;
80 atomic_t interrupt_rcvd;
81 wait_queue_head_t rsp_queue;
82 struct sk_buff **rcvbuf;
83 /* incarnation_id lets IOPART know about re-birth */
85 /* flags as they were prior to set_multicast_list */
86 unsigned short old_flags;
87 atomic_t usage; /* count of users */
89 /* number of rcv buffers the vnic will post */
91 int num_rcv_bufs_could_not_alloc;
92 atomic_t num_rcvbuf_in_iovm;
93 unsigned long alloc_failed_in_if_needed_cnt;
94 unsigned long alloc_failed_in_repost_rtn_cnt;
96 /* absolute max number of outstanding xmits - should never hit this */
97 unsigned long max_outstanding_net_xmits;
98 /* high water mark for calling netif_stop_queue() */
99 unsigned long upper_threshold_net_xmits;
100 /* high water mark for calling netif_wake_queue() */
101 unsigned long lower_threshold_net_xmits;
102 /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
103 struct sk_buff_head xmitbufhead;
105 visorbus_state_complete_func server_down_complete_func;
106 struct work_struct timeout_reset;
107 /* cmdrsp_rcv is used for posting/unposting rcv buffers */
108 struct uiscmdrsp *cmdrsp_rcv;
109 /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
110 struct uiscmdrsp *xmit_cmdrsp;
112 bool server_down; /* IOPART is down */
113 bool server_change_state; /* Processing SERVER_CHANGESTATE msg */
114 bool going_away; /* device is being torn down */
115 struct dentry *eth_debugfs_dir;
117 u64 interrupts_notme;
118 u64 interrupts_disabled;
120 spinlock_t priv_lock; /* spinlock to access devdata structures */
122 /* flow control counter */
123 u64 flow_control_upper_hits;
124 u64 flow_control_lower_hits;
127 unsigned long n_rcv0; /* # rcvs of 0 buffers */
128 unsigned long n_rcv1; /* # rcvs of 1 buffers */
129 unsigned long n_rcv2; /* # rcvs of 2 buffers */
130 unsigned long n_rcvx; /* # rcvs of >2 buffers */
131 unsigned long found_repost_rcvbuf_cnt; /* # repost_rcvbuf_cnt */
132 unsigned long repost_found_skb_cnt; /* # of found the skb */
133 unsigned long n_repost_deficit; /* # of lost rcv buffers */
134 unsigned long bad_rcv_buf; /* # of unknown rcv skb not freed */
135 unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
137 int queuefullmsg_logged;
138 struct chanstat chstat;
139 struct timer_list irq_poll_timer;
140 struct napi_struct napi;
141 struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
145 * visor_copy_fragsinfo_from_skb(
146 * @skb_in: skbuff that we are pulling the frags from
147 * @firstfraglen: length of first fragment in skb
148 * @frags_max: max len of frags array
149 * @frags: frags array filled in on output
151 * Copy the fragment list in the SKB to a phys_info
152 * array that the IOPART understands.
153 * Return value indicates number of entries filled in frags
154 * Negative values indicate an error.
157 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
158 unsigned int frags_max,
159 struct phys_info frags[])
161 unsigned int count = 0, frag, size, offset = 0, numfrags;
162 unsigned int total_count;
164 numfrags = skb_shinfo(skb)->nr_frags;
166 /* Compute the number of fragments this skb has, and if its more than
167 * frag array can hold, linearize the skb
169 total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
170 if (firstfraglen % PI_PAGE_SIZE)
173 if (total_count > frags_max) {
174 if (skb_linearize(skb))
176 numfrags = skb_shinfo(skb)->nr_frags;
180 while (firstfraglen) {
181 if (count == frags_max)
184 frags[count].pi_pfn =
185 page_to_pfn(virt_to_page(skb->data + offset));
186 frags[count].pi_off =
187 (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
188 size = min_t(unsigned int, firstfraglen,
189 PI_PAGE_SIZE - frags[count].pi_off);
191 /* can take smallest of firstfraglen (what's left) OR
192 * bytes left in the page
194 frags[count].pi_len = size;
195 firstfraglen -= size;
200 if ((count + numfrags) > frags_max)
203 for (frag = 0; frag < numfrags; frag++) {
204 count = add_physinfo_entries(page_to_pfn(
205 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
206 skb_shinfo(skb)->frags[frag].
208 skb_shinfo(skb)->frags[frag].
209 size, count, frags_max, frags);
210 /* add_physinfo_entries only returns
211 * zero if the frags array is out of room
212 * That should never happen because we
213 * fail above, if count+numfrags > frags_max.
219 if (skb_shinfo(skb)->frag_list) {
220 struct sk_buff *skbinlist;
223 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
224 skbinlist = skbinlist->next) {
225 c = visor_copy_fragsinfo_from_skb(skbinlist,
238 static ssize_t enable_ints_write(struct file *file,
239 const char __user *buffer,
240 size_t count, loff_t *ppos)
242 /* Don't want to break ABI here by having a debugfs
243 * file that no longer exists or is writable, so
244 * lets just make this a vestigual function
249 static const struct file_operations debugfs_enable_ints_fops = {
250 .write = enable_ints_write,
254 * visornic_serverdown_complete - IOPART went down, pause device
255 * @work: Work queue it was scheduled on
257 * The IO partition has gone down and we need to do some cleanup
258 * for when it comes back. Treat the IO partition as the link
263 visornic_serverdown_complete(struct visornic_devdata *devdata)
265 struct net_device *netdev;
267 netdev = devdata->netdev;
269 /* Stop polling for interrupts */
270 del_timer_sync(&devdata->irq_poll_timer);
276 atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
277 devdata->chstat.sent_xmit = 0;
278 devdata->chstat.got_xmit_done = 0;
280 if (devdata->server_down_complete_func)
281 (*devdata->server_down_complete_func)(devdata->dev, 0);
283 devdata->server_down = true;
284 devdata->server_change_state = false;
285 devdata->server_down_complete_func = NULL;
289 * visornic_serverdown - Command has notified us that IOPART is down
290 * @devdata: device that is being managed by IOPART
292 * Schedule the work needed to handle the server down request. Make
293 * sure we haven't already handled the server change state event.
294 * Returns 0 if we scheduled the work, -EINVAL on error.
297 visornic_serverdown(struct visornic_devdata *devdata,
298 visorbus_state_complete_func complete_func)
303 spin_lock_irqsave(&devdata->priv_lock, flags);
304 if (devdata->server_change_state) {
305 dev_dbg(&devdata->dev->device, "%s changing state\n",
310 if (devdata->server_down) {
311 dev_dbg(&devdata->dev->device, "%s already down\n",
316 if (devdata->going_away) {
317 dev_dbg(&devdata->dev->device,
318 "%s aborting because device removal pending\n",
323 devdata->server_change_state = true;
324 devdata->server_down_complete_func = complete_func;
325 spin_unlock_irqrestore(&devdata->priv_lock, flags);
327 visornic_serverdown_complete(devdata);
331 spin_unlock_irqrestore(&devdata->priv_lock, flags);
336 * alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition.
337 * @netdev: network adapter the rcv bufs are attached too.
339 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
340 * so that it can write rcv data into our memory space.
341 * Return pointer to sk_buff
343 static struct sk_buff *
344 alloc_rcv_buf(struct net_device *netdev)
348 /* NOTE: the first fragment in each rcv buffer is pointed to by
349 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
350 * in length, so the first frag is large enough to hold 1514.
352 skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
356 /* current value of mtu doesn't come into play here; large
357 * packets will just end up using multiple rcv buffers all of
360 skb->len = RCVPOST_BUF_SIZE;
361 /* alloc_skb already zeroes it out for clarification. */
367 * post_skb - post a skb to the IO Partition.
368 * @cmdrsp: cmdrsp packet to be send to the IO Partition
369 * @devdata: visornic_devdata to post the skb too
370 * @skb: skb to give to the IO partition
372 * Send the skb to the IO Partition.
376 post_skb(struct uiscmdrsp *cmdrsp,
377 struct visornic_devdata *devdata, struct sk_buff *skb)
379 cmdrsp->net.buf = skb;
380 cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
381 cmdrsp->net.rcvpost.frag.pi_off =
382 (unsigned long)skb->data & PI_PAGE_MASK;
383 cmdrsp->net.rcvpost.frag.pi_len = skb->len;
384 cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
386 if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
387 cmdrsp->net.type = NET_RCV_POST;
388 cmdrsp->cmdtype = CMD_NET_TYPE;
389 if (!visorchannel_signalinsert(devdata->dev->visorchannel,
392 atomic_inc(&devdata->num_rcvbuf_in_iovm);
393 devdata->chstat.sent_post++;
395 devdata->chstat.sent_post_failed++;
401 * send_enbdis - send NET_RCV_ENBDIS to IO Partition
402 * @netdev: netdevice we are enable/disable, used as context
404 * @state: enable = 1/disable = 0
405 * @devdata: visornic device we are enabling/disabling
407 * Send the enable/disable message to the IO Partition.
411 send_enbdis(struct net_device *netdev, int state,
412 struct visornic_devdata *devdata)
414 devdata->cmdrsp_rcv->net.enbdis.enable = state;
415 devdata->cmdrsp_rcv->net.enbdis.context = netdev;
416 devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
417 devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
418 if (!visorchannel_signalinsert(devdata->dev->visorchannel,
420 devdata->cmdrsp_rcv))
421 devdata->chstat.sent_enbdis++;
425 * visornic_disable_with_timeout - Disable network adapter
426 * @netdev: netdevice to disable
427 * @timeout: timeout to wait for disable
429 * Disable the network adapter and inform the IO Partition that we
430 * are disabled, reclaim memory from rcv bufs.
431 * Returns 0 on success, negative for failure of IO Partition
436 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
438 struct visornic_devdata *devdata = netdev_priv(netdev);
443 /* send a msg telling the other end we are stopping incoming pkts */
444 spin_lock_irqsave(&devdata->priv_lock, flags);
445 devdata->enabled = 0;
446 devdata->enab_dis_acked = 0; /* must wait for ack */
447 spin_unlock_irqrestore(&devdata->priv_lock, flags);
449 /* send disable and wait for ack -- don't hold lock when sending
450 * disable because if the queue is full, insert might sleep.
452 send_enbdis(netdev, 0, devdata);
454 /* wait for ack to arrive before we try to free rcv buffers
455 * NOTE: the other end automatically unposts the rcv buffers when
456 * when it gets a disable.
458 spin_lock_irqsave(&devdata->priv_lock, flags);
459 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
461 if (devdata->enab_dis_acked)
463 if (devdata->server_down || devdata->server_change_state) {
464 dev_dbg(&netdev->dev, "%s server went away\n",
468 set_current_state(TASK_INTERRUPTIBLE);
469 spin_unlock_irqrestore(&devdata->priv_lock, flags);
470 wait += schedule_timeout(msecs_to_jiffies(10));
471 spin_lock_irqsave(&devdata->priv_lock, flags);
474 /* Wait for usage to go to 1 (no other users) before freeing
477 if (atomic_read(&devdata->usage) > 1) {
479 set_current_state(TASK_INTERRUPTIBLE);
480 spin_unlock_irqrestore(&devdata->priv_lock, flags);
481 schedule_timeout(msecs_to_jiffies(10));
482 spin_lock_irqsave(&devdata->priv_lock, flags);
483 if (atomic_read(&devdata->usage))
487 /* we've set enabled to 0, so we can give up the lock. */
488 spin_unlock_irqrestore(&devdata->priv_lock, flags);
490 /* stop the transmit queue so nothing more can be transmitted */
491 netif_stop_queue(netdev);
493 napi_disable(&devdata->napi);
495 skb_queue_purge(&devdata->xmitbufhead);
497 /* Free rcv buffers - other end has automatically unposed them on
500 for (i = 0; i < devdata->num_rcv_bufs; i++) {
501 if (devdata->rcvbuf[i]) {
502 kfree_skb(devdata->rcvbuf[i]);
503 devdata->rcvbuf[i] = NULL;
511 * init_rcv_bufs -- initialize receive bufs and send them to the IO Part
512 * @netdev: struct netdevice
513 * @devdata: visornic_devdata
515 * Allocate rcv buffers and post them to the IO Partition.
516 * Return 0 for success, and negative for failure.
519 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
523 /* allocate fixed number of receive buffers to post to uisnic
524 * post receive buffers after we've allocated a required amount
526 for (i = 0; i < devdata->num_rcv_bufs; i++) {
527 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
528 if (!devdata->rcvbuf[i])
529 break; /* if we failed to allocate one let us stop */
531 if (i == 0) /* couldn't even allocate one -- bail out */
535 /* Ensure we can alloc 2/3rd of the requested number of buffers.
536 * 2/3 is an arbitrary choice; used also in ndis init.c
538 if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
539 /* free receive buffers we did alloc and then bail out */
540 for (i = 0; i < count; i++) {
541 kfree_skb(devdata->rcvbuf[i]);
542 devdata->rcvbuf[i] = NULL;
547 /* post receive buffers to receive incoming input - without holding
548 * lock - we've not enabled nor started the queue so there shouldn't
549 * be any rcv or xmit activity
551 for (i = 0; i < count; i++)
552 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
558 * visornic_enable_with_timeout - send enable to IO Part
559 * @netdev: struct net_device
560 * @timeout: Time to wait for the ACK from the enable
562 * Sends enable to IOVM, inits, and posts receive buffers to IOVM
563 * timeout is defined in msecs (timeout of 0 specifies infinite wait)
564 * Return 0 for success, negative for failure.
567 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
570 struct visornic_devdata *devdata = netdev_priv(netdev);
574 napi_enable(&devdata->napi);
576 /* NOTE: the other end automatically unposts the rcv buffers when it
579 i = init_rcv_bufs(netdev, devdata);
581 dev_err(&netdev->dev,
582 "%s failed to init rcv bufs (%d)\n", __func__, i);
586 spin_lock_irqsave(&devdata->priv_lock, flags);
587 devdata->enabled = 1;
588 devdata->enab_dis_acked = 0;
590 /* now we're ready, let's send an ENB to uisnic but until we get
591 * an ACK back from uisnic, we'll drop the packets
593 devdata->n_rcv_packets_not_accepted = 0;
594 spin_unlock_irqrestore(&devdata->priv_lock, flags);
596 /* send enable and wait for ack -- don't hold lock when sending enable
597 * because if the queue is full, insert might sleep.
599 send_enbdis(netdev, 1, devdata);
601 spin_lock_irqsave(&devdata->priv_lock, flags);
602 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
604 if (devdata->enab_dis_acked)
606 if (devdata->server_down || devdata->server_change_state) {
607 dev_dbg(&netdev->dev, "%s server went away\n",
611 set_current_state(TASK_INTERRUPTIBLE);
612 spin_unlock_irqrestore(&devdata->priv_lock, flags);
613 wait += schedule_timeout(msecs_to_jiffies(10));
614 spin_lock_irqsave(&devdata->priv_lock, flags);
617 spin_unlock_irqrestore(&devdata->priv_lock, flags);
619 if (!devdata->enab_dis_acked) {
620 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
624 netif_start_queue(netdev);
630 * visornic_timeout_reset - handle xmit timeout resets
631 * @work work item that scheduled the work
633 * Transmit Timeouts are typically handled by resetting the
634 * device for our virtual NIC we will send a Disable and Enable
635 * to the IOVM. If it doesn't respond we will trigger a serverdown.
638 visornic_timeout_reset(struct work_struct *work)
640 struct visornic_devdata *devdata;
641 struct net_device *netdev;
644 devdata = container_of(work, struct visornic_devdata, timeout_reset);
645 netdev = devdata->netdev;
648 if (!netif_running(netdev)) {
653 response = visornic_disable_with_timeout(netdev,
654 VISORNIC_INFINITE_RSP_WAIT);
656 goto call_serverdown;
658 response = visornic_enable_with_timeout(netdev,
659 VISORNIC_INFINITE_RSP_WAIT);
661 goto call_serverdown;
668 visornic_serverdown(devdata, NULL);
673 * visornic_open - Enable the visornic device and mark the queue started
674 * @netdev: netdevice to start
676 * Enable the device and start the transmit queue.
677 * Return 0 for success
680 visornic_open(struct net_device *netdev)
682 visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
688 * visornic_close - Disables the visornic device and stops the queues
689 * @netdev: netdevice to start
691 * Disable the device and stop the transmit queue.
692 * Return 0 for success
695 visornic_close(struct net_device *netdev)
697 visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
703 * devdata_xmits_outstanding - compute outstanding xmits
704 * @devdata: visornic_devdata for device
706 * Return value is the number of outstanding xmits.
708 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
710 if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
711 return devdata->chstat.sent_xmit -
712 devdata->chstat.got_xmit_done;
713 return (ULONG_MAX - devdata->chstat.got_xmit_done
714 + devdata->chstat.sent_xmit + 1);
718 * vnic_hit_high_watermark
719 * @devdata: indicates visornic device we are checking
720 * @high_watermark: max num of unacked xmits we will tolerate,
721 * before we will start throttling
723 * Returns true iff the number of unacked xmits sent to
724 * the IO partition is >= high_watermark.
726 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
727 ulong high_watermark)
729 return (devdata_xmits_outstanding(devdata) >= high_watermark);
733 * vnic_hit_low_watermark
734 * @devdata: indicates visornic device we are checking
735 * @low_watermark: we will wait until the num of unacked xmits
736 * drops to this value or lower before we start
739 * Returns true iff the number of unacked xmits sent to
740 * the IO partition is <= low_watermark.
742 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
745 return (devdata_xmits_outstanding(devdata) <= low_watermark);
749 * visornic_xmit - send a packet to the IO Partition
750 * @skb: Packet to be sent
751 * @netdev: net device the packet is being sent from
753 * Convert the skb to a cmdrsp so the IO Partition can understand it.
754 * Send the XMIT command to the IO Partition for processing. This
755 * function is protected from concurrent calls by a spinlock xmit_lock
756 * in the net_device struct, but as soon as the function returns it
757 * can be called again.
758 * Returns NETDEV_TX_OK.
761 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
763 struct visornic_devdata *devdata;
764 int len, firstfraglen, padlen;
765 struct uiscmdrsp *cmdrsp = NULL;
768 devdata = netdev_priv(netdev);
769 spin_lock_irqsave(&devdata->priv_lock, flags);
771 if (netif_queue_stopped(netdev) || devdata->server_down ||
772 devdata->server_change_state) {
773 spin_unlock_irqrestore(&devdata->priv_lock, flags);
775 dev_dbg(&netdev->dev,
776 "%s busy - queue stopped\n", __func__);
781 /* sk_buff struct is used to host network data throughout all the
782 * linux network subsystems
786 /* skb->len is the FULL length of data (including fragmentary portion)
787 * skb->data_len is the length of the fragment portion in frags
788 * skb->len - skb->data_len is size of the 1st fragment in skb->data
789 * calculate the length of the first fragment that skb->data is
792 firstfraglen = skb->len - skb->data_len;
793 if (firstfraglen < ETH_HLEN) {
794 spin_unlock_irqrestore(&devdata->priv_lock, flags);
796 dev_err(&netdev->dev,
797 "%s busy - first frag too small (%d)\n",
798 __func__, firstfraglen);
803 if ((len < ETH_MIN_PACKET_SIZE) &&
804 ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
805 /* pad the packet out to minimum size */
806 padlen = ETH_MIN_PACKET_SIZE - len;
807 memset(&skb->data[len], 0, padlen);
811 firstfraglen += padlen;
814 cmdrsp = devdata->xmit_cmdrsp;
816 memset(cmdrsp, 0, SIZEOF_CMDRSP);
817 cmdrsp->net.type = NET_XMIT;
818 cmdrsp->cmdtype = CMD_NET_TYPE;
820 /* save the pointer to skb -- we'll need it for completion */
821 cmdrsp->net.buf = skb;
823 if (vnic_hit_high_watermark(devdata,
824 devdata->max_outstanding_net_xmits)) {
825 /* extra NET_XMITs queued over to IOVM - need to wait */
826 devdata->chstat.reject_count++;
827 if (!devdata->queuefullmsg_logged &&
828 ((devdata->chstat.reject_count & 0x3ff) == 1))
829 devdata->queuefullmsg_logged = 1;
830 netif_stop_queue(netdev);
831 spin_unlock_irqrestore(&devdata->priv_lock, flags);
833 dev_dbg(&netdev->dev,
834 "%s busy - waiting for iovm to catch up\n",
839 if (devdata->queuefullmsg_logged)
840 devdata->queuefullmsg_logged = 0;
842 if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
843 cmdrsp->net.xmt.lincsum.valid = 1;
844 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
845 if (skb_transport_header(skb) > skb->data) {
846 cmdrsp->net.xmt.lincsum.hrawoff =
847 skb_transport_header(skb) - skb->data;
848 cmdrsp->net.xmt.lincsum.hrawoff = 1;
850 if (skb_network_header(skb) > skb->data) {
851 cmdrsp->net.xmt.lincsum.nhrawoff =
852 skb_network_header(skb) - skb->data;
853 cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
855 cmdrsp->net.xmt.lincsum.csum = skb->csum;
857 cmdrsp->net.xmt.lincsum.valid = 0;
860 /* save off the length of the entire data packet */
861 cmdrsp->net.xmt.len = len;
863 /* copy ethernet header from first frag into ocmdrsp
864 * - everything else will be pass in frags & DMA'ed
866 memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
867 /* copy frags info - from skb->data we need to only provide access
870 cmdrsp->net.xmt.num_frags =
871 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
873 cmdrsp->net.xmt.frags);
874 if (cmdrsp->net.xmt.num_frags < 0) {
875 spin_unlock_irqrestore(&devdata->priv_lock, flags);
877 dev_err(&netdev->dev,
878 "%s busy - copy frags failed\n", __func__);
883 if (visorchannel_signalinsert(devdata->dev->visorchannel,
884 IOCHAN_TO_IOPART, cmdrsp)) {
885 netif_stop_queue(netdev);
886 spin_unlock_irqrestore(&devdata->priv_lock, flags);
888 dev_dbg(&netdev->dev,
889 "%s busy - signalinsert failed\n", __func__);
894 /* Track the skbs that have been sent to the IOVM for XMIT */
895 skb_queue_head(&devdata->xmitbufhead, skb);
897 /* update xmt stats */
898 devdata->net_stats.tx_packets++;
899 devdata->net_stats.tx_bytes += skb->len;
900 devdata->chstat.sent_xmit++;
902 /* check if we have hit the high watermark for netif_stop_queue() */
903 if (vnic_hit_high_watermark(devdata,
904 devdata->upper_threshold_net_xmits)) {
905 /* extra NET_XMITs queued over to IOVM - need to wait */
906 /* stop queue - call netif_wake_queue() after lower threshold */
907 netif_stop_queue(netdev);
908 dev_dbg(&netdev->dev,
909 "%s busy - invoking iovm flow control\n",
911 devdata->flow_control_upper_hits++;
913 spin_unlock_irqrestore(&devdata->priv_lock, flags);
915 /* skb will be freed when we get back NET_XMIT_DONE */
920 * visornic_get_stats - returns net_stats of the visornic device
923 * Returns the net_device_stats for the device
925 static struct net_device_stats *
926 visornic_get_stats(struct net_device *netdev)
928 struct visornic_devdata *devdata = netdev_priv(netdev);
930 return &devdata->net_stats;
934 * visornic_change_mtu - changes mtu of device.
936 * @new_mtu: value of new mtu
938 * MTU cannot be changed by system, must be changed via
939 * CONTROLVM message. All vnics and pnics in a switch have
940 * to have the same MTU for everything to work.
941 * Currently not supported.
945 visornic_change_mtu(struct net_device *netdev, int new_mtu)
951 * visornic_set_multi - changes mtu of device.
954 * Only flag we support currently is IFF_PROMISC
958 visornic_set_multi(struct net_device *netdev)
960 struct uiscmdrsp *cmdrsp;
961 struct visornic_devdata *devdata = netdev_priv(netdev);
963 if (devdata->old_flags == netdev->flags)
966 if ((netdev->flags & IFF_PROMISC) ==
967 (devdata->old_flags & IFF_PROMISC))
970 cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
973 cmdrsp->cmdtype = CMD_NET_TYPE;
974 cmdrsp->net.type = NET_RCV_PROMISC;
975 cmdrsp->net.enbdis.context = netdev;
976 cmdrsp->net.enbdis.enable =
977 netdev->flags & IFF_PROMISC;
978 visorchannel_signalinsert(devdata->dev->visorchannel,
984 devdata->old_flags = netdev->flags;
988 * visornic_xmit_timeout - request to timeout the xmit
991 * Queue the work and return. Make sure we have not already
992 * been informed the IO Partition is gone, if it is gone
993 * we will already timeout the xmits.
996 visornic_xmit_timeout(struct net_device *netdev)
998 struct visornic_devdata *devdata = netdev_priv(netdev);
1001 spin_lock_irqsave(&devdata->priv_lock, flags);
1002 if (devdata->going_away) {
1003 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1004 dev_dbg(&devdata->dev->device,
1005 "%s aborting because device removal pending\n",
1010 /* Ensure that a ServerDown message hasn't been received */
1011 if (!devdata->enabled ||
1012 (devdata->server_down && !devdata->server_change_state)) {
1013 dev_dbg(&netdev->dev, "%s no processing\n",
1015 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1018 schedule_work(&devdata->timeout_reset);
1019 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1023 * repost_return - repost rcv bufs that have come back
1024 * @cmdrsp: io channel command struct to post
1025 * @devdata: visornic devdata for the device
1027 * @netdev: netdevice
1029 * Repost rcv buffers that have been returned to us when
1030 * we are finished with them.
1031 * Returns 0 for success, -1 for error.
1034 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1035 struct sk_buff *skb, struct net_device *netdev)
1037 struct net_pkt_rcv copy;
1038 int i = 0, cc, numreposted;
1042 copy = cmdrsp->net.rcv;
1043 switch (copy.numrcvbufs) {
1057 for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1058 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1059 if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1062 if ((skb) && devdata->rcvbuf[i] == skb) {
1063 devdata->found_repost_rcvbuf_cnt++;
1065 devdata->repost_found_skb_cnt++;
1067 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1068 if (!devdata->rcvbuf[i]) {
1069 devdata->num_rcv_bufs_could_not_alloc++;
1070 devdata->alloc_failed_in_repost_rtn_cnt++;
1074 post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1079 if (numreposted != copy.numrcvbufs) {
1080 devdata->n_repost_deficit++;
1088 devdata->bad_rcv_buf++;
1095 * visornic_rx - Handle receive packets coming back from IO Part
1096 * @cmdrsp: Receive packet returned from IO Part
1098 * Got a receive packet back from the IO Part, handle it and send
1100 * Returns 1 iff an skb was received, otherwise 0
1103 visornic_rx(struct uiscmdrsp *cmdrsp)
1105 struct visornic_devdata *devdata;
1106 struct sk_buff *skb, *prev, *curr;
1107 struct net_device *netdev;
1108 int cc, currsize, off;
1110 unsigned long flags;
1112 /* post new rcv buf to the other end using the cmdrsp we have at hand
1113 * post it without holding lock - but we'll use the signal lock to
1114 * synchronize the queue insert the cmdrsp that contains the net.rcv
1115 * is the one we are using to repost, so copy the info we need from it.
1117 skb = cmdrsp->net.buf;
1120 devdata = netdev_priv(netdev);
1122 spin_lock_irqsave(&devdata->priv_lock, flags);
1123 atomic_dec(&devdata->num_rcvbuf_in_iovm);
1125 /* set length to how much was ACTUALLY received -
1126 * NOTE: rcv_done_len includes actual length of data rcvd
1129 skb->len = cmdrsp->net.rcv.rcv_done_len;
1131 /* update rcv stats - call it with priv_lock held */
1132 devdata->net_stats.rx_packets++;
1133 devdata->net_stats.rx_bytes += skb->len;
1135 /* test enabled while holding lock */
1136 if (!(devdata->enabled && devdata->enab_dis_acked)) {
1137 /* don't process it unless we're in enable mode and until
1138 * we've gotten an ACK saying the other end got our RCV enable
1140 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1141 repost_return(cmdrsp, devdata, skb, netdev);
1145 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1147 /* when skb was allocated, skb->dev, skb->data, skb->len and
1148 * skb->data_len were setup. AND, data has already put into the
1149 * skb (both first frag and in frags pages)
1150 * NOTE: firstfragslen is the amount of data in skb->data and that
1151 * which is not in nr_frags or frag_list. This is now simply
1152 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1153 * firstfrag & set data_len to show rest see if we have to chain
1156 if (skb->len > RCVPOST_BUF_SIZE) { /* do PRECAUTIONARY check */
1157 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1158 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1159 dev_err(&devdata->netdev->dev,
1160 "repost_return failed");
1163 /* length rcvd is greater than firstfrag in this skb rcv buf */
1164 skb->tail += RCVPOST_BUF_SIZE; /* amount in skb->data */
1165 skb->data_len = skb->len - RCVPOST_BUF_SIZE; /* amount that
1170 /* data fits in this skb - no chaining - do
1171 * PRECAUTIONARY check
1173 if (cmdrsp->net.rcv.numrcvbufs != 1) { /* should be 1 */
1174 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1175 dev_err(&devdata->netdev->dev,
1176 "repost_return failed");
1179 skb->tail += skb->len;
1180 skb->data_len = 0; /* nothing rcvd in frag_list */
1182 off = skb_tail_pointer(skb) - skb->data;
1184 /* amount we bumped tail by in the head skb
1185 * it is used to calculate the size of each chained skb below
1186 * it is also used to index into bufline to continue the copy
1187 * (for chansocktwopc)
1188 * if necessary chain the rcv skbs together.
1189 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1190 * chain the rest to that one.
1191 * - do PRECAUTIONARY check
1193 if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1194 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1195 dev_err(&devdata->netdev->dev, "repost_return failed");
1199 if (cmdrsp->net.rcv.numrcvbufs > 1) {
1200 /* chain the various rcv buffers into the skb's frag_list. */
1201 /* Note: off was initialized above */
1202 for (cc = 1, prev = NULL;
1203 cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1204 curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1206 if (!prev) /* start of list- set head */
1207 skb_shinfo(skb)->frag_list = curr;
1212 /* should we set skb->len and skb->data_len for each
1213 * buffer being chained??? can't hurt!
1215 currsize = min(skb->len - off,
1216 (unsigned int)RCVPOST_BUF_SIZE);
1217 curr->len = currsize;
1218 curr->tail += currsize;
1222 /* assert skb->len == off */
1223 if (skb->len != off) {
1224 netdev_err(devdata->netdev,
1225 "something wrong; skb->len:%d != off:%d\n",
1230 /* set up packet's protocol type using ethernet header - this
1231 * sets up skb->pkt_type & it also PULLS out the eth header
1233 skb->protocol = eth_type_trans(skb, netdev);
1238 skb->ip_summed = CHECKSUM_NONE;
1241 if (netdev->flags & IFF_PROMISC)
1242 break; /* accept all packets */
1243 if (skb->pkt_type == PACKET_BROADCAST) {
1244 if (netdev->flags & IFF_BROADCAST)
1245 break; /* accept all broadcast packets */
1246 } else if (skb->pkt_type == PACKET_MULTICAST) {
1247 if ((netdev->flags & IFF_MULTICAST) &&
1248 (netdev_mc_count(netdev))) {
1249 struct netdev_hw_addr *ha;
1252 /* only accept multicast packets that we can
1253 * find in our multicast address list
1255 netdev_for_each_mc_addr(ha, netdev) {
1256 if (ether_addr_equal(eth->h_dest,
1262 /* accept pkt, dest matches a multicast addr */
1266 /* accept packet, h_dest must match vnic mac address */
1267 } else if (skb->pkt_type == PACKET_HOST) {
1269 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1270 /* something is not right */
1271 dev_err(&devdata->netdev->dev,
1272 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1273 netdev->name, eth->h_dest, netdev->dev_addr);
1275 /* drop packet - don't forward it up to OS */
1276 devdata->n_rcv_packets_not_accepted++;
1277 repost_return(cmdrsp, devdata, skb, netdev);
1281 netif_receive_skb(skb);
1282 /* netif_rx returns various values, but "in practice most drivers
1283 * ignore the return value
1288 * whether the packet got dropped or handled, the skb is freed by
1289 * kernel code, so we shouldn't free it. but we should repost a
1292 repost_return(cmdrsp, devdata, skb, netdev);
1297 * devdata_initialize - Initialize devdata structure
1298 * @devdata: visornic_devdata structure to initialize
1299 * #dev: visorbus_deviced it belongs to
1301 * Setup initial values for the visornic based on channel and default
1303 * Returns a pointer to the devdata structure
1305 static struct visornic_devdata *
1306 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1309 devdata->incarnation_id = get_jiffies_64();
1314 * devdata_release - Frees up references in devdata
1315 * @devdata: struct to clean up
1317 * Frees up references in devdata.
1320 static void devdata_release(struct visornic_devdata *devdata)
1322 kfree(devdata->rcvbuf);
1323 kfree(devdata->cmdrsp_rcv);
1324 kfree(devdata->xmit_cmdrsp);
1327 static const struct net_device_ops visornic_dev_ops = {
1328 .ndo_open = visornic_open,
1329 .ndo_stop = visornic_close,
1330 .ndo_start_xmit = visornic_xmit,
1331 .ndo_get_stats = visornic_get_stats,
1332 .ndo_change_mtu = visornic_change_mtu,
1333 .ndo_tx_timeout = visornic_xmit_timeout,
1334 .ndo_set_rx_mode = visornic_set_multi,
1338 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1339 size_t len, loff_t *offset)
1341 ssize_t bytes_read = 0;
1343 struct visornic_devdata *devdata;
1344 struct net_device *dev;
1349 vbuf = kzalloc(len, GFP_KERNEL);
1353 /* for each vnic channel dump out channel specific data */
1355 for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1356 /* Only consider netdevs that are visornic, and are open */
1357 if ((dev->netdev_ops != &visornic_dev_ops) ||
1358 (!netif_queue_stopped(dev)))
1361 devdata = netdev_priv(dev);
1362 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1363 "netdev = %s (0x%p), MAC Addr %pM\n",
1367 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1368 "VisorNic Dev Info = 0x%p\n", devdata);
1369 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1370 " num_rcv_bufs = %d\n",
1371 devdata->num_rcv_bufs);
1372 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1373 " max_outstanding_next_xmits = %lu\n",
1374 devdata->max_outstanding_net_xmits);
1375 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1376 " upper_threshold_net_xmits = %lu\n",
1377 devdata->upper_threshold_net_xmits);
1378 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1379 " lower_threshold_net_xmits = %lu\n",
1380 devdata->lower_threshold_net_xmits);
1381 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1382 " queuefullmsg_logged = %d\n",
1383 devdata->queuefullmsg_logged);
1384 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1385 " chstat.got_rcv = %lu\n",
1386 devdata->chstat.got_rcv);
1387 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1388 " chstat.got_enbdisack = %lu\n",
1389 devdata->chstat.got_enbdisack);
1390 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1391 " chstat.got_xmit_done = %lu\n",
1392 devdata->chstat.got_xmit_done);
1393 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1394 " chstat.xmit_fail = %lu\n",
1395 devdata->chstat.xmit_fail);
1396 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1397 " chstat.sent_enbdis = %lu\n",
1398 devdata->chstat.sent_enbdis);
1399 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1400 " chstat.sent_promisc = %lu\n",
1401 devdata->chstat.sent_promisc);
1402 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1403 " chstat.sent_post = %lu\n",
1404 devdata->chstat.sent_post);
1405 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1406 " chstat.sent_post_failed = %lu\n",
1407 devdata->chstat.sent_post_failed);
1408 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1409 " chstat.sent_xmit = %lu\n",
1410 devdata->chstat.sent_xmit);
1411 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1412 " chstat.reject_count = %lu\n",
1413 devdata->chstat.reject_count);
1414 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1415 " chstat.extra_rcvbufs_sent = %lu\n",
1416 devdata->chstat.extra_rcvbufs_sent);
1417 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1418 " n_rcv0 = %lu\n", devdata->n_rcv0);
1419 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1420 " n_rcv1 = %lu\n", devdata->n_rcv1);
1421 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1422 " n_rcv2 = %lu\n", devdata->n_rcv2);
1423 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1424 " n_rcvx = %lu\n", devdata->n_rcvx);
1425 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1426 " num_rcvbuf_in_iovm = %d\n",
1427 atomic_read(&devdata->num_rcvbuf_in_iovm));
1428 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1429 " alloc_failed_in_if_needed_cnt = %lu\n",
1430 devdata->alloc_failed_in_if_needed_cnt);
1431 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1432 " alloc_failed_in_repost_rtn_cnt = %lu\n",
1433 devdata->alloc_failed_in_repost_rtn_cnt);
1434 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1435 * " inner_loop_limit_reached_cnt = %lu\n",
1436 * devdata->inner_loop_limit_reached_cnt);
1438 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1439 " found_repost_rcvbuf_cnt = %lu\n",
1440 devdata->found_repost_rcvbuf_cnt);
1441 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1442 " repost_found_skb_cnt = %lu\n",
1443 devdata->repost_found_skb_cnt);
1444 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1445 " n_repost_deficit = %lu\n",
1446 devdata->n_repost_deficit);
1447 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1448 " bad_rcv_buf = %lu\n",
1449 devdata->bad_rcv_buf);
1450 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1451 " n_rcv_packets_not_accepted = %lu\n",
1452 devdata->n_rcv_packets_not_accepted);
1453 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1454 " interrupts_rcvd = %llu\n",
1455 devdata->interrupts_rcvd);
1456 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457 " interrupts_notme = %llu\n",
1458 devdata->interrupts_notme);
1459 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460 " interrupts_disabled = %llu\n",
1461 devdata->interrupts_disabled);
1462 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463 " busy_cnt = %llu\n",
1465 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466 " flow_control_upper_hits = %llu\n",
1467 devdata->flow_control_upper_hits);
1468 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469 " flow_control_lower_hits = %llu\n",
1470 devdata->flow_control_lower_hits);
1471 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472 " netif_queue = %s\n",
1473 netif_queue_stopped(devdata->netdev) ?
1474 "stopped" : "running");
1475 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1476 " xmits_outstanding = %lu\n",
1477 devdata_xmits_outstanding(devdata));
1480 bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1485 static struct dentry *visornic_debugfs_dir;
1486 static const struct file_operations debugfs_info_fops = {
1487 .read = info_debugfs_read,
1491 * send_rcv_posts_if_needed
1492 * @devdata: visornic device
1494 * Send receive buffers to the IO Partition.
1498 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1501 struct net_device *netdev;
1502 struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1503 int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1505 /* don't do this until vnic is marked ready */
1506 if (!(devdata->enabled && devdata->enab_dis_acked))
1509 netdev = devdata->netdev;
1510 rcv_bufs_allocated = 0;
1511 /* this code is trying to prevent getting stuck here forever,
1512 * but still retry it if you cant allocate them all this time.
1514 cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1515 while (cur_num_rcv_bufs_to_alloc > 0) {
1516 cur_num_rcv_bufs_to_alloc--;
1517 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1518 if (devdata->rcvbuf[i])
1520 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1521 if (!devdata->rcvbuf[i]) {
1522 devdata->alloc_failed_in_if_needed_cnt++;
1525 rcv_bufs_allocated++;
1526 post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1527 devdata->chstat.extra_rcvbufs_sent++;
1530 devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1534 * drain_resp_queue - drains and ignores all messages from the resp queue
1535 * @cmdrsp: io channel command response message
1536 * @devdata: visornic device to drain
1539 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1541 while (!visorchannel_signalremove(devdata->dev->visorchannel,
1548 * service_resp_queue - drains the response queue
1549 * @cmdrsp: io channel command response message
1550 * @devdata: visornic device to drain
1552 * Drain the response queue of any responses from the IO partition.
1553 * Process the responses as we get them.
1554 * Returns when response queue is empty or when the thread stops.
1557 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1558 int *rx_work_done, int budget)
1560 unsigned long flags;
1561 struct net_device *netdev;
1563 while (*rx_work_done < budget) {
1564 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1567 if (visorchannel_signalremove(devdata->dev->visorchannel,
1570 break; /* queue empty */
1572 switch (cmdrsp->net.type) {
1574 devdata->chstat.got_rcv++;
1575 /* process incoming packet */
1576 *rx_work_done += visornic_rx(cmdrsp);
1579 spin_lock_irqsave(&devdata->priv_lock, flags);
1580 devdata->chstat.got_xmit_done++;
1581 if (cmdrsp->net.xmtdone.xmt_done_result)
1582 devdata->chstat.xmit_fail++;
1583 /* only call queue wake if we stopped it */
1584 netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1585 /* ASSERT netdev == vnicinfo->netdev; */
1586 if ((netdev == devdata->netdev) &&
1587 netif_queue_stopped(netdev)) {
1588 /* check if we have crossed the lower watermark
1589 * for netif_wake_queue()
1591 if (vnic_hit_low_watermark
1593 devdata->lower_threshold_net_xmits)) {
1594 /* enough NET_XMITs completed
1595 * so can restart netif queue
1597 netif_wake_queue(netdev);
1598 devdata->flow_control_lower_hits++;
1601 skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1602 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1603 kfree_skb(cmdrsp->net.buf);
1605 case NET_RCV_ENBDIS_ACK:
1606 devdata->chstat.got_enbdisack++;
1607 netdev = (struct net_device *)
1608 cmdrsp->net.enbdis.context;
1609 spin_lock_irqsave(&devdata->priv_lock, flags);
1610 devdata->enab_dis_acked = 1;
1611 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1613 if (devdata->server_down &&
1614 devdata->server_change_state) {
1615 /* Inform Linux that the link is up */
1616 devdata->server_down = false;
1617 devdata->server_change_state = false;
1618 netif_wake_queue(netdev);
1619 netif_carrier_on(netdev);
1622 case NET_CONNECT_STATUS:
1623 netdev = devdata->netdev;
1624 if (cmdrsp->net.enbdis.enable == 1) {
1625 spin_lock_irqsave(&devdata->priv_lock, flags);
1626 devdata->enabled = cmdrsp->net.enbdis.enable;
1627 spin_unlock_irqrestore(&devdata->priv_lock,
1629 netif_wake_queue(netdev);
1630 netif_carrier_on(netdev);
1632 netif_stop_queue(netdev);
1633 netif_carrier_off(netdev);
1634 spin_lock_irqsave(&devdata->priv_lock, flags);
1635 devdata->enabled = cmdrsp->net.enbdis.enable;
1636 spin_unlock_irqrestore(&devdata->priv_lock,
1643 /* cmdrsp is now available for reuse */
1647 static int visornic_poll(struct napi_struct *napi, int budget)
1649 struct visornic_devdata *devdata = container_of(napi,
1650 struct visornic_devdata,
1654 send_rcv_posts_if_needed(devdata);
1655 service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1657 /* If there aren't any more packets to receive stop the poll */
1658 if (rx_count < budget)
1659 napi_complete_done(napi, rx_count);
1665 * poll_for_irq - Checks the status of the response queue.
1666 * @v: void pointer to the visronic devdata
1668 * Main function of the vnic_incoming thread. Periodically check the
1669 * response queue and drain it if needed.
1670 * Returns when thread has stopped.
1673 poll_for_irq(unsigned long v)
1675 struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1677 if (!visorchannel_signalempty(
1678 devdata->dev->visorchannel,
1679 IOCHAN_FROM_IOPART))
1680 napi_schedule(&devdata->napi);
1682 atomic_set(&devdata->interrupt_rcvd, 0);
1684 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1688 * visornic_probe - probe function for visornic devices
1689 * @dev: The visor device discovered
1691 * Called when visorbus discovers a visornic device on its
1692 * bus. It creates a new visornic ethernet adapter.
1693 * Returns 0 or negative for error.
1695 static int visornic_probe(struct visor_device *dev)
1697 struct visornic_devdata *devdata = NULL;
1698 struct net_device *netdev = NULL;
1700 int channel_offset = 0;
1703 netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1705 dev_err(&dev->device,
1706 "%s alloc_etherdev failed\n", __func__);
1710 netdev->netdev_ops = &visornic_dev_ops;
1711 netdev->watchdog_timeo = 5 * HZ;
1712 SET_NETDEV_DEV(netdev, &dev->device);
1714 /* Get MAC address from channel and read it into the device. */
1715 netdev->addr_len = ETH_ALEN;
1716 channel_offset = offsetof(struct spar_io_channel_protocol,
1718 err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1721 dev_err(&dev->device,
1722 "%s failed to get mac addr from chan (%d)\n",
1724 goto cleanup_netdev;
1727 devdata = devdata_initialize(netdev_priv(netdev), dev);
1729 dev_err(&dev->device,
1730 "%s devdata_initialize failed\n", __func__);
1732 goto cleanup_netdev;
1734 /* don't trust messages laying around in the channel */
1735 drain_resp_queue(devdata->cmdrsp, devdata);
1737 devdata->netdev = netdev;
1738 dev_set_drvdata(&dev->device, devdata);
1739 init_waitqueue_head(&devdata->rsp_queue);
1740 spin_lock_init(&devdata->priv_lock);
1741 devdata->enabled = 0; /* not yet */
1742 atomic_set(&devdata->usage, 1);
1744 /* Setup rcv bufs */
1745 channel_offset = offsetof(struct spar_io_channel_protocol,
1747 err = visorbus_read_channel(dev, channel_offset,
1748 &devdata->num_rcv_bufs, 4);
1750 dev_err(&dev->device,
1751 "%s failed to get #rcv bufs from chan (%d)\n",
1753 goto cleanup_netdev;
1756 devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1757 sizeof(struct sk_buff *), GFP_KERNEL);
1758 if (!devdata->rcvbuf) {
1760 goto cleanup_netdev;
1763 /* set the net_xmit outstanding threshold */
1764 /* always leave two slots open but you should have 3 at a minimum */
1765 /* note that max_outstanding_net_xmits must be > 0 */
1766 devdata->max_outstanding_net_xmits =
1767 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1768 devdata->upper_threshold_net_xmits =
1769 max_t(unsigned long,
1770 2, (devdata->max_outstanding_net_xmits - 1));
1771 devdata->lower_threshold_net_xmits =
1772 max_t(unsigned long,
1773 1, (devdata->max_outstanding_net_xmits / 2));
1775 skb_queue_head_init(&devdata->xmitbufhead);
1777 /* create a cmdrsp we can use to post and unpost rcv buffers */
1778 devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1779 if (!devdata->cmdrsp_rcv) {
1781 goto cleanup_rcvbuf;
1783 devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1784 if (!devdata->xmit_cmdrsp) {
1786 goto cleanup_cmdrsp_rcv;
1788 INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1789 devdata->server_down = false;
1790 devdata->server_change_state = false;
1792 /*set the default mtu */
1793 channel_offset = offsetof(struct spar_io_channel_protocol,
1795 err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1797 dev_err(&dev->device,
1798 "%s failed to get mtu from chan (%d)\n",
1800 goto cleanup_xmit_cmdrsp;
1803 /* TODO: Setup Interrupt information */
1804 /* Let's start our threads to get responses */
1805 netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1807 setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1808 (unsigned long)devdata);
1809 /* Note: This time has to start running before the while
1810 * loop below because the napi routine is responsible for
1811 * setting enab_dis_acked
1813 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1815 channel_offset = offsetof(struct spar_io_channel_protocol,
1816 channel_header.features);
1817 err = visorbus_read_channel(dev, channel_offset, &features, 8);
1819 dev_err(&dev->device,
1820 "%s failed to get features from chan (%d)\n",
1822 goto cleanup_napi_add;
1825 features |= ULTRA_IO_CHANNEL_IS_POLLING;
1826 features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1827 err = visorbus_write_channel(dev, channel_offset, &features, 8);
1829 dev_err(&dev->device,
1830 "%s failed to set features in chan (%d)\n",
1832 goto cleanup_napi_add;
1835 /* Note: Interrupts have to be enable before the while
1836 * loop below because the napi routine is responsible for
1837 * setting enab_dis_acked
1839 visorbus_enable_channel_interrupts(dev);
1841 err = register_netdev(netdev);
1843 dev_err(&dev->device,
1844 "%s register_netdev failed (%d)\n", __func__, err);
1845 goto cleanup_napi_add;
1848 /* create debug/sysfs directories */
1849 devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1850 visornic_debugfs_dir);
1851 if (!devdata->eth_debugfs_dir) {
1852 dev_err(&dev->device,
1853 "%s debugfs_create_dir %s failed\n",
1854 __func__, netdev->name);
1856 goto cleanup_register_netdev;
1859 dev_info(&dev->device, "%s success netdev=%s\n",
1860 __func__, netdev->name);
1863 cleanup_register_netdev:
1864 unregister_netdev(netdev);
1867 del_timer_sync(&devdata->irq_poll_timer);
1868 netif_napi_del(&devdata->napi);
1870 cleanup_xmit_cmdrsp:
1871 kfree(devdata->xmit_cmdrsp);
1874 kfree(devdata->cmdrsp_rcv);
1877 kfree(devdata->rcvbuf);
1880 free_netdev(netdev);
1885 * host_side_disappeared - IO part is gone.
1886 * @devdata: device object
1888 * IO partition servicing this device is gone, do cleanup
1891 static void host_side_disappeared(struct visornic_devdata *devdata)
1893 unsigned long flags;
1895 spin_lock_irqsave(&devdata->priv_lock, flags);
1896 devdata->dev = NULL; /* indicate device destroyed */
1897 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1901 * visornic_remove - Called when visornic dev goes away
1902 * @dev: visornic device that is being removed
1904 * Called when DEVICE_DESTROY gets called to remove device.
1907 static void visornic_remove(struct visor_device *dev)
1909 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1910 struct net_device *netdev;
1911 unsigned long flags;
1914 dev_err(&dev->device, "%s no devdata\n", __func__);
1917 spin_lock_irqsave(&devdata->priv_lock, flags);
1918 if (devdata->going_away) {
1919 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1920 dev_err(&dev->device, "%s already being removed\n", __func__);
1923 devdata->going_away = true;
1924 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1925 netdev = devdata->netdev;
1927 dev_err(&dev->device, "%s not net device\n", __func__);
1931 /* going_away prevents new items being added to the workqueues */
1932 cancel_work_sync(&devdata->timeout_reset);
1934 debugfs_remove_recursive(devdata->eth_debugfs_dir);
1936 unregister_netdev(netdev); /* this will call visornic_close() */
1938 del_timer_sync(&devdata->irq_poll_timer);
1939 netif_napi_del(&devdata->napi);
1941 dev_set_drvdata(&dev->device, NULL);
1942 host_side_disappeared(devdata);
1943 devdata_release(devdata);
1944 free_netdev(netdev);
1948 * visornic_pause - Called when IO Part disappears
1949 * @dev: visornic device that is being serviced
1950 * @complete_func: call when finished.
1952 * Called when the IO Partition has gone down. Need to free
1953 * up resources and wait for IO partition to come back. Mark
1954 * link as down and don't attempt any DMA. When we have freed
1955 * memory call the complete_func so that Command knows we are
1956 * done. If we don't call complete_func, IO part will never
1958 * Returns 0 for success.
1960 static int visornic_pause(struct visor_device *dev,
1961 visorbus_state_complete_func complete_func)
1963 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1965 visornic_serverdown(devdata, complete_func);
1970 * visornic_resume - Called when IO part has recovered
1971 * @dev: visornic device that is being serviced
1972 * @compelte_func: call when finished
1974 * Called when the IO partition has recovered. Reestablish
1975 * connection to the IO part and set the link up. Okay to do
1977 * Returns 0 for success.
1979 static int visornic_resume(struct visor_device *dev,
1980 visorbus_state_complete_func complete_func)
1982 struct visornic_devdata *devdata;
1983 struct net_device *netdev;
1984 unsigned long flags;
1986 devdata = dev_get_drvdata(&dev->device);
1988 dev_err(&dev->device, "%s no devdata\n", __func__);
1992 netdev = devdata->netdev;
1994 spin_lock_irqsave(&devdata->priv_lock, flags);
1995 if (devdata->server_change_state) {
1996 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1997 dev_err(&dev->device, "%s server already changing state\n",
2001 if (!devdata->server_down) {
2002 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2003 dev_err(&dev->device, "%s server not down\n", __func__);
2004 complete_func(dev, 0);
2007 devdata->server_change_state = true;
2008 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2010 /* Must transition channel to ATTACHED state BEFORE
2011 * we can start using the device again.
2012 * TODO: State transitions
2014 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2020 complete_func(dev, 0);
2024 /* This is used to tell the visor bus driver which types of visor devices
2025 * we support, and what functions to call when a visor device that we support
2026 * is attached or removed.
2028 static struct visor_driver visornic_driver = {
2030 .owner = THIS_MODULE,
2031 .channel_types = visornic_channel_types,
2032 .probe = visornic_probe,
2033 .remove = visornic_remove,
2034 .pause = visornic_pause,
2035 .resume = visornic_resume,
2036 .channel_interrupt = NULL,
2040 * visornic_init - Init function
2042 * Init function for the visornic driver. Do initial driver setup
2043 * and wait for devices.
2044 * Returns 0 for success, negative for error.
2046 static int visornic_init(void)
2051 visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2052 if (!visornic_debugfs_dir)
2055 ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2056 &debugfs_info_fops);
2058 goto cleanup_debugfs;
2059 ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2060 NULL, &debugfs_enable_ints_fops);
2062 goto cleanup_debugfs;
2064 err = visorbus_register_visor_driver(&visornic_driver);
2066 goto cleanup_debugfs;
2071 debugfs_remove_recursive(visornic_debugfs_dir);
2077 * visornic_cleanup - driver exit routine
2079 * Unregister driver from the bus and free up memory.
2081 static void visornic_cleanup(void)
2083 visorbus_unregister_visor_driver(&visornic_driver);
2085 debugfs_remove_recursive(visornic_debugfs_dir);
2088 module_init(visornic_init);
2089 module_exit(visornic_cleanup);
2091 MODULE_AUTHOR("Unisys");
2092 MODULE_LICENSE("GPL");
2093 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");