drivers/staging/unisys/visornic/visornic_main.c

   1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   2  * All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  * NON INFRINGEMENT.  See the GNU General Public License for more
  12  * details.
  13  */
  14
  15 /* This driver lives in a spar partition, and registers to ethernet io
  16  * channels from the visorbus driver. It creates netdev devices and
  17  * forwards transmit to the IO channel and accepts rcvs from the IO
  18  * Partition via the IO channel.
  19  */
  20
  21 #include <linux/debugfs.h>
  22 #include <linux/etherdevice.h>
  23 #include <linux/netdevice.h>
  24 #include <linux/kthread.h>
  25 #include <linux/skbuff.h>
  26 #include <linux/rtnetlink.h>
  27
  28 #include "visorbus.h"
  29 #include "iochannel.h"
  30
  31 #define VISORNIC_INFINITE_RSP_WAIT 0
  32
  33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  34  *         = 163840 bytes
  35  */
  36 #define MAX_BUF 163840
  37 #define NAPI_WEIGHT 64
  38
  39 /* GUIDS for director channel type supported by this driver.  */
  40 static struct visor_channeltype_descriptor visornic_channel_types[] = {
  41         /* Note that the only channel type we expect to be reported by the
  42          * bus driver is the SPAR_VNIC channel.
  43          */
  44         { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
  45         { NULL_UUID_LE, NULL }
  46 };
  47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  48 /*
  49  * FIXME XXX: This next line of code must be fixed and removed before
  50  * acceptance into the 'normal' part of the kernel.  It is only here as a place
  51  * holder to get module autoloading functionality working for visorbus.  Code
  52  * must be added to scripts/mode/file2alias.c, etc., to get this working
  53  * properly.
  54  */
  55 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
  56
  57 struct chanstat {
  58         unsigned long got_rcv;
  59         unsigned long got_enbdisack;
  60         unsigned long got_xmit_done;
  61         unsigned long xmit_fail;
  62         unsigned long sent_enbdis;
  63         unsigned long sent_promisc;
  64         unsigned long sent_post;
  65         unsigned long sent_post_failed;
  66         unsigned long sent_xmit;
  67         unsigned long reject_count;
  68         unsigned long extra_rcvbufs_sent;
  69 };
  70
  71 struct visornic_devdata {
  72         /* 0 disabled 1 enabled to receive */
  73         unsigned short enabled;
  74         /* NET_RCV_ENABLE/DISABLE acked by IOPART */
  75         unsigned short enab_dis_acked;
  76
  77         struct visor_device *dev;
  78         struct net_device *netdev;
  79         struct net_device_stats net_stats;
  80         atomic_t interrupt_rcvd;
  81         wait_queue_head_t rsp_queue;
  82         struct sk_buff **rcvbuf;
  83         /* incarnation_id lets IOPART know about re-birth */
  84         u64 incarnation_id;
  85         /* flags as they were prior to set_multicast_list */
  86         unsigned short old_flags;
  87         atomic_t usage; /* count of users */
  88
  89         /* number of rcv buffers the vnic will post */
  90         int num_rcv_bufs;
  91         int num_rcv_bufs_could_not_alloc;
  92         atomic_t num_rcvbuf_in_iovm;
  93         unsigned long alloc_failed_in_if_needed_cnt;
  94         unsigned long alloc_failed_in_repost_rtn_cnt;
  95
  96         /* absolute max number of outstanding xmits - should never hit this */
  97         unsigned long max_outstanding_net_xmits;
  98         /* high water mark for calling netif_stop_queue() */
  99         unsigned long upper_threshold_net_xmits;
 100         /* high water mark for calling netif_wake_queue() */
 101         unsigned long lower_threshold_net_xmits;
 102         /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
 103         struct sk_buff_head xmitbufhead;
 104
 105         visorbus_state_complete_func server_down_complete_func;
 106         struct work_struct timeout_reset;
 107         /* cmdrsp_rcv is used for posting/unposting rcv buffers  */
 108         struct uiscmdrsp *cmdrsp_rcv;
 109         /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
 110         struct uiscmdrsp *xmit_cmdrsp;
 111
 112         bool server_down;                /* IOPART is down */
 113         bool server_change_state;        /* Processing SERVER_CHANGESTATE msg */
 114         bool going_away;                 /* device is being torn down */
 115         struct dentry *eth_debugfs_dir;
 116         u64 interrupts_rcvd;
 117         u64 interrupts_notme;
 118         u64 interrupts_disabled;
 119         u64 busy_cnt;
 120         spinlock_t priv_lock;  /* spinlock to access devdata structures */
 121
 122         /* flow control counter */
 123         u64 flow_control_upper_hits;
 124         u64 flow_control_lower_hits;
 125
 126         /* debug counters */
 127         unsigned long n_rcv0;                   /* # rcvs of 0 buffers */
 128         unsigned long n_rcv1;                   /* # rcvs of 1 buffers */
 129         unsigned long n_rcv2;                   /* # rcvs of 2 buffers */
 130         unsigned long n_rcvx;                   /* # rcvs of >2 buffers */
 131         unsigned long found_repost_rcvbuf_cnt;  /* # repost_rcvbuf_cnt */
 132         unsigned long repost_found_skb_cnt;     /* # of found the skb */
 133         unsigned long n_repost_deficit;         /* # of lost rcv buffers */
 134         unsigned long bad_rcv_buf; /* # of unknown rcv skb  not freed */
 135         unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
 136
 137         int queuefullmsg_logged;
 138         struct chanstat chstat;
 139         struct timer_list irq_poll_timer;
 140         struct napi_struct napi;
 141         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 142 };
 143
 144 /**
 145  *      visor_copy_fragsinfo_from_skb(
 146  *      @skb_in: skbuff that we are pulling the frags from
 147  *      @firstfraglen: length of first fragment in skb
 148  *      @frags_max: max len of frags array
 149  *      @frags: frags array filled in on output
 150  *
 151  *      Copy the fragment list in the SKB to a phys_info
 152  *      array that the IOPART understands.
 153  *      Return value indicates number of entries filled in frags
 154  *      Negative values indicate an error.
 155  */
 156 static int
 157 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
 158                               unsigned int frags_max,
 159                               struct phys_info frags[])
 160 {
 161         unsigned int count = 0, frag, size, offset = 0, numfrags;
 162         unsigned int total_count;
 163
 164         numfrags = skb_shinfo(skb)->nr_frags;
 165
 166         /* Compute the number of fragments this skb has, and if its more than
 167          * frag array can hold, linearize the skb
 168          */
 169         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 170         if (firstfraglen % PI_PAGE_SIZE)
 171                 total_count++;
 172
 173         if (total_count > frags_max) {
 174                 if (skb_linearize(skb))
 175                         return -EINVAL;
 176                 numfrags = skb_shinfo(skb)->nr_frags;
 177                 firstfraglen = 0;
 178         }
 179
 180         while (firstfraglen) {
 181                 if (count == frags_max)
 182                         return -EINVAL;
 183
 184                 frags[count].pi_pfn =
 185                         page_to_pfn(virt_to_page(skb->data + offset));
 186                 frags[count].pi_off =
 187                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 188                 size = min_t(unsigned int, firstfraglen,
 189                              PI_PAGE_SIZE - frags[count].pi_off);
 190
 191                 /* can take smallest of firstfraglen (what's left) OR
 192                  * bytes left in the page
 193                  */
 194                 frags[count].pi_len = size;
 195                 firstfraglen -= size;
 196                 offset += size;
 197                 count++;
 198         }
 199         if (numfrags) {
 200                 if ((count + numfrags) > frags_max)
 201                         return -EINVAL;
 202
 203                 for (frag = 0; frag < numfrags; frag++) {
 204                         count = add_physinfo_entries(page_to_pfn(
 205                                 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 206                                               skb_shinfo(skb)->frags[frag].
 207                                               page_offset,
 208                                               skb_shinfo(skb)->frags[frag].
 209                                               size, count, frags_max, frags);
 210                         /* add_physinfo_entries only returns
 211                          * zero if the frags array is out of room
 212                          * That should never happen because we
 213                          * fail above, if count+numfrags > frags_max.
 214                          */
 215                         if (!count)
 216                                 return -EINVAL;
 217                 }
 218         }
 219         if (skb_shinfo(skb)->frag_list) {
 220                 struct sk_buff *skbinlist;
 221                 int c;
 222
 223                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 224                      skbinlist = skbinlist->next) {
 225                         c = visor_copy_fragsinfo_from_skb(skbinlist,
 226                                                           skbinlist->len -
 227                                                           skbinlist->data_len,
 228                                                           frags_max - count,
 229                                                           &frags[count]);
 230                         if (c < 0)
 231                                 return c;
 232                         count += c;
 233                 }
 234         }
 235         return count;
 236 }
 237
 238 static ssize_t enable_ints_write(struct file *file,
 239                                  const char __user *buffer,
 240                                  size_t count, loff_t *ppos)
 241 {
 242         /* Don't want to break ABI here by having a debugfs
 243          * file that no longer exists or is writable, so
 244          * lets just make this a vestigual function
 245          */
 246         return count;
 247 }
 248
 249 static const struct file_operations debugfs_enable_ints_fops = {
 250         .write = enable_ints_write,
 251 };
 252
 253 /**
 254  *      visornic_serverdown_complete - IOPART went down, pause device
 255  *      @work: Work queue it was scheduled on
 256  *
 257  *      The IO partition has gone down and we need to do some cleanup
 258  *      for when it comes back. Treat the IO partition as the link
 259  *      being down.
 260  *      Returns void.
 261  */
 262 static void
 263 visornic_serverdown_complete(struct visornic_devdata *devdata)
 264 {
 265         struct net_device *netdev;
 266
 267         netdev = devdata->netdev;
 268
 269         /* Stop polling for interrupts */
 270         del_timer_sync(&devdata->irq_poll_timer);
 271
 272         rtnl_lock();
 273         dev_close(netdev);
 274         rtnl_unlock();
 275
 276         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 277         devdata->chstat.sent_xmit = 0;
 278         devdata->chstat.got_xmit_done = 0;
 279
 280         if (devdata->server_down_complete_func)
 281                 (*devdata->server_down_complete_func)(devdata->dev, 0);
 282
 283         devdata->server_down = true;
 284         devdata->server_change_state = false;
 285         devdata->server_down_complete_func = NULL;
 286 }
 287
 288 /**
 289  *      visornic_serverdown - Command has notified us that IOPART is down
 290  *      @devdata: device that is being managed by IOPART
 291  *
 292  *      Schedule the work needed to handle the server down request. Make
 293  *      sure we haven't already handled the server change state event.
 294  *      Returns 0 if we scheduled the work, -EINVAL on error.
 295  */
 296 static int
 297 visornic_serverdown(struct visornic_devdata *devdata,
 298                     visorbus_state_complete_func complete_func)
 299 {
 300         unsigned long flags;
 301         int err;
 302
 303         spin_lock_irqsave(&devdata->priv_lock, flags);
 304         if (devdata->server_change_state) {
 305                 dev_dbg(&devdata->dev->device, "%s changing state\n",
 306                         __func__);
 307                 err = -EINVAL;
 308                 goto err_unlock;
 309         }
 310         if (devdata->server_down) {
 311                 dev_dbg(&devdata->dev->device, "%s already down\n",
 312                         __func__);
 313                 err = -EINVAL;
 314                 goto err_unlock;
 315         }
 316         if (devdata->going_away) {
 317                 dev_dbg(&devdata->dev->device,
 318                         "%s aborting because device removal pending\n",
 319                         __func__);
 320                 err = -ENODEV;
 321                 goto err_unlock;
 322         }
 323         devdata->server_change_state = true;
 324         devdata->server_down_complete_func = complete_func;
 325         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 326
 327         visornic_serverdown_complete(devdata);
 328         return 0;
 329
 330 err_unlock:
 331         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 332         return err;
 333 }
 334
 335 /**
 336  *      alloc_rcv_buf   - alloc rcv buffer to be given to the IO Partition.
 337  *      @netdev: network adapter the rcv bufs are attached too.
 338  *
 339  *      Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 340  *      so that it can write rcv data into our memory space.
 341  *      Return pointer to sk_buff
 342  */
 343 static struct sk_buff *
 344 alloc_rcv_buf(struct net_device *netdev)
 345 {
 346         struct sk_buff *skb;
 347
 348         /* NOTE: the first fragment in each rcv buffer is pointed to by
 349          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 350          * in length, so the first frag is large enough to hold 1514.
 351          */
 352         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 353         if (!skb)
 354                 return NULL;
 355         skb->dev = netdev;
 356         /* current value of mtu doesn't come into play here; large
 357          * packets will just end up using multiple rcv buffers all of
 358          * same size.
 359          */
 360         skb->len = RCVPOST_BUF_SIZE;
 361         /* alloc_skb already zeroes it out for clarification. */
 362         skb->data_len = 0;
 363         return skb;
 364 }
 365
 366 /**
 367  *      post_skb        - post a skb to the IO Partition.
 368  *      @cmdrsp: cmdrsp packet to be send to the IO Partition
 369  *      @devdata: visornic_devdata to post the skb too
 370  *      @skb: skb to give to the IO partition
 371  *
 372  *      Send the skb to the IO Partition.
 373  *      Returns void
 374  */
 375 static inline void
 376 post_skb(struct uiscmdrsp *cmdrsp,
 377          struct visornic_devdata *devdata, struct sk_buff *skb)
 378 {
 379         cmdrsp->net.buf = skb;
 380         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 381         cmdrsp->net.rcvpost.frag.pi_off =
 382                 (unsigned long)skb->data & PI_PAGE_MASK;
 383         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 384         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 385
 386         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
 387                 cmdrsp->net.type = NET_RCV_POST;
 388                 cmdrsp->cmdtype = CMD_NET_TYPE;
 389                 if (!visorchannel_signalinsert(devdata->dev->visorchannel,
 390                                                IOCHAN_TO_IOPART,
 391                                                cmdrsp)) {
 392                         atomic_inc(&devdata->num_rcvbuf_in_iovm);
 393                         devdata->chstat.sent_post++;
 394                 } else {
 395                         devdata->chstat.sent_post_failed++;
 396                 }
 397         }
 398 }
 399
 400 /**
 401  *      send_enbdis     - send NET_RCV_ENBDIS to IO Partition
 402  *      @netdev: netdevice we are enable/disable, used as context
 403  *               return value
 404  *      @state: enable = 1/disable = 0
 405  *      @devdata: visornic device we are enabling/disabling
 406  *
 407  *      Send the enable/disable message to the IO Partition.
 408  *      Returns void
 409  */
 410 static void
 411 send_enbdis(struct net_device *netdev, int state,
 412             struct visornic_devdata *devdata)
 413 {
 414         devdata->cmdrsp_rcv->net.enbdis.enable = state;
 415         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 416         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 417         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 418         if (!visorchannel_signalinsert(devdata->dev->visorchannel,
 419                                        IOCHAN_TO_IOPART,
 420                                        devdata->cmdrsp_rcv))
 421                 devdata->chstat.sent_enbdis++;
 422 }
 423
 424 /**
 425  *      visornic_disable_with_timeout - Disable network adapter
 426  *      @netdev: netdevice to disable
 427  *      @timeout: timeout to wait for disable
 428  *
 429  *      Disable the network adapter and inform the IO Partition that we
 430  *      are disabled, reclaim memory from rcv bufs.
 431  *      Returns 0 on success, negative for failure of IO Partition
 432  *      responding.
 433  *
 434  */
 435 static int
 436 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
 437 {
 438         struct visornic_devdata *devdata = netdev_priv(netdev);
 439         int i;
 440         unsigned long flags;
 441         int wait = 0;
 442
 443         /* send a msg telling the other end we are stopping incoming pkts */
 444         spin_lock_irqsave(&devdata->priv_lock, flags);
 445         devdata->enabled = 0;
 446         devdata->enab_dis_acked = 0; /* must wait for ack */
 447         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 448
 449         /* send disable and wait for ack -- don't hold lock when sending
 450          * disable because if the queue is full, insert might sleep.
 451          */
 452         send_enbdis(netdev, 0, devdata);
 453
 454         /* wait for ack to arrive before we try to free rcv buffers
 455          * NOTE: the other end automatically unposts the rcv buffers when
 456          * when it gets a disable.
 457          */
 458         spin_lock_irqsave(&devdata->priv_lock, flags);
 459         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 460                (wait < timeout)) {
 461                 if (devdata->enab_dis_acked)
 462                         break;
 463                 if (devdata->server_down || devdata->server_change_state) {
 464                         dev_dbg(&netdev->dev, "%s server went away\n",
 465                                 __func__);
 466                         break;
 467                 }
 468                 set_current_state(TASK_INTERRUPTIBLE);
 469                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 470                 wait += schedule_timeout(msecs_to_jiffies(10));
 471                 spin_lock_irqsave(&devdata->priv_lock, flags);
 472         }
 473
 474         /* Wait for usage to go to 1 (no other users) before freeing
 475          * rcv buffers
 476          */
 477         if (atomic_read(&devdata->usage) > 1) {
 478                 while (1) {
 479                         set_current_state(TASK_INTERRUPTIBLE);
 480                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 481                         schedule_timeout(msecs_to_jiffies(10));
 482                         spin_lock_irqsave(&devdata->priv_lock, flags);
 483                         if (atomic_read(&devdata->usage))
 484                                 break;
 485                 }
 486         }
 487         /* we've set enabled to 0, so we can give up the lock. */
 488         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 489
 490         /* stop the transmit queue so nothing more can be transmitted */
 491         netif_stop_queue(netdev);
 492
 493         napi_disable(&devdata->napi);
 494
 495         skb_queue_purge(&devdata->xmitbufhead);
 496
 497         /* Free rcv buffers - other end has automatically unposed them on
 498          * disable
 499          */
 500         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 501                 if (devdata->rcvbuf[i]) {
 502                         kfree_skb(devdata->rcvbuf[i]);
 503                         devdata->rcvbuf[i] = NULL;
 504                 }
 505         }
 506
 507         return 0;
 508 }
 509
 510 /**
 511  *      init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
 512  *      @netdev: struct netdevice
 513  *      @devdata: visornic_devdata
 514  *
 515  *      Allocate rcv buffers and post them to the IO Partition.
 516  *      Return 0 for success, and negative for failure.
 517  */
 518 static int
 519 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
 520 {
 521         int i, count;
 522
 523         /* allocate fixed number of receive buffers to post to uisnic
 524          * post receive buffers after we've allocated a required amount
 525          */
 526         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 527                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 528                 if (!devdata->rcvbuf[i])
 529                         break; /* if we failed to allocate one let us stop */
 530         }
 531         if (i == 0) /* couldn't even allocate one -- bail out */
 532                 return -ENOMEM;
 533         count = i;
 534
 535         /* Ensure we can alloc 2/3rd of the requested number of buffers.
 536          * 2/3 is an arbitrary choice; used also in ndis init.c
 537          */
 538         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 539                 /* free receive buffers we did alloc and then bail out */
 540                 for (i = 0; i < count; i++) {
 541                         kfree_skb(devdata->rcvbuf[i]);
 542                         devdata->rcvbuf[i] = NULL;
 543                 }
 544                 return -ENOMEM;
 545         }
 546
 547         /* post receive buffers to receive incoming input - without holding
 548          * lock - we've not enabled nor started the queue so there shouldn't
 549          * be any rcv or xmit activity
 550          */
 551         for (i = 0; i < count; i++)
 552                 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
 553
 554         return 0;
 555 }
 556
 557 /**
 558  *      visornic_enable_with_timeout    - send enable to IO Part
 559  *      @netdev: struct net_device
 560  *      @timeout: Time to wait for the ACK from the enable
 561  *
 562  *      Sends enable to IOVM, inits, and posts receive buffers to IOVM
 563  *      timeout is defined in msecs (timeout of 0 specifies infinite wait)
 564  *      Return 0 for success, negative for failure.
 565  */
 566 static int
 567 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
 568 {
 569         int i;
 570         struct visornic_devdata *devdata = netdev_priv(netdev);
 571         unsigned long flags;
 572         int wait = 0;
 573
 574         napi_enable(&devdata->napi);
 575
 576         /* NOTE: the other end automatically unposts the rcv buffers when it
 577          * gets a disable.
 578          */
 579         i = init_rcv_bufs(netdev, devdata);
 580         if (i < 0) {
 581                 dev_err(&netdev->dev,
 582                         "%s failed to init rcv bufs (%d)\n", __func__, i);
 583                 return i;
 584         }
 585
 586         spin_lock_irqsave(&devdata->priv_lock, flags);
 587         devdata->enabled = 1;
 588         devdata->enab_dis_acked = 0;
 589
 590         /* now we're ready, let's send an ENB to uisnic but until we get
 591          * an ACK back from uisnic, we'll drop the packets
 592          */
 593         devdata->n_rcv_packets_not_accepted = 0;
 594         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 595
 596         /* send enable and wait for ack -- don't hold lock when sending enable
 597          * because if the queue is full, insert might sleep.
 598          */
 599         send_enbdis(netdev, 1, devdata);
 600
 601         spin_lock_irqsave(&devdata->priv_lock, flags);
 602         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 603                (wait < timeout)) {
 604                 if (devdata->enab_dis_acked)
 605                         break;
 606                 if (devdata->server_down || devdata->server_change_state) {
 607                         dev_dbg(&netdev->dev, "%s server went away\n",
 608                                 __func__);
 609                         break;
 610                 }
 611                 set_current_state(TASK_INTERRUPTIBLE);
 612                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 613                 wait += schedule_timeout(msecs_to_jiffies(10));
 614                 spin_lock_irqsave(&devdata->priv_lock, flags);
 615         }
 616
 617         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 618
 619         if (!devdata->enab_dis_acked) {
 620                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 621                 return -EIO;
 622         }
 623
 624         netif_start_queue(netdev);
 625
 626         return 0;
 627 }
 628
 629 /**
 630  *      visornic_timeout_reset  - handle xmit timeout resets
 631  *      @work   work item that scheduled the work
 632  *
 633  *      Transmit Timeouts are typically handled by resetting the
 634  *      device for our virtual NIC we will send a Disable and Enable
 635  *      to the IOVM. If it doesn't respond we will trigger a serverdown.
 636  */
 637 static void
 638 visornic_timeout_reset(struct work_struct *work)
 639 {
 640         struct visornic_devdata *devdata;
 641         struct net_device *netdev;
 642         int response = 0;
 643
 644         devdata = container_of(work, struct visornic_devdata, timeout_reset);
 645         netdev = devdata->netdev;
 646
 647         rtnl_lock();
 648         if (!netif_running(netdev)) {
 649                 rtnl_unlock();
 650                 return;
 651         }
 652
 653         response = visornic_disable_with_timeout(netdev,
 654                                                  VISORNIC_INFINITE_RSP_WAIT);
 655         if (response)
 656                 goto call_serverdown;
 657
 658         response = visornic_enable_with_timeout(netdev,
 659                                                 VISORNIC_INFINITE_RSP_WAIT);
 660         if (response)
 661                 goto call_serverdown;
 662
 663         rtnl_unlock();
 664
 665         return;
 666
 667 call_serverdown:
 668         visornic_serverdown(devdata, NULL);
 669         rtnl_unlock();
 670 }
 671
 672 /**
 673  *      visornic_open - Enable the visornic device and mark the queue started
 674  *      @netdev: netdevice to start
 675  *
 676  *      Enable the device and start the transmit queue.
 677  *      Return 0 for success
 678  */
 679 static int
 680 visornic_open(struct net_device *netdev)
 681 {
 682         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 683
 684         return 0;
 685 }
 686
 687 /**
 688  *      visornic_close - Disables the visornic device and stops the queues
 689  *      @netdev: netdevice to start
 690  *
 691  *      Disable the device and stop the transmit queue.
 692  *      Return 0 for success
 693  */
 694 static int
 695 visornic_close(struct net_device *netdev)
 696 {
 697         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 698
 699         return 0;
 700 }
 701
 702 /**
 703  *      devdata_xmits_outstanding - compute outstanding xmits
 704  *      @devdata: visornic_devdata for device
 705  *
 706  *      Return value is the number of outstanding xmits.
 707  */
 708 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 709 {
 710         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 711                 return devdata->chstat.sent_xmit -
 712                         devdata->chstat.got_xmit_done;
 713         return (ULONG_MAX - devdata->chstat.got_xmit_done
 714                 + devdata->chstat.sent_xmit + 1);
 715 }
 716
 717 /**
 718  *      vnic_hit_high_watermark
 719  *      @devdata: indicates visornic device we are checking
 720  *      @high_watermark: max num of unacked xmits we will tolerate,
 721  *                       before we will start throttling
 722  *
 723  *      Returns true iff the number of unacked xmits sent to
 724  *      the IO partition is >= high_watermark.
 725  */
 726 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 727                                            ulong high_watermark)
 728 {
 729         return (devdata_xmits_outstanding(devdata) >= high_watermark);
 730 }
 731
 732 /**
 733  *      vnic_hit_low_watermark
 734  *      @devdata: indicates visornic device we are checking
 735  *      @low_watermark: we will wait until the num of unacked xmits
 736  *                      drops to this value or lower before we start
 737  *                      transmitting again
 738  *
 739  *      Returns true iff the number of unacked xmits sent to
 740  *      the IO partition is <= low_watermark.
 741  */
 742 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 743                                           ulong low_watermark)
 744 {
 745         return (devdata_xmits_outstanding(devdata) <= low_watermark);
 746 }
 747
 748 /**
 749  *      visornic_xmit - send a packet to the IO Partition
 750  *      @skb: Packet to be sent
 751  *      @netdev: net device the packet is being sent from
 752  *
 753  *      Convert the skb to a cmdrsp so the IO Partition can understand it.
 754  *      Send the XMIT command to the IO Partition for processing. This
 755  *      function is protected from concurrent calls by a spinlock xmit_lock
 756  *      in the net_device struct, but as soon as the function returns it
 757  *      can be called again.
 758  *      Returns NETDEV_TX_OK.
 759  */
 760 static int
 761 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 762 {
 763         struct visornic_devdata *devdata;
 764         int len, firstfraglen, padlen;
 765         struct uiscmdrsp *cmdrsp = NULL;
 766         unsigned long flags;
 767
 768         devdata = netdev_priv(netdev);
 769         spin_lock_irqsave(&devdata->priv_lock, flags);
 770
 771         if (netif_queue_stopped(netdev) || devdata->server_down ||
 772             devdata->server_change_state) {
 773                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 774                 devdata->busy_cnt++;
 775                 dev_dbg(&netdev->dev,
 776                         "%s busy - queue stopped\n", __func__);
 777                 kfree_skb(skb);
 778                 return NETDEV_TX_OK;
 779         }
 780
 781         /* sk_buff struct is used to host network data throughout all the
 782          * linux network subsystems
 783          */
 784         len = skb->len;
 785
 786         /* skb->len is the FULL length of data (including fragmentary portion)
 787          * skb->data_len is the length of the fragment portion in frags
 788          * skb->len - skb->data_len is size of the 1st fragment in skb->data
 789          * calculate the length of the first fragment that skb->data is
 790          * pointing to
 791          */
 792         firstfraglen = skb->len - skb->data_len;
 793         if (firstfraglen < ETH_HLEN) {
 794                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 795                 devdata->busy_cnt++;
 796                 dev_err(&netdev->dev,
 797                         "%s busy - first frag too small (%d)\n",
 798                         __func__, firstfraglen);
 799                 kfree_skb(skb);
 800                 return NETDEV_TX_OK;
 801         }
 802
 803         if ((len < ETH_MIN_PACKET_SIZE) &&
 804             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 805                 /* pad the packet out to minimum size */
 806                 padlen = ETH_MIN_PACKET_SIZE - len;
 807                 memset(&skb->data[len], 0, padlen);
 808                 skb->tail += padlen;
 809                 skb->len += padlen;
 810                 len += padlen;
 811                 firstfraglen += padlen;
 812         }
 813
 814         cmdrsp = devdata->xmit_cmdrsp;
 815         /* clear cmdrsp */
 816         memset(cmdrsp, 0, SIZEOF_CMDRSP);
 817         cmdrsp->net.type = NET_XMIT;
 818         cmdrsp->cmdtype = CMD_NET_TYPE;
 819
 820         /* save the pointer to skb -- we'll need it for completion */
 821         cmdrsp->net.buf = skb;
 822
 823         if (vnic_hit_high_watermark(devdata,
 824                                     devdata->max_outstanding_net_xmits)) {
 825                 /* extra NET_XMITs queued over to IOVM - need to wait */
 826                 devdata->chstat.reject_count++;
 827                 if (!devdata->queuefullmsg_logged &&
 828                     ((devdata->chstat.reject_count & 0x3ff) == 1))
 829                         devdata->queuefullmsg_logged = 1;
 830                 netif_stop_queue(netdev);
 831                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 832                 devdata->busy_cnt++;
 833                 dev_dbg(&netdev->dev,
 834                         "%s busy - waiting for iovm to catch up\n",
 835                         __func__);
 836                 kfree_skb(skb);
 837                 return NETDEV_TX_OK;
 838         }
 839         if (devdata->queuefullmsg_logged)
 840                 devdata->queuefullmsg_logged = 0;
 841
 842         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 843                 cmdrsp->net.xmt.lincsum.valid = 1;
 844                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 845                 if (skb_transport_header(skb) > skb->data) {
 846                         cmdrsp->net.xmt.lincsum.hrawoff =
 847                                 skb_transport_header(skb) - skb->data;
 848                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
 849                 }
 850                 if (skb_network_header(skb) > skb->data) {
 851                         cmdrsp->net.xmt.lincsum.nhrawoff =
 852                                 skb_network_header(skb) - skb->data;
 853                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 854                 }
 855                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
 856         } else {
 857                 cmdrsp->net.xmt.lincsum.valid = 0;
 858         }
 859
 860         /* save off the length of the entire data packet */
 861         cmdrsp->net.xmt.len = len;
 862
 863         /* copy ethernet header from first frag into ocmdrsp
 864          * - everything else will be pass in frags & DMA'ed
 865          */
 866         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
 867         /* copy frags info - from skb->data we need to only provide access
 868          * beyond eth header
 869          */
 870         cmdrsp->net.xmt.num_frags =
 871                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 872                                               MAX_PHYS_INFO,
 873                                               cmdrsp->net.xmt.frags);
 874         if (cmdrsp->net.xmt.num_frags < 0) {
 875                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 876                 devdata->busy_cnt++;
 877                 dev_err(&netdev->dev,
 878                         "%s busy - copy frags failed\n", __func__);
 879                 kfree_skb(skb);
 880                 return NETDEV_TX_OK;
 881         }
 882
 883         if (visorchannel_signalinsert(devdata->dev->visorchannel,
 884                                       IOCHAN_TO_IOPART, cmdrsp)) {
 885                 netif_stop_queue(netdev);
 886                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 887                 devdata->busy_cnt++;
 888                 dev_dbg(&netdev->dev,
 889                         "%s busy - signalinsert failed\n", __func__);
 890                 kfree_skb(skb);
 891                 return NETDEV_TX_OK;
 892         }
 893
 894         /* Track the skbs that have been sent to the IOVM for XMIT */
 895         skb_queue_head(&devdata->xmitbufhead, skb);
 896
 897         /* update xmt stats */
 898         devdata->net_stats.tx_packets++;
 899         devdata->net_stats.tx_bytes += skb->len;
 900         devdata->chstat.sent_xmit++;
 901
 902         /* check if we have hit the high watermark for netif_stop_queue() */
 903         if (vnic_hit_high_watermark(devdata,
 904                                     devdata->upper_threshold_net_xmits)) {
 905                 /* extra NET_XMITs queued over to IOVM - need to wait */
 906                 /* stop queue - call netif_wake_queue() after lower threshold */
 907                 netif_stop_queue(netdev);
 908                 dev_dbg(&netdev->dev,
 909                         "%s busy - invoking iovm flow control\n",
 910                         __func__);
 911                 devdata->flow_control_upper_hits++;
 912         }
 913         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 914
 915         /* skb will be freed when we get back NET_XMIT_DONE */
 916         return NETDEV_TX_OK;
 917 }
 918
 919 /**
 920  *      visornic_get_stats - returns net_stats of the visornic device
 921  *      @netdev: netdevice
 922  *
 923  *      Returns the net_device_stats for the device
 924  */
 925 static struct net_device_stats *
 926 visornic_get_stats(struct net_device *netdev)
 927 {
 928         struct visornic_devdata *devdata = netdev_priv(netdev);
 929
 930         return &devdata->net_stats;
 931 }
 932
 933 /**
 934  *      visornic_change_mtu - changes mtu of device.
 935  *      @netdev: netdevice
 936  *      @new_mtu: value of new mtu
 937  *
 938  *      MTU cannot be changed by system, must be changed via
 939  *      CONTROLVM message. All vnics and pnics in a switch have
 940  *      to have the same MTU for everything to work.
 941  *      Currently not supported.
 942  *      Returns EINVAL
 943  */
 944 static int
 945 visornic_change_mtu(struct net_device *netdev, int new_mtu)
 946 {
 947         return -EINVAL;
 948 }
 949
 950 /**
 951  *      visornic_set_multi - changes mtu of device.
 952  *      @netdev: netdevice
 953  *
 954  *      Only flag we support currently is IFF_PROMISC
 955  *      Returns void
 956  */
 957 static void
 958 visornic_set_multi(struct net_device *netdev)
 959 {
 960         struct uiscmdrsp *cmdrsp;
 961         struct visornic_devdata *devdata = netdev_priv(netdev);
 962
 963         if (devdata->old_flags == netdev->flags)
 964                 return;
 965
 966         if ((netdev->flags & IFF_PROMISC) ==
 967             (devdata->old_flags & IFF_PROMISC))
 968                 goto out_save_flags;
 969
 970         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
 971         if (!cmdrsp)
 972                 return;
 973         cmdrsp->cmdtype = CMD_NET_TYPE;
 974         cmdrsp->net.type = NET_RCV_PROMISC;
 975         cmdrsp->net.enbdis.context = netdev;
 976         cmdrsp->net.enbdis.enable =
 977                 netdev->flags & IFF_PROMISC;
 978         visorchannel_signalinsert(devdata->dev->visorchannel,
 979                                   IOCHAN_TO_IOPART,
 980                                   cmdrsp);
 981         kfree(cmdrsp);
 982
 983 out_save_flags:
 984         devdata->old_flags = netdev->flags;
 985 }
 986
 987 /**
 988  *      visornic_xmit_timeout - request to timeout the xmit
 989  *      @netdev
 990  *
 991  *      Queue the work and return. Make sure we have not already
 992  *      been informed the IO Partition is gone, if it is gone
 993  *      we will already timeout the xmits.
 994  */
 995 static void
 996 visornic_xmit_timeout(struct net_device *netdev)
 997 {
 998         struct visornic_devdata *devdata = netdev_priv(netdev);
 999         unsigned long flags;
1000
1001         spin_lock_irqsave(&devdata->priv_lock, flags);
1002         if (devdata->going_away) {
1003                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1004                 dev_dbg(&devdata->dev->device,
1005                         "%s aborting because device removal pending\n",
1006                         __func__);
1007                 return;
1008         }
1009
1010         /* Ensure that a ServerDown message hasn't been received */
1011         if (!devdata->enabled ||
1012             (devdata->server_down && !devdata->server_change_state)) {
1013                 dev_dbg(&netdev->dev, "%s no processing\n",
1014                         __func__);
1015                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1016                 return;
1017         }
1018         schedule_work(&devdata->timeout_reset);
1019         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1020 }
1021
1022 /**
1023  *      repost_return   - repost rcv bufs that have come back
1024  *      @cmdrsp: io channel command struct to post
1025  *      @devdata: visornic devdata for the device
1026  *      @skb: skb
1027  *      @netdev: netdevice
1028  *
1029  *      Repost rcv buffers that have been returned to us when
1030  *      we are finished with them.
1031  *      Returns 0 for success, -1 for error.
1032  */
1033 static inline int
1034 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1035               struct sk_buff *skb, struct net_device *netdev)
1036 {
1037         struct net_pkt_rcv copy;
1038         int i = 0, cc, numreposted;
1039         int found_skb = 0;
1040         int status = 0;
1041
1042         copy = cmdrsp->net.rcv;
1043         switch (copy.numrcvbufs) {
1044         case 0:
1045                 devdata->n_rcv0++;
1046                 break;
1047         case 1:
1048                 devdata->n_rcv1++;
1049                 break;
1050         case 2:
1051                 devdata->n_rcv2++;
1052                 break;
1053         default:
1054                 devdata->n_rcvx++;
1055                 break;
1056         }
1057         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1058                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1059                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1060                                 continue;
1061
1062                         if ((skb) && devdata->rcvbuf[i] == skb) {
1063                                 devdata->found_repost_rcvbuf_cnt++;
1064                                 found_skb = 1;
1065                                 devdata->repost_found_skb_cnt++;
1066                         }
1067                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1068                         if (!devdata->rcvbuf[i]) {
1069                                 devdata->num_rcv_bufs_could_not_alloc++;
1070                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1071                                 status = -ENOMEM;
1072                                 break;
1073                         }
1074                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1075                         numreposted++;
1076                         break;
1077                 }
1078         }
1079         if (numreposted != copy.numrcvbufs) {
1080                 devdata->n_repost_deficit++;
1081                 status = -EINVAL;
1082         }
1083         if (skb) {
1084                 if (found_skb) {
1085                         kfree_skb(skb);
1086                 } else {
1087                         status = -EINVAL;
1088                         devdata->bad_rcv_buf++;
1089                 }
1090         }
1091         return status;
1092 }
1093
1094 /**
1095  *      visornic_rx - Handle receive packets coming back from IO Part
1096  *      @cmdrsp: Receive packet returned from IO Part
1097  *
1098  *      Got a receive packet back from the IO Part, handle it and send
1099  *      it up the stack.
1100  *      Returns 1 iff an skb was received, otherwise 0
1101  */
1102 static int
1103 visornic_rx(struct uiscmdrsp *cmdrsp)
1104 {
1105         struct visornic_devdata *devdata;
1106         struct sk_buff *skb, *prev, *curr;
1107         struct net_device *netdev;
1108         int cc, currsize, off;
1109         struct ethhdr *eth;
1110         unsigned long flags;
1111
1112         /* post new rcv buf to the other end using the cmdrsp we have at hand
1113          * post it without holding lock - but we'll use the signal lock to
1114          * synchronize the queue insert the cmdrsp that contains the net.rcv
1115          * is the one we are using to repost, so copy the info we need from it.
1116          */
1117         skb = cmdrsp->net.buf;
1118         netdev = skb->dev;
1119
1120         devdata = netdev_priv(netdev);
1121
1122         spin_lock_irqsave(&devdata->priv_lock, flags);
1123         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1124
1125         /* set length to how much was ACTUALLY received -
1126          * NOTE: rcv_done_len includes actual length of data rcvd
1127          * including ethhdr
1128          */
1129         skb->len = cmdrsp->net.rcv.rcv_done_len;
1130
1131         /* update rcv stats - call it with priv_lock held */
1132         devdata->net_stats.rx_packets++;
1133         devdata->net_stats.rx_bytes += skb->len;
1134
1135         /* test enabled while holding lock */
1136         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1137                 /* don't process it unless we're in enable mode and until
1138                  * we've gotten an ACK saying the other end got our RCV enable
1139                  */
1140                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1141                 repost_return(cmdrsp, devdata, skb, netdev);
1142                 return 0;
1143         }
1144
1145         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1146
1147         /* when skb was allocated, skb->dev, skb->data, skb->len and
1148          * skb->data_len were setup. AND, data has already put into the
1149          * skb (both first frag and in frags pages)
1150          * NOTE: firstfragslen is the amount of data in skb->data and that
1151          * which is not in nr_frags or frag_list. This is now simply
1152          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1153          * firstfrag & set data_len to show rest see if we have to chain
1154          * frag_list.
1155          */
1156         if (skb->len > RCVPOST_BUF_SIZE) {      /* do PRECAUTIONARY check */
1157                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1158                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1159                                 dev_err(&devdata->netdev->dev,
1160                                         "repost_return failed");
1161                         return 0;
1162                 }
1163                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1164                 skb->tail += RCVPOST_BUF_SIZE;  /* amount in skb->data */
1165                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;    /* amount that
1166                                                                  *  will be in
1167                                                                  * frag_list
1168                                                                  */
1169         } else {
1170                 /* data fits in this skb - no chaining - do
1171                  * PRECAUTIONARY check
1172                  */
1173                 if (cmdrsp->net.rcv.numrcvbufs != 1) {  /* should be 1 */
1174                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1175                                 dev_err(&devdata->netdev->dev,
1176                                         "repost_return failed");
1177                         return 0;
1178                 }
1179                 skb->tail += skb->len;
1180                 skb->data_len = 0;      /* nothing rcvd in frag_list */
1181         }
1182         off = skb_tail_pointer(skb) - skb->data;
1183
1184         /* amount we bumped tail by in the head skb
1185          * it is used to calculate the size of each chained skb below
1186          * it is also used to index into bufline to continue the copy
1187          * (for chansocktwopc)
1188          * if necessary chain the rcv skbs together.
1189          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1190          * chain the rest to that one.
1191          * - do PRECAUTIONARY check
1192          */
1193         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1194                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1195                         dev_err(&devdata->netdev->dev, "repost_return failed");
1196                 return 0;
1197         }
1198
1199         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1200                 /* chain the various rcv buffers into the skb's frag_list. */
1201                 /* Note: off was initialized above  */
1202                 for (cc = 1, prev = NULL;
1203                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1204                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1205                         curr->next = NULL;
1206                         if (!prev)      /* start of list- set head */
1207                                 skb_shinfo(skb)->frag_list = curr;
1208                         else
1209                                 prev->next = curr;
1210                         prev = curr;
1211
1212                         /* should we set skb->len and skb->data_len for each
1213                          * buffer being chained??? can't hurt!
1214                          */
1215                         currsize = min(skb->len - off,
1216                                        (unsigned int)RCVPOST_BUF_SIZE);
1217                         curr->len = currsize;
1218                         curr->tail += currsize;
1219                         curr->data_len = 0;
1220                         off += currsize;
1221                 }
1222                 /* assert skb->len == off */
1223                 if (skb->len != off) {
1224                         netdev_err(devdata->netdev,
1225                                    "something wrong; skb->len:%d != off:%d\n",
1226                                    skb->len, off);
1227                 }
1228         }
1229
1230         /* set up packet's protocol type using ethernet header - this
1231          * sets up skb->pkt_type & it also PULLS out the eth header
1232          */
1233         skb->protocol = eth_type_trans(skb, netdev);
1234
1235         eth = eth_hdr(skb);
1236
1237         skb->csum = 0;
1238         skb->ip_summed = CHECKSUM_NONE;
1239
1240         do {
1241                 if (netdev->flags & IFF_PROMISC)
1242                         break;  /* accept all packets */
1243                 if (skb->pkt_type == PACKET_BROADCAST) {
1244                         if (netdev->flags & IFF_BROADCAST)
1245                                 break;  /* accept all broadcast packets */
1246                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1247                         if ((netdev->flags & IFF_MULTICAST) &&
1248                             (netdev_mc_count(netdev))) {
1249                                 struct netdev_hw_addr *ha;
1250                                 int found_mc = 0;
1251
1252                                 /* only accept multicast packets that we can
1253                                  * find in our multicast address list
1254                                  */
1255                                 netdev_for_each_mc_addr(ha, netdev) {
1256                                         if (ether_addr_equal(eth->h_dest,
1257                                                              ha->addr)) {
1258                                                 found_mc = 1;
1259                                                 break;
1260                                         }
1261                                 }
1262                                 /* accept pkt, dest matches a multicast addr */
1263                                 if (found_mc)
1264                                         break;
1265                         }
1266                 /* accept packet, h_dest must match vnic  mac address */
1267                 } else if (skb->pkt_type == PACKET_HOST) {
1268                         break;
1269                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1270                         /* something is not right */
1271                         dev_err(&devdata->netdev->dev,
1272                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1273                                 netdev->name, eth->h_dest, netdev->dev_addr);
1274                 }
1275                 /* drop packet - don't forward it up to OS */
1276                 devdata->n_rcv_packets_not_accepted++;
1277                 repost_return(cmdrsp, devdata, skb, netdev);
1278                 return 0;
1279         } while (0);
1280
1281         netif_receive_skb(skb);
1282         /* netif_rx returns various values, but "in practice most drivers
1283          * ignore the return value
1284          */
1285
1286         skb = NULL;
1287         /*
1288          * whether the packet got dropped or handled, the skb is freed by
1289          * kernel code, so we shouldn't free it. but we should repost a
1290          * new rcv buffer.
1291          */
1292         repost_return(cmdrsp, devdata, skb, netdev);
1293         return 1;
1294 }
1295
1296 /**
1297  *      devdata_initialize      - Initialize devdata structure
1298  *      @devdata: visornic_devdata structure to initialize
1299  *      #dev: visorbus_deviced it belongs to
1300  *
1301  *      Setup initial values for the visornic based on channel and default
1302  *      values.
1303  *      Returns a pointer to the devdata structure
1304  */
1305 static struct visornic_devdata *
1306 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1307 {
1308         devdata->dev = dev;
1309         devdata->incarnation_id = get_jiffies_64();
1310         return devdata;
1311 }
1312
1313 /**
1314  *      devdata_release - Frees up references in devdata
1315  *      @devdata: struct to clean up
1316  *
1317  *      Frees up references in devdata.
1318  *      Returns void
1319  */
1320 static void devdata_release(struct visornic_devdata *devdata)
1321 {
1322         kfree(devdata->rcvbuf);
1323         kfree(devdata->cmdrsp_rcv);
1324         kfree(devdata->xmit_cmdrsp);
1325 }
1326
1327 static const struct net_device_ops visornic_dev_ops = {
1328         .ndo_open = visornic_open,
1329         .ndo_stop = visornic_close,
1330         .ndo_start_xmit = visornic_xmit,
1331         .ndo_get_stats = visornic_get_stats,
1332         .ndo_change_mtu = visornic_change_mtu,
1333         .ndo_tx_timeout = visornic_xmit_timeout,
1334         .ndo_set_rx_mode = visornic_set_multi,
1335 };
1336
1337 /* DebugFS code */
1338 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1339                                  size_t len, loff_t *offset)
1340 {
1341         ssize_t bytes_read = 0;
1342         int str_pos = 0;
1343         struct visornic_devdata *devdata;
1344         struct net_device *dev;
1345         char *vbuf;
1346
1347         if (len > MAX_BUF)
1348                 len = MAX_BUF;
1349         vbuf = kzalloc(len, GFP_KERNEL);
1350         if (!vbuf)
1351                 return -ENOMEM;
1352
1353         /* for each vnic channel dump out channel specific data */
1354         rcu_read_lock();
1355         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1356                 /* Only consider netdevs that are visornic, and are open */
1357                 if ((dev->netdev_ops != &visornic_dev_ops) ||
1358                     (!netif_queue_stopped(dev)))
1359                         continue;
1360
1361                 devdata = netdev_priv(dev);
1362                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1363                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1364                                      dev->name,
1365                                      dev,
1366                                      dev->dev_addr);
1367                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1368                                      "VisorNic Dev Info = 0x%p\n", devdata);
1369                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1370                                      " num_rcv_bufs = %d\n",
1371                                      devdata->num_rcv_bufs);
1372                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1373                                      " max_outstanding_next_xmits = %lu\n",
1374                                     devdata->max_outstanding_net_xmits);
1375                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1376                                      " upper_threshold_net_xmits = %lu\n",
1377                                      devdata->upper_threshold_net_xmits);
1378                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1379                                      " lower_threshold_net_xmits = %lu\n",
1380                                      devdata->lower_threshold_net_xmits);
1381                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1382                                      " queuefullmsg_logged = %d\n",
1383                                      devdata->queuefullmsg_logged);
1384                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1385                                      " chstat.got_rcv = %lu\n",
1386                                      devdata->chstat.got_rcv);
1387                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1388                                      " chstat.got_enbdisack = %lu\n",
1389                                      devdata->chstat.got_enbdisack);
1390                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1391                                      " chstat.got_xmit_done = %lu\n",
1392                                      devdata->chstat.got_xmit_done);
1393                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1394                                      " chstat.xmit_fail = %lu\n",
1395                                      devdata->chstat.xmit_fail);
1396                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1397                                      " chstat.sent_enbdis = %lu\n",
1398                                      devdata->chstat.sent_enbdis);
1399                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1400                                      " chstat.sent_promisc = %lu\n",
1401                                      devdata->chstat.sent_promisc);
1402                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1403                                      " chstat.sent_post = %lu\n",
1404                                      devdata->chstat.sent_post);
1405                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1406                                      " chstat.sent_post_failed = %lu\n",
1407                                      devdata->chstat.sent_post_failed);
1408                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1409                                      " chstat.sent_xmit = %lu\n",
1410                                      devdata->chstat.sent_xmit);
1411                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1412                                      " chstat.reject_count = %lu\n",
1413                                      devdata->chstat.reject_count);
1414                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1415                                      " chstat.extra_rcvbufs_sent = %lu\n",
1416                                      devdata->chstat.extra_rcvbufs_sent);
1417                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1418                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1419                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1420                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1421                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1422                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1423                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1424                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1425                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1426                                      " num_rcvbuf_in_iovm = %d\n",
1427                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1428                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1429                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1430                                      devdata->alloc_failed_in_if_needed_cnt);
1431                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1432                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1433                                      devdata->alloc_failed_in_repost_rtn_cnt);
1434                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1435                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1436                  *                   devdata->inner_loop_limit_reached_cnt);
1437                  */
1438                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1439                                      " found_repost_rcvbuf_cnt = %lu\n",
1440                                      devdata->found_repost_rcvbuf_cnt);
1441                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1442                                      " repost_found_skb_cnt = %lu\n",
1443                                      devdata->repost_found_skb_cnt);
1444                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1445                                      " n_repost_deficit = %lu\n",
1446                                      devdata->n_repost_deficit);
1447                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1448                                      " bad_rcv_buf = %lu\n",
1449                                      devdata->bad_rcv_buf);
1450                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1451                                      " n_rcv_packets_not_accepted = %lu\n",
1452                                      devdata->n_rcv_packets_not_accepted);
1453                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1454                                      " interrupts_rcvd = %llu\n",
1455                                      devdata->interrupts_rcvd);
1456                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457                                      " interrupts_notme = %llu\n",
1458                                      devdata->interrupts_notme);
1459                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460                                      " interrupts_disabled = %llu\n",
1461                                      devdata->interrupts_disabled);
1462                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463                                      " busy_cnt = %llu\n",
1464                                      devdata->busy_cnt);
1465                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466                                      " flow_control_upper_hits = %llu\n",
1467                                      devdata->flow_control_upper_hits);
1468                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469                                      " flow_control_lower_hits = %llu\n",
1470                                      devdata->flow_control_lower_hits);
1471                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472                                      " netif_queue = %s\n",
1473                                      netif_queue_stopped(devdata->netdev) ?
1474                                      "stopped" : "running");
1475                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1476                                      " xmits_outstanding = %lu\n",
1477                                      devdata_xmits_outstanding(devdata));
1478         }
1479         rcu_read_unlock();
1480         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1481         kfree(vbuf);
1482         return bytes_read;
1483 }
1484
1485 static struct dentry *visornic_debugfs_dir;
1486 static const struct file_operations debugfs_info_fops = {
1487         .read = info_debugfs_read,
1488 };
1489
1490 /**
1491  *      send_rcv_posts_if_needed
1492  *      @devdata: visornic device
1493  *
1494  *      Send receive buffers to the IO Partition.
1495  *      Returns void
1496  */
1497 static void
1498 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1499 {
1500         int i;
1501         struct net_device *netdev;
1502         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1503         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1504
1505         /* don't do this until vnic is marked ready */
1506         if (!(devdata->enabled && devdata->enab_dis_acked))
1507                 return;
1508
1509         netdev = devdata->netdev;
1510         rcv_bufs_allocated = 0;
1511         /* this code is trying to prevent getting stuck here forever,
1512          * but still retry it if you cant allocate them all this time.
1513          */
1514         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1515         while (cur_num_rcv_bufs_to_alloc > 0) {
1516                 cur_num_rcv_bufs_to_alloc--;
1517                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1518                         if (devdata->rcvbuf[i])
1519                                 continue;
1520                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1521                         if (!devdata->rcvbuf[i]) {
1522                                 devdata->alloc_failed_in_if_needed_cnt++;
1523                                 break;
1524                         }
1525                         rcv_bufs_allocated++;
1526                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1527                         devdata->chstat.extra_rcvbufs_sent++;
1528                 }
1529         }
1530         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1531 }
1532
1533 /**
1534  *      drain_resp_queue  - drains and ignores all messages from the resp queue
1535  *      @cmdrsp: io channel command response message
1536  *      @devdata: visornic device to drain
1537  */
1538 static void
1539 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1540 {
1541         while (!visorchannel_signalremove(devdata->dev->visorchannel,
1542                                           IOCHAN_FROM_IOPART,
1543                                           cmdrsp))
1544                 ;
1545 }
1546
1547 /**
1548  *      service_resp_queue      - drains the response queue
1549  *      @cmdrsp: io channel command response message
1550  *      @devdata: visornic device to drain
1551  *
1552  *      Drain the response queue of any responses from the IO partition.
1553  *      Process the responses as we get them.
1554  *      Returns when response queue is empty or when the thread stops.
1555  */
1556 static void
1557 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1558                    int *rx_work_done, int budget)
1559 {
1560         unsigned long flags;
1561         struct net_device *netdev;
1562
1563         while (*rx_work_done < budget) {
1564         /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1565          * moment
1566          */
1567                 if (visorchannel_signalremove(devdata->dev->visorchannel,
1568                                               IOCHAN_FROM_IOPART,
1569                                               cmdrsp))
1570                         break; /* queue empty */
1571
1572                 switch (cmdrsp->net.type) {
1573                 case NET_RCV:
1574                         devdata->chstat.got_rcv++;
1575                         /* process incoming packet */
1576                         *rx_work_done += visornic_rx(cmdrsp);
1577                         break;
1578                 case NET_XMIT_DONE:
1579                         spin_lock_irqsave(&devdata->priv_lock, flags);
1580                         devdata->chstat.got_xmit_done++;
1581                         if (cmdrsp->net.xmtdone.xmt_done_result)
1582                                 devdata->chstat.xmit_fail++;
1583                         /* only call queue wake if we stopped it */
1584                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1585                         /* ASSERT netdev == vnicinfo->netdev; */
1586                         if ((netdev == devdata->netdev) &&
1587                             netif_queue_stopped(netdev)) {
1588                                 /* check if we have crossed the lower watermark
1589                                  * for netif_wake_queue()
1590                                  */
1591                                 if (vnic_hit_low_watermark
1592                                     (devdata,
1593                                      devdata->lower_threshold_net_xmits)) {
1594                                         /* enough NET_XMITs completed
1595                                          * so can restart netif queue
1596                                          */
1597                                         netif_wake_queue(netdev);
1598                                         devdata->flow_control_lower_hits++;
1599                                 }
1600                         }
1601                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1602                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1603                         kfree_skb(cmdrsp->net.buf);
1604                         break;
1605                 case NET_RCV_ENBDIS_ACK:
1606                         devdata->chstat.got_enbdisack++;
1607                         netdev = (struct net_device *)
1608                         cmdrsp->net.enbdis.context;
1609                         spin_lock_irqsave(&devdata->priv_lock, flags);
1610                         devdata->enab_dis_acked = 1;
1611                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1612
1613                         if (devdata->server_down &&
1614                             devdata->server_change_state) {
1615                                 /* Inform Linux that the link is up */
1616                                 devdata->server_down = false;
1617                                 devdata->server_change_state = false;
1618                                 netif_wake_queue(netdev);
1619                                 netif_carrier_on(netdev);
1620                         }
1621                         break;
1622                 case NET_CONNECT_STATUS:
1623                         netdev = devdata->netdev;
1624                         if (cmdrsp->net.enbdis.enable == 1) {
1625                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1626                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1627                                 spin_unlock_irqrestore(&devdata->priv_lock,
1628                                                        flags);
1629                                 netif_wake_queue(netdev);
1630                                 netif_carrier_on(netdev);
1631                         } else {
1632                                 netif_stop_queue(netdev);
1633                                 netif_carrier_off(netdev);
1634                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1635                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1636                                 spin_unlock_irqrestore(&devdata->priv_lock,
1637                                                        flags);
1638                         }
1639                         break;
1640                 default:
1641                         break;
1642                 }
1643                 /* cmdrsp is now available for reuse  */
1644         }
1645 }
1646
1647 static int visornic_poll(struct napi_struct *napi, int budget)
1648 {
1649         struct visornic_devdata *devdata = container_of(napi,
1650                                                         struct visornic_devdata,
1651                                                         napi);
1652         int rx_count = 0;
1653
1654         send_rcv_posts_if_needed(devdata);
1655         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1656
1657         /* If there aren't any more packets to receive stop the poll */
1658         if (rx_count < budget)
1659                 napi_complete_done(napi, rx_count);
1660
1661         return rx_count;
1662 }
1663
1664 /**
1665  *      poll_for_irq    - Checks the status of the response queue.
1666  *      @v: void pointer to the visronic devdata
1667  *
1668  *      Main function of the vnic_incoming thread. Periodically check the
1669  *      response queue and drain it if needed.
1670  *      Returns when thread has stopped.
1671  */
1672 static void
1673 poll_for_irq(unsigned long v)
1674 {
1675         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1676
1677         if (!visorchannel_signalempty(
1678                                    devdata->dev->visorchannel,
1679                                    IOCHAN_FROM_IOPART))
1680                 napi_schedule(&devdata->napi);
1681
1682         atomic_set(&devdata->interrupt_rcvd, 0);
1683
1684         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1685 }
1686
1687 /**
1688  *      visornic_probe  - probe function for visornic devices
1689  *      @dev: The visor device discovered
1690  *
1691  *      Called when visorbus discovers a visornic device on its
1692  *      bus. It creates a new visornic ethernet adapter.
1693  *      Returns 0 or negative for error.
1694  */
1695 static int visornic_probe(struct visor_device *dev)
1696 {
1697         struct visornic_devdata *devdata = NULL;
1698         struct net_device *netdev = NULL;
1699         int err;
1700         int channel_offset = 0;
1701         u64 features;
1702
1703         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1704         if (!netdev) {
1705                 dev_err(&dev->device,
1706                         "%s alloc_etherdev failed\n", __func__);
1707                 return -ENOMEM;
1708         }
1709
1710         netdev->netdev_ops = &visornic_dev_ops;
1711         netdev->watchdog_timeo = 5 * HZ;
1712         SET_NETDEV_DEV(netdev, &dev->device);
1713
1714         /* Get MAC address from channel and read it into the device. */
1715         netdev->addr_len = ETH_ALEN;
1716         channel_offset = offsetof(struct spar_io_channel_protocol,
1717                                   vnic.macaddr);
1718         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1719                                     ETH_ALEN);
1720         if (err < 0) {
1721                 dev_err(&dev->device,
1722                         "%s failed to get mac addr from chan (%d)\n",
1723                         __func__, err);
1724                 goto cleanup_netdev;
1725         }
1726
1727         devdata = devdata_initialize(netdev_priv(netdev), dev);
1728         if (!devdata) {
1729                 dev_err(&dev->device,
1730                         "%s devdata_initialize failed\n", __func__);
1731                 err = -ENOMEM;
1732                 goto cleanup_netdev;
1733         }
1734         /* don't trust messages laying around in the channel */
1735         drain_resp_queue(devdata->cmdrsp, devdata);
1736
1737         devdata->netdev = netdev;
1738         dev_set_drvdata(&dev->device, devdata);
1739         init_waitqueue_head(&devdata->rsp_queue);
1740         spin_lock_init(&devdata->priv_lock);
1741         devdata->enabled = 0; /* not yet */
1742         atomic_set(&devdata->usage, 1);
1743
1744         /* Setup rcv bufs */
1745         channel_offset = offsetof(struct spar_io_channel_protocol,
1746                                   vnic.num_rcv_bufs);
1747         err = visorbus_read_channel(dev, channel_offset,
1748                                     &devdata->num_rcv_bufs, 4);
1749         if (err) {
1750                 dev_err(&dev->device,
1751                         "%s failed to get #rcv bufs from chan (%d)\n",
1752                         __func__, err);
1753                 goto cleanup_netdev;
1754         }
1755
1756         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1757                                   sizeof(struct sk_buff *), GFP_KERNEL);
1758         if (!devdata->rcvbuf) {
1759                 err = -ENOMEM;
1760                 goto cleanup_netdev;
1761         }
1762
1763         /* set the net_xmit outstanding threshold */
1764         /* always leave two slots open but you should have 3 at a minimum */
1765         /* note that max_outstanding_net_xmits must be > 0 */
1766         devdata->max_outstanding_net_xmits =
1767                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1768         devdata->upper_threshold_net_xmits =
1769                 max_t(unsigned long,
1770                       2, (devdata->max_outstanding_net_xmits - 1));
1771         devdata->lower_threshold_net_xmits =
1772                 max_t(unsigned long,
1773                       1, (devdata->max_outstanding_net_xmits / 2));
1774
1775         skb_queue_head_init(&devdata->xmitbufhead);
1776
1777         /* create a cmdrsp we can use to post and unpost rcv buffers */
1778         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1779         if (!devdata->cmdrsp_rcv) {
1780                 err = -ENOMEM;
1781                 goto cleanup_rcvbuf;
1782         }
1783         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1784         if (!devdata->xmit_cmdrsp) {
1785                 err = -ENOMEM;
1786                 goto cleanup_cmdrsp_rcv;
1787         }
1788         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1789         devdata->server_down = false;
1790         devdata->server_change_state = false;
1791
1792         /*set the default mtu */
1793         channel_offset = offsetof(struct spar_io_channel_protocol,
1794                                   vnic.mtu);
1795         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1796         if (err) {
1797                 dev_err(&dev->device,
1798                         "%s failed to get mtu from chan (%d)\n",
1799                         __func__, err);
1800                 goto cleanup_xmit_cmdrsp;
1801         }
1802
1803         /* TODO: Setup Interrupt information */
1804         /* Let's start our threads to get responses */
1805         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1806
1807         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1808                     (unsigned long)devdata);
1809         /* Note: This time has to start running before the while
1810          * loop below because the napi routine is responsible for
1811          * setting enab_dis_acked
1812          */
1813         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1814
1815         channel_offset = offsetof(struct spar_io_channel_protocol,
1816                                   channel_header.features);
1817         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1818         if (err) {
1819                 dev_err(&dev->device,
1820                         "%s failed to get features from chan (%d)\n",
1821                         __func__, err);
1822                 goto cleanup_napi_add;
1823         }
1824
1825         features |= ULTRA_IO_CHANNEL_IS_POLLING;
1826         features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1827         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1828         if (err) {
1829                 dev_err(&dev->device,
1830                         "%s failed to set features in chan (%d)\n",
1831                         __func__, err);
1832                 goto cleanup_napi_add;
1833         }
1834
1835         /* Note: Interrupts have to be enable before the while
1836          * loop below because the napi routine is responsible for
1837          * setting enab_dis_acked
1838          */
1839         visorbus_enable_channel_interrupts(dev);
1840
1841         err = register_netdev(netdev);
1842         if (err) {
1843                 dev_err(&dev->device,
1844                         "%s register_netdev failed (%d)\n", __func__, err);
1845                 goto cleanup_napi_add;
1846         }
1847
1848         /* create debug/sysfs directories */
1849         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1850                                                       visornic_debugfs_dir);
1851         if (!devdata->eth_debugfs_dir) {
1852                 dev_err(&dev->device,
1853                         "%s debugfs_create_dir %s failed\n",
1854                         __func__, netdev->name);
1855                 err = -ENOMEM;
1856                 goto cleanup_register_netdev;
1857         }
1858
1859         dev_info(&dev->device, "%s success netdev=%s\n",
1860                  __func__, netdev->name);
1861         return 0;
1862
1863 cleanup_register_netdev:
1864         unregister_netdev(netdev);
1865
1866 cleanup_napi_add:
1867         del_timer_sync(&devdata->irq_poll_timer);
1868         netif_napi_del(&devdata->napi);
1869
1870 cleanup_xmit_cmdrsp:
1871         kfree(devdata->xmit_cmdrsp);
1872
1873 cleanup_cmdrsp_rcv:
1874         kfree(devdata->cmdrsp_rcv);
1875
1876 cleanup_rcvbuf:
1877         kfree(devdata->rcvbuf);
1878
1879 cleanup_netdev:
1880         free_netdev(netdev);
1881         return err;
1882 }
1883
1884 /**
1885  *      host_side_disappeared   - IO part is gone.
1886  *      @devdata: device object
1887  *
1888  *      IO partition servicing this device is gone, do cleanup
1889  *      Returns void.
1890  */
1891 static void host_side_disappeared(struct visornic_devdata *devdata)
1892 {
1893         unsigned long flags;
1894
1895         spin_lock_irqsave(&devdata->priv_lock, flags);
1896         devdata->dev = NULL;   /* indicate device destroyed */
1897         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1898 }
1899
1900 /**
1901  *      visornic_remove         - Called when visornic dev goes away
1902  *      @dev: visornic device that is being removed
1903  *
1904  *      Called when DEVICE_DESTROY gets called to remove device.
1905  *      Returns void
1906  */
1907 static void visornic_remove(struct visor_device *dev)
1908 {
1909         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1910         struct net_device *netdev;
1911         unsigned long flags;
1912
1913         if (!devdata) {
1914                 dev_err(&dev->device, "%s no devdata\n", __func__);
1915                 return;
1916         }
1917         spin_lock_irqsave(&devdata->priv_lock, flags);
1918         if (devdata->going_away) {
1919                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1920                 dev_err(&dev->device, "%s already being removed\n", __func__);
1921                 return;
1922         }
1923         devdata->going_away = true;
1924         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1925         netdev = devdata->netdev;
1926         if (!netdev) {
1927                 dev_err(&dev->device, "%s not net device\n", __func__);
1928                 return;
1929         }
1930
1931         /* going_away prevents new items being added to the workqueues */
1932         cancel_work_sync(&devdata->timeout_reset);
1933
1934         debugfs_remove_recursive(devdata->eth_debugfs_dir);
1935
1936         unregister_netdev(netdev);  /* this will call visornic_close() */
1937
1938         del_timer_sync(&devdata->irq_poll_timer);
1939         netif_napi_del(&devdata->napi);
1940
1941         dev_set_drvdata(&dev->device, NULL);
1942         host_side_disappeared(devdata);
1943         devdata_release(devdata);
1944         free_netdev(netdev);
1945 }
1946
1947 /**
1948  *      visornic_pause          - Called when IO Part disappears
1949  *      @dev: visornic device that is being serviced
1950  *      @complete_func: call when finished.
1951  *
1952  *      Called when the IO Partition has gone down. Need to free
1953  *      up resources and wait for IO partition to come back. Mark
1954  *      link as down and don't attempt any DMA. When we have freed
1955  *      memory call the complete_func so that Command knows we are
1956  *      done. If we don't call complete_func, IO part will never
1957  *      come back.
1958  *      Returns 0 for success.
1959  */
1960 static int visornic_pause(struct visor_device *dev,
1961                           visorbus_state_complete_func complete_func)
1962 {
1963         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1964
1965         visornic_serverdown(devdata, complete_func);
1966         return 0;
1967 }
1968
1969 /**
1970  *      visornic_resume         - Called when IO part has recovered
1971  *      @dev: visornic device that is being serviced
1972  *      @compelte_func: call when finished
1973  *
1974  *      Called when the IO partition has recovered. Reestablish
1975  *      connection to the IO part and set the link up. Okay to do
1976  *      DMA again.
1977  *      Returns 0 for success.
1978  */
1979 static int visornic_resume(struct visor_device *dev,
1980                            visorbus_state_complete_func complete_func)
1981 {
1982         struct visornic_devdata *devdata;
1983         struct net_device *netdev;
1984         unsigned long flags;
1985
1986         devdata = dev_get_drvdata(&dev->device);
1987         if (!devdata) {
1988                 dev_err(&dev->device, "%s no devdata\n", __func__);
1989                 return -EINVAL;
1990         }
1991
1992         netdev = devdata->netdev;
1993
1994         spin_lock_irqsave(&devdata->priv_lock, flags);
1995         if (devdata->server_change_state) {
1996                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1997                 dev_err(&dev->device, "%s server already changing state\n",
1998                         __func__);
1999                 return -EINVAL;
2000         }
2001         if (!devdata->server_down) {
2002                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2003                 dev_err(&dev->device, "%s server not down\n", __func__);
2004                 complete_func(dev, 0);
2005                 return 0;
2006         }
2007         devdata->server_change_state = true;
2008         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2009
2010         /* Must transition channel to ATTACHED state BEFORE
2011          * we can start using the device again.
2012          * TODO: State transitions
2013          */
2014         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2015
2016         rtnl_lock();
2017         dev_open(netdev);
2018         rtnl_unlock();
2019
2020         complete_func(dev, 0);
2021         return 0;
2022 }
2023
2024 /* This is used to tell the visor bus driver which types of visor devices
2025  * we support, and what functions to call when a visor device that we support
2026  * is attached or removed.
2027  */
2028 static struct visor_driver visornic_driver = {
2029         .name = "visornic",
2030         .owner = THIS_MODULE,
2031         .channel_types = visornic_channel_types,
2032         .probe = visornic_probe,
2033         .remove = visornic_remove,
2034         .pause = visornic_pause,
2035         .resume = visornic_resume,
2036         .channel_interrupt = NULL,
2037 };
2038
2039 /**
2040  *      visornic_init   - Init function
2041  *
2042  *      Init function for the visornic driver. Do initial driver setup
2043  *      and wait for devices.
2044  *      Returns 0 for success, negative for error.
2045  */
2046 static int visornic_init(void)
2047 {
2048         struct dentry *ret;
2049         int err = -ENOMEM;
2050
2051         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2052         if (!visornic_debugfs_dir)
2053                 return err;
2054
2055         ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2056                                   &debugfs_info_fops);
2057         if (!ret)
2058                 goto cleanup_debugfs;
2059         ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2060                                   NULL, &debugfs_enable_ints_fops);
2061         if (!ret)
2062                 goto cleanup_debugfs;
2063
2064         err = visorbus_register_visor_driver(&visornic_driver);
2065         if (err)
2066                 goto cleanup_debugfs;
2067
2068         return 0;
2069
2070 cleanup_debugfs:
2071         debugfs_remove_recursive(visornic_debugfs_dir);
2072
2073         return err;
2074 }
2075
2076 /**
2077  *      visornic_cleanup        - driver exit routine
2078  *
2079  *      Unregister driver from the bus and free up memory.
2080  */
2081 static void visornic_cleanup(void)
2082 {
2083         visorbus_unregister_visor_driver(&visornic_driver);
2084
2085         debugfs_remove_recursive(visornic_debugfs_dir);
2086 }
2087
2088 module_init(visornic_init);
2089 module_exit(visornic_cleanup);
2090
2091 MODULE_AUTHOR("Unisys");
2092 MODULE_LICENSE("GPL");
2093 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");