]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'enic-next'
authorDavid S. Miller <davem@davemloft.net>
Wed, 21 May 2014 21:04:42 +0000 (17:04 -0400)
committerDavid S. Miller <davem@davemloft.net>
Wed, 21 May 2014 21:04:42 +0000 (17:04 -0400)
Govindarajulu Varadarajan says:

====================
enic: Add adaptive coalescing interrupt support

This series add support for adaptive coalescing interrupt and updates
enic Maintainers.

v1->v2:
* Add commit log
* do vnic_intr_coalescing_timer_set only while enabling intr
* use ktime_get instead of hrtimer
* make enic_set_rx_coal_setting return type void
* change func name enic_apply_int_moderation to enic_calc_int_moderation
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
MAINTAINERS
drivers/net/ethernet/cisco/enic/enic.h
drivers/net/ethernet/cisco/enic/enic_ethtool.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/cisco/enic/vnic_cq.h

index bde15ffcccf9f2e25801d4c6516516a8ef9ae431..f5de16eb19555345423c6421753435e98deb8480 100644 (file)
@@ -2228,9 +2228,8 @@ F:        drivers/platform/chrome/
 CISCO VIC ETHERNET NIC DRIVER
 M:     Christian Benvenuti <benve@cisco.com>
 M:     Sujith Sankar <ssujith@cisco.com>
-M:     Govindarajulu Varadarajan <govindarajulu90@gmail.com>
+M:     Govindarajulu Varadarajan <_govind@gmx.com>
 M:     Neel Patel <neepatel@cisco.com>
-M:     Nishank Trivedi <nistrive@cisco.com>
 S:     Supported
 F:     drivers/net/ethernet/cisco/enic/
 
index e35c8e0202adda08dec6d53586bdd9f7da89a2e9..f23ef321606ca1326b02507139f953274964b84d 100644 (file)
@@ -43,6 +43,8 @@
 #define ENIC_CQ_MAX            (ENIC_WQ_MAX + ENIC_RQ_MAX)
 #define ENIC_INTR_MAX          (ENIC_CQ_MAX + 2)
 
+#define ENIC_AIC_LARGE_PKT_DIFF        3
+
 struct enic_msix_entry {
        int requested;
        char devname[IFNAMSIZ];
@@ -50,6 +52,33 @@ struct enic_msix_entry {
        void *devid;
 };
 
+/* Store only the lower range.  Higher range is given by fw. */
+struct enic_intr_mod_range {
+       u32 small_pkt_range_start;
+       u32 large_pkt_range_start;
+};
+
+struct enic_intr_mod_table {
+       u32 rx_rate;
+       u32 range_percent;
+};
+
+#define ENIC_MAX_LINK_SPEEDS           3
+#define ENIC_LINK_SPEED_10G            10000
+#define ENIC_LINK_SPEED_4G             4000
+#define ENIC_LINK_40G_INDEX            2
+#define ENIC_LINK_10G_INDEX            1
+#define ENIC_LINK_4G_INDEX             0
+#define ENIC_RX_COALESCE_RANGE_END     125
+#define ENIC_AIC_TS_BREAK              100
+
+struct enic_rx_coal {
+       u32 small_pkt_range_start;
+       u32 large_pkt_range_start;
+       u32 range_end;
+       u32 use_adaptive_rx_coalesce;
+};
+
 /* priv_flags */
 #define ENIC_SRIOV_ENABLED             (1 << 0)
 
@@ -92,6 +121,7 @@ struct enic {
        unsigned int mc_count;
        unsigned int uc_count;
        u32 port_mtu;
+       struct enic_rx_coal rx_coalesce_setting;
        u32 rx_coalesce_usecs;
        u32 tx_coalesce_usecs;
 #ifdef CONFIG_PCI_IOV
index 58a8c67638e349673c53844725197fcd1b1e24da..1882db230e139e506bfb1efc5248ac211ec76c2b 100644 (file)
@@ -79,6 +79,17 @@ static const struct enic_stat enic_rx_stats[] = {
 static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
 static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
 
+void enic_intr_coal_set_rx(struct enic *enic, u32 timer)
+{
+       int i;
+       int intr;
+
+       for (i = 0; i < enic->rq_count; i++) {
+               intr = enic_msix_rq_intr(enic, i);
+               vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+       }
+}
+
 static int enic_get_settings(struct net_device *netdev,
        struct ethtool_cmd *ecmd)
 {
@@ -178,9 +189,14 @@ static int enic_get_coalesce(struct net_device *netdev,
        struct ethtool_coalesce *ecmd)
 {
        struct enic *enic = netdev_priv(netdev);
+       struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
 
        ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs;
        ecmd->rx_coalesce_usecs = enic->rx_coalesce_usecs;
+       if (rxcoal->use_adaptive_rx_coalesce)
+               ecmd->use_adaptive_rx_coalesce = 1;
+       ecmd->rx_coalesce_usecs_low = rxcoal->small_pkt_range_start;
+       ecmd->rx_coalesce_usecs_high = rxcoal->range_end;
 
        return 0;
 }
@@ -191,17 +207,31 @@ static int enic_set_coalesce(struct net_device *netdev,
        struct enic *enic = netdev_priv(netdev);
        u32 tx_coalesce_usecs;
        u32 rx_coalesce_usecs;
+       u32 rx_coalesce_usecs_low;
+       u32 rx_coalesce_usecs_high;
+       u32 coalesce_usecs_max;
        unsigned int i, intr;
+       struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
 
+       coalesce_usecs_max = vnic_dev_get_intr_coal_timer_max(enic->vdev);
        tx_coalesce_usecs = min_t(u32, ecmd->tx_coalesce_usecs,
-               vnic_dev_get_intr_coal_timer_max(enic->vdev));
+                                 coalesce_usecs_max);
        rx_coalesce_usecs = min_t(u32, ecmd->rx_coalesce_usecs,
-               vnic_dev_get_intr_coal_timer_max(enic->vdev));
+                                 coalesce_usecs_max);
+
+       rx_coalesce_usecs_low = min_t(u32, ecmd->rx_coalesce_usecs_low,
+                                     coalesce_usecs_max);
+       rx_coalesce_usecs_high = min_t(u32, ecmd->rx_coalesce_usecs_high,
+                                      coalesce_usecs_max);
 
        switch (vnic_dev_get_intr_mode(enic->vdev)) {
        case VNIC_DEV_INTR_MODE_INTX:
                if (tx_coalesce_usecs != rx_coalesce_usecs)
                        return -EINVAL;
+               if (ecmd->use_adaptive_rx_coalesce      ||
+                   ecmd->rx_coalesce_usecs_low         ||
+                   ecmd->rx_coalesce_usecs_high)
+                       return -EOPNOTSUPP;
 
                intr = enic_legacy_io_intr();
                vnic_intr_coalescing_timer_set(&enic->intr[intr],
@@ -210,6 +240,10 @@ static int enic_set_coalesce(struct net_device *netdev,
        case VNIC_DEV_INTR_MODE_MSI:
                if (tx_coalesce_usecs != rx_coalesce_usecs)
                        return -EINVAL;
+               if (ecmd->use_adaptive_rx_coalesce      ||
+                   ecmd->rx_coalesce_usecs_low         ||
+                   ecmd->rx_coalesce_usecs_high)
+                       return -EOPNOTSUPP;
 
                vnic_intr_coalescing_timer_set(&enic->intr[0],
                        tx_coalesce_usecs);
@@ -221,12 +255,27 @@ static int enic_set_coalesce(struct net_device *netdev,
                                tx_coalesce_usecs);
                }
 
-               for (i = 0; i < enic->rq_count; i++) {
-                       intr = enic_msix_rq_intr(enic, i);
-                       vnic_intr_coalescing_timer_set(&enic->intr[intr],
-                               rx_coalesce_usecs);
+               if (rxcoal->use_adaptive_rx_coalesce) {
+                       if (!ecmd->use_adaptive_rx_coalesce) {
+                               rxcoal->use_adaptive_rx_coalesce = 0;
+                               enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
+                       }
+               } else {
+                       if (ecmd->use_adaptive_rx_coalesce)
+                               rxcoal->use_adaptive_rx_coalesce = 1;
+                       else
+                               enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
                }
 
+               if (ecmd->rx_coalesce_usecs_high) {
+                       if (rx_coalesce_usecs_high <
+                           (rx_coalesce_usecs_low + ENIC_AIC_LARGE_PKT_DIFF))
+                               return -EINVAL;
+                       rxcoal->range_end = rx_coalesce_usecs_high;
+                       rxcoal->small_pkt_range_start = rx_coalesce_usecs_low;
+                       rxcoal->large_pkt_range_start = rx_coalesce_usecs_low +
+                                                       ENIC_AIC_LARGE_PKT_DIFF;
+               }
                break;
        default:
                break;
index 2945718ce8068e4355628852ed200d539c9c2273..0d8995cc92ed65902e245eb40d665091c968e8cc 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/prefetch.h>
 #include <net/ip6_checksum.h>
+#include <linux/ktime.h>
 
 #include "cq_enet_desc.h"
 #include "vnic_dev.h"
@@ -72,6 +73,35 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, enic_id_table);
 
+#define ENIC_LARGE_PKT_THRESHOLD               1000
+#define ENIC_MAX_COALESCE_TIMERS               10
+/*  Interrupt moderation table, which will be used to decide the
+ *  coalescing timer values
+ *  {rx_rate in Mbps, mapping percentage of the range}
+ */
+struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = {
+       {4000,  0},
+       {4400, 10},
+       {5060, 20},
+       {5230, 30},
+       {5540, 40},
+       {5820, 50},
+       {6120, 60},
+       {6435, 70},
+       {6745, 80},
+       {7000, 90},
+       {0xFFFFFFFF, 100}
+};
+
+/* This table helps the driver to pick different ranges for rx coalescing
+ * timer depending on the link speed.
+ */
+struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
+       {0,  0}, /* 0  - 4  Gbps */
+       {0,  3}, /* 4  - 10 Gbps */
+       {3,  6}, /* 10 - 40 Gbps */
+};
+
 int enic_is_dynamic(struct enic *enic)
 {
        return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
@@ -979,6 +1009,15 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq)
        return 0;
 }
 
+static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
+                                     u32 pkt_len)
+{
+       if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len)
+               pkt_size->large_pkt_bytes_cnt += pkt_len;
+       else
+               pkt_size->small_pkt_bytes_cnt += pkt_len;
+}
+
 static void enic_rq_indicate_buf(struct vnic_rq *rq,
        struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
        int skipped, void *opaque)
@@ -986,6 +1025,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
        struct enic *enic = vnic_dev_priv(rq->vdev);
        struct net_device *netdev = enic->netdev;
        struct sk_buff *skb;
+       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
 
        u8 type, color, eop, sop, ingress_port, vlan_stripped;
        u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
@@ -1056,6 +1096,9 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
                        napi_gro_receive(&enic->napi[q_number], skb);
                else
                        netif_receive_skb(skb);
+               if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+                       enic_intr_update_pkt_size(&cq->pkt_size_counter,
+                                                 bytes_written);
        } else {
 
                /* Buffer overflow
@@ -1134,6 +1177,64 @@ static int enic_poll(struct napi_struct *napi, int budget)
        return rq_work_done;
 }
 
+static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+       unsigned int intr = enic_msix_rq_intr(enic, rq->index);
+       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+       u32 timer = cq->tobe_rx_coal_timeval;
+
+       if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
+               vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+               cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
+       }
+}
+
+static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+       struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+       struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+       struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
+       int index;
+       u32 timer;
+       u32 range_start;
+       u32 traffic;
+       u64 delta;
+       ktime_t now = ktime_get();
+
+       delta = ktime_us_delta(now, cq->prev_ts);
+       if (delta < ENIC_AIC_TS_BREAK)
+               return;
+       cq->prev_ts = now;
+
+       traffic = pkt_size_counter->large_pkt_bytes_cnt +
+                 pkt_size_counter->small_pkt_bytes_cnt;
+       /* The table takes Mbps
+        * traffic *= 8    => bits
+        * traffic *= (10^6 / delta)    => bps
+        * traffic /= 10^6     => Mbps
+        *
+        * Combining, traffic *= (8 / delta)
+        */
+
+       traffic <<= 3;
+       traffic /= delta;
+
+       for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
+               if (traffic < mod_table[index].rx_rate)
+                       break;
+       range_start = (pkt_size_counter->small_pkt_bytes_cnt >
+                      pkt_size_counter->large_pkt_bytes_cnt << 1) ?
+                     rx_coal->small_pkt_range_start :
+                     rx_coal->large_pkt_range_start;
+       timer = range_start + ((rx_coal->range_end - range_start) *
+                              mod_table[index].range_percent / 100);
+       /* Damping */
+       cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
+
+       pkt_size_counter->large_pkt_bytes_cnt = 0;
+       pkt_size_counter->small_pkt_bytes_cnt = 0;
+}
+
 static int enic_poll_msix(struct napi_struct *napi, int budget)
 {
        struct net_device *netdev = napi->dev;
@@ -1171,6 +1272,13 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
 
        if (err)
                work_done = work_to_do;
+       if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+               /* Call the function which refreshes
+                * the intr coalescing timer value based on
+                * the traffic.  This is supported only in
+                * the case of MSI-x mode
+                */
+               enic_calc_int_moderation(enic, &enic->rq[rq]);
 
        if (work_done < work_to_do) {
 
@@ -1179,6 +1287,8 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
                 */
 
                napi_complete(napi);
+               if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+                       enic_set_int_moderation(enic, &enic->rq[rq]);
                vnic_intr_unmask(&enic->intr[intr]);
        }
 
@@ -1314,6 +1424,42 @@ static void enic_synchronize_irqs(struct enic *enic)
        }
 }
 
+static void enic_set_rx_coal_setting(struct enic *enic)
+{
+       unsigned int speed;
+       int index = -1;
+       struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+
+       /* If intr mode is not MSIX, do not do adaptive coalescing */
+       if (VNIC_DEV_INTR_MODE_MSIX != vnic_dev_get_intr_mode(enic->vdev)) {
+               netdev_info(enic->netdev, "INTR mode is not MSIX, Not initializing adaptive coalescing");
+               return;
+       }
+
+       /* 1. Read the link speed from fw
+        * 2. Pick the default range for the speed
+        * 3. Update it in enic->rx_coalesce_setting
+        */
+       speed = vnic_dev_port_speed(enic->vdev);
+       if (ENIC_LINK_SPEED_10G < speed)
+               index = ENIC_LINK_40G_INDEX;
+       else if (ENIC_LINK_SPEED_4G < speed)
+               index = ENIC_LINK_10G_INDEX;
+       else
+               index = ENIC_LINK_4G_INDEX;
+
+       rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
+       rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
+       rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
+
+       /* Start with the value provided by UCSM */
+       for (index = 0; index < enic->rq_count; index++)
+               enic->cq[index].cur_rx_coal_timeval =
+                               enic->config.intr_timer_usec;
+
+       rx_coal->use_adaptive_rx_coalesce = 1;
+}
+
 static int enic_dev_notify_set(struct enic *enic)
 {
        int err;
@@ -2231,6 +2377,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        enic->notify_timer.function = enic_notify_timer;
        enic->notify_timer.data = (unsigned long)enic;
 
+       enic_set_rx_coal_setting(enic);
        INIT_WORK(&enic->reset, enic_reset);
        INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);
 
@@ -2250,6 +2397,9 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
 
        enic->tx_coalesce_usecs = enic->config.intr_timer_usec;
+       /* rx coalesce time already got initialized. This gets used
+        * if adaptive coal is turned off
+        */
        enic->rx_coalesce_usecs = enic->tx_coalesce_usecs;
 
        if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
index 579315cbe803c91130c978b5e21538240aff3018..4e6aa65857f70a0b7236decf382c3018c137517c 100644 (file)
@@ -50,6 +50,11 @@ struct vnic_cq_ctrl {
        u32 pad10;
 };
 
+struct vnic_rx_bytes_counter {
+       unsigned int small_pkt_bytes_cnt;
+       unsigned int large_pkt_bytes_cnt;
+};
+
 struct vnic_cq {
        unsigned int index;
        struct vnic_dev *vdev;
@@ -58,6 +63,10 @@ struct vnic_cq {
        unsigned int to_clean;
        unsigned int last_color;
        unsigned int interrupt_offset;
+       struct vnic_rx_bytes_counter pkt_size_counter;
+       unsigned int cur_rx_coal_timeval;
+       unsigned int tobe_rx_coal_timeval;
+       ktime_t prev_ts;
 };
 
 static inline unsigned int vnic_cq_service(struct vnic_cq *cq,