]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/dma/ioat/dma_v3.c
Merge branch 'ioat' into dmaengine
[karo-tx-linux.git] / drivers / dma / ioat / dma_v3.c
index d1adbf35268cc720a4b47372a2089833c4f52078..1cdd22e1051befb3036bba6a63a6d8368db70f11 100644 (file)
@@ -57,6 +57,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/gfp.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include "registers.h"
@@ -259,8 +260,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
        struct ioat_chan_common *chan = &ioat->base;
        struct ioat_ring_ent *desc;
        bool seen_current = false;
+       int idx = ioat->tail, i;
        u16 active;
-       int i;
 
        dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
                __func__, ioat->head, ioat->tail, ioat->issued);
@@ -269,13 +270,14 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
        for (i = 0; i < active && !seen_current; i++) {
                struct dma_async_tx_descriptor *tx;
 
-               prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
-               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               smp_read_barrier_depends();
+               prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
+               desc = ioat2_get_ring_ent(ioat, idx + i);
                dump_desc_dbg(ioat, desc);
                tx = &desc->txd;
                if (tx->cookie) {
                        chan->completed_cookie = tx->cookie;
-                       ioat3_dma_unmap(ioat, desc, ioat->tail + i);
+                       ioat3_dma_unmap(ioat, desc, idx + i);
                        tx->cookie = 0;
                        if (tx->callback) {
                                tx->callback(tx->callback_param);
@@ -292,69 +294,30 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
                        i++;
                }
        }
-       ioat->tail += i;
+       smp_mb(); /* finish all descriptor reads before incrementing tail */
+       ioat->tail = idx + i;
        BUG_ON(active && !seen_current); /* no active descs have written a completion? */
        chan->last_completion = phys_complete;
 
-       active = ioat2_ring_active(ioat);
-       if (active == 0) {
+       if (active - i == 0) {
                dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
                        __func__);
                clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
                mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
        }
        /* 5 microsecond delay per pending descriptor */
-       writew(min((5 * active), IOAT_INTRDELAY_MASK),
+       writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
               chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
 }
 
-/* try to cleanup, but yield (via spin_trylock) to incoming submissions
- * with the expectation that we will immediately poll again shortly
- */
-static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat)
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
 {
        struct ioat_chan_common *chan = &ioat->base;
        unsigned long phys_complete;
 
-       prefetch(chan->completion);
-
-       if (!spin_trylock_bh(&chan->cleanup_lock))
-               return;
-
-       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
-               spin_unlock_bh(&chan->cleanup_lock);
-               return;
-       }
-
-       if (!spin_trylock_bh(&ioat->ring_lock)) {
-               spin_unlock_bh(&chan->cleanup_lock);
-               return;
-       }
-
-       __cleanup(ioat, phys_complete);
-
-       spin_unlock_bh(&ioat->ring_lock);
-       spin_unlock_bh(&chan->cleanup_lock);
-}
-
-/* run cleanup now because we already delayed the interrupt via INTRDELAY */
-static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat)
-{
-       struct ioat_chan_common *chan = &ioat->base;
-       unsigned long phys_complete;
-
-       prefetch(chan->completion);
-
        spin_lock_bh(&chan->cleanup_lock);
-       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
-               spin_unlock_bh(&chan->cleanup_lock);
-               return;
-       }
-       spin_lock_bh(&ioat->ring_lock);
-
-       __cleanup(ioat, phys_complete);
-
-       spin_unlock_bh(&ioat->ring_lock);
+       if (ioat_cleanup_preamble(chan, &phys_complete))
+               __cleanup(ioat, phys_complete);
        spin_unlock_bh(&chan->cleanup_lock);
 }
 
@@ -362,7 +325,7 @@ static void ioat3_cleanup_event(unsigned long data)
 {
        struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 
-       ioat3_cleanup_sync(ioat);
+       ioat3_cleanup(ioat);
        writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
@@ -383,12 +346,10 @@ static void ioat3_timer_event(unsigned long data)
        struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
        struct ioat_chan_common *chan = &ioat->base;
 
-       spin_lock_bh(&chan->cleanup_lock);
        if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
                unsigned long phys_complete;
                u64 status;
 
-               spin_lock_bh(&ioat->ring_lock);
                status = ioat_chansts(chan);
 
                /* when halted due to errors check for channel
@@ -407,26 +368,31 @@ static void ioat3_timer_event(unsigned long data)
                 * acknowledged a pending completion once, then be more
                 * forceful with a restart
                 */
+               spin_lock_bh(&chan->cleanup_lock);
                if (ioat_cleanup_preamble(chan, &phys_complete))
                        __cleanup(ioat, phys_complete);
-               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
+                       spin_lock_bh(&ioat->prep_lock);
                        ioat3_restart_channel(ioat);
-               else {
+                       spin_unlock_bh(&ioat->prep_lock);
+               } else {
                        set_bit(IOAT_COMPLETION_ACK, &chan->state);
                        mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
                }
-               spin_unlock_bh(&ioat->ring_lock);
+               spin_unlock_bh(&chan->cleanup_lock);
        } else {
                u16 active;
 
                /* if the ring is idle, empty, and oversized try to step
                 * down the size
                 */
-               spin_lock_bh(&ioat->ring_lock);
+               spin_lock_bh(&chan->cleanup_lock);
+               spin_lock_bh(&ioat->prep_lock);
                active = ioat2_ring_active(ioat);
                if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
                        reshape_ring(ioat, ioat->alloc_order-1);
-               spin_unlock_bh(&ioat->ring_lock);
+               spin_unlock_bh(&ioat->prep_lock);
+               spin_unlock_bh(&chan->cleanup_lock);
 
                /* keep shrinking until we get back to our minimum
                 * default size
@@ -434,7 +400,6 @@ static void ioat3_timer_event(unsigned long data)
                if (ioat->alloc_order > ioat_get_alloc_order())
                        mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
        }
-       spin_unlock_bh(&chan->cleanup_lock);
 }
 
 static enum dma_status
@@ -446,7 +411,7 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
        if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
                return DMA_SUCCESS;
 
-       ioat3_cleanup_poll(ioat);
+       ioat3_cleanup(ioat);
 
        return ioat_tx_status(c, cookie, txstate);
 }
@@ -459,15 +424,12 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
        struct ioat_ring_ent *desc;
        size_t total_len = len;
        struct ioat_fill_descriptor *fill;
-       int num_descs;
        u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
-       u16 idx;
-       int i;
+       int num_descs, idx, i;
 
        num_descs = ioat2_xferlen_to_descs(ioat, len);
-       if (likely(num_descs) &&
-           ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
-               /* pass */;
+       if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
+               idx = ioat->head;
        else
                return NULL;
        i = 0;
@@ -512,11 +474,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
        struct ioat_xor_descriptor *xor;
        struct ioat_xor_ext_descriptor *xor_ex = NULL;
        struct ioat_dma_descriptor *hw;
+       int num_descs, with_ext, idx, i;
        u32 offset = 0;
-       int num_descs;
-       int with_ext;
-       int i;
-       u16 idx;
        u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
 
        BUG_ON(src_cnt < 2);
@@ -536,9 +495,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
         * (legacy) descriptor to ensure all completion writes arrive in
         * order.
         */
-       if (likely(num_descs) &&
-           ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
-               /* pass */;
+       if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
+               idx = ioat->head;
        else
                return NULL;
        i = 0;
@@ -656,11 +614,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
        struct ioat_pq_ext_descriptor *pq_ex = NULL;
        struct ioat_dma_descriptor *hw;
        u32 offset = 0;
-       int num_descs;
-       int with_ext;
-       int i, s;
-       u16 idx;
        u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+       int i, s, idx, with_ext, num_descs;
 
        dev_dbg(to_dev(chan), "%s\n", __func__);
        /* the engine requires at least two sources (we provide
@@ -686,8 +641,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
         * order.
         */
        if (likely(num_descs) &&
-           ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
-               /* pass */;
+           ioat2_check_space_lock(ioat, num_descs+1) == 0)
+               idx = ioat->head;
        else
                return NULL;
        i = 0;
@@ -850,10 +805,9 @@ ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
        struct ioat_ring_ent *desc;
        struct ioat_dma_descriptor *hw;
-       u16 idx;
 
-       if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
-               desc = ioat2_get_ring_ent(ioat, idx);
+       if (ioat2_check_space_lock(ioat, 1) == 0)
+               desc = ioat2_get_ring_ent(ioat, ioat->head);
        else
                return NULL;
 
@@ -1221,7 +1175,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
        if (cap & IOAT_CAP_XOR) {
                is_raid_device = true;
                dma->max_xor = 8;
-               dma->xor_align = 2;
+               dma->xor_align = 6;
 
                dma_cap_set(DMA_XOR, dma->cap_mask);
                dma->device_prep_dma_xor = ioat3_prep_xor;
@@ -1232,7 +1186,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
        if (cap & IOAT_CAP_PQ) {
                is_raid_device = true;
                dma_set_maxpq(dma, 8, 0);
-               dma->pq_align = 2;
+               dma->pq_align = 6;
 
                dma_cap_set(DMA_PQ, dma->cap_mask);
                dma->device_prep_dma_pq = ioat3_prep_pq;
@@ -1242,7 +1196,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
 
                if (!(cap & IOAT_CAP_XOR)) {
                        dma->max_xor = 8;
-                       dma->xor_align = 2;
+                       dma->xor_align = 6;
 
                        dma_cap_set(DMA_XOR, dma->cap_mask);
                        dma->device_prep_dma_xor = ioat3_prep_pqxor;