bgmac: implement scatter/gather support
[cascardo/linux.git] / drivers / net / ethernet / broadcom / bgmac.c
index 676ffe0..fa8f9e1 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mii.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <bcm47xx_nvram.h>
@@ -114,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
        bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
 }
 
+static void
+bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
+                    int i, int len, u32 ctl0)
+{
+       struct bgmac_slot_info *slot;
+       struct bgmac_dma_desc *dma_desc;
+       u32 ctl1;
+
+       if (i == ring->num_slots - 1)
+               ctl0 |= BGMAC_DESC_CTL0_EOT;
+
+       ctl1 = len & BGMAC_DESC_CTL1_LEN;
+
+       slot = &ring->slots[i];
+       dma_desc = &ring->cpu_base[i];
+       dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+       dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+       dma_desc->ctl0 = cpu_to_le32(ctl0);
+       dma_desc->ctl1 = cpu_to_le32(ctl1);
+}
+
 static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
                                    struct bgmac_dma_ring *ring,
                                    struct sk_buff *skb)
 {
        struct device *dma_dev = bgmac->core->dma_dev;
        struct net_device *net_dev = bgmac->net_dev;
-       struct bgmac_dma_desc *dma_desc;
-       struct bgmac_slot_info *slot;
-       u32 ctl0, ctl1;
+       struct bgmac_slot_info *slot = &ring->slots[ring->end];
        int free_slots;
+       int nr_frags;
+       u32 flags;
+       int index = ring->end;
+       int i;
 
        if (skb->len > BGMAC_DESC_CTL1_LEN) {
                bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
-               goto err_stop_drop;
+               goto err_drop;
        }
 
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               skb_checksum_help(skb);
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+
        if (ring->start <= ring->end)
                free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
        else
                free_slots = ring->start - ring->end;
-       if (free_slots == 1) {
+
+       if (free_slots <= nr_frags + 1) {
                bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
                netif_stop_queue(net_dev);
                return NETDEV_TX_BUSY;
        }
 
-       slot = &ring->slots[ring->end];
-       slot->skb = skb;
-       slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
+       slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
                                        DMA_TO_DEVICE);
-       if (dma_mapping_error(dma_dev, slot->dma_addr)) {
-               bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
-                         ring->mmio_base);
-               goto err_stop_drop;
-       }
+       if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
+               goto err_dma_head;
 
-       ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
-       if (ring->end == ring->num_slots - 1)
-               ctl0 |= BGMAC_DESC_CTL0_EOT;
-       ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
+       flags = BGMAC_DESC_CTL0_SOF;
+       if (!nr_frags)
+               flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
 
-       dma_desc = ring->cpu_base;
-       dma_desc += ring->end;
-       dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
-       dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
-       dma_desc->ctl0 = cpu_to_le32(ctl0);
-       dma_desc->ctl1 = cpu_to_le32(ctl1);
+       bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
+       flags = 0;
+
+       for (i = 0; i < nr_frags; i++) {
+               struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+               int len = skb_frag_size(frag);
+
+               index = (index + 1) % BGMAC_TX_RING_SLOTS;
+               slot = &ring->slots[index];
+               slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
+                                                 len, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
+                       goto err_dma;
+
+               if (i == nr_frags - 1)
+                       flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
+
+               bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
+       }
+
+       slot->skb = skb;
 
        netdev_sent_queue(net_dev, skb->len);
 
@@ -169,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
        /* Increase ring->end to point empty slot. We tell hardware the first
         * slot it should *not* read.
         */
-       if (++ring->end >= BGMAC_TX_RING_SLOTS)
-               ring->end = 0;
+       ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
        bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
                    ring->index_base +
                    ring->end * sizeof(struct bgmac_dma_desc));
 
-       /* Always keep one slot free to allow detecting bugged calls. */
-       if (--free_slots == 1)
+       free_slots -= nr_frags + 1;
+       if (free_slots < 8)
                netif_stop_queue(net_dev);
 
        return NETDEV_TX_OK;
 
-err_stop_drop:
-       netif_stop_queue(net_dev);
+err_dma:
+       dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
+                        DMA_TO_DEVICE);
+
+       while (i > 0) {
+               int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
+               struct bgmac_slot_info *slot = &ring->slots[index];
+               u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
+               int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+
+               dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
+       }
+
+err_dma_head:
+       bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+                 ring->mmio_base);
+
+err_drop:
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
 }
@@ -204,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 
        while (ring->start != empty_slot) {
                struct bgmac_slot_info *slot = &ring->slots[ring->start];
+               u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
+               int len = ctl1 & BGMAC_DESC_CTL1_LEN;
 
-               if (slot->skb) {
+               if (!slot->dma_addr) {
+                       bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+                                 ring->start, ring->end);
+                       goto next;
+               }
+
+               if (ctl1 & BGMAC_DESC_CTL0_SOF)
                        /* Unmap no longer used buffer */
-                       dma_unmap_single(dma_dev, slot->dma_addr,
-                                        slot->skb->len, DMA_TO_DEVICE);
-                       slot->dma_addr = 0;
+                       dma_unmap_single(dma_dev, slot->dma_addr, len,
+                                        DMA_TO_DEVICE);
+               else
+                       dma_unmap_page(dma_dev, slot->dma_addr, len,
+                                      DMA_TO_DEVICE);
 
+               if (slot->skb) {
                        bytes_compl += slot->skb->len;
                        pkts_compl++;
 
                        /* Free memory! :) */
                        dev_kfree_skb(slot->skb);
                        slot->skb = NULL;
-               } else {
-                       bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
-                                 ring->start, ring->end);
                }
 
+next:
+               slot->dma_addr = 0;
                if (++ring->start >= BGMAC_TX_RING_SLOTS)
                        ring->start = 0;
                freed = true;
        }
 
+       if (!pkts_compl)
+               return;
+
        netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
 
-       if (freed && netif_queue_stopped(bgmac->net_dev))
+       if (netif_queue_stopped(bgmac->net_dev))
                netif_wake_queue(bgmac->net_dev);
 }
 
@@ -275,36 +342,33 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
                                     struct bgmac_slot_info *slot)
 {
        struct device *dma_dev = bgmac->core->dma_dev;
-       struct sk_buff *skb;
        dma_addr_t dma_addr;
        struct bgmac_rx_header *rx;
+       void *buf;
 
        /* Alloc skb */
-       skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
-       if (!skb)
+       buf = netdev_alloc_frag(BGMAC_RX_ALLOC_SIZE);
+       if (!buf)
                return -ENOMEM;
 
        /* Poison - if everything goes fine, hardware will overwrite it */
-       rx = (struct bgmac_rx_header *)skb->data;
+       rx = buf;
        rx->len = cpu_to_le16(0xdead);
        rx->flags = cpu_to_le16(0xbeef);
 
        /* Map skb for the DMA */
-       dma_addr = dma_map_single(dma_dev, skb->data,
-                                 BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+       dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
+                                 DMA_FROM_DEVICE);
        if (dma_mapping_error(dma_dev, dma_addr)) {
                bgmac_err(bgmac, "DMA mapping error\n");
-               dev_kfree_skb(skb);
+               put_page(virt_to_head_page(buf));
                return -ENOMEM;
        }
 
        /* Update the slot */
-       slot->skb = skb;
+       slot->buf = buf;
        slot->dma_addr = dma_addr;
 
-       if (slot->dma_addr & 0xC0000000)
-               bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
-
        return 0;
 }
 
@@ -345,8 +409,9 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
        while (ring->start != ring->end) {
                struct device *dma_dev = bgmac->core->dma_dev;
                struct bgmac_slot_info *slot = &ring->slots[ring->start];
-               struct sk_buff *skb = slot->skb;
-               struct bgmac_rx_header *rx;
+               struct bgmac_rx_header *rx = slot->buf;
+               struct sk_buff *skb;
+               void *buf = slot->buf;
                u16 len, flags;
 
                /* Unmap buffer to make it accessible to the CPU */
@@ -354,7 +419,6 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
                                        BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
                /* Get info from the header */
-               rx = (struct bgmac_rx_header *)skb->data;
                len = le16_to_cpu(rx->len);
                flags = le16_to_cpu(rx->flags);
 
@@ -395,12 +459,13 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
                        dma_unmap_single(dma_dev, old_dma_addr,
                                         BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
+                       skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
                        skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
                        skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
 
                        skb_checksum_none_assert(skb);
                        skb->protocol = eth_type_trans(skb, bgmac->net_dev);
-                       netif_receive_skb(skb);
+                       napi_gro_receive(&bgmac->napi, skb);
                        handled++;
                } while (0);
 
@@ -436,40 +501,79 @@ static bool bgmac_dma_unaligned(struct bgmac *bgmac,
        return false;
 }
 
-static void bgmac_dma_ring_free(struct bgmac *bgmac,
-                               struct bgmac_dma_ring *ring)
+static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
+                                  struct bgmac_dma_ring *ring)
 {
        struct device *dma_dev = bgmac->core->dma_dev;
+       struct bgmac_dma_desc *dma_desc = ring->cpu_base;
        struct bgmac_slot_info *slot;
-       int size;
        int i;
 
        for (i = 0; i < ring->num_slots; i++) {
+               int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
+
                slot = &ring->slots[i];
-               if (slot->skb) {
-                       if (slot->dma_addr)
-                               dma_unmap_single(dma_dev, slot->dma_addr,
-                                                slot->skb->len, DMA_TO_DEVICE);
-                       dev_kfree_skb(slot->skb);
-               }
+               dev_kfree_skb(slot->skb);
+
+               if (!slot->dma_addr)
+                       continue;
+
+               if (slot->skb)
+                       dma_unmap_single(dma_dev, slot->dma_addr,
+                                        len, DMA_TO_DEVICE);
+               else
+                       dma_unmap_page(dma_dev, slot->dma_addr,
+                                      len, DMA_TO_DEVICE);
        }
+}
 
-       if (ring->cpu_base) {
-               /* Free ring of descriptors */
-               size = ring->num_slots * sizeof(struct bgmac_dma_desc);
-               dma_free_coherent(dma_dev, size, ring->cpu_base,
-                                 ring->dma_base);
+static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
+                                  struct bgmac_dma_ring *ring)
+{
+       struct device *dma_dev = bgmac->core->dma_dev;
+       struct bgmac_slot_info *slot;
+       int i;
+
+       for (i = 0; i < ring->num_slots; i++) {
+               slot = &ring->slots[i];
+               if (!slot->buf)
+                       continue;
+
+               if (slot->dma_addr)
+                       dma_unmap_single(dma_dev, slot->dma_addr,
+                                        BGMAC_RX_BUF_SIZE,
+                                        DMA_FROM_DEVICE);
+               put_page(virt_to_head_page(slot->buf));
        }
 }
 
+static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
+                                    struct bgmac_dma_ring *ring)
+{
+       struct device *dma_dev = bgmac->core->dma_dev;
+       int size;
+
+       if (!ring->cpu_base)
+           return;
+
+       /* Free ring of descriptors */
+       size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+       dma_free_coherent(dma_dev, size, ring->cpu_base,
+                         ring->dma_base);
+}
+
 static void bgmac_dma_free(struct bgmac *bgmac)
 {
        int i;
 
-       for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
-               bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]);
-       for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
-               bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]);
+       for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+               bgmac_dma_tx_ring_free(bgmac, &bgmac->tx_ring[i]);
+               bgmac_dma_ring_desc_free(bgmac, &bgmac->tx_ring[i]);
+       }
+       for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+               bgmac_dma_rx_ring_free(bgmac, &bgmac->rx_ring[i]);
+               bgmac_dma_ring_desc_free(bgmac, &bgmac->rx_ring[i]);
+       }
 }
 
 static int bgmac_dma_alloc(struct bgmac *bgmac)
@@ -505,8 +609,6 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
                                  ring->mmio_base);
                        goto err_dma_free;
                }
-               if (ring->dma_base & 0xC0000000)
-                       bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
 
                ring->unaligned = bgmac_dma_unaligned(bgmac, ring,
                                                      BGMAC_DMA_RING_TX);
@@ -536,8 +638,6 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
                        err = -ENOMEM;
                        goto err_dma_free;
                }
-               if (ring->dma_base & 0xC0000000)
-                       bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
 
                ring->unaligned = bgmac_dma_unaligned(bgmac, ring,
                                                      BGMAC_DMA_RING_RX);
@@ -1337,13 +1437,46 @@ static void bgmac_adjust_link(struct net_device *net_dev)
        }
 }
 
+static int bgmac_fixed_phy_register(struct bgmac *bgmac)
+{
+       struct fixed_phy_status fphy_status = {
+               .link = 1,
+               .speed = SPEED_1000,
+               .duplex = DUPLEX_FULL,
+       };
+       struct phy_device *phy_dev;
+       int err;
+
+       phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
+       if (!phy_dev || IS_ERR(phy_dev)) {
+               bgmac_err(bgmac, "Failed to register fixed PHY device\n");
+               return -ENODEV;
+       }
+
+       err = phy_connect_direct(bgmac->net_dev, phy_dev, bgmac_adjust_link,
+                                PHY_INTERFACE_MODE_MII);
+       if (err) {
+               bgmac_err(bgmac, "Connecting PHY failed\n");
+               return err;
+       }
+
+       bgmac->phy_dev = phy_dev;
+
+       return err;
+}
+
 static int bgmac_mii_register(struct bgmac *bgmac)
 {
+       struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
        struct mii_bus *mii_bus;
        struct phy_device *phy_dev;
        char bus_id[MII_BUS_ID_SIZE + 3];
        int i, err = 0;
 
+       if (ci->id == BCMA_CHIP_ID_BCM4707 ||
+           ci->id == BCMA_CHIP_ID_BCM53018)
+               return bgmac_fixed_phy_register(bgmac);
+
        mii_bus = mdiobus_alloc();
        if (!mii_bus)
                return -ENOMEM;
@@ -1524,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core)
                goto err_dma_free;
        }
 
+       net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+       net_dev->hw_features = net_dev->features;
+       net_dev->vlan_features = net_dev->features;
+
        err = register_netdev(bgmac->net_dev);
        if (err) {
                bgmac_err(bgmac, "Cannot register net device\n");