Merge branch 'lro'
authorJeff Garzik <jeff@garzik.org>
Thu, 2 Mar 2006 19:26:30 +0000 (14:26 -0500)
committerJeff Garzik <jeff@garzik.org>
Thu, 2 Mar 2006 19:26:30 +0000 (14:26 -0500)
1  2 
drivers/net/s2io.c
drivers/net/s2io.h

diff --combined drivers/net/s2io.c
  #include <linux/ethtool.h>
  #include <linux/workqueue.h>
  #include <linux/if_vlan.h>
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+ #include <net/tcp.h>
  
  #include <asm/system.h>
  #include <asm/uaccess.h>
  #include <asm/io.h>
+ #include <asm/div64.h>
  
  /* local include */
  #include "s2io.h"
  #include "s2io-regs.h"
  
- #define DRV_VERSION "Version 2.0.9.4"
+ #define DRV_VERSION "2.0.11.2"
  
  /* S2io Driver name & version. */
  static char s2io_driver_name[] = "Neterion";
  static char s2io_driver_version[] = DRV_VERSION;
  
 -int rxd_size[4] = {32,48,48,64};
 -int rxd_count[4] = {127,85,85,63};
 +static int rxd_size[4] = {32,48,48,64};
 +static int rxd_count[4] = {127,85,85,63};
  
  static inline int RXD_IS_UP2DT(RxD_t *rxdp)
  {
@@@ -168,6 -172,11 +172,11 @@@ static char ethtool_stats_keys[][ETH_GS
        {"\n DRIVER STATISTICS"},
        {"single_bit_ecc_errs"},
        {"double_bit_ecc_errs"},
+       ("lro_aggregated_pkts"),
+       ("lro_flush_both_count"),
+       ("lro_out_of_sequence_pkts"),
+       ("lro_flush_due_to_max_pkts"),
+       ("lro_avg_aggr_pkts"),
  };
  
  #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
@@@ -317,6 -326,12 +326,12 @@@ static unsigned int indicate_max_pkts
  static unsigned int rxsync_frequency = 3;
  /* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */
  static unsigned int intr_type = 0;
+ /* Large receive offload feature */
+ static unsigned int lro = 0;
+ /* Max pkts to be aggregated by LRO at one time. If not specified,
+  * aggregation happens until we hit max IP pkt size(64K)
+  */
+ static unsigned int lro_max_pkts = 0xFFFF;
  
  /*
   * S2IO device table.
@@@ -1476,6 -1491,19 +1491,19 @@@ static int init_nic(struct s2io_nic *ni
        writel((u32) (val64 >> 32), (add + 4));
        val64 = readq(&bar0->mac_cfg);
  
+       /* Enable FCS stripping by adapter */
+       add = &bar0->mac_cfg;
+       val64 = readq(&bar0->mac_cfg);
+       val64 |= MAC_CFG_RMAC_STRIP_FCS;
+       if (nic->device_type == XFRAME_II_DEVICE)
+               writeq(val64, &bar0->mac_cfg);
+       else {
+               writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+               writel((u32) (val64), add);
+               writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+               writel((u32) (val64 >> 32), (add + 4));
+       }
        /*
         * Set the time value to be inserted in the pause frame
         * generated by xena.
@@@ -2127,7 -2155,7 +2155,7 @@@ static void stop_nic(struct s2io_nic *n
        }
  }
  
 -int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb)
 +static int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb)
  {
        struct net_device *dev = nic->dev;
        struct sk_buff *frag_list;
@@@ -2569,6 -2597,8 +2597,8 @@@ static void rx_intr_handler(ring_info_
  #ifndef CONFIG_S2IO_NAPI
        int pkt_cnt = 0;
  #endif
+       int i;
        spin_lock(&nic->rx_lock);
        if (atomic_read(&nic->card_state) == CARD_DOWN) {
                DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n",
                        break;
  #endif
        }
+       if (nic->lro) {
+               /* Clear all LRO sessions before exiting */
+               for (i=0; i<MAX_LRO_SESSIONS; i++) {
+                       lro_t *lro = &nic->lro0_n[i];
+                       if (lro->in_use) {
+                               update_L3L4_header(nic, lro);
+                               queue_rx_frame(lro->parent);
+                               clear_lro_session(lro);
+                       }
+               }
+       }
        spin_unlock(&nic->rx_lock);
  }
  
@@@ -2852,7 -2894,7 +2894,7 @@@ static int wait_for_cmd_complete(nic_t 
   *  void.
   */
  
 -void s2io_reset(nic_t * sp)
 +static void s2io_reset(nic_t * sp)
  {
        XENA_dev_config_t __iomem *bar0 = sp->bar0;
        u64 val64;
   *  SUCCESS on success and FAILURE on failure.
   */
  
 -int s2io_set_swapper(nic_t * sp)
 +static int s2io_set_swapper(nic_t * sp)
  {
        struct net_device *dev = sp->dev;
        XENA_dev_config_t __iomem *bar0 = sp->bar0;
@@@ -3089,7 -3131,7 +3131,7 @@@ static int wait_for_msix_trans(nic_t *n
        return ret;
  }
  
 -void restore_xmsi_data(nic_t *nic)
 +static void restore_xmsi_data(nic_t *nic)
  {
        XENA_dev_config_t __iomem *bar0 = nic->bar0;
        u64 val64;
@@@ -3180,7 -3222,7 +3222,7 @@@ int s2io_enable_msi(nic_t *nic
        return 0;
  }
  
 -int s2io_enable_msi_x(nic_t *nic)
 +static int s2io_enable_msi_x(nic_t *nic)
  {
        XENA_dev_config_t __iomem *bar0 = nic->bar0;
        u64 tx_mat, rx_mat;
@@@ -3668,23 -3710,32 +3710,32 @@@ s2io_msi_handle(int irq, void *dev_id, 
         * else schedule a tasklet to reallocate the buffers.
         */
        for (i = 0; i < config->rx_ring_num; i++) {
-               int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
-               int level = rx_buffer_level(sp, rxb_size, i);
-               if ((level == PANIC) && (!TASKLET_IN_USE)) {
-                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
-                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
-                       if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
-                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, " in ISR!!\n");
+               if (!sp->lro) {
+                       int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+                       int level = rx_buffer_level(sp, rxb_size, i);
+                       if ((level == PANIC) && (!TASKLET_IN_USE)) {
+                               DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", 
+                                                       dev->name);
+                               DBG_PRINT(INTR_DBG, "PANIC levels\n");
+                               if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+                                       DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                 dev->name);
+                                       DBG_PRINT(ERR_DBG, " in ISR!!\n");
+                                       clear_bit(0, (&sp->tasklet_status));
+                                       atomic_dec(&sp->isr_cnt);
+                                       return IRQ_HANDLED;
+                               }
                                clear_bit(0, (&sp->tasklet_status));
-                               atomic_dec(&sp->isr_cnt);
-                               return IRQ_HANDLED;
+                       } else if (level == LOW) {
+                               tasklet_schedule(&sp->task);
                        }
-                       clear_bit(0, (&sp->tasklet_status));
-               } else if (level == LOW) {
-                       tasklet_schedule(&sp->task);
+               }
+               else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                       dev->name);
+                               DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+                               break;
                }
        }
  
@@@ -3697,29 -3748,37 +3748,37 @@@ s2io_msix_ring_handle(int irq, void *de
  {
        ring_info_t *ring = (ring_info_t *)dev_id;
        nic_t *sp = ring->nic;
+       struct net_device *dev = (struct net_device *) dev_id;
        int rxb_size, level, rng_n;
  
        atomic_inc(&sp->isr_cnt);
        rx_intr_handler(ring);
  
        rng_n = ring->ring_no;
-       rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
-       level = rx_buffer_level(sp, rxb_size, rng_n);
-       if ((level == PANIC) && (!TASKLET_IN_USE)) {
-               int ret;
-               DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
-               DBG_PRINT(INTR_DBG, "PANIC levels\n");
-               if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
-                       DBG_PRINT(ERR_DBG, "Out of memory in %s",
-                                 __FUNCTION__);
+       if (!sp->lro) {
+               rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
+               level = rx_buffer_level(sp, rxb_size, rng_n);
+               if ((level == PANIC) && (!TASKLET_IN_USE)) {
+                       int ret;
+                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
+                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
+                       if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
+                               DBG_PRINT(ERR_DBG, "Out of memory in %s",
+                                         __FUNCTION__);
+                               clear_bit(0, (&sp->tasklet_status));
+                               return IRQ_HANDLED;
+                       }
                        clear_bit(0, (&sp->tasklet_status));
-                       return IRQ_HANDLED;
+               } else if (level == LOW) {
+                       tasklet_schedule(&sp->task);
                }
-               clear_bit(0, (&sp->tasklet_status));
-       } else if (level == LOW) {
-               tasklet_schedule(&sp->task);
        }
+       else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
+                       DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name);
+                       DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+       }
        atomic_dec(&sp->isr_cnt);
  
        return IRQ_HANDLED;
@@@ -3875,24 -3934,33 +3934,33 @@@ static irqreturn_t s2io_isr(int irq, vo
         */
  #ifndef CONFIG_S2IO_NAPI
        for (i = 0; i < config->rx_ring_num; i++) {
-               int ret;
-               int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
-               int level = rx_buffer_level(sp, rxb_size, i);
-               if ((level == PANIC) && (!TASKLET_IN_USE)) {
-                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
-                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
-                       if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
-                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, " in ISR!!\n");
+               if (!sp->lro) {
+                       int ret;
+                       int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+                       int level = rx_buffer_level(sp, rxb_size, i);
+                       if ((level == PANIC) && (!TASKLET_IN_USE)) {
+                               DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", 
+                                                       dev->name);
+                               DBG_PRINT(INTR_DBG, "PANIC levels\n");
+                               if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+                                       DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                 dev->name);
+                                       DBG_PRINT(ERR_DBG, " in ISR!!\n");
+                                       clear_bit(0, (&sp->tasklet_status));
+                                       atomic_dec(&sp->isr_cnt);
+                                       return IRQ_HANDLED;
+                               }
                                clear_bit(0, (&sp->tasklet_status));
-                               atomic_dec(&sp->isr_cnt);
-                               return IRQ_HANDLED;
+                       } else if (level == LOW) {
+                               tasklet_schedule(&sp->task);
                        }
-                       clear_bit(0, (&sp->tasklet_status));
-               } else if (level == LOW) {
-                       tasklet_schedule(&sp->task);
+               }
+               else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                       dev->name);
+                               DBG_PRINT(ERR_DBG, " in Rx intr!!\n");
+                               break;
                }
        }
  #endif
@@@ -4128,7 -4196,7 +4196,7 @@@ static void s2io_set_multicast(struct n
   *  as defined in errno.h file on failure.
   */
  
 -int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
 +static int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
  {
        nic_t *sp = dev->priv;
        XENA_dev_config_t __iomem *bar0 = sp->bar0;
@@@ -5043,6 -5111,7 +5111,7 @@@ static void s2io_get_ethtool_stats(stru
        int i = 0;
        nic_t *sp = dev->priv;
        StatInfo_t *stat_info = sp->mac_control.stats_info;
+       u64 tmp;
  
        s2io_updt_stats(sp);
        tmp_stats[i++] =
        tmp_stats[i++] = 0;
        tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
        tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
+       tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt;
+       tmp_stats[i++] = stat_info->sw_stat.sending_both;
+       tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts;
+       tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts;
+       tmp = 0;
+       if (stat_info->sw_stat.num_aggregations) {
+               tmp = stat_info->sw_stat.sum_avg_pkts_aggregated;
+               do_div(tmp, stat_info->sw_stat.num_aggregations);
+       }
+       tmp_stats[i++] = tmp;
  }
  
  static int s2io_ethtool_get_regs_len(struct net_device *dev)
@@@ -5515,6 -5594,14 +5594,14 @@@ static int s2io_card_up(nic_t * sp
        /* Setting its receive mode */
        s2io_set_multicast(dev);
  
+       if (sp->lro) {
+               /* Initialize max aggregatable pkts based on MTU */
+               sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu;
+               /* Check if we can use(if specified) user provided value */
+               if (lro_max_pkts < sp->lro_max_aggr_per_sess)
+                       sp->lro_max_aggr_per_sess = lro_max_pkts;
+       }
        /* Enable tasklet for the device */
        tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
  
@@@ -5607,6 -5694,7 +5694,7 @@@ static int rx_osm_handler(ring_info_t *
                ((unsigned long) rxdp->Host_Control);
        int ring_no = ring_data->ring_no;
        u16 l3_csum, l4_csum;
+       lro_t *lro;
  
        skb->dev = dev;
        if (rxdp->Control_1 & RXD_T_CODE) {
                        skb_put(skb, buf2_len);
        }
  
-       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) &&
+       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
+           (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
            (sp->rx_csum)) {
                l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
                l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
                         * a flag in the RxD.
                         */
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       if (sp->lro) {
+                               u32 tcp_len;
+                               u8 *tcp;
+                               int ret = 0;
+                               ret = s2io_club_tcp_session(skb->data, &tcp,
+                                               &tcp_len, &lro, rxdp, sp);
+                               switch (ret) {
+                                       case 3: /* Begin anew */
+                                               lro->parent = skb;
+                                               goto aggregate;
+                                       case 1: /* Aggregate */
+                                       {
+                                               lro_append_pkt(sp, lro,
+                                                       skb, tcp_len);
+                                               goto aggregate;
+                                       }
+                                       case 4: /* Flush session */
+                                       {
+                                               lro_append_pkt(sp, lro,
+                                                       skb, tcp_len);
+                                               queue_rx_frame(lro->parent);
+                                               clear_lro_session(lro);
+                                               sp->mac_control.stats_info->
+                                                   sw_stat.flush_max_pkts++;
+                                               goto aggregate;
+                                       }
+                                       case 2: /* Flush both */
+                                               lro->parent->data_len =
+                                                       lro->frags_len;
+                                               sp->mac_control.stats_info->
+                                                    sw_stat.sending_both++;
+                                               queue_rx_frame(lro->parent);
+                                               clear_lro_session(lro);
+                                               goto send_up;
+                                       case 0: /* sessions exceeded */
+                                       case 5: /*
+                                                * First pkt in session not
+                                                * L3/L4 aggregatable
+                                                */
+                                               break;
+                                       default:
+                                               DBG_PRINT(ERR_DBG,
+                                                       "%s: Samadhana!!\n",
+                                                        __FUNCTION__);
+                                               BUG();
+                               }
+                       }
                } else {
                        /*
                         * Packet with erroneous checksum, let the
                skb->ip_summed = CHECKSUM_NONE;
        }
  
-       skb->protocol = eth_type_trans(skb, dev);
+       if (!sp->lro) {
+               skb->protocol = eth_type_trans(skb, dev);
  #ifdef CONFIG_S2IO_NAPI
-       if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
-               /* Queueing the vlan frame to the upper layer */
-               vlan_hwaccel_receive_skb(skb, sp->vlgrp,
-                       RXD_GET_VLAN_TAG(rxdp->Control_2));
-       } else {
-               netif_receive_skb(skb);
-       }
+               if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+                       /* Queueing the vlan frame to the upper layer */
+                       vlan_hwaccel_receive_skb(skb, sp->vlgrp,
+                               RXD_GET_VLAN_TAG(rxdp->Control_2));
+               } else {
+                       netif_receive_skb(skb);
+               }
  #else
-       if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
-               /* Queueing the vlan frame to the upper layer */
-               vlan_hwaccel_rx(skb, sp->vlgrp,
-                       RXD_GET_VLAN_TAG(rxdp->Control_2));
-       } else {
-               netif_rx(skb);
-       }
+               if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+                       /* Queueing the vlan frame to the upper layer */
+                       vlan_hwaccel_rx(skb, sp->vlgrp,
+                               RXD_GET_VLAN_TAG(rxdp->Control_2));
+               } else {
+                       netif_rx(skb);
+               }
  #endif
+       } else {
+ send_up:
+               queue_rx_frame(skb);
+       }               
        dev->last_rx = jiffies;
+ aggregate:
        atomic_dec(&sp->rx_bufs_left[ring_no]);
        return SUCCESS;
  }
   *  void.
   */
  
 -void s2io_link(nic_t * sp, int link)
 +static void s2io_link(nic_t * sp, int link)
  {
        struct net_device *dev = (struct net_device *) sp->dev;
  
   *  returns the revision ID of the device.
   */
  
 -int get_xena_rev_id(struct pci_dev *pdev)
 +static int get_xena_rev_id(struct pci_dev *pdev)
  {
        u8 id = 0;
        int ret;
@@@ -5807,6 -5950,8 +5950,8 @@@ module_param(indicate_max_pkts, int, 0)
  #endif
  module_param(rxsync_frequency, int, 0);
  module_param(intr_type, int, 0);
+ module_param(lro, int, 0);
+ module_param(lro_max_pkts, int, 0);
  
  /**
   *  s2io_init_nic - Initialization of the adapter .
@@@ -5938,6 -6083,7 +6083,7 @@@ Defaulting to INTA\n")
        else
                sp->device_type = XFRAME_I_DEVICE;
  
+       sp->lro = lro;
                
        /* Initialize some PCI/PCI-X fields of the NIC. */
        s2io_init_pci(sp);
                DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been "
                          "enabled\n",dev->name);
  
+       if (sp->lro)
+               DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
+                       dev->name);
        /* Initialize device name */
        strcpy(sp->name, dev->name);
        if (sp->device_type & XFRAME_II_DEVICE)
@@@ -6343,7 -6493,7 +6493,7 @@@ int __init s2io_starter(void
   * Description: This function is the cleanup routine for the driver. It unregist * ers the driver.
   */
  
 -void s2io_closer(void)
 +static void s2io_closer(void)
  {
        pci_unregister_driver(&s2io_driver);
        DBG_PRINT(INIT_DBG, "cleanup done\n");
  
  module_init(s2io_starter);
  module_exit(s2io_closer);
+ static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip, 
+               struct tcphdr **tcp, RxD_t *rxdp)
+ {
+       int ip_off;
+       u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
+       if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) {
+               DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n",
+                         __FUNCTION__);
+               return -1;
+       }
+       /* TODO:
+        * By default the VLAN field in the MAC is stripped by the card, if this
+        * feature is turned off in rx_pa_cfg register, then the ip_off field
+        * has to be shifted by a further 2 bytes
+        */
+       switch (l2_type) {
+               case 0: /* DIX type */
+               case 4: /* DIX type with VLAN */
+                       ip_off = HEADER_ETHERNET_II_802_3_SIZE;
+                       break;
+               /* LLC, SNAP etc are considered non-mergeable */
+               default:
+                       return -1;
+       }
+       *ip = (struct iphdr *)((u8 *)buffer + ip_off);
+       ip_len = (u8)((*ip)->ihl);
+       ip_len <<= 2;
+       *tcp = (struct tcphdr *)((unsigned long)*ip + ip_len);
+       return 0;
+ }
+ static int check_for_socket_match(lro_t *lro, struct iphdr *ip,
+                                 struct tcphdr *tcp)
+ {
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) ||
+          (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest))
+               return -1;
+       return 0;
+ }
+ static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
+ {
+       return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2));
+ }
+ static void initiate_new_session(lro_t *lro, u8 *l2h,
+                    struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
+ {
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       lro->l2h = l2h;
+       lro->iph = ip;
+       lro->tcph = tcp;
+       lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
+       lro->tcp_ack = ntohl(tcp->ack_seq);
+       lro->sg_num = 1;
+       lro->total_len = ntohs(ip->tot_len);
+       lro->frags_len = 0;
+       /* 
+        * check if we saw TCP timestamp. Other consistency checks have
+        * already been done.
+        */
+       if (tcp->doff == 8) {
+               u32 *ptr;
+               ptr = (u32 *)(tcp+1);
+               lro->saw_ts = 1;
+               lro->cur_tsval = *(ptr+1);
+               lro->cur_tsecr = *(ptr+2);
+       }
+       lro->in_use = 1;
+ }
+ static void update_L3L4_header(nic_t *sp, lro_t *lro)
+ {
+       struct iphdr *ip = lro->iph;
+       struct tcphdr *tcp = lro->tcph;
+       u16 nchk;
+       StatInfo_t *statinfo = sp->mac_control.stats_info;
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       /* Update L3 header */
+       ip->tot_len = htons(lro->total_len);
+       ip->check = 0;
+       nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl);
+       ip->check = nchk;
+       /* Update L4 header */
+       tcp->ack_seq = lro->tcp_ack;
+       tcp->window = lro->window;
+       /* Update tsecr field if this session has timestamps enabled */
+       if (lro->saw_ts) {
+               u32 *ptr = (u32 *)(tcp + 1);
+               *(ptr+2) = lro->cur_tsecr;
+       }
+       /* Update counters required for calculation of
+        * average no. of packets aggregated.
+        */
+       statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num;
+       statinfo->sw_stat.num_aggregations++;
+ }
+ static void aggregate_new_rx(lro_t *lro, struct iphdr *ip,
+               struct tcphdr *tcp, u32 l4_pyld)
+ {
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       lro->total_len += l4_pyld;
+       lro->frags_len += l4_pyld;
+       lro->tcp_next_seq += l4_pyld;
+       lro->sg_num++;
+       /* Update ack seq no. and window ad(from this pkt) in LRO object */
+       lro->tcp_ack = tcp->ack_seq;
+       lro->window = tcp->window;
+       
+       if (lro->saw_ts) {
+               u32 *ptr;
+               /* Update tsecr and tsval from this packet */
+               ptr = (u32 *) (tcp + 1);
+               lro->cur_tsval = *(ptr + 1); 
+               lro->cur_tsecr = *(ptr + 2);
+       }
+ }
+ static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip,
+                                   struct tcphdr *tcp, u32 tcp_pyld_len)
+ {
+       u8 *ptr;
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       if (!tcp_pyld_len) {
+               /* Runt frame or a pure ack */
+               return -1;
+       }
+       if (ip->ihl != 5) /* IP has options */
+               return -1;
+       if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin ||
+                                                               !tcp->ack) {
+               /*
+                * Currently recognize only the ack control word and
+                * any other control field being set would result in
+                * flushing the LRO session
+                */
+               return -1;
+       }
+       /* 
+        * Allow only one TCP timestamp option. Don't aggregate if
+        * any other options are detected.
+        */
+       if (tcp->doff != 5 && tcp->doff != 8)
+               return -1;
+       if (tcp->doff == 8) {
+               ptr = (u8 *)(tcp + 1);  
+               while (*ptr == TCPOPT_NOP)
+                       ptr++;
+               if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP)
+                       return -1;
+               /* Ensure timestamp value increases monotonically */
+               if (l_lro)
+                       if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
+                               return -1;
+               /* timestamp echo reply should be non-zero */
+               if (*((u32 *)(ptr+6)) == 0) 
+                       return -1;
+       }
+       return 0;
+ }
+ static int
+ s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro,
+                     RxD_t *rxdp, nic_t *sp)
+ {
+       struct iphdr *ip;
+       struct tcphdr *tcph;
+       int ret = 0, i;
+       if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
+                                        rxdp))) {
+               DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
+                         ip->saddr, ip->daddr);
+       } else {
+               return ret;
+       }
+       tcph = (struct tcphdr *)*tcp;
+       *tcp_len = get_l4_pyld_length(ip, tcph);
+       for (i=0; i<MAX_LRO_SESSIONS; i++) {
+               lro_t *l_lro = &sp->lro0_n[i];
+               if (l_lro->in_use) {
+                       if (check_for_socket_match(l_lro, ip, tcph))
+                               continue;
+                       /* Sock pair matched */
+                       *lro = l_lro;
+                       if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) {
+                               DBG_PRINT(INFO_DBG, "%s:Out of order. expected "
+                                         "0x%x, actual 0x%x\n", __FUNCTION__,
+                                         (*lro)->tcp_next_seq,
+                                         ntohl(tcph->seq));
+                               sp->mac_control.stats_info->
+                                  sw_stat.outof_sequence_pkts++;
+                               ret = 2;
+                               break;
+                       }
+                       if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len))
+                               ret = 1; /* Aggregate */
+                       else
+                               ret = 2; /* Flush both */
+                       break;
+               }
+       }
+       if (ret == 0) {
+               /* Before searching for available LRO objects,
+                * check if the pkt is L3/L4 aggregatable. If not
+                * don't create new LRO session. Just send this
+                * packet up.
+                */
+               if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) {
+                       return 5;
+               }
+               for (i=0; i<MAX_LRO_SESSIONS; i++) {
+                       lro_t *l_lro = &sp->lro0_n[i];
+                       if (!(l_lro->in_use)) {
+                               *lro = l_lro;
+                               ret = 3; /* Begin anew */
+                               break;
+                       }
+               }
+       }
+       if (ret == 0) { /* sessions exceeded */
+               DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n",
+                         __FUNCTION__);
+               *lro = NULL;
+               return ret;
+       }
+       switch (ret) {
+               case 3:
+                       initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
+                       break;
+               case 2:
+                       update_L3L4_header(sp, *lro);
+                       break;
+               case 1:
+                       aggregate_new_rx(*lro, ip, tcph, *tcp_len);
+                       if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) {
+                               update_L3L4_header(sp, *lro);
+                               ret = 4; /* Flush the LRO */
+                       }
+                       break;
+               default:
+                       DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n",
+                               __FUNCTION__);
+                       break;
+       }
+       return ret;
+ }
+ static void clear_lro_session(lro_t *lro)
+ {
+       static u16 lro_struct_size = sizeof(lro_t);
+       memset(lro, 0, lro_struct_size);
+ }
+ static void queue_rx_frame(struct sk_buff *skb)
+ {
+       struct net_device *dev = skb->dev;
+       skb->protocol = eth_type_trans(skb, dev);
+ #ifdef CONFIG_S2IO_NAPI
+       netif_receive_skb(skb);
+ #else
+       netif_rx(skb);
+ #endif
+ }
+ static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb,
+                          u32 tcp_len)
+ {
+       struct sk_buff *tmp, *first = lro->parent;
+       first->len += tcp_len;
+       first->data_len = lro->frags_len;
+       skb_pull(skb, (skb->len - tcp_len));
+       if ((tmp = skb_shinfo(first)->frag_list)) {
+               while (tmp->next)
+                       tmp = tmp->next;
+               tmp->next = skb;
+       }
+       else
+               skb_shinfo(first)->frag_list = skb;
+       sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++;
+       return;
+ }
diff --combined drivers/net/s2io.h
@@@ -64,7 -64,7 +64,7 @@@ typedef enum xena_max_outstanding_split
  #define       INTR_DBG        4
  
  /* Global variable that defines the present debug level of the driver. */
 -int debug_level = ERR_DBG;    /* Default level. */
 +static int debug_level = ERR_DBG;
  
  /* DEBUG message print. */
  #define DBG_PRINT(dbg_level, args...)  if(!(debug_level<dbg_level)) printk(args)
  typedef struct {
        unsigned long long single_ecc_errs;
        unsigned long long double_ecc_errs;
+       /* LRO statistics */
+       unsigned long long clubbed_frms_cnt;
+       unsigned long long sending_both;
+       unsigned long long outof_sequence_pkts;
+       unsigned long long flush_max_pkts;
+       unsigned long long sum_avg_pkts_aggregated;
+       unsigned long long num_aggregations;
  } swStat_t;
  
  /* The statistics block of Xena */
@@@ -268,7 -275,7 +275,7 @@@ typedef struct stat_block 
  #define MAX_RX_RINGS 8
  
  /* FIFO mappings for all possible number of fifos configured */
 -int fifo_map[][MAX_TX_FIFOS] = {
 +static int fifo_map[][MAX_TX_FIFOS] = {
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 1, 1, 1, 1},
        {0, 0, 0, 1, 1, 1, 2, 2},
@@@ -680,6 -687,24 +687,24 @@@ struct msix_info_st 
        u64 data;
  };
  
+ /* Data structure to represent a LRO session */
+ typedef struct lro {
+       struct sk_buff  *parent;
+       u8              *l2h;
+       struct iphdr    *iph;
+       struct tcphdr   *tcph;
+       u32             tcp_next_seq;
+       u32             tcp_ack;
+       int             total_len;
+       int             frags_len;
+       int             sg_num;
+       int             in_use;
+       u16             window;
+       u32             cur_tsval;
+       u32             cur_tsecr;
+       u8              saw_ts;
+ }lro_t;
  /* Structure representing one instance of the NIC */
  struct s2io_nic {
        int rxd_mode;
  #define XFRAME_II_DEVICE      2
        u8 device_type;
  
+ #define MAX_LRO_SESSIONS      32
+       lro_t lro0_n[MAX_LRO_SESSIONS];
+       unsigned long   clubbed_frms_cnt;
+       unsigned long   sending_both;
+       u8              lro;
+       u16             lro_max_aggr_per_sess;
  #define INTA  0
  #define MSI   1
  #define MSI_X 2
@@@ -911,16 -943,18 +943,16 @@@ static void tx_intr_handler(fifo_info_
  static void alarm_intr_handler(struct s2io_nic *sp);
  
  static int s2io_starter(void);
 -void s2io_closer(void);
  static void s2io_tx_watchdog(struct net_device *dev);
  static void s2io_tasklet(unsigned long dev_addr);
  static void s2io_set_multicast(struct net_device *dev);
  static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp);
 -void s2io_link(nic_t * sp, int link);
 -void s2io_reset(nic_t * sp);
 +static void s2io_link(nic_t * sp, int link);
  #if defined(CONFIG_S2IO_NAPI)
  static int s2io_poll(struct net_device *dev, int *budget);
  #endif
  static void s2io_init_pci(nic_t * sp);
 -int s2io_set_mac_addr(struct net_device *dev, u8 * addr);
 +static int s2io_set_mac_addr(struct net_device *dev, u8 * addr);
  static void s2io_alarm_handle(unsigned long data);
  static int s2io_enable_msi(nic_t *nic);
  static irqreturn_t s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs);
@@@ -928,13 -962,20 +960,19 @@@ static irqreturn_
  s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs);
  static irqreturn_t
  s2io_msix_fifo_handle(int irq, void *dev_id, struct pt_regs *regs);
 -int s2io_enable_msi_x(nic_t *nic);
  static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs);
  static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag);
  static struct ethtool_ops netdev_ethtool_ops;
  static void s2io_set_link(unsigned long data);
 -int s2io_set_swapper(nic_t * sp);
 +static int s2io_set_swapper(nic_t * sp);
  static void s2io_card_down(nic_t *nic);
  static int s2io_card_up(nic_t *nic);
 -int get_xena_rev_id(struct pci_dev *pdev);
 -void restore_xmsi_data(nic_t *nic);
 +static int get_xena_rev_id(struct pci_dev *pdev);
 +static void restore_xmsi_data(nic_t *nic);
+ static int s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, RxD_t *rxdp, nic_t *sp);
+ static void clear_lro_session(lro_t *lro);
+ static void queue_rx_frame(struct sk_buff *skb);
+ static void update_L3L4_header(nic_t *sp, lro_t *lro);
+ static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, u32 tcp_len);
  #endif                                /* _S2IO_H */