drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

   1 /*
   2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3  * driver for Linux.
   4  *
   5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35
  36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  37
  38 #include <linux/module.h>
  39 #include <linux/moduleparam.h>
  40 #include <linux/init.h>
  41 #include <linux/pci.h>
  42 #include <linux/dma-mapping.h>
  43 #include <linux/netdevice.h>
  44 #include <linux/etherdevice.h>
  45 #include <linux/debugfs.h>
  46 #include <linux/ethtool.h>
  47 #include <linux/mdio.h>
  48
  49 #include "t4vf_common.h"
  50 #include "t4vf_defs.h"
  51
  52 #include "../cxgb4/t4_regs.h"
  53 #include "../cxgb4/t4_msg.h"
  54
  55 /*
  56  * Generic information about the driver.
  57  */
  58 #define DRV_VERSION "2.0.0-ko"
  59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
  60
  61 /*
  62  * Module Parameters.
  63  * ==================
  64  */
  65
  66 /*
  67  * Default ethtool "message level" for adapters.
  68  */
  69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  70                          NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  71                          NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  72
  73 static int dflt_msg_enable = DFLT_MSG_ENABLE;
  74
  75 module_param(dflt_msg_enable, int, 0644);
  76 MODULE_PARM_DESC(dflt_msg_enable,
  77                  "default adapter ethtool message level bitmap, "
  78                  "deprecated parameter");
  79
  80 /*
  81  * The driver uses the best interrupt scheme available on a platform in the
  82  * order MSI-X then MSI.  This parameter determines which of these schemes the
  83  * driver may consider as follows:
  84  *
  85  *     msi = 2: choose from among MSI-X and MSI
  86  *     msi = 1: only consider MSI interrupts
  87  *
  88  * Note that unlike the Physical Function driver, this Virtual Function driver
  89  * does _not_ support legacy INTx interrupts (this limitation is mandated by
  90  * the PCI-E SR-IOV standard).
  91  */
  92 #define MSI_MSIX        2
  93 #define MSI_MSI         1
  94 #define MSI_DEFAULT     MSI_MSIX
  95
  96 static int msi = MSI_DEFAULT;
  97
  98 module_param(msi, int, 0644);
  99 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
 100
 101 /*
 102  * Fundamental constants.
 103  * ======================
 104  */
 105
 106 enum {
 107         MAX_TXQ_ENTRIES         = 16384,
 108         MAX_RSPQ_ENTRIES        = 16384,
 109         MAX_RX_BUFFERS          = 16384,
 110
 111         MIN_TXQ_ENTRIES         = 32,
 112         MIN_RSPQ_ENTRIES        = 128,
 113         MIN_FL_ENTRIES          = 16,
 114
 115         /*
 116          * For purposes of manipulating the Free List size we need to
 117          * recognize that Free Lists are actually Egress Queues (the host
 118          * produces free buffers which the hardware consumes), Egress Queues
 119          * indices are all in units of Egress Context Units bytes, and free
 120          * list entries are 64-bit PCI DMA addresses.  And since the state of
 121          * the Producer Index == the Consumer Index implies an EMPTY list, we
 122          * always have at least one Egress Unit's worth of Free List entries
 123          * unused.  See sge.c for more details ...
 124          */
 125         EQ_UNIT = SGE_EQ_IDXSIZE,
 126         FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 127         MIN_FL_RESID = FL_PER_EQ_UNIT,
 128 };
 129
 130 /*
 131  * Global driver state.
 132  * ====================
 133  */
 134
 135 static struct dentry *cxgb4vf_debugfs_root;
 136
 137 /*
 138  * OS "Callback" functions.
 139  * ========================
 140  */
 141
 142 /*
 143  * The link status has changed on the indicated "port" (Virtual Interface).
 144  */
 145 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 146 {
 147         struct net_device *dev = adapter->port[pidx];
 148
 149         /*
 150          * If the port is disabled or the current recorded "link up"
 151          * status matches the new status, just return.
 152          */
 153         if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 154                 return;
 155
 156         /*
 157          * Tell the OS that the link status has changed and print a short
 158          * informative message on the console about the event.
 159          */
 160         if (link_ok) {
 161                 const char *s;
 162                 const char *fc;
 163                 const struct port_info *pi = netdev_priv(dev);
 164
 165                 netif_carrier_on(dev);
 166
 167                 switch (pi->link_cfg.speed) {
 168                 case 40000:
 169                         s = "40Gbps";
 170                         break;
 171
 172                 case 10000:
 173                         s = "10Gbps";
 174                         break;
 175
 176                 case 1000:
 177                         s = "1000Mbps";
 178                         break;
 179
 180                 case 100:
 181                         s = "100Mbps";
 182                         break;
 183
 184                 default:
 185                         s = "unknown";
 186                         break;
 187                 }
 188
 189                 switch (pi->link_cfg.fc) {
 190                 case PAUSE_RX:
 191                         fc = "RX";
 192                         break;
 193
 194                 case PAUSE_TX:
 195                         fc = "TX";
 196                         break;
 197
 198                 case PAUSE_RX|PAUSE_TX:
 199                         fc = "RX/TX";
 200                         break;
 201
 202                 default:
 203                         fc = "no";
 204                         break;
 205                 }
 206
 207                 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 208         } else {
 209                 netif_carrier_off(dev);
 210                 netdev_info(dev, "link down\n");
 211         }
 212 }
 213
 214 /*
 215  * THe port module type has changed on the indicated "port" (Virtual
 216  * Interface).
 217  */
 218 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
 219 {
 220         static const char * const mod_str[] = {
 221                 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 222         };
 223         const struct net_device *dev = adapter->port[pidx];
 224         const struct port_info *pi = netdev_priv(dev);
 225
 226         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 227                 dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
 228                          dev->name);
 229         else if (pi->mod_type < ARRAY_SIZE(mod_str))
 230                 dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
 231                          dev->name, mod_str[pi->mod_type]);
 232         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 233                 dev_info(adapter->pdev_dev, "%s: unsupported optical port "
 234                          "module inserted\n", dev->name);
 235         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 236                 dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
 237                          "forcing TWINAX\n", dev->name);
 238         else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 239                 dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
 240                          dev->name);
 241         else
 242                 dev_info(adapter->pdev_dev, "%s: unknown module type %d "
 243                          "inserted\n", dev->name, pi->mod_type);
 244 }
 245
 246 /*
 247  * Net device operations.
 248  * ======================
 249  */
 250
 251
 252
 253
 254 /*
 255  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 256  * Interface).
 257  */
 258 static int link_start(struct net_device *dev)
 259 {
 260         int ret;
 261         struct port_info *pi = netdev_priv(dev);
 262
 263         /*
 264          * We do not set address filters and promiscuity here, the stack does
 265          * that step explicitly. Enable vlan accel.
 266          */
 267         ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 268                               true);
 269         if (ret == 0) {
 270                 ret = t4vf_change_mac(pi->adapter, pi->viid,
 271                                       pi->xact_addr_filt, dev->dev_addr, true);
 272                 if (ret >= 0) {
 273                         pi->xact_addr_filt = ret;
 274                         ret = 0;
 275                 }
 276         }
 277
 278         /*
 279          * We don't need to actually "start the link" itself since the
 280          * firmware will do that for us when the first Virtual Interface
 281          * is enabled on a port.
 282          */
 283         if (ret == 0)
 284                 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 285         return ret;
 286 }
 287
 288 /*
 289  * Name the MSI-X interrupts.
 290  */
 291 static void name_msix_vecs(struct adapter *adapter)
 292 {
 293         int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 294         int pidx;
 295
 296         /*
 297          * Firmware events.
 298          */
 299         snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 300                  "%s-FWeventq", adapter->name);
 301         adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 302
 303         /*
 304          * Ethernet queues.
 305          */
 306         for_each_port(adapter, pidx) {
 307                 struct net_device *dev = adapter->port[pidx];
 308                 const struct port_info *pi = netdev_priv(dev);
 309                 int qs, msi;
 310
 311                 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 312                         snprintf(adapter->msix_info[msi].desc, namelen,
 313                                  "%s-%d", dev->name, qs);
 314                         adapter->msix_info[msi].desc[namelen] = 0;
 315                 }
 316         }
 317 }
 318
 319 /*
 320  * Request all of our MSI-X resources.
 321  */
 322 static int request_msix_queue_irqs(struct adapter *adapter)
 323 {
 324         struct sge *s = &adapter->sge;
 325         int rxq, msi, err;
 326
 327         /*
 328          * Firmware events.
 329          */
 330         err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 331                           0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 332         if (err)
 333                 return err;
 334
 335         /*
 336          * Ethernet queues.
 337          */
 338         msi = MSIX_IQFLINT;
 339         for_each_ethrxq(s, rxq) {
 340                 err = request_irq(adapter->msix_info[msi].vec,
 341                                   t4vf_sge_intr_msix, 0,
 342                                   adapter->msix_info[msi].desc,
 343                                   &s->ethrxq[rxq].rspq);
 344                 if (err)
 345                         goto err_free_irqs;
 346                 msi++;
 347         }
 348         return 0;
 349
 350 err_free_irqs:
 351         while (--rxq >= 0)
 352                 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 353         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 354         return err;
 355 }
 356
 357 /*
 358  * Free our MSI-X resources.
 359  */
 360 static void free_msix_queue_irqs(struct adapter *adapter)
 361 {
 362         struct sge *s = &adapter->sge;
 363         int rxq, msi;
 364
 365         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 366         msi = MSIX_IQFLINT;
 367         for_each_ethrxq(s, rxq)
 368                 free_irq(adapter->msix_info[msi++].vec,
 369                          &s->ethrxq[rxq].rspq);
 370 }
 371
 372 /*
 373  * Turn on NAPI and start up interrupts on a response queue.
 374  */
 375 static void qenable(struct sge_rspq *rspq)
 376 {
 377         napi_enable(&rspq->napi);
 378
 379         /*
 380          * 0-increment the Going To Sleep register to start the timer and
 381          * enable interrupts.
 382          */
 383         t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 384                      CIDXINC_V(0) |
 385                      SEINTARM_V(rspq->intr_params) |
 386                      INGRESSQID_V(rspq->cntxt_id));
 387 }
 388
 389 /*
 390  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 391  */
 392 static void enable_rx(struct adapter *adapter)
 393 {
 394         int rxq;
 395         struct sge *s = &adapter->sge;
 396
 397         for_each_ethrxq(s, rxq)
 398                 qenable(&s->ethrxq[rxq].rspq);
 399         qenable(&s->fw_evtq);
 400
 401         /*
 402          * The interrupt queue doesn't use NAPI so we do the 0-increment of
 403          * its Going To Sleep register here to get it started.
 404          */
 405         if (adapter->flags & USING_MSI)
 406                 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 407                              CIDXINC_V(0) |
 408                              SEINTARM_V(s->intrq.intr_params) |
 409                              INGRESSQID_V(s->intrq.cntxt_id));
 410
 411 }
 412
 413 /*
 414  * Wait until all NAPI handlers are descheduled.
 415  */
 416 static void quiesce_rx(struct adapter *adapter)
 417 {
 418         struct sge *s = &adapter->sge;
 419         int rxq;
 420
 421         for_each_ethrxq(s, rxq)
 422                 napi_disable(&s->ethrxq[rxq].rspq.napi);
 423         napi_disable(&s->fw_evtq.napi);
 424 }
 425
 426 /*
 427  * Response queue handler for the firmware event queue.
 428  */
 429 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 430                           const struct pkt_gl *gl)
 431 {
 432         /*
 433          * Extract response opcode and get pointer to CPL message body.
 434          */
 435         struct adapter *adapter = rspq->adapter;
 436         u8 opcode = ((const struct rss_header *)rsp)->opcode;
 437         void *cpl = (void *)(rsp + 1);
 438
 439         switch (opcode) {
 440         case CPL_FW6_MSG: {
 441                 /*
 442                  * We've received an asynchronous message from the firmware.
 443                  */
 444                 const struct cpl_fw6_msg *fw_msg = cpl;
 445                 if (fw_msg->type == FW6_TYPE_CMD_RPL)
 446                         t4vf_handle_fw_rpl(adapter, fw_msg->data);
 447                 break;
 448         }
 449
 450         case CPL_FW4_MSG: {
 451                 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 452                  */
 453                 const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
 454                 opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
 455                 if (opcode != CPL_SGE_EGR_UPDATE) {
 456                         dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 457                                 , opcode);
 458                         break;
 459                 }
 460                 cpl = (void *)p;
 461                 /*FALLTHROUGH*/
 462         }
 463
 464         case CPL_SGE_EGR_UPDATE: {
 465                 /*
 466                  * We've received an Egress Queue Status Update message.  We
 467                  * get these, if the SGE is configured to send these when the
 468                  * firmware passes certain points in processing our TX
 469                  * Ethernet Queue or if we make an explicit request for one.
 470                  * We use these updates to determine when we may need to
 471                  * restart a TX Ethernet Queue which was stopped for lack of
 472                  * free TX Queue Descriptors ...
 473                  */
 474                 const struct cpl_sge_egr_update *p = cpl;
 475                 unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
 476                 struct sge *s = &adapter->sge;
 477                 struct sge_txq *tq;
 478                 struct sge_eth_txq *txq;
 479                 unsigned int eq_idx;
 480
 481                 /*
 482                  * Perform sanity checking on the Queue ID to make sure it
 483                  * really refers to one of our TX Ethernet Egress Queues which
 484                  * is active and matches the queue's ID.  None of these error
 485                  * conditions should ever happen so we may want to either make
 486                  * them fatal and/or conditionalized under DEBUG.
 487                  */
 488                 eq_idx = EQ_IDX(s, qid);
 489                 if (unlikely(eq_idx >= MAX_EGRQ)) {
 490                         dev_err(adapter->pdev_dev,
 491                                 "Egress Update QID %d out of range\n", qid);
 492                         break;
 493                 }
 494                 tq = s->egr_map[eq_idx];
 495                 if (unlikely(tq == NULL)) {
 496                         dev_err(adapter->pdev_dev,
 497                                 "Egress Update QID %d TXQ=NULL\n", qid);
 498                         break;
 499                 }
 500                 txq = container_of(tq, struct sge_eth_txq, q);
 501                 if (unlikely(tq->abs_id != qid)) {
 502                         dev_err(adapter->pdev_dev,
 503                                 "Egress Update QID %d refers to TXQ %d\n",
 504                                 qid, tq->abs_id);
 505                         break;
 506                 }
 507
 508                 /*
 509                  * Restart a stopped TX Queue which has less than half of its
 510                  * TX ring in use ...
 511                  */
 512                 txq->q.restarts++;
 513                 netif_tx_wake_queue(txq->txq);
 514                 break;
 515         }
 516
 517         default:
 518                 dev_err(adapter->pdev_dev,
 519                         "unexpected CPL %#x on FW event queue\n", opcode);
 520         }
 521
 522         return 0;
 523 }
 524
 525 /*
 526  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 527  * to use and initializes them.  We support multiple "Queue Sets" per port if
 528  * we have MSI-X, otherwise just one queue set per port.
 529  */
 530 static int setup_sge_queues(struct adapter *adapter)
 531 {
 532         struct sge *s = &adapter->sge;
 533         int err, pidx, msix;
 534
 535         /*
 536          * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 537          * state.
 538          */
 539         bitmap_zero(s->starving_fl, MAX_EGRQ);
 540
 541         /*
 542          * If we're using MSI interrupt mode we need to set up a "forwarded
 543          * interrupt" queue which we'll set up with our MSI vector.  The rest
 544          * of the ingress queues will be set up to forward their interrupts to
 545          * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 546          * the intrq's queue ID as the interrupt forwarding queue for the
 547          * subsequent calls ...
 548          */
 549         if (adapter->flags & USING_MSI) {
 550                 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 551                                          adapter->port[0], 0, NULL, NULL);
 552                 if (err)
 553                         goto err_free_queues;
 554         }
 555
 556         /*
 557          * Allocate our ingress queue for asynchronous firmware messages.
 558          */
 559         err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 560                                  MSIX_FW, NULL, fwevtq_handler);
 561         if (err)
 562                 goto err_free_queues;
 563
 564         /*
 565          * Allocate each "port"'s initial Queue Sets.  These can be changed
 566          * later on ... up to the point where any interface on the adapter is
 567          * brought up at which point lots of things get nailed down
 568          * permanently ...
 569          */
 570         msix = MSIX_IQFLINT;
 571         for_each_port(adapter, pidx) {
 572                 struct net_device *dev = adapter->port[pidx];
 573                 struct port_info *pi = netdev_priv(dev);
 574                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 575                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 576                 int qs;
 577
 578                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 579                         err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 580                                                  dev, msix++,
 581                                                  &rxq->fl, t4vf_ethrx_handler);
 582                         if (err)
 583                                 goto err_free_queues;
 584
 585                         err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 586                                              netdev_get_tx_queue(dev, qs),
 587                                              s->fw_evtq.cntxt_id);
 588                         if (err)
 589                                 goto err_free_queues;
 590
 591                         rxq->rspq.idx = qs;
 592                         memset(&rxq->stats, 0, sizeof(rxq->stats));
 593                 }
 594         }
 595
 596         /*
 597          * Create the reverse mappings for the queues.
 598          */
 599         s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 600         s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 601         IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 602         for_each_port(adapter, pidx) {
 603                 struct net_device *dev = adapter->port[pidx];
 604                 struct port_info *pi = netdev_priv(dev);
 605                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 606                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 607                 int qs;
 608
 609                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 610                         IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 611                         EQ_MAP(s, txq->q.abs_id) = &txq->q;
 612
 613                         /*
 614                          * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 615                          * for Free Lists but since all of the Egress Queues
 616                          * (including Free Lists) have Relative Queue IDs
 617                          * which are computed as Absolute - Base Queue ID, we
 618                          * can synthesize the Absolute Queue IDs for the Free
 619                          * Lists.  This is useful for debugging purposes when
 620                          * we want to dump Queue Contexts via the PF Driver.
 621                          */
 622                         rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 623                         EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 624                 }
 625         }
 626         return 0;
 627
 628 err_free_queues:
 629         t4vf_free_sge_resources(adapter);
 630         return err;
 631 }
 632
 633 /*
 634  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 635  * queues.  We configure the RSS CPU lookup table to distribute to the number
 636  * of HW receive queues, and the response queue lookup table to narrow that
 637  * down to the response queues actually configured for each "port" (Virtual
 638  * Interface).  We always configure the RSS mapping for all ports since the
 639  * mapping table has plenty of entries.
 640  */
 641 static int setup_rss(struct adapter *adapter)
 642 {
 643         int pidx;
 644
 645         for_each_port(adapter, pidx) {
 646                 struct port_info *pi = adap2pinfo(adapter, pidx);
 647                 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 648                 u16 rss[MAX_PORT_QSETS];
 649                 int qs, err;
 650
 651                 for (qs = 0; qs < pi->nqsets; qs++)
 652                         rss[qs] = rxq[qs].rspq.abs_id;
 653
 654                 err = t4vf_config_rss_range(adapter, pi->viid,
 655                                             0, pi->rss_size, rss, pi->nqsets);
 656                 if (err)
 657                         return err;
 658
 659                 /*
 660                  * Perform Global RSS Mode-specific initialization.
 661                  */
 662                 switch (adapter->params.rss.mode) {
 663                 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 664                         /*
 665                          * If Tunnel All Lookup isn't specified in the global
 666                          * RSS Configuration, then we need to specify a
 667                          * default Ingress Queue for any ingress packets which
 668                          * aren't hashed.  We'll use our first ingress queue
 669                          * ...
 670                          */
 671                         if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 672                                 union rss_vi_config config;
 673                                 err = t4vf_read_rss_vi_config(adapter,
 674                                                               pi->viid,
 675                                                               &config);
 676                                 if (err)
 677                                         return err;
 678                                 config.basicvirtual.defaultq =
 679                                         rxq[0].rspq.abs_id;
 680                                 err = t4vf_write_rss_vi_config(adapter,
 681                                                                pi->viid,
 682                                                                &config);
 683                                 if (err)
 684                                         return err;
 685                         }
 686                         break;
 687                 }
 688         }
 689
 690         return 0;
 691 }
 692
 693 /*
 694  * Bring the adapter up.  Called whenever we go from no "ports" open to having
 695  * one open.  This function performs the actions necessary to make an adapter
 696  * operational, such as completing the initialization of HW modules, and
 697  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 698  * this is called "cxgb_up" in the PF Driver.)
 699  */
 700 static int adapter_up(struct adapter *adapter)
 701 {
 702         int err;
 703
 704         /*
 705          * If this is the first time we've been called, perform basic
 706          * adapter setup.  Once we've done this, many of our adapter
 707          * parameters can no longer be changed ...
 708          */
 709         if ((adapter->flags & FULL_INIT_DONE) == 0) {
 710                 err = setup_sge_queues(adapter);
 711                 if (err)
 712                         return err;
 713                 err = setup_rss(adapter);
 714                 if (err) {
 715                         t4vf_free_sge_resources(adapter);
 716                         return err;
 717                 }
 718
 719                 if (adapter->flags & USING_MSIX)
 720                         name_msix_vecs(adapter);
 721                 adapter->flags |= FULL_INIT_DONE;
 722         }
 723
 724         /*
 725          * Acquire our interrupt resources.  We only support MSI-X and MSI.
 726          */
 727         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 728         if (adapter->flags & USING_MSIX)
 729                 err = request_msix_queue_irqs(adapter);
 730         else
 731                 err = request_irq(adapter->pdev->irq,
 732                                   t4vf_intr_handler(adapter), 0,
 733                                   adapter->name, adapter);
 734         if (err) {
 735                 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 736                         err);
 737                 return err;
 738         }
 739
 740         /*
 741          * Enable NAPI ingress processing and return success.
 742          */
 743         enable_rx(adapter);
 744         t4vf_sge_start(adapter);
 745
 746         /* Initialize hash mac addr list*/
 747         INIT_LIST_HEAD(&adapter->mac_hlist);
 748         return 0;
 749 }
 750
 751 /*
 752  * Bring the adapter down.  Called whenever the last "port" (Virtual
 753  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 754  * Driver.)
 755  */
 756 static void adapter_down(struct adapter *adapter)
 757 {
 758         /*
 759          * Free interrupt resources.
 760          */
 761         if (adapter->flags & USING_MSIX)
 762                 free_msix_queue_irqs(adapter);
 763         else
 764                 free_irq(adapter->pdev->irq, adapter);
 765
 766         /*
 767          * Wait for NAPI handlers to finish.
 768          */
 769         quiesce_rx(adapter);
 770 }
 771
 772 /*
 773  * Start up a net device.
 774  */
 775 static int cxgb4vf_open(struct net_device *dev)
 776 {
 777         int err;
 778         struct port_info *pi = netdev_priv(dev);
 779         struct adapter *adapter = pi->adapter;
 780
 781         /*
 782          * If this is the first interface that we're opening on the "adapter",
 783          * bring the "adapter" up now.
 784          */
 785         if (adapter->open_device_map == 0) {
 786                 err = adapter_up(adapter);
 787                 if (err)
 788                         return err;
 789         }
 790
 791         /*
 792          * Note that this interface is up and start everything up ...
 793          */
 794         err = link_start(dev);
 795         if (err)
 796                 goto err_unwind;
 797
 798         netif_tx_start_all_queues(dev);
 799         set_bit(pi->port_id, &adapter->open_device_map);
 800         return 0;
 801
 802 err_unwind:
 803         if (adapter->open_device_map == 0)
 804                 adapter_down(adapter);
 805         return err;
 806 }
 807
 808 /*
 809  * Shut down a net device.  This routine is called "cxgb_close" in the PF
 810  * Driver ...
 811  */
 812 static int cxgb4vf_stop(struct net_device *dev)
 813 {
 814         struct port_info *pi = netdev_priv(dev);
 815         struct adapter *adapter = pi->adapter;
 816
 817         netif_tx_stop_all_queues(dev);
 818         netif_carrier_off(dev);
 819         t4vf_enable_vi(adapter, pi->viid, false, false);
 820         pi->link_cfg.link_ok = 0;
 821
 822         clear_bit(pi->port_id, &adapter->open_device_map);
 823         if (adapter->open_device_map == 0)
 824                 adapter_down(adapter);
 825         return 0;
 826 }
 827
 828 /*
 829  * Translate our basic statistics into the standard "ifconfig" statistics.
 830  */
 831 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 832 {
 833         struct t4vf_port_stats stats;
 834         struct port_info *pi = netdev2pinfo(dev);
 835         struct adapter *adapter = pi->adapter;
 836         struct net_device_stats *ns = &dev->stats;
 837         int err;
 838
 839         spin_lock(&adapter->stats_lock);
 840         err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 841         spin_unlock(&adapter->stats_lock);
 842
 843         memset(ns, 0, sizeof(*ns));
 844         if (err)
 845                 return ns;
 846
 847         ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 848                         stats.tx_ucast_bytes + stats.tx_offload_bytes);
 849         ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 850                           stats.tx_ucast_frames + stats.tx_offload_frames);
 851         ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 852                         stats.rx_ucast_bytes);
 853         ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 854                           stats.rx_ucast_frames);
 855         ns->multicast = stats.rx_mcast_frames;
 856         ns->tx_errors = stats.tx_drop_frames;
 857         ns->rx_errors = stats.rx_err_frames;
 858
 859         return ns;
 860 }
 861
 862 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
 863 {
 864         struct adapter *adapter = pi->adapter;
 865         u64 vec = 0;
 866         bool ucast = false;
 867         struct hash_mac_addr *entry;
 868
 869         /* Calculate the hash vector for the updated list and program it */
 870         list_for_each_entry(entry, &adapter->mac_hlist, list) {
 871                 ucast |= is_unicast_ether_addr(entry->addr);
 872                 vec |= (1ULL << hash_mac_addr(entry->addr));
 873         }
 874         return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
 875 }
 876
 877 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 878 {
 879         struct port_info *pi = netdev_priv(netdev);
 880         struct adapter *adapter = pi->adapter;
 881         int ret;
 882         u64 mhash = 0;
 883         u64 uhash = 0;
 884         bool free = false;
 885         bool ucast = is_unicast_ether_addr(mac_addr);
 886         const u8 *maclist[1] = {mac_addr};
 887         struct hash_mac_addr *new_entry;
 888
 889         ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
 890                                   NULL, ucast ? &uhash : &mhash, false);
 891         if (ret < 0)
 892                 goto out;
 893         /* if hash != 0, then add the addr to hash addr list
 894          * so on the end we will calculate the hash for the
 895          * list and program it
 896          */
 897         if (uhash || mhash) {
 898                 new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 899                 if (!new_entry)
 900                         return -ENOMEM;
 901                 ether_addr_copy(new_entry->addr, mac_addr);
 902                 list_add_tail(&new_entry->list, &adapter->mac_hlist);
 903                 ret = cxgb4vf_set_addr_hash(pi);
 904         }
 905 out:
 906         return ret < 0 ? ret : 0;
 907 }
 908
 909 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 910 {
 911         struct port_info *pi = netdev_priv(netdev);
 912         struct adapter *adapter = pi->adapter;
 913         int ret;
 914         const u8 *maclist[1] = {mac_addr};
 915         struct hash_mac_addr *entry, *tmp;
 916
 917         /* If the MAC address to be removed is in the hash addr
 918          * list, delete it from the list and update hash vector
 919          */
 920         list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
 921                 if (ether_addr_equal(entry->addr, mac_addr)) {
 922                         list_del(&entry->list);
 923                         kfree(entry);
 924                         return cxgb4vf_set_addr_hash(pi);
 925                 }
 926         }
 927
 928         ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
 929         return ret < 0 ? -EINVAL : 0;
 930 }
 931
 932 /*
 933  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 934  * If @mtu is -1 it is left unchanged.
 935  */
 936 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 937 {
 938         struct port_info *pi = netdev_priv(dev);
 939
 940         __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 941         __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 942         return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 943                                (dev->flags & IFF_PROMISC) != 0,
 944                                (dev->flags & IFF_ALLMULTI) != 0,
 945                                1, -1, sleep_ok);
 946 }
 947
 948 /*
 949  * Set the current receive modes on the device.
 950  */
 951 static void cxgb4vf_set_rxmode(struct net_device *dev)
 952 {
 953         /* unfortunately we can't return errors to the stack */
 954         set_rxmode(dev, -1, false);
 955 }
 956
 957 /*
 958  * Find the entry in the interrupt holdoff timer value array which comes
 959  * closest to the specified interrupt holdoff value.
 960  */
 961 static int closest_timer(const struct sge *s, int us)
 962 {
 963         int i, timer_idx = 0, min_delta = INT_MAX;
 964
 965         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 966                 int delta = us - s->timer_val[i];
 967                 if (delta < 0)
 968                         delta = -delta;
 969                 if (delta < min_delta) {
 970                         min_delta = delta;
 971                         timer_idx = i;
 972                 }
 973         }
 974         return timer_idx;
 975 }
 976
 977 static int closest_thres(const struct sge *s, int thres)
 978 {
 979         int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 980
 981         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 982                 delta = thres - s->counter_val[i];
 983                 if (delta < 0)
 984                         delta = -delta;
 985                 if (delta < min_delta) {
 986                         min_delta = delta;
 987                         pktcnt_idx = i;
 988                 }
 989         }
 990         return pktcnt_idx;
 991 }
 992
 993 /*
 994  * Return a queue's interrupt hold-off time in us.  0 means no timer.
 995  */
 996 static unsigned int qtimer_val(const struct adapter *adapter,
 997                                const struct sge_rspq *rspq)
 998 {
 999         unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1000
1001         return timer_idx < SGE_NTIMERS
1002                 ? adapter->sge.timer_val[timer_idx]
1003                 : 0;
1004 }
1005
1006 /**
1007  *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
1008  *      @adapter: the adapter
1009  *      @rspq: the RX response queue
1010  *      @us: the hold-off time in us, or 0 to disable timer
1011  *      @cnt: the hold-off packet count, or 0 to disable counter
1012  *
1013  *      Sets an RX response queue's interrupt hold-off time and packet count.
1014  *      At least one of the two needs to be enabled for the queue to generate
1015  *      interrupts.
1016  */
1017 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1018                                unsigned int us, unsigned int cnt)
1019 {
1020         unsigned int timer_idx;
1021
1022         /*
1023          * If both the interrupt holdoff timer and count are specified as
1024          * zero, default to a holdoff count of 1 ...
1025          */
1026         if ((us | cnt) == 0)
1027                 cnt = 1;
1028
1029         /*
1030          * If an interrupt holdoff count has been specified, then find the
1031          * closest configured holdoff count and use that.  If the response
1032          * queue has already been created, then update its queue context
1033          * parameters ...
1034          */
1035         if (cnt) {
1036                 int err;
1037                 u32 v, pktcnt_idx;
1038
1039                 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1040                 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1041                         v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1042                             FW_PARAMS_PARAM_X_V(
1043                                         FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1044                             FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1045                         err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1046                         if (err)
1047                                 return err;
1048                 }
1049                 rspq->pktcnt_idx = pktcnt_idx;
1050         }
1051
1052         /*
1053          * Compute the closest holdoff timer index from the supplied holdoff
1054          * timer value.
1055          */
1056         timer_idx = (us == 0
1057                      ? SGE_TIMER_RSTRT_CNTR
1058                      : closest_timer(&adapter->sge, us));
1059
1060         /*
1061          * Update the response queue's interrupt coalescing parameters and
1062          * return success.
1063          */
1064         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1065                              QINTR_CNT_EN_V(cnt > 0));
1066         return 0;
1067 }
1068
1069 /*
1070  * Return a version number to identify the type of adapter.  The scheme is:
1071  * - bits 0..9: chip version
1072  * - bits 10..15: chip revision
1073  */
1074 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1075 {
1076         /*
1077          * Chip version 4, revision 0x3f (cxgb4vf).
1078          */
1079         return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1080 }
1081
1082 /*
1083  * Execute the specified ioctl command.
1084  */
1085 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1086 {
1087         int ret = 0;
1088
1089         switch (cmd) {
1090             /*
1091              * The VF Driver doesn't have access to any of the other
1092              * common Ethernet device ioctl()'s (like reading/writing
1093              * PHY registers, etc.
1094              */
1095
1096         default:
1097                 ret = -EOPNOTSUPP;
1098                 break;
1099         }
1100         return ret;
1101 }
1102
1103 /*
1104  * Change the device's MTU.
1105  */
1106 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1107 {
1108         int ret;
1109         struct port_info *pi = netdev_priv(dev);
1110
1111         /* accommodate SACK */
1112         if (new_mtu < 81)
1113                 return -EINVAL;
1114
1115         ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1116                               -1, -1, -1, -1, true);
1117         if (!ret)
1118                 dev->mtu = new_mtu;
1119         return ret;
1120 }
1121
1122 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1123         netdev_features_t features)
1124 {
1125         /*
1126          * Since there is no support for separate rx/tx vlan accel
1127          * enable/disable make sure tx flag is always in same state as rx.
1128          */
1129         if (features & NETIF_F_HW_VLAN_CTAG_RX)
1130                 features |= NETIF_F_HW_VLAN_CTAG_TX;
1131         else
1132                 features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1133
1134         return features;
1135 }
1136
1137 static int cxgb4vf_set_features(struct net_device *dev,
1138         netdev_features_t features)
1139 {
1140         struct port_info *pi = netdev_priv(dev);
1141         netdev_features_t changed = dev->features ^ features;
1142
1143         if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1144                 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1145                                 features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1146
1147         return 0;
1148 }
1149
1150 /*
1151  * Change the devices MAC address.
1152  */
1153 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1154 {
1155         int ret;
1156         struct sockaddr *addr = _addr;
1157         struct port_info *pi = netdev_priv(dev);
1158
1159         if (!is_valid_ether_addr(addr->sa_data))
1160                 return -EADDRNOTAVAIL;
1161
1162         ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1163                               addr->sa_data, true);
1164         if (ret < 0)
1165                 return ret;
1166
1167         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1168         pi->xact_addr_filt = ret;
1169         return 0;
1170 }
1171
1172 #ifdef CONFIG_NET_POLL_CONTROLLER
1173 /*
1174  * Poll all of our receive queues.  This is called outside of normal interrupt
1175  * context.
1176  */
1177 static void cxgb4vf_poll_controller(struct net_device *dev)
1178 {
1179         struct port_info *pi = netdev_priv(dev);
1180         struct adapter *adapter = pi->adapter;
1181
1182         if (adapter->flags & USING_MSIX) {
1183                 struct sge_eth_rxq *rxq;
1184                 int nqsets;
1185
1186                 rxq = &adapter->sge.ethrxq[pi->first_qset];
1187                 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1188                         t4vf_sge_intr_msix(0, &rxq->rspq);
1189                         rxq++;
1190                 }
1191         } else
1192                 t4vf_intr_handler(adapter)(0, adapter);
1193 }
1194 #endif
1195
1196 /*
1197  * Ethtool operations.
1198  * ===================
1199  *
1200  * Note that we don't support any ethtool operations which change the physical
1201  * state of the port to which we're linked.
1202  */
1203
1204 /**
1205  *      from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1206  *      @port_type: Firmware Port Type
1207  *      @mod_type: Firmware Module Type
1208  *
1209  *      Translate Firmware Port/Module type to Ethtool Port Type.
1210  */
1211 static int from_fw_port_mod_type(enum fw_port_type port_type,
1212                                  enum fw_port_module_type mod_type)
1213 {
1214         if (port_type == FW_PORT_TYPE_BT_SGMII ||
1215             port_type == FW_PORT_TYPE_BT_XFI ||
1216             port_type == FW_PORT_TYPE_BT_XAUI) {
1217                 return PORT_TP;
1218         } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1219                    port_type == FW_PORT_TYPE_FIBER_XAUI) {
1220                 return PORT_FIBRE;
1221         } else if (port_type == FW_PORT_TYPE_SFP ||
1222                    port_type == FW_PORT_TYPE_QSFP_10G ||
1223                    port_type == FW_PORT_TYPE_QSA ||
1224                    port_type == FW_PORT_TYPE_QSFP) {
1225                 if (mod_type == FW_PORT_MOD_TYPE_LR ||
1226                     mod_type == FW_PORT_MOD_TYPE_SR ||
1227                     mod_type == FW_PORT_MOD_TYPE_ER ||
1228                     mod_type == FW_PORT_MOD_TYPE_LRM)
1229                         return PORT_FIBRE;
1230                 else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1231                          mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1232                         return PORT_DA;
1233                 else
1234                         return PORT_OTHER;
1235         }
1236
1237         return PORT_OTHER;
1238 }
1239
1240 /**
1241  *      fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1242  *      @port_type: Firmware Port Type
1243  *      @fw_caps: Firmware Port Capabilities
1244  *      @link_mode_mask: ethtool Link Mode Mask
1245  *
1246  *      Translate a Firmware Port Capabilities specification to an ethtool
1247  *      Link Mode Mask.
1248  */
1249 static void fw_caps_to_lmm(enum fw_port_type port_type,
1250                            unsigned int fw_caps,
1251                            unsigned long *link_mode_mask)
1252 {
1253         #define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name\
1254                          ## _BIT, link_mode_mask)
1255
1256         #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1257                 do { \
1258                         if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
1259                                 SET_LMM(__lmm_name); \
1260                 } while (0)
1261
1262         switch (port_type) {
1263         case FW_PORT_TYPE_BT_SGMII:
1264         case FW_PORT_TYPE_BT_XFI:
1265         case FW_PORT_TYPE_BT_XAUI:
1266                 SET_LMM(TP);
1267                 FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1268                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1269                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1270                 break;
1271
1272         case FW_PORT_TYPE_KX4:
1273         case FW_PORT_TYPE_KX:
1274                 SET_LMM(Backplane);
1275                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1276                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1277                 break;
1278
1279         case FW_PORT_TYPE_KR:
1280                 SET_LMM(Backplane);
1281                 SET_LMM(10000baseKR_Full);
1282                 break;
1283
1284         case FW_PORT_TYPE_BP_AP:
1285                 SET_LMM(Backplane);
1286                 SET_LMM(10000baseR_FEC);
1287                 SET_LMM(10000baseKR_Full);
1288                 SET_LMM(1000baseKX_Full);
1289                 break;
1290
1291         case FW_PORT_TYPE_BP4_AP:
1292                 SET_LMM(Backplane);
1293                 SET_LMM(10000baseR_FEC);
1294                 SET_LMM(10000baseKR_Full);
1295                 SET_LMM(1000baseKX_Full);
1296                 SET_LMM(10000baseKX4_Full);
1297                 break;
1298
1299         case FW_PORT_TYPE_FIBER_XFI:
1300         case FW_PORT_TYPE_FIBER_XAUI:
1301         case FW_PORT_TYPE_SFP:
1302         case FW_PORT_TYPE_QSFP_10G:
1303         case FW_PORT_TYPE_QSA:
1304                 SET_LMM(FIBRE);
1305                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1306                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1307                 break;
1308
1309         case FW_PORT_TYPE_BP40_BA:
1310         case FW_PORT_TYPE_QSFP:
1311                 SET_LMM(FIBRE);
1312                 SET_LMM(40000baseSR4_Full);
1313                 break;
1314
1315         case FW_PORT_TYPE_CR_QSFP:
1316         case FW_PORT_TYPE_SFP28:
1317                 SET_LMM(FIBRE);
1318                 SET_LMM(25000baseCR_Full);
1319                 break;
1320
1321         case FW_PORT_TYPE_KR4_100G:
1322         case FW_PORT_TYPE_CR4_QSFP:
1323                 SET_LMM(FIBRE);
1324                 SET_LMM(100000baseCR4_Full);
1325                 break;
1326
1327         default:
1328                 break;
1329         }
1330
1331         FW_CAPS_TO_LMM(ANEG, Autoneg);
1332         FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1333         FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1334
1335         #undef FW_CAPS_TO_LMM
1336         #undef SET_LMM
1337 }
1338
1339 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1340                                       struct ethtool_link_ksettings
1341                                                         *link_ksettings)
1342 {
1343         const struct port_info *pi = netdev_priv(dev);
1344         struct ethtool_link_settings *base = &link_ksettings->base;
1345
1346         ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1347         ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1348         ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1349
1350         base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1351
1352         if (pi->mdio_addr >= 0) {
1353                 base->phy_address = pi->mdio_addr;
1354                 base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1355                                       ? ETH_MDIO_SUPPORTS_C22
1356                                       : ETH_MDIO_SUPPORTS_C45);
1357         } else {
1358                 base->phy_address = 255;
1359                 base->mdio_support = 0;
1360         }
1361
1362         fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
1363                        link_ksettings->link_modes.supported);
1364         fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
1365                        link_ksettings->link_modes.advertising);
1366         fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
1367                        link_ksettings->link_modes.lp_advertising);
1368
1369         if (netif_carrier_ok(dev)) {
1370                 base->speed = pi->link_cfg.speed;
1371                 base->duplex = DUPLEX_FULL;
1372         } else {
1373                 base->speed = SPEED_UNKNOWN;
1374                 base->duplex = DUPLEX_UNKNOWN;
1375         }
1376
1377         base->autoneg = pi->link_cfg.autoneg;
1378         if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
1379                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1380                                                      supported, Autoneg);
1381         if (pi->link_cfg.autoneg)
1382                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1383                                                      advertising, Autoneg);
1384
1385         return 0;
1386 }
1387
1388 /*
1389  * Return our driver information.
1390  */
1391 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1392                                 struct ethtool_drvinfo *drvinfo)
1393 {
1394         struct adapter *adapter = netdev2adap(dev);
1395
1396         strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1397         strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1398         strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1399                 sizeof(drvinfo->bus_info));
1400         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1401                  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1402                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1403                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1404                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1405                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1406                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1407                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1408                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1409                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1410 }
1411
1412 /*
1413  * Return current adapter message level.
1414  */
1415 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1416 {
1417         return netdev2adap(dev)->msg_enable;
1418 }
1419
1420 /*
1421  * Set current adapter message level.
1422  */
1423 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1424 {
1425         netdev2adap(dev)->msg_enable = msglevel;
1426 }
1427
1428 /*
1429  * Return the device's current Queue Set ring size parameters along with the
1430  * allowed maximum values.  Since ethtool doesn't understand the concept of
1431  * multi-queue devices, we just return the current values associated with the
1432  * first Queue Set.
1433  */
1434 static void cxgb4vf_get_ringparam(struct net_device *dev,
1435                                   struct ethtool_ringparam *rp)
1436 {
1437         const struct port_info *pi = netdev_priv(dev);
1438         const struct sge *s = &pi->adapter->sge;
1439
1440         rp->rx_max_pending = MAX_RX_BUFFERS;
1441         rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1442         rp->rx_jumbo_max_pending = 0;
1443         rp->tx_max_pending = MAX_TXQ_ENTRIES;
1444
1445         rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1446         rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1447         rp->rx_jumbo_pending = 0;
1448         rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1449 }
1450
1451 /*
1452  * Set the Queue Set ring size parameters for the device.  Again, since
1453  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1454  * apply these new values across all of the Queue Sets associated with the
1455  * device -- after vetting them of course!
1456  */
1457 static int cxgb4vf_set_ringparam(struct net_device *dev,
1458                                  struct ethtool_ringparam *rp)
1459 {
1460         const struct port_info *pi = netdev_priv(dev);
1461         struct adapter *adapter = pi->adapter;
1462         struct sge *s = &adapter->sge;
1463         int qs;
1464
1465         if (rp->rx_pending > MAX_RX_BUFFERS ||
1466             rp->rx_jumbo_pending ||
1467             rp->tx_pending > MAX_TXQ_ENTRIES ||
1468             rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1469             rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1470             rp->rx_pending < MIN_FL_ENTRIES ||
1471             rp->tx_pending < MIN_TXQ_ENTRIES)
1472                 return -EINVAL;
1473
1474         if (adapter->flags & FULL_INIT_DONE)
1475                 return -EBUSY;
1476
1477         for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1478                 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1479                 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1480                 s->ethtxq[qs].q.size = rp->tx_pending;
1481         }
1482         return 0;
1483 }
1484
1485 /*
1486  * Return the interrupt holdoff timer and count for the first Queue Set on the
1487  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1488  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1489  */
1490 static int cxgb4vf_get_coalesce(struct net_device *dev,
1491                                 struct ethtool_coalesce *coalesce)
1492 {
1493         const struct port_info *pi = netdev_priv(dev);
1494         const struct adapter *adapter = pi->adapter;
1495         const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1496
1497         coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1498         coalesce->rx_max_coalesced_frames =
1499                 ((rspq->intr_params & QINTR_CNT_EN_F)
1500                  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1501                  : 0);
1502         return 0;
1503 }
1504
1505 /*
1506  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1507  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1508  * the interrupt holdoff timer on any of the device's Queue Sets.
1509  */
1510 static int cxgb4vf_set_coalesce(struct net_device *dev,
1511                                 struct ethtool_coalesce *coalesce)
1512 {
1513         const struct port_info *pi = netdev_priv(dev);
1514         struct adapter *adapter = pi->adapter;
1515
1516         return set_rxq_intr_params(adapter,
1517                                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1518                                    coalesce->rx_coalesce_usecs,
1519                                    coalesce->rx_max_coalesced_frames);
1520 }
1521
1522 /*
1523  * Report current port link pause parameter settings.
1524  */
1525 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1526                                    struct ethtool_pauseparam *pauseparam)
1527 {
1528         struct port_info *pi = netdev_priv(dev);
1529
1530         pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1531         pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1532         pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1533 }
1534
1535 /*
1536  * Identify the port by blinking the port's LED.
1537  */
1538 static int cxgb4vf_phys_id(struct net_device *dev,
1539                            enum ethtool_phys_id_state state)
1540 {
1541         unsigned int val;
1542         struct port_info *pi = netdev_priv(dev);
1543
1544         if (state == ETHTOOL_ID_ACTIVE)
1545                 val = 0xffff;
1546         else if (state == ETHTOOL_ID_INACTIVE)
1547                 val = 0;
1548         else
1549                 return -EINVAL;
1550
1551         return t4vf_identify_port(pi->adapter, pi->viid, val);
1552 }
1553
1554 /*
1555  * Port stats maintained per queue of the port.
1556  */
1557 struct queue_port_stats {
1558         u64 tso;
1559         u64 tx_csum;
1560         u64 rx_csum;
1561         u64 vlan_ex;
1562         u64 vlan_ins;
1563         u64 lro_pkts;
1564         u64 lro_merged;
1565 };
1566
1567 /*
1568  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1569  * these need to match the order of statistics returned by
1570  * t4vf_get_port_stats().
1571  */
1572 static const char stats_strings[][ETH_GSTRING_LEN] = {
1573         /*
1574          * These must match the layout of the t4vf_port_stats structure.
1575          */
1576         "TxBroadcastBytes  ",
1577         "TxBroadcastFrames ",
1578         "TxMulticastBytes  ",
1579         "TxMulticastFrames ",
1580         "TxUnicastBytes    ",
1581         "TxUnicastFrames   ",
1582         "TxDroppedFrames   ",
1583         "TxOffloadBytes    ",
1584         "TxOffloadFrames   ",
1585         "RxBroadcastBytes  ",
1586         "RxBroadcastFrames ",
1587         "RxMulticastBytes  ",
1588         "RxMulticastFrames ",
1589         "RxUnicastBytes    ",
1590         "RxUnicastFrames   ",
1591         "RxErrorFrames     ",
1592
1593         /*
1594          * These are accumulated per-queue statistics and must match the
1595          * order of the fields in the queue_port_stats structure.
1596          */
1597         "TSO               ",
1598         "TxCsumOffload     ",
1599         "RxCsumGood        ",
1600         "VLANextractions   ",
1601         "VLANinsertions    ",
1602         "GROPackets        ",
1603         "GROMerged         ",
1604 };
1605
1606 /*
1607  * Return the number of statistics in the specified statistics set.
1608  */
1609 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1610 {
1611         switch (sset) {
1612         case ETH_SS_STATS:
1613                 return ARRAY_SIZE(stats_strings);
1614         default:
1615                 return -EOPNOTSUPP;
1616         }
1617         /*NOTREACHED*/
1618 }
1619
1620 /*
1621  * Return the strings for the specified statistics set.
1622  */
1623 static void cxgb4vf_get_strings(struct net_device *dev,
1624                                 u32 sset,
1625                                 u8 *data)
1626 {
1627         switch (sset) {
1628         case ETH_SS_STATS:
1629                 memcpy(data, stats_strings, sizeof(stats_strings));
1630                 break;
1631         }
1632 }
1633
1634 /*
1635  * Small utility routine to accumulate queue statistics across the queues of
1636  * a "port".
1637  */
1638 static void collect_sge_port_stats(const struct adapter *adapter,
1639                                    const struct port_info *pi,
1640                                    struct queue_port_stats *stats)
1641 {
1642         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1643         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1644         int qs;
1645
1646         memset(stats, 0, sizeof(*stats));
1647         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1648                 stats->tso += txq->tso;
1649                 stats->tx_csum += txq->tx_cso;
1650                 stats->rx_csum += rxq->stats.rx_cso;
1651                 stats->vlan_ex += rxq->stats.vlan_ex;
1652                 stats->vlan_ins += txq->vlan_ins;
1653                 stats->lro_pkts += rxq->stats.lro_pkts;
1654                 stats->lro_merged += rxq->stats.lro_merged;
1655         }
1656 }
1657
1658 /*
1659  * Return the ETH_SS_STATS statistics set.
1660  */
1661 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1662                                       struct ethtool_stats *stats,
1663                                       u64 *data)
1664 {
1665         struct port_info *pi = netdev2pinfo(dev);
1666         struct adapter *adapter = pi->adapter;
1667         int err = t4vf_get_port_stats(adapter, pi->pidx,
1668                                       (struct t4vf_port_stats *)data);
1669         if (err)
1670                 memset(data, 0, sizeof(struct t4vf_port_stats));
1671
1672         data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1673         collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1674 }
1675
1676 /*
1677  * Return the size of our register map.
1678  */
1679 static int cxgb4vf_get_regs_len(struct net_device *dev)
1680 {
1681         return T4VF_REGMAP_SIZE;
1682 }
1683
1684 /*
1685  * Dump a block of registers, start to end inclusive, into a buffer.
1686  */
1687 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1688                            unsigned int start, unsigned int end)
1689 {
1690         u32 *bp = regbuf + start - T4VF_REGMAP_START;
1691
1692         for ( ; start <= end; start += sizeof(u32)) {
1693                 /*
1694                  * Avoid reading the Mailbox Control register since that
1695                  * can trigger a Mailbox Ownership Arbitration cycle and
1696                  * interfere with communication with the firmware.
1697                  */
1698                 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1699                         *bp++ = 0xffff;
1700                 else
1701                         *bp++ = t4_read_reg(adapter, start);
1702         }
1703 }
1704
1705 /*
1706  * Copy our entire register map into the provided buffer.
1707  */
1708 static void cxgb4vf_get_regs(struct net_device *dev,
1709                              struct ethtool_regs *regs,
1710                              void *regbuf)
1711 {
1712         struct adapter *adapter = netdev2adap(dev);
1713
1714         regs->version = mk_adap_vers(adapter);
1715
1716         /*
1717          * Fill in register buffer with our register map.
1718          */
1719         memset(regbuf, 0, T4VF_REGMAP_SIZE);
1720
1721         reg_block_dump(adapter, regbuf,
1722                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1723                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1724         reg_block_dump(adapter, regbuf,
1725                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1726                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1727
1728         /* T5 adds new registers in the PL Register map.
1729          */
1730         reg_block_dump(adapter, regbuf,
1731                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1732                        T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1733                        ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1734         reg_block_dump(adapter, regbuf,
1735                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1736                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1737
1738         reg_block_dump(adapter, regbuf,
1739                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1740                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1741 }
1742
1743 /*
1744  * Report current Wake On LAN settings.
1745  */
1746 static void cxgb4vf_get_wol(struct net_device *dev,
1747                             struct ethtool_wolinfo *wol)
1748 {
1749         wol->supported = 0;
1750         wol->wolopts = 0;
1751         memset(&wol->sopass, 0, sizeof(wol->sopass));
1752 }
1753
1754 /*
1755  * TCP Segmentation Offload flags which we support.
1756  */
1757 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1758
1759 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1760         .get_link_ksettings     = cxgb4vf_get_link_ksettings,
1761         .get_drvinfo            = cxgb4vf_get_drvinfo,
1762         .get_msglevel           = cxgb4vf_get_msglevel,
1763         .set_msglevel           = cxgb4vf_set_msglevel,
1764         .get_ringparam          = cxgb4vf_get_ringparam,
1765         .set_ringparam          = cxgb4vf_set_ringparam,
1766         .get_coalesce           = cxgb4vf_get_coalesce,
1767         .set_coalesce           = cxgb4vf_set_coalesce,
1768         .get_pauseparam         = cxgb4vf_get_pauseparam,
1769         .get_link               = ethtool_op_get_link,
1770         .get_strings            = cxgb4vf_get_strings,
1771         .set_phys_id            = cxgb4vf_phys_id,
1772         .get_sset_count         = cxgb4vf_get_sset_count,
1773         .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1774         .get_regs_len           = cxgb4vf_get_regs_len,
1775         .get_regs               = cxgb4vf_get_regs,
1776         .get_wol                = cxgb4vf_get_wol,
1777 };
1778
1779 /*
1780  * /sys/kernel/debug/cxgb4vf support code and data.
1781  * ================================================
1782  */
1783
1784 /*
1785  * Show Firmware Mailbox Command/Reply Log
1786  *
1787  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1788  * it's possible that we can catch things during a log update and therefore
1789  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1790  * If we ever decide that we want to make sure that we're dumping a coherent
1791  * log, we'd need to perform locking in the mailbox logging and in
1792  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1793  * like we do for the Firmware Device Log.  But as stated above, meh ...
1794  */
1795 static int mboxlog_show(struct seq_file *seq, void *v)
1796 {
1797         struct adapter *adapter = seq->private;
1798         struct mbox_cmd_log *log = adapter->mbox_log;
1799         struct mbox_cmd *entry;
1800         int entry_idx, i;
1801
1802         if (v == SEQ_START_TOKEN) {
1803                 seq_printf(seq,
1804                            "%10s  %15s  %5s  %5s  %s\n",
1805                            "Seq#", "Tstamp", "Atime", "Etime",
1806                            "Command/Reply");
1807                 return 0;
1808         }
1809
1810         entry_idx = log->cursor + ((uintptr_t)v - 2);
1811         if (entry_idx >= log->size)
1812                 entry_idx -= log->size;
1813         entry = mbox_cmd_log_entry(log, entry_idx);
1814
1815         /* skip over unused entries */
1816         if (entry->timestamp == 0)
1817                 return 0;
1818
1819         seq_printf(seq, "%10u  %15llu  %5d  %5d",
1820                    entry->seqno, entry->timestamp,
1821                    entry->access, entry->execute);
1822         for (i = 0; i < MBOX_LEN / 8; i++) {
1823                 u64 flit = entry->cmd[i];
1824                 u32 hi = (u32)(flit >> 32);
1825                 u32 lo = (u32)flit;
1826
1827                 seq_printf(seq, "  %08x %08x", hi, lo);
1828         }
1829         seq_puts(seq, "\n");
1830         return 0;
1831 }
1832
1833 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1834 {
1835         struct adapter *adapter = seq->private;
1836         struct mbox_cmd_log *log = adapter->mbox_log;
1837
1838         return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1839 }
1840
1841 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1842 {
1843         return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1844 }
1845
1846 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1847 {
1848         ++*pos;
1849         return mboxlog_get_idx(seq, *pos);
1850 }
1851
1852 static void mboxlog_stop(struct seq_file *seq, void *v)
1853 {
1854 }
1855
1856 static const struct seq_operations mboxlog_seq_ops = {
1857         .start = mboxlog_start,
1858         .next  = mboxlog_next,
1859         .stop  = mboxlog_stop,
1860         .show  = mboxlog_show
1861 };
1862
1863 static int mboxlog_open(struct inode *inode, struct file *file)
1864 {
1865         int res = seq_open(file, &mboxlog_seq_ops);
1866
1867         if (!res) {
1868                 struct seq_file *seq = file->private_data;
1869
1870                 seq->private = inode->i_private;
1871         }
1872         return res;
1873 }
1874
1875 static const struct file_operations mboxlog_fops = {
1876         .owner   = THIS_MODULE,
1877         .open    = mboxlog_open,
1878         .read    = seq_read,
1879         .llseek  = seq_lseek,
1880         .release = seq_release,
1881 };
1882
1883 /*
1884  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1885  */
1886 #define QPL     4
1887
1888 static int sge_qinfo_show(struct seq_file *seq, void *v)
1889 {
1890         struct adapter *adapter = seq->private;
1891         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1892         int qs, r = (uintptr_t)v - 1;
1893
1894         if (r)
1895                 seq_putc(seq, '\n');
1896
1897         #define S3(fmt_spec, s, v) \
1898                 do {\
1899                         seq_printf(seq, "%-12s", s); \
1900                         for (qs = 0; qs < n; ++qs) \
1901                                 seq_printf(seq, " %16" fmt_spec, v); \
1902                         seq_putc(seq, '\n'); \
1903                 } while (0)
1904         #define S(s, v)         S3("s", s, v)
1905         #define T(s, v)         S3("u", s, txq[qs].v)
1906         #define R(s, v)         S3("u", s, rxq[qs].v)
1907
1908         if (r < eth_entries) {
1909                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1910                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1911                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1912
1913                 S("QType:", "Ethernet");
1914                 S("Interface:",
1915                   (rxq[qs].rspq.netdev
1916                    ? rxq[qs].rspq.netdev->name
1917                    : "N/A"));
1918                 S3("d", "Port:",
1919                    (rxq[qs].rspq.netdev
1920                     ? ((struct port_info *)
1921                        netdev_priv(rxq[qs].rspq.netdev))->port_id
1922                     : -1));
1923                 T("TxQ ID:", q.abs_id);
1924                 T("TxQ size:", q.size);
1925                 T("TxQ inuse:", q.in_use);
1926                 T("TxQ PIdx:", q.pidx);
1927                 T("TxQ CIdx:", q.cidx);
1928                 R("RspQ ID:", rspq.abs_id);
1929                 R("RspQ size:", rspq.size);
1930                 R("RspQE size:", rspq.iqe_len);
1931                 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1932                 S3("u", "Intr pktcnt:",
1933                    adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1934                 R("RspQ CIdx:", rspq.cidx);
1935                 R("RspQ Gen:", rspq.gen);
1936                 R("FL ID:", fl.abs_id);
1937                 R("FL size:", fl.size - MIN_FL_RESID);
1938                 R("FL avail:", fl.avail);
1939                 R("FL PIdx:", fl.pidx);
1940                 R("FL CIdx:", fl.cidx);
1941                 return 0;
1942         }
1943
1944         r -= eth_entries;
1945         if (r == 0) {
1946                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1947
1948                 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1949                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1950                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1951                            qtimer_val(adapter, evtq));
1952                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1953                            adapter->sge.counter_val[evtq->pktcnt_idx]);
1954                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1955                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1956         } else if (r == 1) {
1957                 const struct sge_rspq *intrq = &adapter->sge.intrq;
1958
1959                 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1960                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1961                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1962                            qtimer_val(adapter, intrq));
1963                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1964                            adapter->sge.counter_val[intrq->pktcnt_idx]);
1965                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1966                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1967         }
1968
1969         #undef R
1970         #undef T
1971         #undef S
1972         #undef S3
1973
1974         return 0;
1975 }
1976
1977 /*
1978  * Return the number of "entries" in our "file".  We group the multi-Queue
1979  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1980  *
1981  *     Ethernet RX/TX Queue Sets
1982  *     Firmware Event Queue
1983  *     Forwarded Interrupt Queue (if in MSI mode)
1984  */
1985 static int sge_queue_entries(const struct adapter *adapter)
1986 {
1987         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1988                 ((adapter->flags & USING_MSI) != 0);
1989 }
1990
1991 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1992 {
1993         int entries = sge_queue_entries(seq->private);
1994
1995         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1996 }
1997
1998 static void sge_queue_stop(struct seq_file *seq, void *v)
1999 {
2000 }
2001
2002 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2003 {
2004         int entries = sge_queue_entries(seq->private);
2005
2006         ++*pos;
2007         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2008 }
2009
2010 static const struct seq_operations sge_qinfo_seq_ops = {
2011         .start = sge_queue_start,
2012         .next  = sge_queue_next,
2013         .stop  = sge_queue_stop,
2014         .show  = sge_qinfo_show
2015 };
2016
2017 static int sge_qinfo_open(struct inode *inode, struct file *file)
2018 {
2019         int res = seq_open(file, &sge_qinfo_seq_ops);
2020
2021         if (!res) {
2022                 struct seq_file *seq = file->private_data;
2023                 seq->private = inode->i_private;
2024         }
2025         return res;
2026 }
2027
2028 static const struct file_operations sge_qinfo_debugfs_fops = {
2029         .owner   = THIS_MODULE,
2030         .open    = sge_qinfo_open,
2031         .read    = seq_read,
2032         .llseek  = seq_lseek,
2033         .release = seq_release,
2034 };
2035
2036 /*
2037  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2038  */
2039 #define QPL     4
2040
2041 static int sge_qstats_show(struct seq_file *seq, void *v)
2042 {
2043         struct adapter *adapter = seq->private;
2044         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2045         int qs, r = (uintptr_t)v - 1;
2046
2047         if (r)
2048                 seq_putc(seq, '\n');
2049
2050         #define S3(fmt, s, v) \
2051                 do { \
2052                         seq_printf(seq, "%-16s", s); \
2053                         for (qs = 0; qs < n; ++qs) \
2054                                 seq_printf(seq, " %8" fmt, v); \
2055                         seq_putc(seq, '\n'); \
2056                 } while (0)
2057         #define S(s, v)         S3("s", s, v)
2058
2059         #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2060         #define T(s, v)         T3("lu", s, v)
2061
2062         #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2063         #define R(s, v)         R3("lu", s, v)
2064
2065         if (r < eth_entries) {
2066                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2067                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2068                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2069
2070                 S("QType:", "Ethernet");
2071                 S("Interface:",
2072                   (rxq[qs].rspq.netdev
2073                    ? rxq[qs].rspq.netdev->name
2074                    : "N/A"));
2075                 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2076                 R("RxPackets:", stats.pkts);
2077                 R("RxCSO:", stats.rx_cso);
2078                 R("VLANxtract:", stats.vlan_ex);
2079                 R("LROmerged:", stats.lro_merged);
2080                 R("LROpackets:", stats.lro_pkts);
2081                 R("RxDrops:", stats.rx_drops);
2082                 T("TSO:", tso);
2083                 T("TxCSO:", tx_cso);
2084                 T("VLANins:", vlan_ins);
2085                 T("TxQFull:", q.stops);
2086                 T("TxQRestarts:", q.restarts);
2087                 T("TxMapErr:", mapping_err);
2088                 R("FLAllocErr:", fl.alloc_failed);
2089                 R("FLLrgAlcErr:", fl.large_alloc_failed);
2090                 R("FLStarving:", fl.starving);
2091                 return 0;
2092         }
2093
2094         r -= eth_entries;
2095         if (r == 0) {
2096                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2097
2098                 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2099                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2100                            evtq->unhandled_irqs);
2101                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2102                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2103         } else if (r == 1) {
2104                 const struct sge_rspq *intrq = &adapter->sge.intrq;
2105
2106                 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2107                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2108                            intrq->unhandled_irqs);
2109                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2110                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2111         }
2112
2113         #undef R
2114         #undef T
2115         #undef S
2116         #undef R3
2117         #undef T3
2118         #undef S3
2119
2120         return 0;
2121 }
2122
2123 /*
2124  * Return the number of "entries" in our "file".  We group the multi-Queue
2125  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2126  *
2127  *     Ethernet RX/TX Queue Sets
2128  *     Firmware Event Queue
2129  *     Forwarded Interrupt Queue (if in MSI mode)
2130  */
2131 static int sge_qstats_entries(const struct adapter *adapter)
2132 {
2133         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2134                 ((adapter->flags & USING_MSI) != 0);
2135 }
2136
2137 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2138 {
2139         int entries = sge_qstats_entries(seq->private);
2140
2141         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2142 }
2143
2144 static void sge_qstats_stop(struct seq_file *seq, void *v)
2145 {
2146 }
2147
2148 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2149 {
2150         int entries = sge_qstats_entries(seq->private);
2151
2152         (*pos)++;
2153         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2154 }
2155
2156 static const struct seq_operations sge_qstats_seq_ops = {
2157         .start = sge_qstats_start,
2158         .next  = sge_qstats_next,
2159         .stop  = sge_qstats_stop,
2160         .show  = sge_qstats_show
2161 };
2162
2163 static int sge_qstats_open(struct inode *inode, struct file *file)
2164 {
2165         int res = seq_open(file, &sge_qstats_seq_ops);
2166
2167         if (res == 0) {
2168                 struct seq_file *seq = file->private_data;
2169                 seq->private = inode->i_private;
2170         }
2171         return res;
2172 }
2173
2174 static const struct file_operations sge_qstats_proc_fops = {
2175         .owner   = THIS_MODULE,
2176         .open    = sge_qstats_open,
2177         .read    = seq_read,
2178         .llseek  = seq_lseek,
2179         .release = seq_release,
2180 };
2181
2182 /*
2183  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2184  */
2185 static int resources_show(struct seq_file *seq, void *v)
2186 {
2187         struct adapter *adapter = seq->private;
2188         struct vf_resources *vfres = &adapter->params.vfres;
2189
2190         #define S(desc, fmt, var) \
2191                 seq_printf(seq, "%-60s " fmt "\n", \
2192                            desc " (" #var "):", vfres->var)
2193
2194         S("Virtual Interfaces", "%d", nvi);
2195         S("Egress Queues", "%d", neq);
2196         S("Ethernet Control", "%d", nethctrl);
2197         S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2198         S("Ingress Queues", "%d", niq);
2199         S("Traffic Class", "%d", tc);
2200         S("Port Access Rights Mask", "%#x", pmask);
2201         S("MAC Address Filters", "%d", nexactf);
2202         S("Firmware Command Read Capabilities", "%#x", r_caps);
2203         S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2204
2205         #undef S
2206
2207         return 0;
2208 }
2209
2210 static int resources_open(struct inode *inode, struct file *file)
2211 {
2212         return single_open(file, resources_show, inode->i_private);
2213 }
2214
2215 static const struct file_operations resources_proc_fops = {
2216         .owner   = THIS_MODULE,
2217         .open    = resources_open,
2218         .read    = seq_read,
2219         .llseek  = seq_lseek,
2220         .release = single_release,
2221 };
2222
2223 /*
2224  * Show Virtual Interfaces.
2225  */
2226 static int interfaces_show(struct seq_file *seq, void *v)
2227 {
2228         if (v == SEQ_START_TOKEN) {
2229                 seq_puts(seq, "Interface  Port   VIID\n");
2230         } else {
2231                 struct adapter *adapter = seq->private;
2232                 int pidx = (uintptr_t)v - 2;
2233                 struct net_device *dev = adapter->port[pidx];
2234                 struct port_info *pi = netdev_priv(dev);
2235
2236                 seq_printf(seq, "%9s  %4d  %#5x\n",
2237                            dev->name, pi->port_id, pi->viid);
2238         }
2239         return 0;
2240 }
2241
2242 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2243 {
2244         return pos <= adapter->params.nports
2245                 ? (void *)(uintptr_t)(pos + 1)
2246                 : NULL;
2247 }
2248
2249 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2250 {
2251         return *pos
2252                 ? interfaces_get_idx(seq->private, *pos)
2253                 : SEQ_START_TOKEN;
2254 }
2255
2256 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2257 {
2258         (*pos)++;
2259         return interfaces_get_idx(seq->private, *pos);
2260 }
2261
2262 static void interfaces_stop(struct seq_file *seq, void *v)
2263 {
2264 }
2265
2266 static const struct seq_operations interfaces_seq_ops = {
2267         .start = interfaces_start,
2268         .next  = interfaces_next,
2269         .stop  = interfaces_stop,
2270         .show  = interfaces_show
2271 };
2272
2273 static int interfaces_open(struct inode *inode, struct file *file)
2274 {
2275         int res = seq_open(file, &interfaces_seq_ops);
2276
2277         if (res == 0) {
2278                 struct seq_file *seq = file->private_data;
2279                 seq->private = inode->i_private;
2280         }
2281         return res;
2282 }
2283
2284 static const struct file_operations interfaces_proc_fops = {
2285         .owner   = THIS_MODULE,
2286         .open    = interfaces_open,
2287         .read    = seq_read,
2288         .llseek  = seq_lseek,
2289         .release = seq_release,
2290 };
2291
2292 /*
2293  * /sys/kernel/debugfs/cxgb4vf/ files list.
2294  */
2295 struct cxgb4vf_debugfs_entry {
2296         const char *name;               /* name of debugfs node */
2297         umode_t mode;                   /* file system mode */
2298         const struct file_operations *fops;
2299 };
2300
2301 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2302         { "mboxlog",    S_IRUGO, &mboxlog_fops },
2303         { "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2304         { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2305         { "resources",  S_IRUGO, &resources_proc_fops },
2306         { "interfaces", S_IRUGO, &interfaces_proc_fops },
2307 };
2308
2309 /*
2310  * Module and device initialization and cleanup code.
2311  * ==================================================
2312  */
2313
2314 /*
2315  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2316  * directory (debugfs_root) has already been set up.
2317  */
2318 static int setup_debugfs(struct adapter *adapter)
2319 {
2320         int i;
2321
2322         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2323
2324         /*
2325          * Debugfs support is best effort.
2326          */
2327         for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2328                 (void)debugfs_create_file(debugfs_files[i].name,
2329                                   debugfs_files[i].mode,
2330                                   adapter->debugfs_root,
2331                                   (void *)adapter,
2332                                   debugfs_files[i].fops);
2333
2334         return 0;
2335 }
2336
2337 /*
2338  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2339  * it to our caller to tear down the directory (debugfs_root).
2340  */
2341 static void cleanup_debugfs(struct adapter *adapter)
2342 {
2343         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2344
2345         /*
2346          * Unlike our sister routine cleanup_proc(), we don't need to remove
2347          * individual entries because a call will be made to
2348          * debugfs_remove_recursive().  We just need to clean up any ancillary
2349          * persistent state.
2350          */
2351         /* nothing to do */
2352 }
2353
2354 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2355  * knowing our Virtual Function Resources and may be called a second time if
2356  * we fall back from MSI-X to MSI Interrupt Mode.
2357  */
2358 static void size_nports_qsets(struct adapter *adapter)
2359 {
2360         struct vf_resources *vfres = &adapter->params.vfres;
2361         unsigned int ethqsets, pmask_nports;
2362
2363         /* The number of "ports" which we support is equal to the number of
2364          * Virtual Interfaces with which we've been provisioned.
2365          */
2366         adapter->params.nports = vfres->nvi;
2367         if (adapter->params.nports > MAX_NPORTS) {
2368                 dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2369                          " allowed virtual interfaces\n", MAX_NPORTS,
2370                          adapter->params.nports);
2371                 adapter->params.nports = MAX_NPORTS;
2372         }
2373
2374         /* We may have been provisioned with more VIs than the number of
2375          * ports we're allowed to access (our Port Access Rights Mask).
2376          * This is obviously a configuration conflict but we don't want to
2377          * crash the kernel or anything silly just because of that.
2378          */
2379         pmask_nports = hweight32(adapter->params.vfres.pmask);
2380         if (pmask_nports < adapter->params.nports) {
2381                 dev_warn(adapter->pdev_dev, "only using %d of %d provissioned"
2382                          " virtual interfaces; limited by Port Access Rights"
2383                          " mask %#x\n", pmask_nports, adapter->params.nports,
2384                          adapter->params.vfres.pmask);
2385                 adapter->params.nports = pmask_nports;
2386         }
2387
2388         /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2389          * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2390          * reserve an Ingress Queue for a Forwarded Interrupts.
2391          *
2392          * The rest of the FL/Intr-capable ingress queues will be matched up
2393          * one-for-one with Ethernet/Control egress queues in order to form
2394          * "Queue Sets" which will be aportioned between the "ports".  For
2395          * each Queue Set, we'll need the ability to allocate two Egress
2396          * Contexts -- one for the Ingress Queue Free List and one for the TX
2397          * Ethernet Queue.
2398          *
2399          * Note that even if we're currently configured to use MSI-X
2400          * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2401          * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2402          * happens we'll need to adjust things later.
2403          */
2404         ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2405         if (vfres->nethctrl != ethqsets)
2406                 ethqsets = min(vfres->nethctrl, ethqsets);
2407         if (vfres->neq < ethqsets*2)
2408                 ethqsets = vfres->neq/2;
2409         if (ethqsets > MAX_ETH_QSETS)
2410                 ethqsets = MAX_ETH_QSETS;
2411         adapter->sge.max_ethqsets = ethqsets;
2412
2413         if (adapter->sge.max_ethqsets < adapter->params.nports) {
2414                 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2415                          " virtual interfaces (too few Queue Sets)\n",
2416                          adapter->sge.max_ethqsets, adapter->params.nports);
2417                 adapter->params.nports = adapter->sge.max_ethqsets;
2418         }
2419 }
2420
2421 /*
2422  * Perform early "adapter" initialization.  This is where we discover what
2423  * adapter parameters we're going to be using and initialize basic adapter
2424  * hardware support.
2425  */
2426 static int adap_init0(struct adapter *adapter)
2427 {
2428         struct sge_params *sge_params = &adapter->params.sge;
2429         struct sge *s = &adapter->sge;
2430         int err;
2431         u32 param, val = 0;
2432
2433         /*
2434          * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2435          * 2.6.31 and later we can't call pci_reset_function() in order to
2436          * issue an FLR because of a self- deadlock on the device semaphore.
2437          * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2438          * cases where they're needed -- for instance, some versions of KVM
2439          * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2440          * use the firmware based reset in order to reset any per function
2441          * state.
2442          */
2443         err = t4vf_fw_reset(adapter);
2444         if (err < 0) {
2445                 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2446                 return err;
2447         }
2448
2449         /*
2450          * Grab basic operational parameters.  These will predominantly have
2451          * been set up by the Physical Function Driver or will be hard coded
2452          * into the adapter.  We just have to live with them ...  Note that
2453          * we _must_ get our VPD parameters before our SGE parameters because
2454          * we need to know the adapter's core clock from the VPD in order to
2455          * properly decode the SGE Timer Values.
2456          */
2457         err = t4vf_get_dev_params(adapter);
2458         if (err) {
2459                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2460                         " device parameters: err=%d\n", err);
2461                 return err;
2462         }
2463         err = t4vf_get_vpd_params(adapter);
2464         if (err) {
2465                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2466                         " VPD parameters: err=%d\n", err);
2467                 return err;
2468         }
2469         err = t4vf_get_sge_params(adapter);
2470         if (err) {
2471                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2472                         " SGE parameters: err=%d\n", err);
2473                 return err;
2474         }
2475         err = t4vf_get_rss_glb_config(adapter);
2476         if (err) {
2477                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2478                         " RSS parameters: err=%d\n", err);
2479                 return err;
2480         }
2481         if (adapter->params.rss.mode !=
2482             FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2483                 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2484                         " mode %d\n", adapter->params.rss.mode);
2485                 return -EINVAL;
2486         }
2487         err = t4vf_sge_init(adapter);
2488         if (err) {
2489                 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2490                         " err=%d\n", err);
2491                 return err;
2492         }
2493
2494         /* If we're running on newer firmware, let it know that we're
2495          * prepared to deal with encapsulated CPL messages.  Older
2496          * firmware won't understand this and we'll just get
2497          * unencapsulated messages ...
2498          */
2499         param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2500                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2501         val = 1;
2502         (void) t4vf_set_params(adapter, 1, &param, &val);
2503
2504         /*
2505          * Retrieve our RX interrupt holdoff timer values and counter
2506          * threshold values from the SGE parameters.
2507          */
2508         s->timer_val[0] = core_ticks_to_us(adapter,
2509                 TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2510         s->timer_val[1] = core_ticks_to_us(adapter,
2511                 TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2512         s->timer_val[2] = core_ticks_to_us(adapter,
2513                 TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2514         s->timer_val[3] = core_ticks_to_us(adapter,
2515                 TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2516         s->timer_val[4] = core_ticks_to_us(adapter,
2517                 TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2518         s->timer_val[5] = core_ticks_to_us(adapter,
2519                 TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2520
2521         s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2522         s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2523         s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2524         s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2525
2526         /*
2527          * Grab our Virtual Interface resource allocation, extract the
2528          * features that we're interested in and do a bit of sanity testing on
2529          * what we discover.
2530          */
2531         err = t4vf_get_vfres(adapter);
2532         if (err) {
2533                 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2534                         " resources: err=%d\n", err);
2535                 return err;
2536         }
2537
2538         /* Check for various parameter sanity issues */
2539         if (adapter->params.vfres.pmask == 0) {
2540                 dev_err(adapter->pdev_dev, "no port access configured\n"
2541                         "usable!\n");
2542                 return -EINVAL;
2543         }
2544         if (adapter->params.vfres.nvi == 0) {
2545                 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2546                         "usable!\n");
2547                 return -EINVAL;
2548         }
2549
2550         /* Initialize nports and max_ethqsets now that we have our Virtual
2551          * Function Resources.
2552          */
2553         size_nports_qsets(adapter);
2554
2555         return 0;
2556 }
2557
2558 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2559                              u8 pkt_cnt_idx, unsigned int size,
2560                              unsigned int iqe_size)
2561 {
2562         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2563                              (pkt_cnt_idx < SGE_NCOUNTERS ?
2564                               QINTR_CNT_EN_F : 0));
2565         rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2566                             ? pkt_cnt_idx
2567                             : 0);
2568         rspq->iqe_len = iqe_size;
2569         rspq->size = size;
2570 }
2571
2572 /*
2573  * Perform default configuration of DMA queues depending on the number and
2574  * type of ports we found and the number of available CPUs.  Most settings can
2575  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2576  * being brought up for the first time.
2577  */
2578 static void cfg_queues(struct adapter *adapter)
2579 {
2580         struct sge *s = &adapter->sge;
2581         int q10g, n10g, qidx, pidx, qs;
2582         size_t iqe_size;
2583
2584         /*
2585          * We should not be called till we know how many Queue Sets we can
2586          * support.  In particular, this means that we need to know what kind
2587          * of interrupts we'll be using ...
2588          */
2589         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2590
2591         /*
2592          * Count the number of 10GbE Virtual Interfaces that we have.
2593          */
2594         n10g = 0;
2595         for_each_port(adapter, pidx)
2596                 n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2597
2598         /*
2599          * We default to 1 queue per non-10G port and up to # of cores queues
2600          * per 10G port.
2601          */
2602         if (n10g == 0)
2603                 q10g = 0;
2604         else {
2605                 int n1g = (adapter->params.nports - n10g);
2606                 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2607                 if (q10g > num_online_cpus())
2608                         q10g = num_online_cpus();
2609         }
2610
2611         /*
2612          * Allocate the "Queue Sets" to the various Virtual Interfaces.
2613          * The layout will be established in setup_sge_queues() when the
2614          * adapter is brough up for the first time.
2615          */
2616         qidx = 0;
2617         for_each_port(adapter, pidx) {
2618                 struct port_info *pi = adap2pinfo(adapter, pidx);
2619
2620                 pi->first_qset = qidx;
2621                 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2622                 qidx += pi->nqsets;
2623         }
2624         s->ethqsets = qidx;
2625
2626         /*
2627          * The Ingress Queue Entry Size for our various Response Queues needs
2628          * to be big enough to accommodate the largest message we can receive
2629          * from the chip/firmware; which is 64 bytes ...
2630          */
2631         iqe_size = 64;
2632
2633         /*
2634          * Set up default Queue Set parameters ...  Start off with the
2635          * shortest interrupt holdoff timer.
2636          */
2637         for (qs = 0; qs < s->max_ethqsets; qs++) {
2638                 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2639                 struct sge_eth_txq *txq = &s->ethtxq[qs];
2640
2641                 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2642                 rxq->fl.size = 72;
2643                 txq->q.size = 1024;
2644         }
2645
2646         /*
2647          * The firmware event queue is used for link state changes and
2648          * notifications of TX DMA completions.
2649          */
2650         init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2651
2652         /*
2653          * The forwarded interrupt queue is used when we're in MSI interrupt
2654          * mode.  In this mode all interrupts associated with RX queues will
2655          * be forwarded to a single queue which we'll associate with our MSI
2656          * interrupt vector.  The messages dropped in the forwarded interrupt
2657          * queue will indicate which ingress queue needs servicing ...  This
2658          * queue needs to be large enough to accommodate all of the ingress
2659          * queues which are forwarding their interrupt (+1 to prevent the PIDX
2660          * from equalling the CIDX if every ingress queue has an outstanding
2661          * interrupt).  The queue doesn't need to be any larger because no
2662          * ingress queue will ever have more than one outstanding interrupt at
2663          * any time ...
2664          */
2665         init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2666                   iqe_size);
2667 }
2668
2669 /*
2670  * Reduce the number of Ethernet queues across all ports to at most n.
2671  * n provides at least one queue per port.
2672  */
2673 static void reduce_ethqs(struct adapter *adapter, int n)
2674 {
2675         int i;
2676         struct port_info *pi;
2677
2678         /*
2679          * While we have too many active Ether Queue Sets, interate across the
2680          * "ports" and reduce their individual Queue Set allocations.
2681          */
2682         BUG_ON(n < adapter->params.nports);
2683         while (n < adapter->sge.ethqsets)
2684                 for_each_port(adapter, i) {
2685                         pi = adap2pinfo(adapter, i);
2686                         if (pi->nqsets > 1) {
2687                                 pi->nqsets--;
2688                                 adapter->sge.ethqsets--;
2689                                 if (adapter->sge.ethqsets <= n)
2690                                         break;
2691                         }
2692                 }
2693
2694         /*
2695          * Reassign the starting Queue Sets for each of the "ports" ...
2696          */
2697         n = 0;
2698         for_each_port(adapter, i) {
2699                 pi = adap2pinfo(adapter, i);
2700                 pi->first_qset = n;
2701                 n += pi->nqsets;
2702         }
2703 }
2704
2705 /*
2706  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2707  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2708  * need.  Minimally we need one for every Virtual Interface plus those needed
2709  * for our "extras".  Note that this process may lower the maximum number of
2710  * allowed Queue Sets ...
2711  */
2712 static int enable_msix(struct adapter *adapter)
2713 {
2714         int i, want, need, nqsets;
2715         struct msix_entry entries[MSIX_ENTRIES];
2716         struct sge *s = &adapter->sge;
2717
2718         for (i = 0; i < MSIX_ENTRIES; ++i)
2719                 entries[i].entry = i;
2720
2721         /*
2722          * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2723          * plus those needed for our "extras" (for example, the firmware
2724          * message queue).  We _need_ at least one "Queue Set" per Virtual
2725          * Interface plus those needed for our "extras".  So now we get to see
2726          * if the song is right ...
2727          */
2728         want = s->max_ethqsets + MSIX_EXTRAS;
2729         need = adapter->params.nports + MSIX_EXTRAS;
2730
2731         want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2732         if (want < 0)
2733                 return want;
2734
2735         nqsets = want - MSIX_EXTRAS;
2736         if (nqsets < s->max_ethqsets) {
2737                 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2738                          " for %d Queue Sets\n", nqsets);
2739                 s->max_ethqsets = nqsets;
2740                 if (nqsets < s->ethqsets)
2741                         reduce_ethqs(adapter, nqsets);
2742         }
2743         for (i = 0; i < want; ++i)
2744                 adapter->msix_info[i].vec = entries[i].vector;
2745
2746         return 0;
2747 }
2748
2749 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2750         .ndo_open               = cxgb4vf_open,
2751         .ndo_stop               = cxgb4vf_stop,
2752         .ndo_start_xmit         = t4vf_eth_xmit,
2753         .ndo_get_stats          = cxgb4vf_get_stats,
2754         .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2755         .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2756         .ndo_validate_addr      = eth_validate_addr,
2757         .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2758         .ndo_change_mtu         = cxgb4vf_change_mtu,
2759         .ndo_fix_features       = cxgb4vf_fix_features,
2760         .ndo_set_features       = cxgb4vf_set_features,
2761 #ifdef CONFIG_NET_POLL_CONTROLLER
2762         .ndo_poll_controller    = cxgb4vf_poll_controller,
2763 #endif
2764 };
2765
2766 /*
2767  * "Probe" a device: initialize a device and construct all kernel and driver
2768  * state needed to manage the device.  This routine is called "init_one" in
2769  * the PF Driver ...
2770  */
2771 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2772                              const struct pci_device_id *ent)
2773 {
2774         int pci_using_dac;
2775         int err, pidx;
2776         unsigned int pmask;
2777         struct adapter *adapter;
2778         struct port_info *pi;
2779         struct net_device *netdev;
2780
2781         /*
2782          * Print our driver banner the first time we're called to initialize a
2783          * device.
2784          */
2785         pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2786
2787         /*
2788          * Initialize generic PCI device state.
2789          */
2790         err = pci_enable_device(pdev);
2791         if (err) {
2792                 dev_err(&pdev->dev, "cannot enable PCI device\n");
2793                 return err;
2794         }
2795
2796         /*
2797          * Reserve PCI resources for the device.  If we can't get them some
2798          * other driver may have already claimed the device ...
2799          */
2800         err = pci_request_regions(pdev, KBUILD_MODNAME);
2801         if (err) {
2802                 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2803                 goto err_disable_device;
2804         }
2805
2806         /*
2807          * Set up our DMA mask: try for 64-bit address masking first and
2808          * fall back to 32-bit if we can't get 64 bits ...
2809          */
2810         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2811         if (err == 0) {
2812                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2813                 if (err) {
2814                         dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2815                                 " coherent allocations\n");
2816                         goto err_release_regions;
2817                 }
2818                 pci_using_dac = 1;
2819         } else {
2820                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2821                 if (err != 0) {
2822                         dev_err(&pdev->dev, "no usable DMA configuration\n");
2823                         goto err_release_regions;
2824                 }
2825                 pci_using_dac = 0;
2826         }
2827
2828         /*
2829          * Enable bus mastering for the device ...
2830          */
2831         pci_set_master(pdev);
2832
2833         /*
2834          * Allocate our adapter data structure and attach it to the device.
2835          */
2836         adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2837         if (!adapter) {
2838                 err = -ENOMEM;
2839                 goto err_release_regions;
2840         }
2841         pci_set_drvdata(pdev, adapter);
2842         adapter->pdev = pdev;
2843         adapter->pdev_dev = &pdev->dev;
2844
2845         adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2846                                     (sizeof(struct mbox_cmd) *
2847                                      T4VF_OS_LOG_MBOX_CMDS),
2848                                     GFP_KERNEL);
2849         if (!adapter->mbox_log) {
2850                 err = -ENOMEM;
2851                 goto err_free_adapter;
2852         }
2853         adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2854
2855         /*
2856          * Initialize SMP data synchronization resources.
2857          */
2858         spin_lock_init(&adapter->stats_lock);
2859         spin_lock_init(&adapter->mbox_lock);
2860         INIT_LIST_HEAD(&adapter->mlist.list);
2861
2862         /*
2863          * Map our I/O registers in BAR0.
2864          */
2865         adapter->regs = pci_ioremap_bar(pdev, 0);
2866         if (!adapter->regs) {
2867                 dev_err(&pdev->dev, "cannot map device registers\n");
2868                 err = -ENOMEM;
2869                 goto err_free_adapter;
2870         }
2871
2872         /* Wait for the device to become ready before proceeding ...
2873          */
2874         err = t4vf_prep_adapter(adapter);
2875         if (err) {
2876                 dev_err(adapter->pdev_dev, "device didn't become ready:"
2877                         " err=%d\n", err);
2878                 goto err_unmap_bar0;
2879         }
2880
2881         /* For T5 and later we want to use the new BAR-based User Doorbells,
2882          * so we need to map BAR2 here ...
2883          */
2884         if (!is_t4(adapter->params.chip)) {
2885                 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2886                                            pci_resource_len(pdev, 2));
2887                 if (!adapter->bar2) {
2888                         dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2889                         err = -ENOMEM;
2890                         goto err_unmap_bar0;
2891                 }
2892         }
2893         /*
2894          * Initialize adapter level features.
2895          */
2896         adapter->name = pci_name(pdev);
2897         adapter->msg_enable = dflt_msg_enable;
2898         err = adap_init0(adapter);
2899         if (err)
2900                 goto err_unmap_bar;
2901
2902         /*
2903          * Allocate our "adapter ports" and stitch everything together.
2904          */
2905         pmask = adapter->params.vfres.pmask;
2906         for_each_port(adapter, pidx) {
2907                 int port_id, viid;
2908
2909                 /*
2910                  * We simplistically allocate our virtual interfaces
2911                  * sequentially across the port numbers to which we have
2912                  * access rights.  This should be configurable in some manner
2913                  * ...
2914                  */
2915                 if (pmask == 0)
2916                         break;
2917                 port_id = ffs(pmask) - 1;
2918                 pmask &= ~(1 << port_id);
2919                 viid = t4vf_alloc_vi(adapter, port_id);
2920                 if (viid < 0) {
2921                         dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2922                                 " err=%d\n", port_id, viid);
2923                         err = viid;
2924                         goto err_free_dev;
2925                 }
2926
2927                 /*
2928                  * Allocate our network device and stitch things together.
2929                  */
2930                 netdev = alloc_etherdev_mq(sizeof(struct port_info),
2931                                            MAX_PORT_QSETS);
2932                 if (netdev == NULL) {
2933                         t4vf_free_vi(adapter, viid);
2934                         err = -ENOMEM;
2935                         goto err_free_dev;
2936                 }
2937                 adapter->port[pidx] = netdev;
2938                 SET_NETDEV_DEV(netdev, &pdev->dev);
2939                 pi = netdev_priv(netdev);
2940                 pi->adapter = adapter;
2941                 pi->pidx = pidx;
2942                 pi->port_id = port_id;
2943                 pi->viid = viid;
2944
2945                 /*
2946                  * Initialize the starting state of our "port" and register
2947                  * it.
2948                  */
2949                 pi->xact_addr_filt = -1;
2950                 netif_carrier_off(netdev);
2951                 netdev->irq = pdev->irq;
2952
2953                 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2954                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2955                         NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2956                 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2957                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2958                         NETIF_F_HIGHDMA;
2959                 netdev->features = netdev->hw_features |
2960                                    NETIF_F_HW_VLAN_CTAG_TX;
2961                 if (pci_using_dac)
2962                         netdev->features |= NETIF_F_HIGHDMA;
2963
2964                 netdev->priv_flags |= IFF_UNICAST_FLT;
2965
2966                 netdev->netdev_ops = &cxgb4vf_netdev_ops;
2967                 netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2968
2969                 /*
2970                  * Initialize the hardware/software state for the port.
2971                  */
2972                 err = t4vf_port_init(adapter, pidx);
2973                 if (err) {
2974                         dev_err(&pdev->dev, "cannot initialize port %d\n",
2975                                 pidx);
2976                         goto err_free_dev;
2977                 }
2978         }
2979
2980         /* See what interrupts we'll be using.  If we've been configured to
2981          * use MSI-X interrupts, try to enable them but fall back to using
2982          * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2983          * get MSI interrupts we bail with the error.
2984          */
2985         if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2986                 adapter->flags |= USING_MSIX;
2987         else {
2988                 if (msi == MSI_MSIX) {
2989                         dev_info(adapter->pdev_dev,
2990                                  "Unable to use MSI-X Interrupts; falling "
2991                                  "back to MSI Interrupts\n");
2992
2993                         /* We're going to need a Forwarded Interrupt Queue so
2994                          * that may cut into how many Queue Sets we can
2995                          * support.
2996                          */
2997                         msi = MSI_MSI;
2998                         size_nports_qsets(adapter);
2999                 }
3000                 err = pci_enable_msi(pdev);
3001                 if (err) {
3002                         dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3003                                 " err=%d\n", err);
3004                         goto err_free_dev;
3005                 }
3006                 adapter->flags |= USING_MSI;
3007         }
3008
3009         /* Now that we know how many "ports" we have and what interrupt
3010          * mechanism we're going to use, we can configure our queue resources.
3011          */
3012         cfg_queues(adapter);
3013
3014         /*
3015          * The "card" is now ready to go.  If any errors occur during device
3016          * registration we do not fail the whole "card" but rather proceed
3017          * only with the ports we manage to register successfully.  However we
3018          * must register at least one net device.
3019          */
3020         for_each_port(adapter, pidx) {
3021                 struct port_info *pi = netdev_priv(adapter->port[pidx]);
3022                 netdev = adapter->port[pidx];
3023                 if (netdev == NULL)
3024                         continue;
3025
3026                 netif_set_real_num_tx_queues(netdev, pi->nqsets);
3027                 netif_set_real_num_rx_queues(netdev, pi->nqsets);
3028
3029                 err = register_netdev(netdev);
3030                 if (err) {
3031                         dev_warn(&pdev->dev, "cannot register net device %s,"
3032                                  " skipping\n", netdev->name);
3033                         continue;
3034                 }
3035
3036                 set_bit(pidx, &adapter->registered_device_map);
3037         }
3038         if (adapter->registered_device_map == 0) {
3039                 dev_err(&pdev->dev, "could not register any net devices\n");
3040                 goto err_disable_interrupts;
3041         }
3042
3043         /*
3044          * Set up our debugfs entries.
3045          */
3046         if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3047                 adapter->debugfs_root =
3048                         debugfs_create_dir(pci_name(pdev),
3049                                            cxgb4vf_debugfs_root);
3050                 if (IS_ERR_OR_NULL(adapter->debugfs_root))
3051                         dev_warn(&pdev->dev, "could not create debugfs"
3052                                  " directory");
3053                 else
3054                         setup_debugfs(adapter);
3055         }
3056
3057         /*
3058          * Print a short notice on the existence and configuration of the new
3059          * VF network device ...
3060          */
3061         for_each_port(adapter, pidx) {
3062                 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3063                          adapter->port[pidx]->name,
3064                          (adapter->flags & USING_MSIX) ? "MSI-X" :
3065                          (adapter->flags & USING_MSI)  ? "MSI" : "");
3066         }
3067
3068         /*
3069          * Return success!
3070          */
3071         return 0;
3072
3073         /*
3074          * Error recovery and exit code.  Unwind state that's been created
3075          * so far and return the error.
3076          */
3077 err_disable_interrupts:
3078         if (adapter->flags & USING_MSIX) {
3079                 pci_disable_msix(adapter->pdev);
3080                 adapter->flags &= ~USING_MSIX;
3081         } else if (adapter->flags & USING_MSI) {
3082                 pci_disable_msi(adapter->pdev);
3083                 adapter->flags &= ~USING_MSI;
3084         }
3085
3086 err_free_dev:
3087         for_each_port(adapter, pidx) {
3088                 netdev = adapter->port[pidx];
3089                 if (netdev == NULL)
3090                         continue;
3091                 pi = netdev_priv(netdev);
3092                 t4vf_free_vi(adapter, pi->viid);
3093                 if (test_bit(pidx, &adapter->registered_device_map))
3094                         unregister_netdev(netdev);
3095                 free_netdev(netdev);
3096         }
3097
3098 err_unmap_bar:
3099         if (!is_t4(adapter->params.chip))
3100                 iounmap(adapter->bar2);
3101
3102 err_unmap_bar0:
3103         iounmap(adapter->regs);
3104
3105 err_free_adapter:
3106         kfree(adapter->mbox_log);
3107         kfree(adapter);
3108
3109 err_release_regions:
3110         pci_release_regions(pdev);
3111         pci_clear_master(pdev);
3112
3113 err_disable_device:
3114         pci_disable_device(pdev);
3115
3116         return err;
3117 }
3118
3119 /*
3120  * "Remove" a device: tear down all kernel and driver state created in the
3121  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3122  * that this is called "remove_one" in the PF Driver.)
3123  */
3124 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3125 {
3126         struct adapter *adapter = pci_get_drvdata(pdev);
3127
3128         /*
3129          * Tear down driver state associated with device.
3130          */
3131         if (adapter) {
3132                 int pidx;
3133
3134                 /*
3135                  * Stop all of our activity.  Unregister network port,
3136                  * disable interrupts, etc.
3137                  */
3138                 for_each_port(adapter, pidx)
3139                         if (test_bit(pidx, &adapter->registered_device_map))
3140                                 unregister_netdev(adapter->port[pidx]);
3141                 t4vf_sge_stop(adapter);
3142                 if (adapter->flags & USING_MSIX) {
3143                         pci_disable_msix(adapter->pdev);
3144                         adapter->flags &= ~USING_MSIX;
3145                 } else if (adapter->flags & USING_MSI) {
3146                         pci_disable_msi(adapter->pdev);
3147                         adapter->flags &= ~USING_MSI;
3148                 }
3149
3150                 /*
3151                  * Tear down our debugfs entries.
3152                  */
3153                 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3154                         cleanup_debugfs(adapter);
3155                         debugfs_remove_recursive(adapter->debugfs_root);
3156                 }
3157
3158                 /*
3159                  * Free all of the various resources which we've acquired ...
3160                  */
3161                 t4vf_free_sge_resources(adapter);
3162                 for_each_port(adapter, pidx) {
3163                         struct net_device *netdev = adapter->port[pidx];
3164                         struct port_info *pi;
3165
3166                         if (netdev == NULL)
3167                                 continue;
3168
3169                         pi = netdev_priv(netdev);
3170                         t4vf_free_vi(adapter, pi->viid);
3171                         free_netdev(netdev);
3172                 }
3173                 iounmap(adapter->regs);
3174                 if (!is_t4(adapter->params.chip))
3175                         iounmap(adapter->bar2);
3176                 kfree(adapter->mbox_log);
3177                 kfree(adapter);
3178         }
3179
3180         /*
3181          * Disable the device and release its PCI resources.
3182          */
3183         pci_disable_device(pdev);
3184         pci_clear_master(pdev);
3185         pci_release_regions(pdev);
3186 }
3187
3188 /*
3189  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3190  * delivery.
3191  */
3192 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3193 {
3194         struct adapter *adapter;
3195         int pidx;
3196
3197         adapter = pci_get_drvdata(pdev);
3198         if (!adapter)
3199                 return;
3200
3201         /* Disable all Virtual Interfaces.  This will shut down the
3202          * delivery of all ingress packets into the chip for these
3203          * Virtual Interfaces.
3204          */
3205         for_each_port(adapter, pidx)
3206                 if (test_bit(pidx, &adapter->registered_device_map))
3207                         unregister_netdev(adapter->port[pidx]);
3208
3209         /* Free up all Queues which will prevent further DMA and
3210          * Interrupts allowing various internal pathways to drain.
3211          */
3212         t4vf_sge_stop(adapter);
3213         if (adapter->flags & USING_MSIX) {
3214                 pci_disable_msix(adapter->pdev);
3215                 adapter->flags &= ~USING_MSIX;
3216         } else if (adapter->flags & USING_MSI) {
3217                 pci_disable_msi(adapter->pdev);
3218                 adapter->flags &= ~USING_MSI;
3219         }
3220
3221         /*
3222          * Free up all Queues which will prevent further DMA and
3223          * Interrupts allowing various internal pathways to drain.
3224          */
3225         t4vf_free_sge_resources(adapter);
3226         pci_set_drvdata(pdev, NULL);
3227 }
3228
3229 /* Macros needed to support the PCI Device ID Table ...
3230  */
3231 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3232         static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3233 #define CH_PCI_DEVICE_ID_FUNCTION       0x8
3234
3235 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3236                 { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3237
3238 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3239
3240 #include "../cxgb4/t4_pci_id_tbl.h"
3241
3242 MODULE_DESCRIPTION(DRV_DESC);
3243 MODULE_AUTHOR("Chelsio Communications");
3244 MODULE_LICENSE("Dual BSD/GPL");
3245 MODULE_VERSION(DRV_VERSION);
3246 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3247
3248 static struct pci_driver cxgb4vf_driver = {
3249         .name           = KBUILD_MODNAME,
3250         .id_table       = cxgb4vf_pci_tbl,
3251         .probe          = cxgb4vf_pci_probe,
3252         .remove         = cxgb4vf_pci_remove,
3253         .shutdown       = cxgb4vf_pci_shutdown,
3254 };
3255
3256 /*
3257  * Initialize global driver state.
3258  */
3259 static int __init cxgb4vf_module_init(void)
3260 {
3261         int ret;
3262
3263         /*
3264          * Vet our module parameters.
3265          */
3266         if (msi != MSI_MSIX && msi != MSI_MSI) {
3267                 pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3268                         msi, MSI_MSIX, MSI_MSI);
3269                 return -EINVAL;
3270         }
3271
3272         /* Debugfs support is optional, just warn if this fails */
3273         cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3274         if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3275                 pr_warn("could not create debugfs entry, continuing\n");
3276
3277         ret = pci_register_driver(&cxgb4vf_driver);
3278         if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3279                 debugfs_remove(cxgb4vf_debugfs_root);
3280         return ret;
3281 }
3282
3283 /*
3284  * Tear down global driver state.
3285  */
3286 static void __exit cxgb4vf_module_exit(void)
3287 {
3288         pci_unregister_driver(&cxgb4vf_driver);
3289         debugfs_remove(cxgb4vf_debugfs_root);
3290 }
3291
3292 module_init(cxgb4vf_module_init);
3293 module_exit(cxgb4vf_module_exit);