net/mlx5e: Use napi_alloc_skb for RX SKB allocations
[cascardo/linux.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_main.c
index 402994b..9b17bc0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * SOFTWARE.
  */
 
+#include <net/tc_act/tc_gact.h>
+#include <net/pkt_cls.h>
 #include <linux/mlx5/fs.h>
+#include <net/vxlan.h>
 #include "en.h"
+#include "en_tc.h"
 #include "eswitch.h"
+#include "vxlan.h"
 
 struct mlx5e_rq_param {
        u32                        rqc[MLX5_ST_SZ_DW(rqc)];
@@ -43,6 +48,7 @@ struct mlx5e_sq_param {
        u32                        sqc[MLX5_ST_SZ_DW(sqc)];
        struct mlx5_wq_param       wq;
        u16                        max_inline;
+       bool                       icosq;
 };
 
 struct mlx5e_cq_param {
@@ -54,8 +60,10 @@ struct mlx5e_cq_param {
 struct mlx5e_channel_param {
        struct mlx5e_rq_param      rq;
        struct mlx5e_sq_param      sq;
+       struct mlx5e_sq_param      icosq;
        struct mlx5e_cq_param      rx_cq;
        struct mlx5e_cq_param      tx_cq;
+       struct mlx5e_cq_param      icosq_cq;
 };
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -124,6 +132,17 @@ free_out:
        kvfree(out);
 }
 
+static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
+{
+       struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
+
+       if (!priv->q_counter)
+               return;
+
+       mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter,
+                                     &qcnt->rx_out_of_buffer);
+}
+
 void mlx5e_update_stats(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
@@ -147,15 +166,20 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
        s->tx_bytes             = 0;
        s->tso_packets          = 0;
        s->tso_bytes            = 0;
+       s->tso_inner_packets    = 0;
+       s->tso_inner_bytes      = 0;
        s->tx_queue_stopped     = 0;
        s->tx_queue_wake        = 0;
        s->tx_queue_dropped     = 0;
+       s->tx_csum_inner        = 0;
        tx_offload_none         = 0;
        s->lro_packets          = 0;
        s->lro_bytes            = 0;
        s->rx_csum_none         = 0;
        s->rx_csum_sw           = 0;
        s->rx_wqe_err           = 0;
+       s->rx_mpwqe_filler      = 0;
+       s->rx_mpwqe_frag        = 0;
        for (i = 0; i < priv->params.num_channels; i++) {
                rq_stats = &priv->channel[i]->rq.stats;
 
@@ -166,6 +190,8 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
                s->rx_csum_none += rq_stats->csum_none;
                s->rx_csum_sw   += rq_stats->csum_sw;
                s->rx_wqe_err   += rq_stats->wqe_err;
+               s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
+               s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
 
                for (j = 0; j < priv->params.num_tc; j++) {
                        sq_stats = &priv->channel[i]->sq[j].stats;
@@ -174,9 +200,12 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
                        s->tx_bytes             += sq_stats->bytes;
                        s->tso_packets          += sq_stats->tso_packets;
                        s->tso_bytes            += sq_stats->tso_bytes;
+                       s->tso_inner_packets    += sq_stats->tso_inner_packets;
+                       s->tso_inner_bytes      += sq_stats->tso_inner_bytes;
                        s->tx_queue_stopped     += sq_stats->stopped;
                        s->tx_queue_wake        += sq_stats->wake;
                        s->tx_queue_dropped     += sq_stats->dropped;
+                       s->tx_csum_inner        += sq_stats->csum_offload_inner;
                        tx_offload_none         += sq_stats->csum_offload_none;
                }
        }
@@ -234,11 +263,13 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
                MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
 
        /* Update calculated offload counters */
-       s->tx_csum_offload = s->tx_packets - tx_offload_none;
+       s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
        s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
                               s->rx_csum_sw;
 
        mlx5e_update_pport_counters(priv);
+       mlx5e_update_q_counter(priv);
+
 free_out:
        kvfree(out);
 }
@@ -258,9 +289,14 @@ static void mlx5e_update_stats_work(struct work_struct *work)
        mutex_unlock(&priv->state_lock);
 }
 
-static void __mlx5e_async_event(struct mlx5e_priv *priv,
-                               enum mlx5_dev_event event)
+static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
+                             enum mlx5_dev_event event, unsigned long param)
 {
+       struct mlx5e_priv *priv = vpriv;
+
+       if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+               return;
+
        switch (event) {
        case MLX5_DEV_EVENT_PORT_UP:
        case MLX5_DEV_EVENT_PORT_DOWN:
@@ -272,17 +308,6 @@ static void __mlx5e_async_event(struct mlx5e_priv *priv,
        }
 }
 
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
-                             enum mlx5_dev_event event, unsigned long param)
-{
-       struct mlx5e_priv *priv = vpriv;
-
-       spin_lock(&priv->async_events_spinlock);
-       if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
-               __mlx5e_async_event(priv, event);
-       spin_unlock(&priv->async_events_spinlock);
-}
-
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
        set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
@@ -290,9 +315,8 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-       spin_lock_irq(&priv->async_events_spinlock);
        clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
-       spin_unlock_irq(&priv->async_events_spinlock);
+       synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
 }
 
 #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
@@ -306,6 +330,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
        struct mlx5_core_dev *mdev = priv->mdev;
        void *rqc = param->rqc;
        void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+       u32 byte_count;
        int wq_sz;
        int err;
        int i;
@@ -320,32 +345,54 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
        rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
 
        wq_sz = mlx5_wq_ll_get_size(&rq->wq);
-       rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
-                              cpu_to_node(c->cpu));
-       if (!rq->skb) {
-               err = -ENOMEM;
-               goto err_rq_wq_destroy;
-       }
 
-       rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz :
-                                            MLX5E_SW2HW_MTU(priv->netdev->mtu);
-       rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN);
+       switch (priv->params.rq_wq_type) {
+       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+               rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
+                                           GFP_KERNEL, cpu_to_node(c->cpu));
+               if (!rq->wqe_info) {
+                       err = -ENOMEM;
+                       goto err_rq_wq_destroy;
+               }
+               rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
+               rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+
+               rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
+               byte_count = rq->wqe_sz;
+               break;
+       default: /* MLX5_WQ_TYPE_LINKED_LIST */
+               rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
+                                      cpu_to_node(c->cpu));
+               if (!rq->skb) {
+                       err = -ENOMEM;
+                       goto err_rq_wq_destroy;
+               }
+               rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+               rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+
+               rq->wqe_sz = (priv->params.lro_en) ?
+                               priv->params.lro_wqe_sz :
+                               MLX5E_SW2HW_MTU(priv->netdev->mtu);
+               rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz);
+               byte_count = rq->wqe_sz;
+               byte_count |= MLX5_HW_START_PADDING;
+       }
 
        for (i = 0; i < wq_sz; i++) {
                struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
-               u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
 
-               wqe->data.lkey       = c->mkey_be;
-               wqe->data.byte_count =
-                       cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
+               wqe->data.byte_count = cpu_to_be32(byte_count);
        }
 
+       rq->wq_type = priv->params.rq_wq_type;
        rq->pdev    = c->pdev;
        rq->netdev  = c->netdev;
        rq->tstamp  = &priv->tstamp;
        rq->channel = c;
        rq->ix      = c->ix;
        rq->priv    = c->priv;
+       rq->mkey_be = c->mkey_be;
+       rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
 
        return 0;
 
@@ -357,7 +404,14 @@ err_rq_wq_destroy:
 
 static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 {
-       kfree(rq->skb);
+       switch (rq->wq_type) {
+       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+               kfree(rq->wqe_info);
+               break;
+       default: /* MLX5_WQ_TYPE_LINKED_LIST */
+               kfree(rq->skb);
+       }
+
        mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
@@ -454,6 +508,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
                         struct mlx5e_rq_param *param,
                         struct mlx5e_rq *rq)
 {
+       struct mlx5e_sq *sq = &c->icosq;
+       u16 pi = sq->pc & sq->wq.sz_m1;
        int err;
 
        err = mlx5e_create_rq(c, param, rq);
@@ -469,7 +525,10 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
                goto err_disable_rq;
 
        set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
-       mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */
+
+       sq->ico_wqe_info[pi].opcode     = MLX5_OPCODE_NOP;
+       sq->ico_wqe_info[pi].num_wqebbs = 1;
+       mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
 
        return 0;
 
@@ -535,10 +594,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 
        void *sqc = param->sqc;
        void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
-       int txq_ix;
        int err;
 
-       err = mlx5_alloc_map_uar(mdev, &sq->uar);
+       err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
        if (err)
                return err;
 
@@ -550,8 +608,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
                goto err_unmap_free_uar;
 
        sq->wq.db       = &sq->wq.db[MLX5_SND_DBR];
-       sq->uar_map     = sq->uar.map;
-       sq->uar_bf_map  = sq->uar.bf_map;
+       if (sq->uar.bf_map) {
+               set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state);
+               sq->uar_map = sq->uar.bf_map;
+       } else {
+               sq->uar_map = sq->uar.map;
+       }
        sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
        sq->max_inline  = param->max_inline;
 
@@ -559,8 +621,24 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
        if (err)
                goto err_sq_wq_destroy;
 
-       txq_ix = c->ix + tc * priv->params.num_channels;
-       sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix);
+       if (param->icosq) {
+               u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+               sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) *
+                                               wq_sz,
+                                               GFP_KERNEL,
+                                               cpu_to_node(c->cpu));
+               if (!sq->ico_wqe_info) {
+                       err = -ENOMEM;
+                       goto err_free_sq_db;
+               }
+       } else {
+               int txq_ix;
+
+               txq_ix = c->ix + tc * priv->params.num_channels;
+               sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix);
+               priv->txq_to_sq_map[txq_ix] = sq;
+       }
 
        sq->pdev      = c->pdev;
        sq->tstamp    = &priv->tstamp;
@@ -569,10 +647,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
        sq->tc        = tc;
        sq->edge      = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
        sq->bf_budget = MLX5E_SQ_BF_BUDGET;
-       priv->txq_to_sq_map[txq_ix] = sq;
 
        return 0;
 
+err_free_sq_db:
+       mlx5e_free_sq_db(sq);
+
 err_sq_wq_destroy:
        mlx5_wq_destroy(&sq->wq_ctrl);
 
@@ -587,6 +667,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq)
        struct mlx5e_channel *c = sq->channel;
        struct mlx5e_priv *priv = c->priv;
 
+       kfree(sq->ico_wqe_info);
        mlx5e_free_sq_db(sq);
        mlx5_wq_destroy(&sq->wq_ctrl);
        mlx5_unmap_free_uar(priv->mdev, &sq->uar);
@@ -615,10 +696,10 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
 
        memcpy(sqc, param->sqc, sizeof(param->sqc));
 
-       MLX5_SET(sqc,  sqc, tis_num_0,          priv->tisn[sq->tc]);
-       MLX5_SET(sqc,  sqc, cqn,                c->sq[sq->tc].cq.mcq.cqn);
+       MLX5_SET(sqc,  sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]);
+       MLX5_SET(sqc,  sqc, cqn,                sq->cq.mcq.cqn);
        MLX5_SET(sqc,  sqc, state,              MLX5_SQC_STATE_RST);
-       MLX5_SET(sqc,  sqc, tis_lst_sz,         1);
+       MLX5_SET(sqc,  sqc, tis_lst_sz,         param->icosq ? 0 : 1);
        MLX5_SET(sqc,  sqc, flush_in_error_en,  1);
 
        MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
@@ -693,9 +774,11 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
        if (err)
                goto err_disable_sq;
 
-       set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
-       netdev_tx_reset_queue(sq->txq);
-       netif_tx_start_queue(sq->txq);
+       if (sq->txq) {
+               set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
+               netdev_tx_reset_queue(sq->txq);
+               netif_tx_start_queue(sq->txq);
+       }
 
        return 0;
 
@@ -716,15 +799,19 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
-       clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
-       napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */
-       netif_tx_disable_queue(sq->txq);
+       if (sq->txq) {
+               clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
+               /* prevent netif_tx_wake_queue */
+               napi_synchronize(&sq->channel->napi);
+               netif_tx_disable_queue(sq->txq);
+
+               /* ensure hw is notified of all pending wqes */
+               if (mlx5e_sq_has_room_for(sq, 1))
+                       mlx5e_send_nop(sq, true);
 
-       /* ensure hw is notified of all pending wqes */
-       if (mlx5e_sq_has_room_for(sq, 1))
-               mlx5e_send_nop(sq, true);
+               mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+       }
 
-       mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
        while (sq->cc != sq->pc) /* wait till sq is empty */
                msleep(20);
 
@@ -860,12 +947,10 @@ static int mlx5e_open_cq(struct mlx5e_channel *c,
        if (err)
                goto err_destroy_cq;
 
-       err = mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
-                                            moderation_usecs,
-                                            moderation_frames);
-       if (err)
-               goto err_destroy_cq;
-
+       if (MLX5_CAP_GEN(mdev, cq_moderation))
+               mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
+                                              moderation_usecs,
+                                              moderation_frames);
        return 0;
 
 err_destroy_cq:
@@ -973,17 +1058,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->cpu      = cpu;
        c->pdev     = &priv->mdev->pdev->dev;
        c->netdev   = priv->netdev;
-       c->mkey_be  = cpu_to_be32(priv->mr.key);
+       c->mkey_be  = cpu_to_be32(priv->mkey.key);
        c->num_tc   = priv->params.num_tc;
 
        mlx5e_build_channeltc_to_txq_map(priv, ix);
 
        netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
-       err = mlx5e_open_tx_cqs(c, cparam);
+       err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0);
        if (err)
                goto err_napi_del;
 
+       err = mlx5e_open_tx_cqs(c, cparam);
+       if (err)
+               goto err_close_icosq_cq;
+
        err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
                            priv->params.rx_cq_moderation_usec,
                            priv->params.rx_cq_moderation_pkts);
@@ -992,10 +1081,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 
        napi_enable(&c->napi);
 
-       err = mlx5e_open_sqs(c, cparam);
+       err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq);
        if (err)
                goto err_disable_napi;
 
+       err = mlx5e_open_sqs(c, cparam);
+       if (err)
+               goto err_close_icosq;
+
        err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
        if (err)
                goto err_close_sqs;
@@ -1008,6 +1101,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 err_close_sqs:
        mlx5e_close_sqs(c);
 
+err_close_icosq:
+       mlx5e_close_sq(&c->icosq);
+
 err_disable_napi:
        napi_disable(&c->napi);
        mlx5e_close_cq(&c->rq.cq);
@@ -1015,6 +1111,9 @@ err_disable_napi:
 err_close_tx_cqs:
        mlx5e_close_tx_cqs(c);
 
+err_close_icosq_cq:
+       mlx5e_close_cq(&c->icosq.cq);
+
 err_napi_del:
        netif_napi_del(&c->napi);
        napi_hash_del(&c->napi);
@@ -1027,9 +1126,11 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
        mlx5e_close_rq(&c->rq);
        mlx5e_close_sqs(c);
+       mlx5e_close_sq(&c->icosq);
        napi_disable(&c->napi);
        mlx5e_close_cq(&c->rq.cq);
        mlx5e_close_tx_cqs(c);
+       mlx5e_close_cq(&c->icosq.cq);
        netif_napi_del(&c->napi);
 
        napi_hash_del(&c->napi);
@@ -1044,27 +1145,58 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
        void *rqc = param->rqc;
        void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
-       MLX5_SET(wq, wq, wq_type,          MLX5_WQ_TYPE_LINKED_LIST);
+       switch (priv->params.rq_wq_type) {
+       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+               MLX5_SET(wq, wq, log_wqe_num_of_strides,
+                        MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
+               MLX5_SET(wq, wq, log_wqe_stride_size,
+                        MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
+               MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+               break;
+       default: /* MLX5_WQ_TYPE_LINKED_LIST */
+               MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+       }
+
        MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
        MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
        MLX5_SET(wq, wq, log_wq_sz,        priv->params.log_rq_size);
        MLX5_SET(wq, wq, pd,               priv->pdn);
+       MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 
        param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
        param->wq.linear = 1;
 }
 
-static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
-                                struct mlx5e_sq_param *param)
+static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
+{
+       void *rqc = param->rqc;
+       void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+       MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+}
+
+static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+                                       struct mlx5e_sq_param *param)
 {
        void *sqc = param->sqc;
        void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
-       MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
        MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
        MLX5_SET(wq, wq, pd,            priv->pdn);
 
        param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
+}
+
+static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
+                                struct mlx5e_sq_param *param)
+{
+       void *sqc = param->sqc;
+       void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+       mlx5e_build_sq_param_common(priv, param);
+       MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
+
        param->max_inline = priv->params.tx_max_inline;
 }
 
@@ -1080,8 +1212,18 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
                                    struct mlx5e_cq_param *param)
 {
        void *cqc = param->cqc;
+       u8 log_cq_size;
 
-       MLX5_SET(cqc, cqc, log_cq_size,  priv->params.log_rq_size);
+       switch (priv->params.rq_wq_type) {
+       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+               log_cq_size = priv->params.log_rq_size +
+                       MLX5_MPWRQ_LOG_NUM_STRIDES;
+               break;
+       default: /* MLX5_WQ_TYPE_LINKED_LIST */
+               log_cq_size = priv->params.log_rq_size;
+       }
+
+       MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
 
        mlx5e_build_common_cq_param(priv, param);
 }
@@ -1091,20 +1233,50 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 {
        void *cqc = param->cqc;
 
-       MLX5_SET(cqc, cqc, log_cq_size,  priv->params.log_sq_size);
+       MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
+
+       mlx5e_build_common_cq_param(priv, param);
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+                                    struct mlx5e_cq_param *param,
+                                    u8 log_wq_size)
+{
+       void *cqc = param->cqc;
+
+       MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
 
        mlx5e_build_common_cq_param(priv, param);
 }
 
+static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+                                   struct mlx5e_sq_param *param,
+                                   u8 log_wq_size)
+{
+       void *sqc = param->sqc;
+       void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+       mlx5e_build_sq_param_common(priv, param);
+
+       MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+       MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
+
+       param->icosq = true;
+}
+
 static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
                                      struct mlx5e_channel_param *cparam)
 {
+       u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
        memset(cparam, 0, sizeof(*cparam));
 
        mlx5e_build_rq_param(priv, &cparam->rq);
        mlx5e_build_sq_param(priv, &cparam->sq);
+       mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz);
        mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
        mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
+       mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz);
 }
 
 static int mlx5e_open_channels(struct mlx5e_priv *priv)
@@ -1410,6 +1582,24 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
        return 0;
 }
 
+static void mlx5e_netdev_set_tcs(struct net_device *netdev)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       int nch = priv->params.num_channels;
+       int ntc = priv->params.num_tc;
+       int tc;
+
+       netdev_reset_tc(netdev);
+
+       if (ntc == 1)
+               return;
+
+       netdev_set_num_tc(netdev, ntc);
+
+       for (tc = 0; tc < ntc; tc++)
+               netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+}
+
 int mlx5e_open_locked(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1418,6 +1608,8 @@ int mlx5e_open_locked(struct net_device *netdev)
 
        set_bit(MLX5E_STATE_OPENED, &priv->state);
 
+       mlx5e_netdev_set_tcs(netdev);
+
        num_txqs = priv->params.num_channels * priv->params.num_tc;
        netif_set_real_num_tx_queues(netdev, num_txqs);
        netif_set_real_num_rx_queues(netdev, priv->params.num_channels);
@@ -1440,8 +1632,8 @@ int mlx5e_open_locked(struct net_device *netdev)
                goto err_close_channels;
        }
 
-       mlx5e_update_carrier(priv);
        mlx5e_redirect_rqts(priv);
+       mlx5e_update_carrier(priv);
        mlx5e_timestamp_init(priv);
 
        schedule_delayed_work(&priv->update_stats_work, 0);
@@ -1480,8 +1672,8 @@ int mlx5e_close_locked(struct net_device *netdev)
        clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
        mlx5e_timestamp_cleanup(priv);
-       mlx5e_redirect_rqts(priv);
        netif_carrier_off(priv->netdev);
+       mlx5e_redirect_rqts(priv);
        mlx5e_close_channels(priv);
 
        return 0;
@@ -1563,8 +1755,7 @@ static int mlx5e_open_drop_rq(struct mlx5e_priv *priv)
 
        memset(&cq_param, 0, sizeof(cq_param));
        memset(&rq_param, 0, sizeof(rq_param));
-       mlx5e_build_rx_cq_param(priv, &cq_param);
-       mlx5e_build_rq_param(priv, &rq_param);
+       mlx5e_build_drop_rq_param(&rq_param);
 
        err = mlx5e_create_drop_cq(priv, cq, &cq_param);
        if (err)
@@ -1612,7 +1803,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc)
 
        memset(in, 0, sizeof(in));
 
-       MLX5_SET(tisc, tisc, prio,  tc);
+       MLX5_SET(tisc, tisc, prio, tc << 1);
        MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
 
        return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]);
@@ -1628,7 +1819,7 @@ static int mlx5e_create_tises(struct mlx5e_priv *priv)
        int err;
        int tc;
 
-       for (tc = 0; tc < priv->params.num_tc; tc++) {
+       for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) {
                err = mlx5e_create_tis(priv, tc);
                if (err)
                        goto err_close_tises;
@@ -1647,7 +1838,7 @@ static void mlx5e_destroy_tises(struct mlx5e_priv *priv)
 {
        int tc;
 
-       for (tc = 0; tc < priv->params.num_tc; tc++)
+       for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++)
                mlx5e_destroy_tis(priv, tc);
 }
 
@@ -1824,6 +2015,58 @@ static void mlx5e_destroy_tirs(struct mlx5e_priv *priv)
                mlx5e_destroy_tir(priv, i);
 }
 
+static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       bool was_opened;
+       int err = 0;
+
+       if (tc && tc != MLX5E_MAX_NUM_TC)
+               return -EINVAL;
+
+       mutex_lock(&priv->state_lock);
+
+       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+       if (was_opened)
+               mlx5e_close_locked(priv->netdev);
+
+       priv->params.num_tc = tc ? tc : 1;
+
+       if (was_opened)
+               err = mlx5e_open_locked(priv->netdev);
+
+       mutex_unlock(&priv->state_lock);
+
+       return err;
+}
+
+static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
+                             __be16 proto, struct tc_to_netdev *tc)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+
+       if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+               goto mqprio;
+
+       switch (tc->type) {
+       case TC_SETUP_CLSFLOWER:
+               switch (tc->cls_flower->command) {
+               case TC_CLSFLOWER_REPLACE:
+                       return mlx5e_configure_flower(priv, proto, tc->cls_flower);
+               case TC_CLSFLOWER_DESTROY:
+                       return mlx5e_delete_flower(priv, tc->cls_flower);
+               }
+       default:
+               return -EOPNOTSUPP;
+       }
+
+mqprio:
+       if (tc->type != TC_SETUP_MQPRIO)
+               return -EINVAL;
+
+       return mlx5e_setup_tc(dev, tc->tc);
+}
+
 static struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
@@ -1881,7 +2124,8 @@ static int mlx5e_set_features(struct net_device *netdev,
        if (changes & NETIF_F_LRO) {
                bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 
-               if (was_opened)
+               if (was_opened && (priv->params.rq_wq_type ==
+                                  MLX5_WQ_TYPE_LINKED_LIST))
                        mlx5e_close_locked(priv->netdev);
 
                priv->params.lro_en = !!(features & NETIF_F_LRO);
@@ -1890,7 +2134,8 @@ static int mlx5e_set_features(struct net_device *netdev,
                        mlx5_core_warn(priv->mdev, "lro modify failed, %d\n",
                                       err);
 
-               if (was_opened)
+               if (was_opened && (priv->params.rq_wq_type ==
+                                  MLX5_WQ_TYPE_LINKED_LIST))
                        err = mlx5e_open_locked(priv->netdev);
        }
 
@@ -1903,6 +2148,13 @@ static int mlx5e_set_features(struct net_device *netdev,
                        mlx5e_disable_vlan_filter(priv);
        }
 
+       if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+           mlx5e_tc_num_filters(priv)) {
+               netdev_err(netdev,
+                          "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+               return -EINVAL;
+       }
+
        return err;
 }
 
@@ -2026,10 +2278,84 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
                                            vf_stats);
 }
 
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+                                sa_family_t sa_family, __be16 port)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+
+       if (!mlx5e_vxlan_allowed(priv->mdev))
+               return;
+
+       mlx5e_vxlan_add_port(priv, be16_to_cpu(port));
+}
+
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+                                sa_family_t sa_family, __be16 port)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+
+       if (!mlx5e_vxlan_allowed(priv->mdev))
+               return;
+
+       mlx5e_vxlan_del_port(priv, be16_to_cpu(port));
+}
+
+static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv,
+                                                   struct sk_buff *skb,
+                                                   netdev_features_t features)
+{
+       struct udphdr *udph;
+       u16 proto;
+       u16 port = 0;
+
+       switch (vlan_get_protocol(skb)) {
+       case htons(ETH_P_IP):
+               proto = ip_hdr(skb)->protocol;
+               break;
+       case htons(ETH_P_IPV6):
+               proto = ipv6_hdr(skb)->nexthdr;
+               break;
+       default:
+               goto out;
+       }
+
+       if (proto == IPPROTO_UDP) {
+               udph = udp_hdr(skb);
+               port = be16_to_cpu(udph->dest);
+       }
+
+       /* Verify if UDP port is being offloaded by HW */
+       if (port && mlx5e_vxlan_lookup_port(priv, port))
+               return features;
+
+out:
+       /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
+       return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+                                             struct net_device *netdev,
+                                             netdev_features_t features)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+
+       features = vlan_features_check(skb, features);
+       features = vxlan_features_check(skb, features);
+
+       /* Validate if the tunneled packet is being offloaded by HW */
+       if (skb->encapsulation &&
+           (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
+               return mlx5e_vxlan_features_check(priv, skb, features);
+
+       return features;
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
        .ndo_start_xmit          = mlx5e_xmit,
+       .ndo_setup_tc            = mlx5e_ndo_setup_tc,
+       .ndo_select_queue        = mlx5e_select_queue,
        .ndo_get_stats64         = mlx5e_get_stats,
        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
        .ndo_set_mac_address     = mlx5e_set_mac,
@@ -2044,6 +2370,8 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
        .ndo_start_xmit          = mlx5e_xmit,
+       .ndo_setup_tc            = mlx5e_ndo_setup_tc,
+       .ndo_select_queue        = mlx5e_select_queue,
        .ndo_get_stats64         = mlx5e_get_stats,
        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
        .ndo_set_mac_address     = mlx5e_set_mac,
@@ -2052,6 +2380,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
        .ndo_set_features        = mlx5e_set_features,
        .ndo_change_mtu          = mlx5e_change_mtu,
        .ndo_do_ioctl            = mlx5e_ioctl,
+       .ndo_add_vxlan_port      = mlx5e_add_vxlan_port,
+       .ndo_del_vxlan_port      = mlx5e_del_vxlan_port,
+       .ndo_features_check      = mlx5e_features_check,
        .ndo_set_vf_mac          = mlx5e_set_vf_mac,
        .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
        .ndo_get_vf_config       = mlx5e_get_vf_config,
@@ -2078,6 +2409,8 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
        }
        if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
                mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+       if (!MLX5_CAP_GEN(mdev, cq_moderation))
+               mlx5_core_warn(mdev, "CQ modiration is not supported\n");
 
        return 0;
 }
@@ -2091,15 +2424,51 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
               2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static void mlx5e_ets_init(struct mlx5e_priv *priv)
+{
+       int i;
+
+       priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+       for (i = 0; i < priv->params.ets.ets_cap; i++) {
+               priv->params.ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+               priv->params.ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+               priv->params.ets.prio_tc[i] = i;
+       }
+
+       /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+       priv->params.ets.prio_tc[0] = 1;
+       priv->params.ets.prio_tc[1] = 0;
+}
+#endif
+
+void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
+                                  u32 *indirection_rqt, int len,
                                   int num_channels)
 {
+       int node = mdev->priv.numa_node;
+       int node_num_of_cores;
        int i;
 
+       if (node == -1)
+               node = first_online_node;
+
+       node_num_of_cores = cpumask_weight(cpumask_of_node(node));
+
+       if (node_num_of_cores)
+               num_channels = min_t(int, num_channels, node_num_of_cores);
+
        for (i = 0; i < len; i++)
                indirection_rqt[i] = i % num_channels;
 }
 
+static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+{
+       return MLX5_CAP_GEN(mdev, striding_rq) &&
+               MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+               MLX5_CAP_ETH(mdev, reg_umr_sq);
+}
+
 static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
                                    struct net_device *netdev,
                                    int num_channels)
@@ -2108,8 +2477,21 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
        priv->params.log_sq_size           =
                MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
-       priv->params.log_rq_size           =
-               MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+       priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ?
+               MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+               MLX5_WQ_TYPE_LINKED_LIST;
+
+       switch (priv->params.rq_wq_type) {
+       case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+               priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+               priv->params.lro_en = true;
+               break;
+       default: /* MLX5_WQ_TYPE_LINKED_LIST */
+               priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+       }
+
+       priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+                                           BIT(priv->params.log_rq_size));
        priv->params.rx_cq_moderation_usec =
                MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
        priv->params.rx_cq_moderation_pkts =
@@ -2119,16 +2501,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
        priv->params.tx_cq_moderation_pkts =
                MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
        priv->params.tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
-       priv->params.min_rx_wqes           =
-               MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
        priv->params.num_tc                = 1;
-       priv->params.default_vlan_prio     = 0;
        priv->params.rss_hfunc             = ETH_RSS_HASH_XOR;
 
        netdev_rss_key_fill(priv->params.toeplitz_hash_key,
                            sizeof(priv->params.toeplitz_hash_key));
 
-       mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+       mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
                                      MLX5E_INDIR_RQT_SIZE, num_channels);
 
        priv->params.lro_wqe_sz            =
@@ -2137,9 +2516,11 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
        priv->mdev                         = mdev;
        priv->netdev                       = netdev;
        priv->params.num_channels          = num_channels;
-       priv->default_vlan_prio            = priv->params.default_vlan_prio;
 
-       spin_lock_init(&priv->async_events_spinlock);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+       mlx5e_ets_init(priv);
+#endif
+
        mutex_init(&priv->state_lock);
 
        INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
@@ -2166,10 +2547,14 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 
        SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
 
-       if (MLX5_CAP_GEN(mdev, vport_group_manager))
+       if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
                netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
-       else
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+               netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+       } else {
                netdev->netdev_ops = &mlx5e_netdev_ops_basic;
+       }
 
        netdev->watchdog_timeo    = 15 * HZ;
 
@@ -2192,10 +2577,27 @@ static void mlx5e_build_netdev(struct net_device *netdev)
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
+       if (mlx5e_vxlan_allowed(mdev)) {
+               netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
+               netdev->hw_enc_features |= NETIF_F_IP_CSUM;
+               netdev->hw_enc_features |= NETIF_F_RXCSUM;
+               netdev->hw_enc_features |= NETIF_F_TSO;
+               netdev->hw_enc_features |= NETIF_F_TSO6;
+               netdev->hw_enc_features |= NETIF_F_RXHASH;
+               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+       }
+
        netdev->features          = netdev->hw_features;
        if (!priv->params.lro_en)
                netdev->features  &= ~NETIF_F_LRO;
 
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+       if (FT_CAP(flow_modify_en) &&
+           FT_CAP(modify_root) &&
+           FT_CAP(identified_miss_table_mode) &&
+           FT_CAP(flow_table_modify))
+               priv->netdev->hw_features      |= NETIF_F_HW_TC;
+
        netdev->features         |= NETIF_F_HIGHDMA;
 
        netdev->priv_flags       |= IFF_UNICAST_FLT;
@@ -2204,7 +2606,7 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 }
 
 static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
-                            struct mlx5_core_mr *mr)
+                            struct mlx5_core_mkey *mkey)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5_create_mkey_mbox_in *in;
@@ -2220,7 +2622,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
        in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 
-       err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
+       err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
                                    NULL);
 
        kvfree(in);
@@ -2228,6 +2630,61 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
        return err;
 }
 
+static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
+{
+       struct mlx5_core_dev *mdev = priv->mdev;
+       int err;
+
+       err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter);
+       if (err) {
+               mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
+               priv->q_counter = 0;
+       }
+}
+
+static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv)
+{
+       if (!priv->q_counter)
+               return;
+
+       mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+}
+
+static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
+{
+       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_mkey_seg *mkc;
+       int inlen = sizeof(*in);
+       u64 npages =
+               mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
+       int err;
+
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       mkc = &in->seg;
+       mkc->status = MLX5_MKEY_STATUS_FREE;
+       mkc->flags = MLX5_PERM_UMR_EN |
+                    MLX5_PERM_LOCAL_READ |
+                    MLX5_PERM_LOCAL_WRITE |
+                    MLX5_ACCESS_MODE_MTT;
+
+       mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       mkc->flags_pd = cpu_to_be32(priv->pdn);
+       mkc->len = cpu_to_be64(npages << PAGE_SHIFT);
+       mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages));
+       mkc->log2_page_size = PAGE_SHIFT;
+
+       err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL,
+                                   NULL, NULL);
+
+       kvfree(in);
+
+       return err;
+}
+
 static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
 {
        struct net_device *netdev;
@@ -2238,7 +2695,9 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
        if (mlx5e_check_required_hca_cap(mdev))
                return NULL;
 
-       netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch, nch);
+       netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
+                                   nch * MLX5E_MAX_NUM_TC,
+                                   nch);
        if (!netdev) {
                mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
                return NULL;
@@ -2251,7 +2710,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
 
        priv = netdev_priv(netdev);
 
-       err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
+       err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false);
        if (err) {
                mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
                goto err_free_netdev;
@@ -2269,16 +2728,22 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
                goto err_dealloc_pd;
        }
 
-       err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
+       err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey);
        if (err) {
                mlx5_core_err(mdev, "create mkey failed, %d\n", err);
                goto err_dealloc_transport_domain;
        }
 
+       err = mlx5e_create_umr_mkey(priv);
+       if (err) {
+               mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
+               goto err_destroy_mkey;
+       }
+
        err = mlx5e_create_tises(priv);
        if (err) {
                mlx5_core_warn(mdev, "create tises failed, %d\n", err);
-               goto err_destroy_mkey;
+               goto err_destroy_umr_mkey;
        }
 
        err = mlx5e_open_drop_rq(priv);
@@ -2311,20 +2776,39 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
                goto err_destroy_tirs;
        }
 
+       mlx5e_create_q_counter(priv);
+
        mlx5e_init_eth_addr(priv);
 
+       mlx5e_vxlan_init(priv);
+
+       err = mlx5e_tc_init(priv);
+       if (err)
+               goto err_dealloc_q_counters;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+       mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
+#endif
+
        err = register_netdev(netdev);
        if (err) {
                mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-               goto err_destroy_flow_tables;
+               goto err_tc_cleanup;
        }
 
+       if (mlx5e_vxlan_allowed(mdev))
+               vxlan_get_rx_port(netdev);
+
        mlx5e_enable_async_events(priv);
        schedule_work(&priv->set_rx_mode_work);
 
        return priv;
 
-err_destroy_flow_tables:
+err_tc_cleanup:
+       mlx5e_tc_cleanup(priv);
+
+err_dealloc_q_counters:
+       mlx5e_destroy_q_counter(priv);
        mlx5e_destroy_flow_tables(priv);
 
 err_destroy_tirs:
@@ -2342,8 +2826,11 @@ err_close_drop_rq:
 err_destroy_tises:
        mlx5e_destroy_tises(priv);
 
+err_destroy_umr_mkey:
+       mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
+
 err_destroy_mkey:
-       mlx5_core_destroy_mkey(mdev, &priv->mr);
+       mlx5_core_destroy_mkey(mdev, &priv->mkey);
 
 err_dealloc_transport_domain:
        mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
@@ -2371,13 +2858,17 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
        mlx5e_disable_async_events(priv);
        flush_scheduled_work();
        unregister_netdev(netdev);
+       mlx5e_tc_cleanup(priv);
+       mlx5e_vxlan_cleanup(priv);
+       mlx5e_destroy_q_counter(priv);
        mlx5e_destroy_flow_tables(priv);
        mlx5e_destroy_tirs(priv);
        mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT);
        mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT);
        mlx5e_close_drop_rq(priv);
        mlx5e_destroy_tises(priv);
-       mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
+       mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
+       mlx5_core_destroy_mkey(priv->mdev, &priv->mkey);
        mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
        mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
        mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);