dpif-netdev: log port/core affinity

[cascardo/ovs.git] / lib / dpif-netdev.c
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

index 477e32c..f4033e4 100644 (file)
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -117,7 +117,7 @@ struct netdev_flow_key {
   * If dp_netdev_input is not called from a pmd thread, a mutex is used.
   */
  
-#define EM_FLOW_HASH_SHIFT 10
+#define EM_FLOW_HASH_SHIFT 13
  #define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT)
  #define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1)
  #define EM_FLOW_HASH_SEGS 2
@@ -295,7 +295,7 @@ struct dp_netdev_flow {
      const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */
                                   /* 'flow_table'. */
      const ovs_u128 ufid;         /* Unique flow identifier. */
-    const int pmd_id;            /* The 'core_id' of pmd thread owning this */
+    const unsigned pmd_id;       /* The 'core_id' of pmd thread owning this */
                                   /* flow. */
  
      /* Number of references.
@@ -413,8 +413,10 @@ struct dp_netdev_pmd_thread {
      pthread_t thread;
      int index;                      /* Idx of this pmd thread among pmd*/
                                      /* threads on same numa node. */
-    int core_id;                    /* CPU core id of this pmd thread. */
+    unsigned core_id;               /* CPU core id of this pmd thread. */
      int numa_id;                    /* numa node id of this pmd thread. */
+    int tx_qid;                     /* Queue id used by this pmd thread to
+                                     * send packets on all netdevs */
  
      /* Only a pmd thread can write on its own 'cycles' and 'stats'.
       * The main thread keeps 'stats_zero' and 'cycles_zero' as base
@@ -458,11 +460,11 @@ static void dp_netdev_disable_upcall(struct dp_netdev *);
  void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
  static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
                                      struct dp_netdev *dp, int index,
-                                    int core_id, int numa_id);
+                                    unsigned core_id, int numa_id);
  static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd);
  static void dp_netdev_set_nonpmd(struct dp_netdev *dp);
  static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
-                                                      int core_id);
+                                                      unsigned core_id);
  static struct dp_netdev_pmd_thread *
  dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
  static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
@@ -580,8 +582,8 @@ pmd_info_show_stats(struct ds *reply,
      if (pmd->numa_id != OVS_NUMA_UNSPEC) {
          ds_put_format(reply, " numa_id %d", pmd->numa_id);
      }
-    if (pmd->core_id != OVS_CORE_UNSPEC) {
-        ds_put_format(reply, " core_id %d", pmd->core_id);
+    if (pmd->core_id != OVS_CORE_UNSPEC && pmd->core_id != NON_PMD_CORE_ID) {
+        ds_put_format(reply, " core_id %u", pmd->core_id);
      }
      ds_put_cstr(reply, ":\n");
  
@@ -829,8 +831,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
      ovs_mutex_init_recursive(&dp->non_pmd_mutex);
      ovsthread_key_create(&dp->per_pmd_key, NULL);
  
-    /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */
-    ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID);
      dp_netdev_set_nonpmd(dp);
      dp->n_dpdk_rxqs = NR_QUEUE;
  
@@ -1069,8 +1069,9 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
              return ENOENT;
          }
          /* There can only be ovs_numa_get_n_cores() pmd threads,
-         * so creates a txq for each. */
-        error = netdev_set_multiq(netdev, n_cores, dp->n_dpdk_rxqs);
+         * so creates a txq for each, and one extra for the non
+         * pmd threads. */
+        error = netdev_set_multiq(netdev, n_cores + 1, dp->n_dpdk_rxqs);
          if (error && (error != EOPNOTSUPP)) {
              VLOG_ERR("%s, cannot set multiq", devname);
              return errno;
@@ -1769,7 +1770,7 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
      if (ufidp) {
          CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, dp_netdev_flow_hash(ufidp),
                                   &pmd->flow_table) {
-            if (ovs_u128_equal(&netdev_flow->ufid, ufidp)) {
+            if (ovs_u128_equals(&netdev_flow->ufid, ufidp)) {
                  return netdev_flow;
              }
          }
@@ -1942,7 +1943,8 @@ dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
      struct dp_netdev *dp = get_dp_netdev(dpif);
      struct dp_netdev_flow *netdev_flow;
      struct dp_netdev_pmd_thread *pmd;
-    int pmd_id = get->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : get->pmd_id;
+    unsigned pmd_id = get->pmd_id == PMD_ID_NULL
+                      ? NON_PMD_CORE_ID : get->pmd_id;
      int error = 0;
  
      pmd = dp_netdev_get_pmd(dp, pmd_id);
@@ -1982,7 +1984,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
      memset(&flow->stats, 0, sizeof flow->stats);
      flow->dead = false;
      flow->batch = NULL;
-    *CONST_CAST(int *, &flow->pmd_id) = pmd->core_id;
+    *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
      *CONST_CAST(struct flow *, &flow->flow) = match->flow;
      *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
      ovs_refcount_init(&flow->ref_cnt);
@@ -2025,7 +2027,8 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
      struct dp_netdev_pmd_thread *pmd;
      struct match match;
      ovs_u128 ufid;
-    int pmd_id = put->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : put->pmd_id;
+    unsigned pmd_id = put->pmd_id == PMD_ID_NULL
+                      ? NON_PMD_CORE_ID : put->pmd_id;
      int error;
  
      error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
@@ -2120,7 +2123,8 @@ dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
      struct dp_netdev *dp = get_dp_netdev(dpif);
      struct dp_netdev_flow *netdev_flow;
      struct dp_netdev_pmd_thread *pmd;
-    int pmd_id = del->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : del->pmd_id;
+    unsigned pmd_id = del->pmd_id == PMD_ID_NULL
+                      ? NON_PMD_CORE_ID : del->pmd_id;
      int error = 0;
  
      pmd = dp_netdev_get_pmd(dp, pmd_id);
@@ -2401,7 +2405,8 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
                  }
  
                  /* Sets the new rx queue config.  */
-                err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores(),
+                err = netdev_set_multiq(port->netdev,
+                                        ovs_numa_get_n_cores() + 1,
                                          n_rxqs);
                  if (err && (err != EOPNOTSUPP)) {
                      VLOG_ERR("Failed to set dpdk interface %s rx_queue to:"
@@ -2508,7 +2513,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
                             struct dp_netdev_port *port,
                             struct netdev_rxq *rxq)
  {
-    struct dp_packet *packets[NETDEV_MAX_RX_BATCH];
+    struct dp_packet *packets[NETDEV_MAX_BURST];
      int error, cnt;
  
      cycles_count_start(pmd);
@@ -2659,6 +2664,11 @@ reload:
      emc_cache_init(&pmd->flow_cache);
      poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);
  
+    /* List port/core affinity */
+    for (i = 0; i < poll_cnt; i++) {
+       VLOG_INFO("Core %d processing port \'%s\'\n", pmd->core_id, netdev_get_name(poll_list[i].port->netdev));
+    }
+
      /* Signal here to make sure the pmd finishes
       * reloading the updated configuration. */
      dp_netdev_pmd_reload_done(pmd);
@@ -2745,7 +2755,7 @@ dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
   *
   * Caller must unrefs the returned reference.  */
  static struct dp_netdev_pmd_thread *
-dp_netdev_get_pmd(struct dp_netdev *dp, int core_id)
+dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id)
  {
      struct dp_netdev_pmd_thread *pmd;
      const struct cmap_node *pnode;
@@ -2805,14 +2815,25 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos)
      return next;
  }
  
+static int
+core_id_to_qid(unsigned core_id)
+{
+    if (core_id != NON_PMD_CORE_ID) {
+        return core_id;
+    } else {
+        return ovs_numa_get_n_cores();
+    }
+}
+
  /* Configures the 'pmd' based on the input argument. */
  static void
  dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
-                        int index, int core_id, int numa_id)
+                        int index, unsigned core_id, int numa_id)
  {
      pmd->dp = dp;
      pmd->index = index;
      pmd->core_id = core_id;
+    pmd->tx_qid = core_id_to_qid(core_id);
      pmd->numa_id = numa_id;
  
      ovs_refcount_init(&pmd->ref_cnt);
@@ -2921,7 +2942,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
          for (i = 0; i < can_have; i++) {
              struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
-            int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
+            unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
  
              dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
              /* Each thread will distribute all devices rx-queues among
@@ -3015,16 +3036,24 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
  }
  
  static inline uint32_t
-dpif_netdev_packet_get_dp_hash(struct dp_packet *packet,
-                               const struct miniflow *mf)
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+                                const struct miniflow *mf)
  {
-    uint32_t hash;
+    uint32_t hash, recirc_depth;
  
      hash = dp_packet_get_rss_hash(packet);
      if (OVS_UNLIKELY(!hash)) {
          hash = miniflow_hash_5tuple(mf, 0);
          dp_packet_set_rss_hash(packet, hash);
      }
+
+    /* The RSS hash must account for the recirculation depth to avoid
+     * collisions in the exact match cache */
+    recirc_depth = *recirc_depth_get_unsafe();
+    if (OVS_UNLIKELY(recirc_depth)) {
+        hash = hash_finish(hash, recirc_depth);
+        dp_packet_set_rss_hash(packet, hash);
+    }
      return hash;
  }
  
@@ -3035,7 +3064,7 @@ struct packet_batch {
  
      struct dp_netdev_flow *flow;
  
-    struct dp_packet *packets[NETDEV_MAX_RX_BATCH];
+    struct dp_packet *packets[NETDEV_MAX_BURST];
  };
  
  static inline void
@@ -3066,7 +3095,6 @@ packet_batch_execute(struct packet_batch *batch,
      struct dp_netdev_actions *actions;
      struct dp_netdev_flow *flow = batch->flow;
  
-    flow->batch = NULL;
      dp_netdev_flow_used(flow, batch->packet_count, batch->byte_count,
                          batch->tcp_flags, now);
  
@@ -3127,9 +3155,14 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets,
              continue;
          }
  
+        if (i != cnt - 1) {
+            /* Prefetch next packet data */
+            OVS_PREFETCH(dp_packet_data(packets[i+1]));
+        }
+
          miniflow_extract(packets[i], &key.mf);
          key.len = 0; /* Not computed yet. */
-        key.hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.mf);
+        key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf);
  
          flow = emc_lookup(flow_cache, &key);
          if (OVS_LIKELY(flow)) {
@@ -3159,7 +3192,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
      const size_t PKT_ARRAY_SIZE = cnt;
  #else
      /* Sparse or MSVC doesn't like variable length array. */
-    enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH };
+    enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
  #endif
      struct dpcls_rule *rules[PKT_ARRAY_SIZE];
      struct dp_netdev *dp = pmd->dp;
@@ -3285,7 +3318,7 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
      const size_t PKT_ARRAY_SIZE = cnt;
  #else
      /* Sparse or MSVC doesn't like variable length array. */
-    enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH };
+    enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
  #endif
      struct netdev_flow_key keys[PKT_ARRAY_SIZE];
      struct packet_batch batches[PKT_ARRAY_SIZE];
@@ -3298,6 +3331,10 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
          fast_path_processing(pmd, packets, newcnt, keys, batches, &n_batches);
      }
  
+    for (i = 0; i < n_batches; i++) {
+        batches[i].flow->batch = NULL;
+    }
+
      for (i = 0; i < n_batches; i++) {
          packet_batch_execute(&batches[i], pmd, now);
      }
@@ -3317,7 +3354,7 @@ dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb,
  }
  
  static void
-dp_netdev_drop_packets(struct dp_packet ** packets, int cnt, bool may_steal)
+dp_netdev_drop_packets(struct dp_packet **packets, int cnt, bool may_steal)
  {
      if (may_steal) {
          int i;
@@ -3375,14 +3412,14 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt,
      case OVS_ACTION_ATTR_OUTPUT:
          p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
          if (OVS_LIKELY(p)) {
-            netdev_send(p->netdev, pmd->core_id, packets, cnt, may_steal);
+            netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal);
              return;
          }
          break;
  
      case OVS_ACTION_ATTR_TUNNEL_PUSH:
          if (*depth < MAX_RECIRC_DEPTH) {
-            struct dp_packet *tnl_pkt[NETDEV_MAX_RX_BATCH];
+            struct dp_packet *tnl_pkt[NETDEV_MAX_BURST];
              int err;
  
              if (!may_steal) {
@@ -3408,7 +3445,7 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt,
  
              p = dp_netdev_lookup_port(dp, portno);
              if (p) {
-                struct dp_packet *tnl_pkt[NETDEV_MAX_RX_BATCH];
+                struct dp_packet *tnl_pkt[NETDEV_MAX_BURST];
                  int err;
  
                  if (!may_steal) {
@@ -3470,7 +3507,7 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt,
  
      case OVS_ACTION_ATTR_RECIRC:
          if (*depth < MAX_RECIRC_DEPTH) {
-            struct dp_packet *recirc_pkts[NETDEV_MAX_RX_BATCH];
+            struct dp_packet *recirc_pkts[NETDEV_MAX_BURST];
  
              if (!may_steal) {
                 dp_netdev_clone_pkt_batch(recirc_pkts, packets, cnt);
@@ -3829,7 +3866,7 @@ dpcls_lookup(const struct dpcls *cls, const struct netdev_flow_key keys[],
  #if !defined(__CHECKER__) && !defined(_WIN32)
      const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS);
  #else
-    enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_RX_BATCH, MAP_BITS) };
+    enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) };
  #endif
      map_type maps[N_MAPS];
      struct dpcls_subtable *subtable;