net/mlx5_core: Flow counters infrastructure
authorAmir Vadai <amirva@mellanox.com>
Fri, 13 May 2016 12:55:41 +0000 (12:55 +0000)
committerDavid S. Miller <davem@davemloft.net>
Mon, 16 May 2016 17:43:51 +0000 (13:43 -0400)
If a counter has the aging flag set when created, it is added to a list
of counters that will be queried periodically from a workqueue.  query
result and last use timestamp are cached.
add/del counter must be very efficient since thousands of such
operations might be issued in a second.
There is only a single reference to counters without aging, therefore
no need for locks.
But, counters with aging enabled are stored in a list. In order to make
code as lockless as possible, all the list manipulation and access to
hardware is done from a single context - the periodic counters query
thread.

The hardware supports multiple counters per FTE, however currently we
are using one counter for each FTE.

Signed-off-by: Amir Vadai <amirva@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c [new file with mode: 0644]
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h

index b531d4f..9ea7b58 100644 (file)
@@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE)         += mlx5_core.o
 
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
-               mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o
+               mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
                en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
index 9420def..8b5f0b2 100644 (file)
@@ -1771,6 +1771,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
        cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
        cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
        cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns);
+       mlx5_cleanup_fc_stats(dev);
 }
 
 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
@@ -1827,10 +1828,14 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
        int err = 0;
 
+       err = mlx5_init_fc_stats(dev);
+       if (err)
+               return err;
+
        if (MLX5_CAP_GEN(dev, nic_flow_table)) {
                err = init_root_ns(dev);
                if (err)
-                       return err;
+                       goto err;
        }
        if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
                err = init_fdb_root_ns(dev);
index 1989048..aa41a73 100644 (file)
@@ -169,6 +169,9 @@ struct mlx5_flow_root_namespace {
        struct mutex                    chain_lock;
 };
 
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+
 int mlx5_init_fs(struct mlx5_core_dev *dev);
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
new file mode 100644 (file)
index 0000000..164dc37
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+
+#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+
+/* locking scheme:
+ *
+ * It is the responsibility of the user to prevent concurrent calls or bad
+ * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
+ * to struct mlx5_fc.
+ * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
+ * dump (access to struct mlx5_fc) after a counter is destroyed.
+ *
+ * access to counter list:
+ * - create (user context)
+ *   - mlx5_fc_create() only adds to an addlist to be used by
+ *     mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ *   - spawn thread to do the actual destroy
+ *
+ * - destroy (user context)
+ *   - mark a counter as deleted
+ *   - spawn thread to do the actual del
+ *
+ * - dump (user context)
+ *   user should not call dump after destroy
+ *
+ * - query (single thread workqueue context)
+ *   destroy/dump - no conflict (see destroy)
+ *   query/dump - packets and bytes might be inconsistent (since update is not
+ *                atomic)
+ *   query/create - no conflict (see create)
+ *   since every create/destroy spawn the work, only after necessary time has
+ *   elapsed, the thread will actually query the hardware.
+ */
+
+static void mlx5_fc_stats_work(struct work_struct *work)
+{
+       struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
+                                                priv.fc_stats.work.work);
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       unsigned long now = jiffies;
+       struct mlx5_fc *counter;
+       struct mlx5_fc *tmp;
+       int err = 0;
+
+       spin_lock(&fc_stats->addlist_lock);
+
+       list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
+
+       if (!list_empty(&fc_stats->list))
+               queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD);
+
+       spin_unlock(&fc_stats->addlist_lock);
+
+       list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
+               struct mlx5_fc_cache *c = &counter->cache;
+               u64 packets;
+               u64 bytes;
+
+               if (counter->deleted) {
+                       list_del(&counter->list);
+
+                       mlx5_cmd_fc_free(dev, counter->id);
+
+                       kfree(counter);
+                       continue;
+               }
+
+               if (time_before(now, fc_stats->next_query))
+                       continue;
+
+               err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes);
+               if (err) {
+                       pr_err("Error querying stats for counter id %d\n",
+                              counter->id);
+                       continue;
+               }
+
+               if (packets == c->packets)
+                       continue;
+
+               c->lastuse = jiffies;
+               c->packets = packets;
+               c->bytes   = bytes;
+       }
+
+       if (time_after_eq(now, fc_stats->next_query))
+               fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       struct mlx5_fc *counter;
+       int err;
+
+       counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+       if (!counter)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx5_cmd_fc_alloc(dev, &counter->id);
+       if (err)
+               goto err_out;
+
+       if (aging) {
+               counter->aging = true;
+
+               spin_lock(&fc_stats->addlist_lock);
+               list_add(&counter->list, &fc_stats->addlist);
+               spin_unlock(&fc_stats->addlist_lock);
+
+               mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+       }
+
+       return counter;
+
+err_out:
+       kfree(counter);
+
+       return ERR_PTR(err);
+}
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       if (!counter)
+               return;
+
+       if (counter->aging) {
+               counter->deleted = true;
+               mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+               return;
+       }
+
+       mlx5_cmd_fc_free(dev, counter->id);
+       kfree(counter);
+}
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+       INIT_LIST_HEAD(&fc_stats->list);
+       INIT_LIST_HEAD(&fc_stats->addlist);
+       spin_lock_init(&fc_stats->addlist_lock);
+
+       fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
+       if (!fc_stats->wq)
+               return -ENOMEM;
+
+       INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+
+       return 0;
+}
+
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+       struct mlx5_fc *counter;
+       struct mlx5_fc *tmp;
+
+       cancel_delayed_work_sync(&dev->priv.fc_stats.work);
+       destroy_workqueue(dev->priv.fc_stats.wq);
+       dev->priv.fc_stats.wq = NULL;
+
+       list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
+
+       list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
+               list_del(&counter->list);
+
+               mlx5_cmd_fc_free(dev, counter->id);
+
+               kfree(counter);
+       }
+}
+
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+                         u64 *bytes, u64 *packets, u64 *lastuse)
+{
+       struct mlx5_fc_cache c;
+
+       c = counter->cache;
+
+       *bytes = c.bytes - counter->lastbytes;
+       *packets = c.packets - counter->lastpackets;
+       *lastuse = c.lastuse;
+
+       counter->lastbytes = c.bytes;
+       counter->lastpackets = c.packets;
+}
index 9613143..07b504f 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/radix-tree.h>
+#include <linux/workqueue.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
@@ -457,6 +458,17 @@ struct mlx5_irq_info {
        char name[MLX5_MAX_IRQ_NAME];
 };
 
+struct mlx5_fc_stats {
+       struct list_head list;
+       struct list_head addlist;
+       /* protect addlist add/splice operations */
+       spinlock_t addlist_lock;
+
+       struct workqueue_struct *wq;
+       struct delayed_work work;
+       unsigned long next_query;
+};
+
 struct mlx5_eswitch;
 
 struct mlx5_priv {
@@ -520,6 +532,8 @@ struct mlx5_priv {
        struct mlx5_flow_root_namespace *fdb_root_ns;
        struct mlx5_flow_root_namespace *esw_egress_root_ns;
        struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+
+       struct mlx5_fc_stats            fc_stats;
 };
 
 enum mlx5_device_state {
index c8b9ede..4b7a107 100644 (file)
@@ -127,4 +127,9 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
                                 struct mlx5_flow_destination *dest);
 
 struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule);
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+                         u64 *bytes, u64 *packets, u64 *lastuse);
+
 #endif