#include "hmap.h"
#include "list.h"
#include "ovs-thread.h"
-#include "vlog.h"
+#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(ovs_numa);
-#define MAX_CPU_SOCKETS 128
+/* ovs-numa module
+ * ===============
+ *
+ * This module stores the affinity information of numa nodes and cpu cores.
+ * It also provides functions to bookkeep the pin of threads on cpu cores.
+ *
+ * It is assumed that the numa node ids and cpu core ids all start from 0 and
+ * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N,
+ * user can assume core ids from 0 to N-1 are all valid and there is a
+ * 'struct cpu_core' for each id.
+ *
+ * NOTE, this module should only be used by the main thread.
+ *
+ * NOTE, the assumption above will fail when cpu hotplug is used. In that
+ * case ovs-numa will not function correctly. For now, add a TODO entry
+ * for addressing it in the future.
+ *
+ * TODO: Fix ovs-numa when cpu hotplug is used.
+ */
-/* Cpu socket. */
-struct cpu_socket {
- struct hmap_node hmap_node; /* In the 'all_cpu_sockets'. */
- struct list cores; /* List of cpu cores on the socket. */
- int socket_id; /* Socket id. */
+#define MAX_NUMA_NODES 128
+
+/* numa node. */
+struct numa_node {
+ struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
+ struct ovs_list cores; /* List of cpu cores on the numa node. */
+ int numa_id; /* numa node id. */
};
-/* Cpu core on a cpu socket. */
+/* Cpu core on a numa node. */
struct cpu_core {
struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */
- struct list list_node; /* In 'cpu_socket->cores' list. */
- struct cpu_socket *socket; /* Socket containing the core. */
+ struct ovs_list list_node; /* In 'numa_node->cores' list. */
+ struct numa_node *numa; /* numa node containing the core. */
int core_id; /* Core id. */
+ bool available; /* If the core can be pinned. */
bool pinned; /* If a thread has been pinned to the core. */
};
-/* Contains all 'struct cpu_socket's. */
-static struct hmap all_cpu_sockets = HMAP_INITIALIZER(&all_cpu_sockets);
+/* Contains all 'struct numa_node's. */
+static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes);
/* Contains all 'struct cpu_core's. */
static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores);
-/* True if socket and core info are correctly extracted. */
-static bool found_sockets_and_cores;
+/* True if numa node and core info are correctly extracted. */
+static bool found_numa_and_core;
/* Returns true if 'str' contains all digits. Returns false otherwise. */
static bool
return str[strspn(str, "0123456789")] == '\0';
}
-/* Discovers all cpu sockets and the corresponding cpu cores for each socket.
- * Constructs the 'struct cpu_socket' and 'struct cpu_core'. */
+/* Discovers all numa nodes and the corresponding cpu cores.
+ * Constructs the 'struct numa_node' and 'struct cpu_core'. */
static void
-discover_sockets_and_cores(void)
+discover_numa_and_core(void)
{
int n_cpus = 0;
int i;
- for (i = 0; i < MAX_CPU_SOCKETS; i++) {
+ for (i = 0; i < MAX_NUMA_NODES; i++) {
DIR *dir;
char* path;
path = xasprintf("/sys/devices/system/node/node%d", i);
dir = opendir(path);
- /* Creates 'struct cpu_socket' if the 'dir' is non-null. */
+ /* Creates 'struct numa_node' if the 'dir' is non-null. */
if (dir) {
- struct cpu_socket *s = xzalloc(sizeof *s);
+ struct numa_node *n = xzalloc(sizeof *n);
struct dirent *subdir;
- hmap_insert(&all_cpu_sockets, &s->hmap_node, hash_int(i, 0));
- list_init(&s->cores);
- s->socket_id = i;
+ hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0));
+ list_init(&n->cores);
+ n->numa_id = i;
while ((subdir = readdir(dir)) != NULL) {
if (!strncmp(subdir->d_name, "cpu", 3)
core_id = strtoul(subdir->d_name + 3, NULL, 10);
hmap_insert(&all_cpu_cores, &c->hmap_node,
hash_int(core_id, 0));
- list_insert(&s->cores, &c->list_node);
+ list_insert(&n->cores, &c->list_node);
c->core_id = core_id;
+ c->numa = n;
+ c->available = true;
n_cpus++;
}
}
- VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on CPU socket %d",
- list_size(&s->cores), s->socket_id);
+ VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d",
+ list_size(&n->cores), n->numa_id);
free(path);
closedir(dir);
} else {
}
}
- VLOG_INFO("Discovered %"PRIuSIZE" CPU Sockets and %d CPU cores",
- hmap_count(&all_cpu_sockets), n_cpus);
- if (hmap_count(&all_cpu_sockets) && hmap_count(&all_cpu_cores)) {
- found_sockets_and_cores = true;
+ VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %d CPU cores",
+ hmap_count(&all_numa_nodes), n_cpus);
+ if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) {
+ found_numa_and_core = true;
+ }
+}
+
+/* Gets 'struct cpu_core' by 'core_id'. */
+static struct cpu_core*
+get_core_by_core_id(int core_id)
+{
+ struct cpu_core *core = NULL;
+
+ if (ovs_numa_core_id_is_valid(core_id)) {
+ core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
+ hash_int(core_id, 0)),
+ struct cpu_core, hmap_node);
+ }
+
+ return core;
+}
+
+/* Gets 'struct numa_node' by 'numa_id'. */
+static struct numa_node*
+get_numa_by_numa_id(int numa_id)
+{
+ struct numa_node *numa = NULL;
+
+ if (ovs_numa_numa_id_is_valid(numa_id)) {
+ numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes,
+ hash_int(numa_id, 0)),
+ struct numa_node, hmap_node);
}
+
+ return numa;
}
+\f
/* Extracts the numa node and core info from the 'sysfs'. */
void
ovs_numa_init(void)
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
if (ovsthread_once_start(&once)) {
- discover_sockets_and_cores();
+ discover_numa_and_core();
ovsthread_once_done(&once);
}
}
bool
-ovs_numa_cpu_socket_id_is_valid(int sid)
+ovs_numa_numa_id_is_valid(int numa_id)
+{
+ return found_numa_and_core && numa_id < ovs_numa_get_n_numas();
+}
+
+bool
+ovs_numa_core_id_is_valid(int core_id)
{
- return sid < ovs_numa_get_n_sockets();
+ return found_numa_and_core && core_id < ovs_numa_get_n_cores();
}
bool
-ovs_numa_cpu_core_id_is_valid(int cid)
+ovs_numa_core_is_pinned(int core_id)
{
- return cid < ovs_numa_get_n_cores();
+ struct cpu_core *core = get_core_by_core_id(core_id);
+
+ if (core) {
+ return core->pinned;
+ }
+
+ return false;
}
-/* Returns the number of cpu sockets. */
+/* Returns the number of numa nodes. */
int
-ovs_numa_get_n_sockets(void)
+ovs_numa_get_n_numas(void)
{
- return found_sockets_and_cores ? hmap_count(&all_cpu_sockets)
- : OVS_SOCKET_UNSPEC;
+ return found_numa_and_core ? hmap_count(&all_numa_nodes)
+ : OVS_NUMA_UNSPEC;
}
/* Returns the number of cpu cores. */
int
ovs_numa_get_n_cores(void)
{
- return found_sockets_and_cores ? hmap_count(&all_cpu_cores)
- : OVS_CORE_UNSPEC;
+ return found_numa_and_core ? hmap_count(&all_cpu_cores)
+ : OVS_CORE_UNSPEC;
}
-/* Returns the number of cpu cores on socket. */
+/* Given 'core_id', returns the corresponding numa node id. Returns
+ * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
int
-ovs_numa_get_n_cores_on_socket(int socket_id)
+ovs_numa_get_numa_id(int core_id)
{
- if (found_sockets_and_cores) {
- struct cpu_socket *socket;
+ struct cpu_core *core = get_core_by_core_id(core_id);
- ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id));
- socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_sockets,
- hash_int(socket_id, 0)),
- struct cpu_socket, hmap_node);
+ if (core) {
+ return core->numa->numa_id;
+ }
+
+ return OVS_NUMA_UNSPEC;
+}
+
+/* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC
+ * if 'numa_id' is invalid. */
+int
+ovs_numa_get_n_cores_on_numa(int numa_id)
+{
+ struct numa_node *numa = get_numa_by_numa_id(numa_id);
- return list_size(&socket->cores);
+ if (numa) {
+ return list_size(&numa->cores);
}
return OVS_CORE_UNSPEC;
}
-/* Returns the number of unpinned cpu cores on socket. */
+/* Returns the number of cpu cores that are available and unpinned
+ * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */
int
-ovs_numa_get_n_unpinned_cores_on_socket(int socket_id)
+ovs_numa_get_n_unpinned_cores_on_numa(int numa_id)
{
- if (found_sockets_and_cores) {
- struct cpu_socket *socket;
+ struct numa_node *numa = get_numa_by_numa_id(numa_id);
+
+ if (numa) {
struct cpu_core *core;
int count = 0;
- ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id));
- socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_sockets,
- hash_int(socket_id, 0)),
- struct cpu_socket, hmap_node);
- LIST_FOR_EACH(core, list_node, &socket->cores) {
- if (!core->pinned) {
+ LIST_FOR_EACH(core, list_node, &numa->cores) {
+ if (core->available && !core->pinned) {
count++;
}
}
-
return count;
}
}
/* Given 'core_id', tries to pin that core. Returns true, if succeeds.
- * False, if the core has already been pinned. */
+ * False, if the core has already been pinned, or if it is invalid or
+ * not available. */
bool
ovs_numa_try_pin_core_specific(int core_id)
{
- struct cpu_core *core;
+ struct cpu_core *core = get_core_by_core_id(core_id);
- ovs_assert(ovs_numa_cpu_core_id_is_valid(core_id));
-
- core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
- hash_int(core_id, 0)),
- struct cpu_core, hmap_node);
- if (!core->pinned) {
- core->pinned = true;
- return true;
+ if (core) {
+ if (core->available && !core->pinned) {
+ core->pinned = true;
+ return true;
+ }
}
return false;
}
-/* Searches through all cores for an unpinned core. Returns the core_id
- * if found and set the 'core->pinned' to true. Otherwise, returns -1. */
+/* Searches through all cores for an unpinned and available core. Returns
+ * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise,
+ * returns OVS_CORE_UNSPEC. */
int
ovs_numa_get_unpinned_core_any(void)
{
struct cpu_core *core;
HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
- if (!core->pinned) {
+ if (core->available && !core->pinned) {
core->pinned = true;
return core->core_id;
}
return OVS_CORE_UNSPEC;
}
-/* Searches through all cores on socket with 'socket_id' for an unpinned core.
- * Returns the core_id if found and sets the 'core->pinned' to true.
- * Otherwise, returns -1. */
+/* Searches through all cores on numa node with 'numa_id' for an
+ * unpinned and available core. Returns the core_id if found and
+ * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */
int
-ovs_numa_get_unpinned_core_on_socket(int socket_id)
+ovs_numa_get_unpinned_core_on_numa(int numa_id)
{
- struct cpu_socket *socket;
- struct cpu_core *core;
+ struct numa_node *numa = get_numa_by_numa_id(numa_id);
- ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id));
+ if (numa) {
+ struct cpu_core *core;
- socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_sockets,
- hash_int(socket_id, 0)),
- struct cpu_socket, hmap_node);
- LIST_FOR_EACH(core, list_node, &socket->cores) {
- if (!core->pinned) {
- core->pinned = true;
- return core->core_id;
+ LIST_FOR_EACH(core, list_node, &numa->cores) {
+ if (core->available && !core->pinned) {
+ core->pinned = true;
+ return core->core_id;
+ }
}
}
return OVS_CORE_UNSPEC;
}
-/* Resets the 'core->pinned' for the core with 'core_id'. */
+/* Unpins the core with 'core_id'. */
void
ovs_numa_unpin_core(int core_id)
{
- struct cpu_core *core;
+ struct cpu_core *core = get_core_by_core_id(core_id);
+
+ if (core) {
+ core->pinned = false;
+ }
+}
+
+/* Given the 'numa_id', returns dump of all cores on the numa node. */
+struct ovs_numa_dump *
+ovs_numa_dump_cores_on_numa(int numa_id)
+{
+ struct ovs_numa_dump *dump = NULL;
+ struct numa_node *numa = get_numa_by_numa_id(numa_id);
+
+ if (numa) {
+ struct cpu_core *core;
+
+ dump = xmalloc(sizeof *dump);
+ list_init(&dump->dump);
+ LIST_FOR_EACH(core, list_node, &numa->cores) {
+ struct ovs_numa_info *info = xmalloc(sizeof *info);
+
+ info->numa_id = numa->numa_id;
+ info->core_id = core->core_id;
+ list_insert(&dump->dump, &info->list_node);
+ }
+ }
+
+ return dump;
+}
+
+void
+ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
+{
+ struct ovs_numa_info *iter, *next;
+
+ LIST_FOR_EACH_SAFE (iter, next, list_node, &dump->dump) {
+ list_remove(&iter->list_node);
+ free(iter);
+ }
+
+ free(dump);
+}
+
+/* Reads the cpu mask configuration from 'cmask' and sets the
+ * 'available' of corresponding cores. For unspecified cores,
+ * sets 'available' to false. */
+void
+ovs_numa_set_cpu_mask(const char *cmask)
+{
+ int core_id = 0;
+ int i;
+
+ if (!found_numa_and_core) {
+ return;
+ }
+
+ /* If no mask specified, resets the 'available' to true for all cores. */
+ if (!cmask) {
+ struct cpu_core *core;
+
+ HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
+ core->available = true;
+ }
+
+ return;
+ }
- ovs_assert(ovs_numa_cpu_core_id_is_valid(core_id));
+ for (i = strlen(cmask) - 1; i >= 0; i--) {
+ char hex = toupper(cmask[i]);
+ int bin, j;
- core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
- hash_int(core_id, 0)),
- struct cpu_core, hmap_node);
- core->pinned = false;
+ if (hex >= '0' && hex <= '9') {
+ bin = hex - '0';
+ } else if (hex >= 'A' && hex <= 'F') {
+ bin = hex - 'A' + 10;
+ } else {
+ bin = 0;
+ VLOG_WARN("Invalid cpu mask: %c", cmask[i]);
+ }
+
+ for (j = 0; j < 4; j++) {
+ struct cpu_core *core;
+
+ core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
+ hash_int(core_id++, 0)),
+ struct cpu_core, hmap_node);
+ core->available = (bin >> j) & 0x1;
+
+ if (core_id >= hmap_count(&all_cpu_cores)) {
+ return;
+ }
+ }
+ }
+
+ /* For unspecified cores, sets 'available' to false. */
+ while (core_id < hmap_count(&all_cpu_cores)) {
+ struct cpu_core *core;
+
+ core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
+ hash_int(core_id++, 0)),
+ struct cpu_core, hmap_node);
+ core->available = false;
+ }
}
#endif /* __linux__ */