2 * Copyright (c) 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
26 #include <sys/types.h>
28 #endif /* __linux__ */
32 #include "openvswitch/list.h"
33 #include "ovs-thread.h"
34 #include "openvswitch/vlog.h"
36 VLOG_DEFINE_THIS_MODULE(ovs_numa);
41 * This module stores the affinity information of numa nodes and cpu cores.
42 * It also provides functions to bookkeep the pin of threads on cpu cores.
44 * It is assumed that the numa node ids and cpu core ids all start from 0 and
45 * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N,
46 * user can assume core ids from 0 to N-1 are all valid and there is a
47 * 'struct cpu_core' for each id.
49 * NOTE, this module should only be used by the main thread.
51 * NOTE, the assumption above will fail when cpu hotplug is used. In that
52 * case ovs-numa will not function correctly. For now, add a TODO entry
53 * for addressing it in the future.
55 * TODO: Fix ovs-numa when cpu hotplug is used.
58 #define MAX_NUMA_NODES 128
62 struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
63 struct ovs_list cores; /* List of cpu cores on the numa node. */
64 int numa_id; /* numa node id. */
67 /* Cpu core on a numa node. */
69 struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */
70 struct ovs_list list_node; /* In 'numa_node->cores' list. */
71 struct numa_node *numa; /* numa node containing the core. */
72 unsigned core_id; /* Core id. */
73 bool available; /* If the core can be pinned. */
74 bool pinned; /* If a thread has been pinned to the core. */
77 /* Contains all 'struct numa_node's. */
78 static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes);
79 /* Contains all 'struct cpu_core's. */
80 static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores);
81 /* True if numa node and core info are correctly extracted. */
82 static bool found_numa_and_core;
85 /* Returns true if 'str' contains all digits. Returns false otherwise. */
87 contain_all_digits(const char *str)
89 return str[strspn(str, "0123456789")] == '\0';
91 #endif /* __linux__ */
93 /* Discovers all numa nodes and the corresponding cpu cores.
94 * Constructs the 'struct numa_node' and 'struct cpu_core'. */
96 discover_numa_and_core(void)
102 bool numa_supported = true;
104 /* Check if NUMA supported on this system. */
105 dir = opendir("/sys/devices/system/node");
107 if (!dir && errno == ENOENT) {
108 numa_supported = false;
114 for (i = 0; i < MAX_NUMA_NODES; i++) {
117 if (numa_supported) {
118 /* Constructs the path to node /sys/devices/system/nodeX. */
119 path = xasprintf("/sys/devices/system/node/node%d", i);
121 path = xasprintf("/sys/devices/system/cpu/");
126 /* Creates 'struct numa_node' if the 'dir' is non-null. */
128 struct numa_node *n = xzalloc(sizeof *n);
129 struct dirent *subdir;
131 hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0));
132 ovs_list_init(&n->cores);
135 while ((subdir = readdir(dir)) != NULL) {
136 if (!strncmp(subdir->d_name, "cpu", 3)
137 && contain_all_digits(subdir->d_name + 3)){
138 struct cpu_core *c = xzalloc(sizeof *c);
141 core_id = strtoul(subdir->d_name + 3, NULL, 10);
142 hmap_insert(&all_cpu_cores, &c->hmap_node,
143 hash_int(core_id, 0));
144 ovs_list_insert(&n->cores, &c->list_node);
145 c->core_id = core_id;
151 VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d",
152 ovs_list_size(&n->cores), n->numa_id);
154 } else if (errno != ENOENT) {
155 VLOG_WARN("opendir(%s) failed (%s)", path,
156 ovs_strerror(errno));
160 if (!dir || !numa_supported) {
165 VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %d CPU cores",
166 hmap_count(&all_numa_nodes), n_cpus);
167 if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) {
168 found_numa_and_core = true;
170 #endif /* __linux__ */
173 /* Gets 'struct cpu_core' by 'core_id'. */
174 static struct cpu_core*
175 get_core_by_core_id(unsigned core_id)
177 struct cpu_core *core = NULL;
179 if (ovs_numa_core_id_is_valid(core_id)) {
180 core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
181 hash_int(core_id, 0)),
182 struct cpu_core, hmap_node);
188 /* Gets 'struct numa_node' by 'numa_id'. */
189 static struct numa_node*
190 get_numa_by_numa_id(int numa_id)
192 struct numa_node *numa = NULL;
194 if (ovs_numa_numa_id_is_valid(numa_id)) {
195 numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes,
196 hash_int(numa_id, 0)),
197 struct numa_node, hmap_node);
204 /* Extracts the numa node and core info from the 'sysfs'. */
208 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
210 if (ovsthread_once_start(&once)) {
211 discover_numa_and_core();
212 ovsthread_once_done(&once);
217 ovs_numa_numa_id_is_valid(int numa_id)
219 return found_numa_and_core && numa_id < ovs_numa_get_n_numas();
223 ovs_numa_core_id_is_valid(unsigned core_id)
225 return found_numa_and_core && core_id < ovs_numa_get_n_cores();
229 ovs_numa_core_is_pinned(unsigned core_id)
231 struct cpu_core *core = get_core_by_core_id(core_id);
240 /* Returns the number of numa nodes. */
242 ovs_numa_get_n_numas(void)
244 return found_numa_and_core ? hmap_count(&all_numa_nodes)
248 /* Returns the number of cpu cores. */
250 ovs_numa_get_n_cores(void)
252 return found_numa_and_core ? hmap_count(&all_cpu_cores)
256 /* Given 'core_id', returns the corresponding numa node id. Returns
257 * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
259 ovs_numa_get_numa_id(unsigned core_id)
261 struct cpu_core *core = get_core_by_core_id(core_id);
264 return core->numa->numa_id;
267 return OVS_NUMA_UNSPEC;
270 /* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC
271 * if 'numa_id' is invalid. */
273 ovs_numa_get_n_cores_on_numa(int numa_id)
275 struct numa_node *numa = get_numa_by_numa_id(numa_id);
278 return ovs_list_size(&numa->cores);
281 return OVS_CORE_UNSPEC;
284 /* Returns the number of cpu cores that are available and unpinned
285 * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */
287 ovs_numa_get_n_unpinned_cores_on_numa(int numa_id)
289 struct numa_node *numa = get_numa_by_numa_id(numa_id);
292 struct cpu_core *core;
295 LIST_FOR_EACH(core, list_node, &numa->cores) {
296 if (core->available && !core->pinned) {
303 return OVS_CORE_UNSPEC;
306 /* Given 'core_id', tries to pin that core. Returns true, if succeeds.
307 * False, if the core has already been pinned, or if it is invalid or
310 ovs_numa_try_pin_core_specific(unsigned core_id)
312 struct cpu_core *core = get_core_by_core_id(core_id);
315 if (core->available && !core->pinned) {
324 /* Searches through all cores for an unpinned and available core. Returns
325 * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise,
326 * returns OVS_CORE_UNSPEC. */
328 ovs_numa_get_unpinned_core_any(void)
330 struct cpu_core *core;
332 HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
333 if (core->available && !core->pinned) {
335 return core->core_id;
339 return OVS_CORE_UNSPEC;
342 /* Searches through all cores on numa node with 'numa_id' for an
343 * unpinned and available core. Returns the core_id if found and
344 * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */
346 ovs_numa_get_unpinned_core_on_numa(int numa_id)
348 struct numa_node *numa = get_numa_by_numa_id(numa_id);
351 struct cpu_core *core;
353 LIST_FOR_EACH(core, list_node, &numa->cores) {
354 if (core->available && !core->pinned) {
356 return core->core_id;
361 return OVS_CORE_UNSPEC;
364 /* Unpins the core with 'core_id'. */
366 ovs_numa_unpin_core(unsigned core_id)
368 struct cpu_core *core = get_core_by_core_id(core_id);
371 core->pinned = false;
375 /* Given the 'numa_id', returns dump of all cores on the numa node. */
376 struct ovs_numa_dump *
377 ovs_numa_dump_cores_on_numa(int numa_id)
379 struct ovs_numa_dump *dump = xmalloc(sizeof *dump);
380 struct numa_node *numa = get_numa_by_numa_id(numa_id);
382 ovs_list_init(&dump->dump);
385 struct cpu_core *core;
387 LIST_FOR_EACH(core, list_node, &numa->cores) {
388 struct ovs_numa_info *info = xmalloc(sizeof *info);
390 info->numa_id = numa->numa_id;
391 info->core_id = core->core_id;
392 ovs_list_insert(&dump->dump, &info->list_node);
400 ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
402 struct ovs_numa_info *iter;
408 LIST_FOR_EACH_POP (iter, list_node, &dump->dump) {
415 /* Reads the cpu mask configuration from 'cmask' and sets the
416 * 'available' of corresponding cores. For unspecified cores,
417 * sets 'available' to false. */
419 ovs_numa_set_cpu_mask(const char *cmask)
424 if (!found_numa_and_core) {
428 /* If no mask specified, resets the 'available' to true for all cores. */
430 struct cpu_core *core;
432 HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
433 core->available = true;
439 for (i = strlen(cmask) - 1; i >= 0; i--) {
440 char hex = toupper(cmask[i]);
443 if (hex >= '0' && hex <= '9') {
445 } else if (hex >= 'A' && hex <= 'F') {
446 bin = hex - 'A' + 10;
449 VLOG_WARN("Invalid cpu mask: %c", cmask[i]);
452 for (j = 0; j < 4; j++) {
453 struct cpu_core *core;
455 core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
456 hash_int(core_id++, 0)),
457 struct cpu_core, hmap_node);
458 core->available = (bin >> j) & 0x1;
460 if (core_id >= hmap_count(&all_cpu_cores)) {
466 /* For unspecified cores, sets 'available' to false. */
467 while (core_id < hmap_count(&all_cpu_cores)) {
468 struct cpu_core *core;
470 core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
471 hash_int(core_id++, 0)),
472 struct cpu_core, hmap_node);
473 core->available = false;