2 * Copyright (c) 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "route-table.h"
21 #include <arpa/inet.h>
22 #include <sys/socket.h>
23 #include <linux/rtnetlink.h>
29 #include "netlink-notifier.h"
30 #include "netlink-socket.h"
32 #include "rtnetlink-link.h"
35 VLOG_DEFINE_THIS_MODULE(route_table);
38 /* Copied from struct rtmsg. */
39 unsigned char rtm_dst_len;
41 /* Extracted from Netlink attributes. */
42 uint32_t rta_dst; /* Destination in host byte order. 0 if missing. */
43 int rta_oif; /* Output interface index. */
46 /* A digested version of a route message sent down by the kernel to indicate
47 * that a route has changed. */
48 struct route_table_msg {
49 bool relevant; /* Should this message be processed? */
50 int nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */
51 struct route_data rd; /* Data parsed from this message. */
55 struct hmap_node node; /* Node in route_map. */
56 struct route_data rd; /* Data associated with this node. */
60 struct hmap_node node; /* Node in name_map. */
61 uint32_t ifi_index; /* Kernel interface index. */
63 char ifname[IFNAMSIZ]; /* Interface name. */
66 static struct ovs_mutex route_table_mutex = OVS_MUTEX_INITIALIZER;
67 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
69 /* Global change number for route-table, which should be incremented
70 * every time route_table_reset() is called. */
71 static uint64_t rt_change_seq;
73 static unsigned int register_count = 0;
74 static struct nln *nln = NULL;
75 static struct route_table_msg rtmsg;
76 static struct nln_notifier *route_notifier = NULL;
77 static struct nln_notifier *name_notifier = NULL;
79 static bool route_table_valid = false;
80 static bool name_table_valid = false;
81 static struct hmap route_map;
82 static struct hmap name_map;
84 static int route_table_reset(void);
85 static bool route_table_get_ifindex(ovs_be32 ip, int *)
86 OVS_REQUIRES(route_table_mutex);
87 static void route_table_handle_msg(const struct route_table_msg *);
88 static bool route_table_parse(struct ofpbuf *, struct route_table_msg *);
89 static void route_table_change(const struct route_table_msg *, void *);
90 static struct route_node *route_node_lookup(const struct route_data *);
91 static struct route_node *route_node_lookup_by_ip(uint32_t ip);
92 static void route_map_clear(void);
93 static uint32_t hash_route_data(const struct route_data *);
95 static void name_table_init(void);
96 static void name_table_uninit(void);
97 static int name_table_reset(void);
98 static void name_table_change(const struct rtnetlink_link_change *, void *);
99 static void name_map_clear(void);
100 static struct name_node *name_node_lookup(int ifi_index);
102 /* Populates 'name' with the name of the interface traffic destined for 'ip'
103 * is likely to egress out of (see route_table_get_ifindex).
105 * Returns true if successful, otherwise false. */
107 route_table_get_name(ovs_be32 ip, char name[IFNAMSIZ])
108 OVS_EXCLUDED(route_table_mutex)
112 ovs_mutex_lock(&route_table_mutex);
114 if (!name_table_valid) {
118 if (route_table_get_ifindex(ip, &ifindex)) {
119 struct name_node *nn;
121 nn = name_node_lookup(ifindex);
123 ovs_strlcpy(name, nn->ifname, IFNAMSIZ);
124 ovs_mutex_unlock(&route_table_mutex);
129 ovs_mutex_unlock(&route_table_mutex);
133 /* Populates 'ifindex' with the interface index traffic destined for 'ip' is
134 * likely to egress. There is no hard guarantee that traffic destined for 'ip'
135 * will egress out the specified interface. 'ifindex' may refer to an
136 * interface which is not physical (such as a bridge port).
138 * Returns true if successful, otherwise false. */
140 route_table_get_ifindex(ovs_be32 ip_, int *ifindex)
141 OVS_REQUIRES(route_table_mutex)
143 struct route_node *rn;
144 uint32_t ip = ntohl(ip_);
148 if (!route_table_valid) {
152 rn = route_node_lookup_by_ip(ip);
155 *ifindex = rn->rd.rta_oif;
159 /* Choose a default route. */
160 HMAP_FOR_EACH(rn, node, &route_map) {
161 if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) {
162 *ifindex = rn->rd.rta_oif;
171 route_table_get_change_seq(void)
173 return rt_change_seq;
176 /* Users of the route_table module should register themselves with this
177 * function before making any other route_table function calls. */
179 route_table_register(void)
180 OVS_EXCLUDED(route_table_mutex)
182 ovs_mutex_lock(&route_table_mutex);
183 if (!register_count) {
185 ovs_assert(!route_notifier);
187 nln = nln_create(NETLINK_ROUTE, RTNLGRP_IPV4_ROUTE,
188 (nln_parse_func *) route_table_parse, &rtmsg);
191 nln_notifier_create(nln, (nln_notify_func *) route_table_change,
194 hmap_init(&route_map);
200 ovs_mutex_unlock(&route_table_mutex);
203 /* Users of the route_table module should unregister themselves with this
204 * function when they will no longer be making any more route_table fuction
207 route_table_unregister(void)
208 OVS_EXCLUDED(route_table_mutex)
210 ovs_mutex_lock(&route_table_mutex);
213 if (!register_count) {
214 nln_notifier_destroy(route_notifier);
215 route_notifier = NULL;
220 hmap_destroy(&route_map);
223 ovs_mutex_unlock(&route_table_mutex);
226 /* Run periodically to update the locally maintained routing table. */
228 route_table_run(void)
229 OVS_EXCLUDED(route_table_mutex)
231 ovs_mutex_lock(&route_table_mutex);
233 rtnetlink_link_run();
236 if (!route_table_valid) {
240 ovs_mutex_unlock(&route_table_mutex);
243 /* Causes poll_block() to wake up when route_table updates are required. */
245 route_table_wait(void)
246 OVS_EXCLUDED(route_table_mutex)
248 ovs_mutex_lock(&route_table_mutex);
250 rtnetlink_link_wait();
253 ovs_mutex_unlock(&route_table_mutex);
257 route_table_reset(void)
260 struct rtgenmsg *rtmsg;
261 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
262 struct ofpbuf request, reply, buf;
265 route_table_valid = true;
268 ofpbuf_init(&request, 0);
270 nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE, NLM_F_REQUEST);
272 rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
273 rtmsg->rtgen_family = AF_INET;
275 nl_dump_start(&dump, NETLINK_ROUTE, &request);
276 ofpbuf_uninit(&request);
278 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
279 while (nl_dump_next(&dump, &reply, &buf)) {
280 struct route_table_msg msg;
282 if (route_table_parse(&reply, &msg)) {
283 route_table_handle_msg(&msg);
288 return nl_dump_done(&dump);
293 route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
297 static const struct nl_policy policy[] = {
298 [RTA_DST] = { .type = NL_A_U32, .optional = true },
299 [RTA_OIF] = { .type = NL_A_U32, .optional = false },
302 struct nlattr *attrs[ARRAY_SIZE(policy)];
304 parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg),
305 policy, attrs, ARRAY_SIZE(policy));
308 const struct rtmsg *rtm;
309 const struct nlmsghdr *nlmsg;
311 nlmsg = ofpbuf_data(buf);
312 rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm);
314 if (rtm->rtm_family != AF_INET) {
315 VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message");
319 memset(change, 0, sizeof *change);
320 change->relevant = true;
322 if (rtm->rtm_scope == RT_SCOPE_NOWHERE) {
323 change->relevant = false;
326 if (rtm->rtm_type != RTN_UNICAST &&
327 rtm->rtm_type != RTN_LOCAL) {
328 change->relevant = false;
331 change->nlmsg_type = nlmsg->nlmsg_type;
332 change->rd.rtm_dst_len = rtm->rtm_dst_len;
333 change->rd.rta_oif = nl_attr_get_u32(attrs[RTA_OIF]);
335 if (attrs[RTA_DST]) {
336 change->rd.rta_dst = ntohl(nl_attr_get_be32(attrs[RTA_DST]));
340 VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message");
347 route_table_change(const struct route_table_msg *change OVS_UNUSED,
348 void *aux OVS_UNUSED)
350 route_table_valid = false;
354 route_table_handle_msg(const struct route_table_msg *change)
356 if (change->relevant && change->nlmsg_type == RTM_NEWROUTE &&
357 !route_node_lookup(&change->rd)) {
358 struct route_node *rn;
360 rn = xzalloc(sizeof *rn);
361 memcpy(&rn->rd, &change->rd, sizeof change->rd);
363 hmap_insert(&route_map, &rn->node, hash_route_data(&rn->rd));
367 static struct route_node *
368 route_node_lookup(const struct route_data *rd)
370 struct route_node *rn;
372 HMAP_FOR_EACH_WITH_HASH(rn, node, hash_route_data(rd), &route_map) {
373 if (!memcmp(&rn->rd, rd, sizeof *rd)) {
381 static struct route_node *
382 route_node_lookup_by_ip(uint32_t ip)
385 struct route_node *rn, *rn_ret;
390 HMAP_FOR_EACH(rn, node, &route_map) {
391 uint32_t mask = 0xffffffff << (32 - rn->rd.rtm_dst_len);
393 if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) {
398 if (rn->rd.rtm_dst_len > dst_len &&
399 (ip & mask) == (rn->rd.rta_dst & mask)) {
401 dst_len = rn->rd.rtm_dst_len;
409 route_map_clear(void)
411 struct route_node *rn, *rn_next;
413 HMAP_FOR_EACH_SAFE(rn, rn_next, node, &route_map) {
414 hmap_remove(&route_map, &rn->node);
420 hash_route_data(const struct route_data *rd)
422 return hash_bytes(rd, sizeof *rd, 0);
428 name_table_init(void)
430 hmap_init(&name_map);
431 name_notifier = rtnetlink_link_notifier_create(name_table_change, NULL);
432 name_table_valid = false;
436 name_table_uninit(void)
438 rtnetlink_link_notifier_destroy(name_notifier);
439 name_notifier = NULL;
441 hmap_destroy(&name_map);
445 name_table_reset(void)
448 struct rtgenmsg *rtmsg;
449 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
450 struct ofpbuf request, reply, buf;
452 name_table_valid = true;
455 ofpbuf_init(&request, 0);
456 nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK, NLM_F_REQUEST);
457 rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
458 rtmsg->rtgen_family = AF_INET;
460 nl_dump_start(&dump, NETLINK_ROUTE, &request);
461 ofpbuf_uninit(&request);
463 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
464 while (nl_dump_next(&dump, &reply, &buf)) {
465 struct rtnetlink_link_change change;
467 if (rtnetlink_link_parse(&reply, &change)
468 && change.nlmsg_type == RTM_NEWLINK
469 && !name_node_lookup(change.ifi_index)) {
470 struct name_node *nn;
472 nn = xzalloc(sizeof *nn);
473 nn->ifi_index = change.ifi_index;
474 ovs_strlcpy(nn->ifname, change.ifname, IFNAMSIZ);
475 hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0));
479 return nl_dump_done(&dump);
483 name_table_change(const struct rtnetlink_link_change *change OVS_UNUSED,
484 void *aux OVS_UNUSED)
486 /* Changes to interface status can cause routing table changes that some
487 * versions of the linux kernel do not advertise for some reason. */
488 route_table_valid = false;
489 name_table_valid = false;
492 static struct name_node *
493 name_node_lookup(int ifi_index)
495 struct name_node *nn;
497 HMAP_FOR_EACH_WITH_HASH(nn, node, hash_int(ifi_index, 0), &name_map) {
498 if (nn->ifi_index == ifi_index) {
509 struct name_node *nn, *nn_next;
511 HMAP_FOR_EACH_SAFE(nn, nn_next, node, &name_map) {
512 hmap_remove(&name_map, &nn->node);