1 /* Copyright (c) 2015, 2016 Nicira, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 #include "byte-order.h"
24 #include "openflow/openflow.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "openvswitch/hmap.h"
27 #include "openvswitch/list.h"
28 #include "openvswitch/match.h"
29 #include "openvswitch/ofp-actions.h"
30 #include "openvswitch/ofp-msgs.h"
31 #include "openvswitch/ofp-parse.h"
32 #include "openvswitch/ofp-print.h"
33 #include "openvswitch/ofp-util.h"
34 #include "openvswitch/ofpbuf.h"
35 #include "openvswitch/vlog.h"
36 #include "ovn-controller.h"
37 #include "ovn/lib/actions.h"
38 #include "poll-loop.h"
41 #include "socket-util.h"
43 #include "vswitch-idl.h"
45 VLOG_DEFINE_THIS_MODULE(ofctrl);
47 /* An OpenFlow flow. */
49 struct hmap_node match_hmap_node; /* For match based hashing. */
50 struct hindex_node uuid_hindex_node; /* For uuid based hashing. */
51 struct ovs_list list_node; /* For handling lists of flows. */
58 /* Data. UUID is used for disambiguation. */
60 struct ofpact *ofpacts;
64 static uint32_t ovn_flow_match_hash(const struct ovn_flow *);
65 static void ovn_flow_lookup(struct hmap *, const struct ovn_flow *target,
66 struct ovs_list *answers);
67 static char *ovn_flow_to_string(const struct ovn_flow *);
68 static void ovn_flow_log(const struct ovn_flow *, const char *action);
69 static void ovn_flow_destroy(struct ovn_flow *);
71 /* OpenFlow connection to the switch. */
72 static struct rconn *swconn;
74 /* Last seen sequence number for 'swconn'. When this differs from
75 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
76 static unsigned int seqno;
78 /* Connection state machine. */
81 STATE(S_TLV_TABLE_REQUESTED) \
82 STATE(S_TLV_TABLE_MOD_SENT) \
83 STATE(S_CLEAR_FLOWS) \
86 #define STATE(NAME) NAME,
91 /* An in-flight update to the switch's flow table.
93 * When we receive a barrier reply from the switch with the given 'xid', we
94 * know that the switch is caught up to northbound database sequence number
95 * 'nb_cfg' (and make that available to the client via ofctrl_get_cur_cfg(), so
96 * that it can store it into our Chassis record's nb_cfg column). */
97 struct ofctrl_flow_update {
98 struct ovs_list list_node; /* In 'flow_updates'. */
99 ovs_be32 xid; /* OpenFlow transaction ID for barrier. */
100 int64_t nb_cfg; /* Northbound database sequence number. */
103 static struct ofctrl_flow_update *
104 ofctrl_flow_update_from_list_node(const struct ovs_list *list_node)
106 return CONTAINER_OF(list_node, struct ofctrl_flow_update, list_node);
109 /* Currently in-flight updates. */
110 static struct ovs_list flow_updates;
112 /* nb_cfg of latest committed flow update. */
113 static int64_t cur_cfg;
116 static enum ofctrl_state state;
118 /* Transaction IDs for messages in flight to the switch. */
119 static ovs_be32 xid, xid2;
121 /* Counter for in-flight OpenFlow messages on 'swconn'. We only send a new
122 * round of flow table modifications to the switch when the counter falls to
123 * zero, to avoid unbounded buffering. */
124 static struct rconn_packet_counter *tx_counter;
126 /* Flow table of "struct ovn_flow"s, that holds the flow table currently
127 * installed in the switch. */
128 static struct hmap installed_flows;
130 /* A reference to the group_table. */
131 static struct group_table *groups;
133 /* MFF_* field ID for our Geneve option. In S_TLV_TABLE_MOD_SENT, this is
134 * the option we requested (we don't know whether we obtained it yet). In
135 * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */
136 static enum mf_field_id mff_ovn_geneve;
138 static ovs_be32 queue_msg(struct ofpbuf *);
140 static void ovn_flow_table_destroy(void);
141 static struct ofpbuf *encode_flow_mod(struct ofputil_flow_mod *);
143 static void ovn_group_table_clear(struct group_table *group_table,
145 static struct ofpbuf *encode_group_mod(const struct ofputil_group_mod *);
147 static void ofctrl_recv(const struct ofp_header *, enum ofptype);
149 static struct hmap match_flow_table = HMAP_INITIALIZER(&match_flow_table);
150 static struct hindex uuid_flow_table = HINDEX_INITIALIZER(&uuid_flow_table);
155 swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
156 tx_counter = rconn_packet_counter_create();
157 hmap_init(&installed_flows);
158 ovs_list_init(&flow_updates);
161 /* S_NEW, for a new connection.
163 * Sends NXT_TLV_TABLE_REQUEST and transitions to
164 * S_TLV_TABLE_REQUESTED. */
169 struct ofpbuf *buf = ofpraw_alloc(OFPRAW_NXT_TLV_TABLE_REQUEST,
170 rconn_get_version(swconn), 0);
171 xid = queue_msg(buf);
172 state = S_TLV_TABLE_REQUESTED;
176 recv_S_NEW(const struct ofp_header *oh OVS_UNUSED,
177 enum ofptype type OVS_UNUSED)
182 /* S_TLV_TABLE_REQUESTED, when NXT_TLV_TABLE_REQUEST has been sent
183 * and we're waiting for a reply.
185 * If we receive an NXT_TLV_TABLE_REPLY:
187 * - If it contains our tunnel metadata option, assign its field ID to
188 * mff_ovn_geneve and transition to S_CLEAR_FLOWS.
190 * - Otherwise, if there is an unused tunnel metadata field ID, send
191 * NXT_TLV_TABLE_MOD and OFPT_BARRIER_REQUEST, and transition to
192 * S_TLV_TABLE_MOD_SENT.
194 * - Otherwise, log an error, disable Geneve, and transition to
197 * If we receive an OFPT_ERROR:
199 * - Log an error, disable Geneve, and transition to S_CLEAR_FLOWS. */
202 run_S_TLV_TABLE_REQUESTED(void)
207 recv_S_TLV_TABLE_REQUESTED(const struct ofp_header *oh, enum ofptype type)
209 if (oh->xid != xid) {
210 ofctrl_recv(oh, type);
211 } else if (type == OFPTYPE_NXT_TLV_TABLE_REPLY) {
212 struct ofputil_tlv_table_reply reply;
213 enum ofperr error = ofputil_decode_tlv_table_reply(oh, &reply);
215 VLOG_ERR("failed to decode TLV table request (%s)",
216 ofperr_to_string(error));
220 const struct ofputil_tlv_map *map;
221 uint64_t md_free = UINT64_MAX;
222 BUILD_ASSERT(TUN_METADATA_NUM_OPTS == 64);
224 LIST_FOR_EACH (map, list_node, &reply.mappings) {
225 if (map->option_class == OVN_GENEVE_CLASS
226 && map->option_type == OVN_GENEVE_TYPE
227 && map->option_len == OVN_GENEVE_LEN) {
228 if (map->index >= TUN_METADATA_NUM_OPTS) {
229 VLOG_ERR("desired Geneve tunnel option 0x%"PRIx16","
230 "%"PRIu8",%"PRIu8" already in use with "
231 "unsupported index %"PRIu16,
232 map->option_class, map->option_type,
233 map->option_len, map->index);
236 mff_ovn_geneve = MFF_TUN_METADATA0 + map->index;
237 state = S_CLEAR_FLOWS;
242 if (map->index < TUN_METADATA_NUM_OPTS) {
243 md_free &= ~(UINT64_C(1) << map->index);
247 VLOG_DBG("OVN Geneve option not found");
249 VLOG_ERR("no Geneve options free for use by OVN");
253 unsigned int index = rightmost_1bit_idx(md_free);
254 mff_ovn_geneve = MFF_TUN_METADATA0 + index;
255 struct ofputil_tlv_map tm;
256 tm.option_class = OVN_GENEVE_CLASS;
257 tm.option_type = OVN_GENEVE_TYPE;
258 tm.option_len = OVN_GENEVE_LEN;
261 struct ofputil_tlv_table_mod ttm;
262 ttm.command = NXTTMC_ADD;
263 ovs_list_init(&ttm.mappings);
264 ovs_list_push_back(&ttm.mappings, &tm.list_node);
266 xid = queue_msg(ofputil_encode_tlv_table_mod(OFP13_VERSION, &ttm));
267 xid2 = queue_msg(ofputil_encode_barrier_request(OFP13_VERSION));
268 state = S_TLV_TABLE_MOD_SENT;
269 } else if (type == OFPTYPE_ERROR) {
270 VLOG_ERR("switch refused to allocate Geneve option (%s)",
271 ofperr_to_string(ofperr_decode_msg(oh, NULL)));
274 char *s = ofp_to_string(oh, ntohs(oh->length), 1);
275 VLOG_ERR("unexpected reply to TLV table request (%s)",
284 state = S_CLEAR_FLOWS;
287 /* S_TLV_TABLE_MOD_SENT, when NXT_TLV_TABLE_MOD and OFPT_BARRIER_REQUEST
288 * have been sent and we're waiting for a reply to one or the other.
290 * If we receive an OFPT_ERROR:
292 * - If the error is NXTTMFC_ALREADY_MAPPED or NXTTMFC_DUP_ENTRY, we
293 * raced with some other controller. Transition to S_NEW.
295 * - Otherwise, log an error, disable Geneve, and transition to
298 * If we receive OFPT_BARRIER_REPLY:
300 * - Set the tunnel metadata field ID to the one that we requested.
301 * Transition to S_CLEAR_FLOWS.
305 run_S_TLV_TABLE_MOD_SENT(void)
310 recv_S_TLV_TABLE_MOD_SENT(const struct ofp_header *oh, enum ofptype type)
312 if (oh->xid != xid && oh->xid != xid2) {
313 ofctrl_recv(oh, type);
314 } else if (oh->xid == xid2 && type == OFPTYPE_BARRIER_REPLY) {
315 state = S_CLEAR_FLOWS;
316 } else if (oh->xid == xid && type == OFPTYPE_ERROR) {
317 enum ofperr error = ofperr_decode_msg(oh, NULL);
318 if (error == OFPERR_NXTTMFC_ALREADY_MAPPED ||
319 error == OFPERR_NXTTMFC_DUP_ENTRY) {
320 VLOG_INFO("raced with another controller adding "
321 "Geneve option (%s); trying again",
322 ofperr_to_string(error));
325 VLOG_ERR("error adding Geneve option (%s)",
326 ofperr_to_string(error));
330 char *s = ofp_to_string(oh, ntohs(oh->length), 1);
331 VLOG_ERR("unexpected reply to Geneve option allocation request (%s)",
339 state = S_CLEAR_FLOWS;
342 /* S_CLEAR_FLOWS, after we've established a Geneve metadata field ID and it's
343 * time to set up some flows.
345 * Sends an OFPT_TABLE_MOD to clear all flows, then transitions to
349 run_S_CLEAR_FLOWS(void)
351 /* Send a flow_mod to delete all flows. */
352 struct ofputil_flow_mod fm = {
353 .match = MATCH_CATCHALL_INITIALIZER,
354 .table_id = OFPTT_ALL,
355 .command = OFPFC_DELETE,
357 queue_msg(encode_flow_mod(&fm));
358 VLOG_DBG("clearing all flows");
360 /* Send a group_mod to delete all groups. */
361 struct ofputil_group_mod gm;
362 memset(&gm, 0, sizeof gm);
363 gm.command = OFPGC11_DELETE;
364 gm.group_id = OFPG_ALL;
365 gm.command_bucket_id = OFPG15_BUCKET_ALL;
366 ovs_list_init(&gm.buckets);
367 queue_msg(encode_group_mod(&gm));
368 ofputil_bucket_list_destroy(&gm.buckets);
370 /* Clear installed_flows, to match the state of the switch. */
371 ovn_flow_table_clear();
373 /* Clear existing groups, to match the state of the switch. */
375 ovn_group_table_clear(groups, true);
378 /* All flow updates are irrelevant now. */
379 struct ofctrl_flow_update *fup, *next;
380 LIST_FOR_EACH_SAFE (fup, next, list_node, &flow_updates) {
381 ovs_list_remove(&fup->list_node);
385 state = S_UPDATE_FLOWS;
389 recv_S_CLEAR_FLOWS(const struct ofp_header *oh, enum ofptype type)
391 ofctrl_recv(oh, type);
394 /* S_UPDATE_FLOWS, for maintaining the flow table over time.
396 * Compare the installed flows to the ones we want. Send OFPT_FLOW_MOD as
399 * This is a terminal state. We only transition out of it if the connection
403 run_S_UPDATE_FLOWS(void)
405 /* Nothing to do here.
407 * Being in this state enables ofctrl_put() to work, however. */
411 recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum ofptype type)
413 if (type == OFPTYPE_BARRIER_REPLY && !ovs_list_is_empty(&flow_updates)) {
414 struct ofctrl_flow_update *fup = ofctrl_flow_update_from_list_node(
415 ovs_list_front(&flow_updates));
416 if (fup->xid == oh->xid) {
417 if (fup->nb_cfg >= cur_cfg) {
418 cur_cfg = fup->nb_cfg;
420 ovs_list_remove(&fup->list_node);
424 ofctrl_recv(oh, type);
428 /* Runs the OpenFlow state machine against 'br_int', which is local to the
429 * hypervisor on which we are running. Attempts to negotiate a Geneve option
430 * field for class OVN_GENEVE_CLASS, type OVN_GENEVE_TYPE. If successful,
431 * returns the MFF_* field ID for the option, otherwise returns 0. */
433 ofctrl_run(const struct ovsrec_bridge *br_int)
437 target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
438 if (strcmp(target, rconn_get_target(swconn))) {
439 VLOG_INFO("%s: connecting to switch", target);
440 rconn_connect(swconn, target, target);
444 rconn_disconnect(swconn);
449 if (!rconn_is_connected(swconn)) {
452 if (seqno != rconn_get_connection_seqno(swconn)) {
453 seqno = rconn_get_connection_seqno(swconn);
457 bool progress = true;
458 for (int i = 0; progress && i < 50; i++) {
459 /* Allow the state machine to run. */
460 enum ofctrl_state old_state = state;
462 #define STATE(NAME) case NAME: run_##NAME(); break;
469 /* Try to process a received packet. */
470 struct ofpbuf *msg = rconn_recv(swconn);
472 const struct ofp_header *oh = msg->data;
476 error = ofptype_decode(&type, oh);
479 #define STATE(NAME) case NAME: recv_##NAME(oh, type); break;
486 char *s = ofp_to_string(oh, ntohs(oh->length), 1);
487 VLOG_WARN("could not decode OpenFlow message (%s): %s",
488 ofperr_to_string(error), s);
495 /* If we did some work, plan to go around again. */
496 progress = old_state != state || msg;
499 /* We bailed out to limit the amount of work we do in one go, to allow
500 * other code a chance to run. We were still making progress at that
501 * point, so ensure that we come back again without waiting. */
502 poll_immediate_wake();
505 return (state == S_CLEAR_FLOWS || state == S_UPDATE_FLOWS
506 ? mff_ovn_geneve : 0);
512 rconn_run_wait(swconn);
513 rconn_recv_wait(swconn);
519 rconn_destroy(swconn);
520 ovn_flow_table_destroy();
521 rconn_packet_counter_destroy(tx_counter);
525 ofctrl_get_cur_cfg(void)
531 queue_msg(struct ofpbuf *msg)
533 const struct ofp_header *oh = msg->data;
534 ovs_be32 xid = oh->xid;
535 rconn_send(swconn, msg, tx_counter);
540 log_openflow_rl(struct vlog_rate_limit *rl, enum vlog_level level,
541 const struct ofp_header *oh, const char *title)
543 if (!vlog_should_drop(&this_module, level, rl)) {
544 char *s = ofp_to_string(oh, ntohs(oh->length), 2);
545 vlog(&this_module, level, "%s: %s", title, s);
551 ofctrl_recv(const struct ofp_header *oh, enum ofptype type)
553 if (type == OFPTYPE_ECHO_REQUEST) {
554 queue_msg(make_echo_reply(oh));
555 } else if (type == OFPTYPE_ERROR) {
556 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
557 log_openflow_rl(&rl, VLL_INFO, oh, "OpenFlow error");
558 } else if (type != OFPTYPE_ECHO_REPLY &&
559 type != OFPTYPE_BARRIER_REPLY &&
560 type != OFPTYPE_PACKET_IN &&
561 type != OFPTYPE_PORT_STATUS &&
562 type != OFPTYPE_FLOW_REMOVED) {
563 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
564 log_openflow_rl(&rl, VLL_DBG, oh, "OpenFlow packet ignored");
568 /* Flow table interfaces to the rest of ovn-controller. */
571 log_ovn_flow_rl(struct vlog_rate_limit *rl, enum vlog_level level,
572 const struct ovn_flow *flow, const char *title)
574 if (!vlog_should_drop(&this_module, level, rl)) {
575 char *s = ovn_flow_to_string(flow);
576 vlog(&this_module, level, "%s for parent "UUID_FMT": %s",
577 title, UUID_ARGS(&flow->uuid), s);
582 /* Adds a flow to the collection associated with 'uuid'. The flow has the
583 * specified 'match' and 'actions' to the OpenFlow table numbered 'table_id'
584 * with the given 'priority'. The caller retains ownership of 'match' and
587 * Any number of flows may be associated with a given UUID. The flows with a
588 * given UUID must have a unique (table_id, priority, match) tuple. A
589 * duplicate within a generally indicates a bug in the ovn-controller code that
590 * generated it, so this functions logs a warning.
592 * (table_id, priority, match) tuples should also be unique for flows with
593 * different UUIDs, but it doesn't necessarily indicate a bug in
594 * ovn-controller, for two reasons. First, these duplicates could be caused by
595 * logical flows generated by ovn-northd, which aren't ovn-controller's fault;
596 * perhaps something should warn about these but the root cause is different.
597 * Second, these duplicates might be transient, that is, they might go away
598 * before the next call to ofctrl_run() if a call to ofctrl_remove_flows()
599 * removes one or the other.
601 * This just assembles the desired flow tables in memory. Nothing is actually
602 * sent to the switch until a later call to ofctrl_run(). */
604 ofctrl_add_flow(uint8_t table_id, uint16_t priority,
605 const struct match *match, const struct ofpbuf *actions,
606 const struct uuid *uuid)
608 /* Structure that uses table_id+priority+various things as hashes. */
609 struct ovn_flow *f = xmalloc(sizeof *f);
610 f->table_id = table_id;
611 f->priority = priority;
613 f->ofpacts = xmemdup(actions->data, actions->size);
614 f->ofpacts_len = actions->size;
616 f->match_hmap_node.hash = ovn_flow_match_hash(f);
617 f->uuid_hindex_node.hash = uuid_hash(&f->uuid);
619 /* Check to see if other flows exist with the same key (table_id priority,
620 * match criteria) and uuid. If so, discard this flow and log a
622 struct ovs_list existing;
623 ovn_flow_lookup(&match_flow_table, f, &existing);
625 LIST_FOR_EACH (d, list_node, &existing) {
626 if (uuid_equals(&f->uuid, &d->uuid)) {
627 /* Duplicate flows with the same UUID indicate some kind of bug
628 * (see the function-level comment), but we distinguish two
631 * - If the actions for the duplicate flow are the same, then
632 * it's benign; it's hard to imagine how there could be a
633 * real problem. Log at INFO level.
635 * - If the actions are different, then one or the other set of
636 * actions must be wrong or (perhaps more likely) we've got a
637 * new set of actions replacing an old set but the caller
638 * neglected to use ofctrl_remove_flows() or
639 * ofctrl_set_flow() to do it properly. Log at WARN level to
640 * get some attention.
642 if (ofpacts_equal(f->ofpacts, f->ofpacts_len,
643 d->ofpacts, d->ofpacts_len)) {
644 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
645 log_ovn_flow_rl(&rl, VLL_INFO, f, "duplicate flow");
647 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
648 log_ovn_flow_rl(&rl, VLL_WARN, f,
649 "duplicate flow with modified action");
651 /* It seems likely that the newer actions are the correct
654 d->ofpacts = f->ofpacts;
655 d->ofpacts_len = f->ofpacts_len;
663 /* Otherwise, add the flow. */
664 hmap_insert(&match_flow_table, &f->match_hmap_node,
665 f->match_hmap_node.hash);
666 hindex_insert(&uuid_flow_table, &f->uuid_hindex_node,
667 f->uuid_hindex_node.hash);
670 /* Removes a bundles of flows from the flow table. */
672 ofctrl_remove_flows(const struct uuid *uuid)
674 struct ovn_flow *f, *next;
675 HINDEX_FOR_EACH_WITH_HASH_SAFE (f, next, uuid_hindex_node, uuid_hash(uuid),
677 if (uuid_equals(&f->uuid, uuid)) {
678 hmap_remove(&match_flow_table, &f->match_hmap_node);
679 hindex_remove(&uuid_flow_table, &f->uuid_hindex_node);
685 /* Shortcut to remove all flows matching the supplied UUID and add this
688 ofctrl_set_flow(uint8_t table_id, uint16_t priority,
689 const struct match *match, const struct ofpbuf *actions,
690 const struct uuid *uuid)
692 ofctrl_remove_flows(uuid);
693 ofctrl_add_flow(table_id, priority, match, actions, uuid);
698 /* Duplicate an ovn_flow structure. */
700 ofctrl_dup_flow(struct ovn_flow *src)
702 struct ovn_flow *dst = xmalloc(sizeof *dst);
703 dst->table_id = src->table_id;
704 dst->priority = src->priority;
705 dst->match = src->match;
706 dst->ofpacts = xmemdup(src->ofpacts, src->ofpacts_len);
707 dst->ofpacts_len = src->ofpacts_len;
708 dst->uuid = src->uuid;
709 dst->match_hmap_node.hash = ovn_flow_match_hash(dst);
710 dst->uuid_hindex_node.hash = uuid_hash(&src->uuid);
714 /* Returns a hash of the match key in 'f'. */
716 ovn_flow_match_hash(const struct ovn_flow *f)
718 return hash_2words((f->table_id << 16) | f->priority,
719 match_hash(&f->match, 0));
722 /* Compare two flows and return -1, 0, 1 based on whether a if less than,
723 * equal to or greater than b. */
725 ovn_flow_compare_flows(struct ovn_flow *a, struct ovn_flow *b)
727 return uuid_compare_3way(&a->uuid, &b->uuid);
730 /* Given a list of ovn_flows, goes through the list and returns
731 * a single flow, in a deterministic way. */
732 static struct ovn_flow *
733 ovn_flow_select_from_list(struct ovs_list *flows)
735 struct ovn_flow *candidate;
736 struct ovn_flow *answer = NULL;
737 LIST_FOR_EACH (candidate, list_node, flows) {
738 if (!answer || ovn_flow_compare_flows(candidate, answer) < 0) {
745 /* Initializes and files in the supplied list with ovn_flows from 'flow_table'
746 * whose key is identical to 'target''s key. */
748 ovn_flow_lookup(struct hmap *flow_table, const struct ovn_flow *target,
749 struct ovs_list *answer)
753 ovs_list_init(answer);
754 HMAP_FOR_EACH_WITH_HASH (f, match_hmap_node, target->match_hmap_node.hash,
756 if (f->table_id == target->table_id
757 && f->priority == target->priority
758 && match_equal(&f->match, &target->match)) {
759 ovs_list_push_back(answer, &f->list_node);
765 ovn_flow_to_string(const struct ovn_flow *f)
767 struct ds s = DS_EMPTY_INITIALIZER;
768 ds_put_format(&s, "table_id=%"PRIu8", ", f->table_id);
769 ds_put_format(&s, "priority=%"PRIu16", ", f->priority);
770 match_format(&f->match, &s, OFP_DEFAULT_PRIORITY);
771 ds_put_cstr(&s, ", actions=");
772 ofpacts_format(f->ofpacts, f->ofpacts_len, &s);
773 return ds_steal_cstr(&s);
777 ovn_flow_log(const struct ovn_flow *f, const char *action)
779 if (VLOG_IS_DBG_ENABLED()) {
780 char *s = ovn_flow_to_string(f);
781 VLOG_DBG("%s flow: %s", action, s);
787 ovn_flow_destroy(struct ovn_flow *f)
795 /* Flow tables of struct ovn_flow. */
798 ovn_flow_table_clear(void)
800 struct ovn_flow *f, *next;
801 HMAP_FOR_EACH_SAFE (f, next, match_hmap_node, &match_flow_table) {
802 hmap_remove(&match_flow_table, &f->match_hmap_node);
803 hindex_remove(&uuid_flow_table, &f->uuid_hindex_node);
809 ovn_flow_table_destroy(void)
811 ovn_flow_table_clear();
812 hmap_destroy(&match_flow_table);
813 hindex_destroy(&uuid_flow_table);
816 /* Flow table update. */
818 static struct ofpbuf *
819 encode_flow_mod(struct ofputil_flow_mod *fm)
821 fm->buffer_id = UINT32_MAX;
822 fm->out_port = OFPP_ANY;
823 fm->out_group = OFPG_ANY;
824 return ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM);
828 add_flow_mod(struct ofputil_flow_mod *fm, struct ovs_list *msgs)
830 struct ofpbuf *msg = encode_flow_mod(fm);
831 ovs_list_push_back(msgs, &msg->list_node);
836 /* Finds and returns a group_info in 'existing_groups' whose key is identical
837 * to 'target''s key, or NULL if there is none. */
838 static struct group_info *
839 ovn_group_lookup(struct hmap *exisiting_groups,
840 const struct group_info *target)
842 struct group_info *e;
844 HMAP_FOR_EACH_WITH_HASH(e, hmap_node, target->hmap_node.hash,
846 if (e->group_id == target->group_id) {
853 /* Clear either desired_groups or existing_groups in group_table. */
855 ovn_group_table_clear(struct group_table *group_table, bool existing)
857 struct group_info *g, *next;
858 struct hmap *target_group = existing
859 ? &group_table->existing_groups
860 : &group_table->desired_groups;
862 HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) {
863 hmap_remove(target_group, &g->hmap_node);
864 bitmap_set0(group_table->group_ids, g->group_id);
865 ds_destroy(&g->group);
870 static struct ofpbuf *
871 encode_group_mod(const struct ofputil_group_mod *gm)
873 return ofputil_encode_group_mod(OFP13_VERSION, gm);
877 add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
879 struct ofpbuf *msg = encode_group_mod(gm);
880 ovs_list_push_back(msgs, &msg->list_node);
884 /* Replaces the flow table on the switch, if possible, by the flows in added
885 * with ofctrl_add_flow().
887 * Replaces the group table on the switch, if possible, by the groups in
888 * 'group_table->desired_groups'. Regardless of whether the group table
889 * is updated, this deletes all the groups from the
890 * 'group_table->desired_groups' and frees them. (The hmap itself isn't
893 * This should be called after ofctrl_run() within the main loop. */
895 ofctrl_put(struct group_table *group_table, int64_t nb_cfg)
898 groups = group_table;
901 /* The flow table can be updated if the connection to the switch is up and
902 * in the correct state and not backlogged with existing flow_mods. (Our
903 * criteria for being backlogged appear very conservative, but the socket
904 * between ovn-controller and OVS provides some buffering.) */
905 if (state != S_UPDATE_FLOWS
906 || rconn_packet_counter_n_packets(tx_counter)) {
907 ovn_group_table_clear(group_table, false);
911 /* OpenFlow messages to send to the switch to bring it up-to-date. */
912 struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
914 /* Iterate through all the desired groups. If there are new ones,
915 * add them to the switch. */
916 struct group_info *desired;
917 HMAP_FOR_EACH(desired, hmap_node, &group_table->desired_groups) {
918 if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
919 /* Create and install new group. */
920 struct ofputil_group_mod gm;
921 enum ofputil_protocol usable_protocols;
923 struct ds group_string = DS_EMPTY_INITIALIZER;
924 ds_put_format(&group_string, "group_id=%u,%s",
925 desired->group_id, ds_cstr(&desired->group));
927 error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
928 ds_cstr(&group_string),
931 add_group_mod(&gm, &msgs);
933 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
934 VLOG_ERR_RL(&rl, "new group %s %s", error,
935 ds_cstr(&group_string));
938 ds_destroy(&group_string);
939 ofputil_bucket_list_destroy(&gm.buckets);
943 /* Iterate through all of the installed flows. If any of them are no
944 * longer desired, delete them; if any of them should have different
945 * actions, update them. */
946 struct ovn_flow *i, *next;
947 HMAP_FOR_EACH_SAFE (i, next, match_hmap_node, &installed_flows) {
948 struct ovs_list matches;
949 ovn_flow_lookup(&match_flow_table, i, &matches);
950 if (ovs_list_is_empty(&matches)) {
951 /* Installed flow is no longer desirable. Delete it from the
952 * switch and from installed_flows. */
953 struct ofputil_flow_mod fm = {
955 .priority = i->priority,
956 .table_id = i->table_id,
957 .command = OFPFC_DELETE_STRICT,
959 add_flow_mod(&fm, &msgs);
960 ovn_flow_log(i, "removing installed");
962 hmap_remove(&installed_flows, &i->match_hmap_node);
965 /* Since we still have desired flows that match this key,
966 * select one and compare both its actions and uuid.
967 * If the actions aren't the same, queue and update
968 * action for the install flow. If the uuid has changed
969 * update that as well. */
970 struct ovn_flow *d = ovn_flow_select_from_list(&matches);
971 if (!uuid_equals(&i->uuid, &d->uuid)) {
972 /* Update installed flow's UUID. */
975 if (!ofpacts_equal(i->ofpacts, i->ofpacts_len,
976 d->ofpacts, d->ofpacts_len)) {
977 /* Update actions in installed flow. */
978 struct ofputil_flow_mod fm = {
980 .priority = i->priority,
981 .table_id = i->table_id,
982 .ofpacts = d->ofpacts,
983 .ofpacts_len = d->ofpacts_len,
984 .command = OFPFC_MODIFY_STRICT,
986 add_flow_mod(&fm, &msgs);
987 ovn_flow_log(i, "updating installed");
989 /* Replace 'i''s actions by 'd''s. */
991 i->ofpacts = xmemdup(d->ofpacts, d->ofpacts_len);
992 i->ofpacts_len = d->ofpacts_len;
997 /* Iterate through the desired flows and add those that aren't found
998 * in the installed flow table. */
1000 HMAP_FOR_EACH (c, match_hmap_node, &match_flow_table) {
1001 struct ovs_list matches;
1002 ovn_flow_lookup(&installed_flows, c, &matches);
1003 if (ovs_list_is_empty(&matches)) {
1004 /* We have a key that isn't in the installed flows, so
1005 * look back into the desired flow list for all flows
1006 * that match this key, and select the one to be installed. */
1007 struct ovs_list candidates;
1008 ovn_flow_lookup(&match_flow_table, c, &candidates);
1009 struct ovn_flow *d = ovn_flow_select_from_list(&candidates);
1010 /* Send flow_mod to add flow. */
1011 struct ofputil_flow_mod fm = {
1013 .priority = d->priority,
1014 .table_id = d->table_id,
1015 .ofpacts = d->ofpacts,
1016 .ofpacts_len = d->ofpacts_len,
1017 .command = OFPFC_ADD,
1019 add_flow_mod(&fm, &msgs);
1020 ovn_flow_log(d, "adding installed");
1022 /* Copy 'd' from 'flow_table' to installed_flows. */
1023 struct ovn_flow *new_node = ofctrl_dup_flow(d);
1024 hmap_insert(&installed_flows, &new_node->match_hmap_node,
1025 new_node->match_hmap_node.hash);
1029 /* Iterate through the installed groups from previous runs. If they
1030 * are not needed delete them. */
1031 struct group_info *installed, *next_group;
1032 HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
1033 &group_table->existing_groups) {
1034 if (!ovn_group_lookup(&group_table->desired_groups, installed)) {
1035 /* Delete the group. */
1036 struct ofputil_group_mod gm;
1037 enum ofputil_protocol usable_protocols;
1039 struct ds group_string = DS_EMPTY_INITIALIZER;
1040 ds_put_format(&group_string, "group_id=%u", installed->group_id);
1042 error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
1043 ds_cstr(&group_string),
1046 add_group_mod(&gm, &msgs);
1048 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1049 VLOG_ERR_RL(&rl, "Error deleting group %d: %s",
1050 installed->group_id, error);
1053 ds_destroy(&group_string);
1054 ofputil_bucket_list_destroy(&gm.buckets);
1056 /* Remove 'installed' from 'group_table->existing_groups' */
1057 hmap_remove(&group_table->existing_groups, &installed->hmap_node);
1058 ds_destroy(&installed->group);
1060 /* Dealloc group_id. */
1061 bitmap_set0(group_table->group_ids, installed->group_id);
1066 /* Move the contents of desired_groups to existing_groups. */
1067 HMAP_FOR_EACH_SAFE(desired, next_group, hmap_node,
1068 &group_table->desired_groups) {
1069 hmap_remove(&group_table->desired_groups, &desired->hmap_node);
1070 if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
1071 hmap_insert(&group_table->existing_groups, &desired->hmap_node,
1072 desired->hmap_node.hash);
1074 ds_destroy(&desired->group);
1079 if (!ovs_list_is_empty(&msgs)) {
1080 /* Add a barrier to the list of messages. */
1081 struct ofpbuf *barrier = ofputil_encode_barrier_request(OFP13_VERSION);
1082 const struct ofp_header *oh = barrier->data;
1083 ovs_be32 xid = oh->xid;
1084 ovs_list_push_back(&msgs, &barrier->list_node);
1086 /* Queue the messages. */
1088 LIST_FOR_EACH_POP (msg, list_node, &msgs) {
1092 /* Track the flow update. */
1093 struct ofctrl_flow_update *fup, *prev;
1094 LIST_FOR_EACH_REVERSE_SAFE (fup, prev, list_node, &flow_updates) {
1095 if (nb_cfg < fup->nb_cfg) {
1096 /* This ofctrl_flow_update is for a configuration later than
1097 * 'nb_cfg'. This should not normally happen, because it means
1098 * that 'nb_cfg' in the SB_Global table of the southbound
1099 * database decreased, and it should normally be monotonically
1101 VLOG_WARN("nb_cfg regressed from %"PRId64" to %"PRId64,
1102 fup->nb_cfg, nb_cfg);
1103 ovs_list_remove(&fup->list_node);
1105 } else if (nb_cfg == fup->nb_cfg) {
1106 /* This ofctrl_flow_update is for the same configuration as
1107 * 'nb_cfg'. Probably, some change to the physical topology
1108 * means that we had to revise the OpenFlow flow table even
1109 * though the logical topology did not change. Update fp->xid,
1110 * so that we don't send a notification that we're up-to-date
1111 * until we're really caught up. */
1112 VLOG_DBG("advanced xid target for nb_cfg=%"PRId64, nb_cfg);
1120 /* Add a flow update. */
1121 fup = xmalloc(sizeof *fup);
1122 ovs_list_push_back(&flow_updates, &fup->list_node);
1124 fup->nb_cfg = nb_cfg;
1126 } else if (!ovs_list_is_empty(&flow_updates)) {
1127 /* Getting up-to-date with 'nb_cfg' didn't require any extra flow table
1128 * changes, so whenever we get up-to-date with the most recent flow
1129 * table update, we're also up-to-date with 'nb_cfg'. */
1130 struct ofctrl_flow_update *fup = ofctrl_flow_update_from_list_node(
1131 ovs_list_back(&flow_updates));
1132 fup->nb_cfg = nb_cfg;
1134 /* We were completely up-to-date before and still are. */