1 /* Copyright (c) 2015 Nicira, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
21 #include "ofp-actions.h"
23 #include "ovn-controller.h"
24 #include "ovn/lib/ovn-sb-idl.h"
25 #include "openvswitch/vlog.h"
30 #include "vswitch-idl.h"
32 VLOG_DEFINE_THIS_MODULE(physical);
35 physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
37 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_bridge);
38 ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_ports);
40 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_port);
41 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_name);
42 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_interfaces);
43 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_external_ids);
45 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface);
46 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_name);
47 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_ofport);
48 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_external_ids);
51 /* Maps from a chassis to the OpenFlow port number of the tunnel that can be
52 * used to reach that chassis. */
53 struct chassis_tunnel {
54 struct hmap_node hmap_node;
55 const char *chassis_id;
57 enum chassis_tunnel_type { GENEVE, STT } type;
60 static struct chassis_tunnel *
61 chassis_tunnel_find(struct hmap *tunnels, const char *chassis_id)
63 struct chassis_tunnel *tun;
64 HMAP_FOR_EACH_WITH_HASH (tun, hmap_node, hash_string(chassis_id, 0),
66 if (!strcmp(tun->chassis_id, chassis_id)) {
74 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
75 struct ofpbuf *ofpacts)
77 struct ofpact_set_field *sf = ofpact_put_SET_FIELD(ofpacts);
78 sf->field = mf_from_id(dst);
79 sf->flow_has_vlan = false;
81 ovs_be64 n_value = htonll(value);
82 bitwise_copy(&n_value, 8, 0, &sf->value, sf->field->n_bytes, ofs, n_bits);
83 bitwise_one(&sf->mask, sf->field->n_bytes, ofs, n_bits);
87 put_move(enum mf_field_id src, int src_ofs,
88 enum mf_field_id dst, int dst_ofs,
90 struct ofpbuf *ofpacts)
92 struct ofpact_reg_move *move = ofpact_put_REG_MOVE(ofpacts);
93 move->src.field = mf_from_id(src);
94 move->src.ofs = src_ofs;
95 move->src.n_bits = n_bits;
96 move->dst.field = mf_from_id(dst);
97 move->dst.ofs = dst_ofs;
98 move->dst.n_bits = n_bits;
102 put_resubmit(uint8_t table_id, struct ofpbuf *ofpacts)
104 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(ofpacts);
105 resubmit->in_port = OFPP_IN_PORT;
106 resubmit->table_id = table_id;
110 put_encapsulation(enum mf_field_id mff_ovn_geneve,
111 const struct chassis_tunnel *tun,
112 const struct sbrec_datapath_binding *datapath,
113 uint16_t outport, struct ofpbuf *ofpacts)
115 if (tun->type == GENEVE) {
116 put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
117 put_load(outport, mff_ovn_geneve, 0, 32, ofpacts);
118 put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts);
119 } else if (tun->type == STT) {
120 put_load(datapath->tunnel_key | (outport << 24), MFF_TUN_ID, 0, 64,
122 put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts);
129 put_stack(enum mf_field_id field, struct ofpact_stack *stack)
131 stack->subfield.field = mf_from_id(field);
132 stack->subfield.ofs = 0;
133 stack->subfield.n_bits = stack->subfield.field->n_bits;
137 physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
138 const struct ovsrec_bridge *br_int, const char *this_chassis_id,
139 struct hmap *flow_table)
141 struct simap lport_to_ofport = SIMAP_INITIALIZER(&lport_to_ofport);
142 struct hmap tunnels = HMAP_INITIALIZER(&tunnels);
143 struct simap localnet_to_ofport = SIMAP_INITIALIZER(&localnet_to_ofport);
145 for (int i = 0; i < br_int->n_ports; i++) {
146 const struct ovsrec_port *port_rec = br_int->ports[i];
147 if (!strcmp(port_rec->name, br_int->name)) {
151 const char *chassis_id = smap_get(&port_rec->external_ids,
153 if (chassis_id && !strcmp(chassis_id, this_chassis_id)) {
157 const char *localnet = smap_get(&port_rec->external_ids,
160 for (int j = 0; j < port_rec->n_interfaces; j++) {
161 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
163 /* Get OpenFlow port number. */
164 if (!iface_rec->n_ofport) {
167 int64_t ofport = iface_rec->ofport[0];
168 if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
172 /* Record as patch to local net, chassis, or local logical port. */
173 if (!strcmp(iface_rec->type, "patch") && localnet) {
174 simap_put(&localnet_to_ofport, localnet, ofport);
176 } else if (chassis_id) {
177 enum chassis_tunnel_type tunnel_type;
178 if (!strcmp(iface_rec->type, "geneve")) {
179 tunnel_type = GENEVE;
180 if (!mff_ovn_geneve) {
183 } else if (!strcmp(iface_rec->type, "stt")) {
189 struct chassis_tunnel *tun = xmalloc(sizeof *tun);
190 hmap_insert(&tunnels, &tun->hmap_node,
191 hash_string(chassis_id, 0));
192 tun->chassis_id = chassis_id;
193 tun->ofport = u16_to_ofp(ofport);
194 tun->type = tunnel_type;
197 const char *iface_id = smap_get(&iface_rec->external_ids,
200 simap_put(&lport_to_ofport, iface_id, ofport);
206 struct ofpbuf ofpacts;
207 ofpbuf_init(&ofpacts, 0);
209 struct binding_elem {
210 struct ovs_list list_elem;
211 const struct sbrec_port_binding *binding;
213 struct localnet_bindings {
215 struct ovs_list bindings;
217 /* Maps from network name to "struct localnet_bindings". */
218 struct shash localnet_inputs = SHASH_INITIALIZER(&localnet_inputs);
220 /* Contains bare "struct hmap_node"s whose hash values are the tunnel_key
221 * of datapaths with at least one local port binding. */
222 struct hmap local_datapaths = HMAP_INITIALIZER(&local_datapaths);
224 /* Set up flows in table 0 for physical-to-logical translation and in table
225 * 64 for logical-to-physical translation. */
226 const struct sbrec_port_binding *binding;
227 SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
228 /* Find the OpenFlow port for the logical port, as 'ofport'. This is
231 * - If the port is a VIF on the chassis we're managing, the
232 * OpenFlow port for the VIF. 'tun' will be NULL.
234 * In this or the next case, for a container nested inside a VM
235 * and accessible via a VLAN, 'tag' is the VLAN ID; otherwise
238 * - If the port is on a remote chassis, the OpenFlow port for a
239 * tunnel to the VIF's remote chassis. 'tun' identifies that
242 * - If the port is a "localnet" port for a network that is
243 * attached to the chassis we're managing, the OpenFlow port for
244 * the localnet port (a patch port).
249 if (!strcmp(binding->type, "localnet")) {
250 const char *network = smap_get(&binding->options, "network_name");
254 ofport = u16_to_ofp(simap_get(&localnet_to_ofport, network));
255 } else if (binding->parent_port) {
256 ofport = u16_to_ofp(simap_get(&lport_to_ofport,
257 binding->parent_port));
258 if (ofport && binding->tag) {
262 ofport = u16_to_ofp(simap_get(&lport_to_ofport,
263 binding->logical_port));
266 const struct chassis_tunnel *tun = NULL;
268 if (!binding->chassis) {
271 tun = chassis_tunnel_find(&tunnels, binding->chassis->name);
275 ofport = tun->ofport;
280 /* Packets that arrive from a vif can belong to a VM or
281 * to a container located inside that VM. Packets that
282 * arrive from containers have a tag (vlan) associated with them.
285 /* Table 0, Priority 150 and 100.
286 * ==============================
288 * Priority 150 is for traffic belonging to containers. For such
289 * traffic, match on the tags and then strip the tag.
290 * Priority 100 is for traffic belonging to VMs or locally connected
293 * For both types of traffic: set MFF_LOG_INPORT to the logical
294 * input port, MFF_LOG_DATAPATH to the logical datapath, and
295 * resubmit into the logical ingress pipeline starting at table
297 if (!strcmp(binding->type, "localnet")) {
298 /* The same OpenFlow port may correspond to localnet ports
299 * attached to more than one logical datapath, so keep track of
300 * all associated bindings and add a flow at the end. */
303 = smap_get(&binding->options, "network_name");
304 struct localnet_bindings *ln_bindings;
306 ln_bindings = shash_find_data(&localnet_inputs, network);
308 ln_bindings = xmalloc(sizeof *ln_bindings);
309 ln_bindings->ofport = ofport;
310 list_init(&ln_bindings->bindings);
311 shash_add(&localnet_inputs, network, ln_bindings);
314 struct binding_elem *b = xmalloc(sizeof *b);
315 b->binding = binding;
316 list_insert(&ln_bindings->bindings, &b->list_elem);
318 struct hmap_node *ld;
319 ld = hmap_first_with_hash(&local_datapaths,
320 binding->datapath->tunnel_key);
322 ld = xmalloc(sizeof *ld);
323 hmap_insert(&local_datapaths, ld,
324 binding->datapath->tunnel_key);
327 ofpbuf_clear(&ofpacts);
328 match_init_catchall(&match);
329 match_set_in_port(&match, ofport);
331 match_set_dl_vlan(&match, htons(tag));
334 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
335 put_load(binding->datapath->tunnel_key, MFF_LOG_DATAPATH, 0, 64,
337 put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 32,
342 ofpact_put_STRIP_VLAN(&ofpacts);
345 /* Resubmit to first logical ingress pipeline table. */
346 put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
347 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
348 tag ? 150 : 100, &match, &ofpacts);
351 /* Table 33, priority 100.
352 * =======================
354 * Implements output to local hypervisor. Each flow matches a
355 * logical output port on the local hypervisor, and resubmits to
359 match_init_catchall(&match);
360 ofpbuf_clear(&ofpacts);
362 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
363 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
364 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
365 binding->tunnel_key);
367 /* Resubmit to table 34. */
368 put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
369 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, &match,
372 /* Table 64, Priority 100.
373 * =======================
375 * Deliver the packet to the local vif. */
376 match_init_catchall(&match);
377 ofpbuf_clear(&ofpacts);
378 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
379 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
380 binding->tunnel_key);
382 /* For containers sitting behind a local vif, tag the packets
383 * before delivering them. */
384 struct ofpact_vlan_vid *vlan_vid;
385 vlan_vid = ofpact_put_SET_VLAN_VID(&ofpacts);
386 vlan_vid->vlan_vid = tag;
387 vlan_vid->push_vlan_if_needed = true;
389 /* A packet might need to hair-pin back into its ingress
390 * OpenFlow port (to a different logical port, which we already
391 * checked back in table 34), so set the in_port to zero. */
392 put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(&ofpacts));
393 put_load(0, MFF_IN_PORT, 0, 16, &ofpacts);
395 ofpact_put_OUTPUT(&ofpacts)->port = ofport;
397 /* Revert the tag added to the packets headed to containers
398 * in the previous step. If we don't do this, the packets
399 * that are to be broadcasted to a VM in the same logical
400 * switch will also contain the tag. Also revert the zero'd
402 ofpact_put_STRIP_VLAN(&ofpacts);
403 put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(&ofpacts));
405 ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100,
408 /* Table 32, priority 100.
409 * =======================
411 * Implements output to remote hypervisors. Each flow matches an
412 * output port that includes a logical port on a remote hypervisor,
413 * and tunnels the packet to that hypervisor.
416 match_init_catchall(&match);
417 ofpbuf_clear(&ofpacts);
419 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
420 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
421 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
422 binding->tunnel_key);
424 put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
425 binding->tunnel_key, &ofpacts);
427 /* Output to tunnel. */
428 ofpact_put_OUTPUT(&ofpacts)->port = ofport;
429 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
433 /* Table 34, Priority 100.
434 * =======================
436 * Drop packets whose logical inport and outport are the same. */
437 match_init_catchall(&match);
438 ofpbuf_clear(&ofpacts);
439 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
440 match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, binding->tunnel_key);
441 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, binding->tunnel_key);
442 ofctrl_add_flow(flow_table, OFTABLE_DROP_LOOPBACK, 100,
446 /* Handle output to multicast groups, in tables 32 and 33. */
447 const struct sbrec_multicast_group *mc;
448 SBREC_MULTICAST_GROUP_FOR_EACH (mc, ctx->ovnsb_idl) {
449 struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
452 match_init_catchall(&match);
453 match_set_metadata(&match, htonll(mc->datapath->tunnel_key));
454 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, mc->tunnel_key);
456 /* Go through all of the ports in the multicast group:
458 * - For local ports, add actions to 'ofpacts' to set the output
461 * - For remote ports, add the chassis to 'remote_chassis'. */
462 ofpbuf_clear(&ofpacts);
463 for (size_t i = 0; i < mc->n_ports; i++) {
464 struct sbrec_port_binding *port = mc->ports[i];
466 if (port->datapath != mc->datapath) {
467 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
468 VLOG_WARN_RL(&rl, UUID_FMT": multicast group contains ports "
470 UUID_ARGS(&mc->header_.uuid));
474 if (simap_contains(&lport_to_ofport, port->logical_port)) {
475 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
476 put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
477 } else if (port->chassis) {
478 sset_add(&remote_chassis, port->chassis->name);
479 } else if (!strcmp(port->type, "localnet")) {
480 const char *network = smap_get(&port->options, "network_name");
484 if (!simap_contains(&localnet_to_ofport, network)) {
487 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
488 put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
492 /* Table 33, priority 100.
493 * =======================
495 * Handle output to the local logical ports in the multicast group, if
497 bool local_ports = ofpacts.size > 0;
499 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
503 /* Table 32, priority 100.
504 * =======================
506 * Handle output to the remote chassis in the multicast group, if
508 if (!sset_is_empty(&remote_chassis)) {
509 ofpbuf_clear(&ofpacts);
512 const struct chassis_tunnel *prev = NULL;
513 SSET_FOR_EACH (chassis, &remote_chassis) {
514 const struct chassis_tunnel *tun
515 = chassis_tunnel_find(&tunnels, chassis);
520 if (!prev || tun->type != prev->type) {
521 put_encapsulation(mff_ovn_geneve, tun,
522 mc->datapath, mc->tunnel_key, &ofpacts);
525 ofpact_put_OUTPUT(&ofpacts)->port = tun->ofport;
530 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
532 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
536 sset_destroy(&remote_chassis);
539 /* Table 0, priority 100.
540 * ======================
542 * For packets that arrive from a remote hypervisor (by matching a tunnel
543 * in_port), set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and MFF_LOG_OUTPORT from
544 * the tunnel key data, then resubmit to table 33 to handle packets to the
545 * local hypervisor. */
547 struct chassis_tunnel *tun;
548 HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
549 struct match match = MATCH_CATCHALL_INITIALIZER;
550 match_set_in_port(&match, tun->ofport);
552 ofpbuf_clear(&ofpacts);
553 if (tun->type == GENEVE) {
554 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
555 put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15,
557 put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16,
559 } else if (tun->type == STT) {
560 put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts);
561 put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
562 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
566 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
568 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, &match, &ofpacts);
571 /* Table 32, Priority 0.
572 * =======================
574 * Resubmit packets that are not directed at tunnels or part of a
575 * multicast group to the local output table. */
577 match_init_catchall(&match);
578 ofpbuf_clear(&ofpacts);
579 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
580 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, &match, &ofpacts);
582 /* Table 34, Priority 0.
583 * =======================
585 * Resubmit packets that don't output to the ingress port (already checked
586 * in table 33) to the logical egress pipeline, clearing the logical
587 * registers (for consistent behavior with packets that get tunneled). */
588 match_init_catchall(&match);
589 ofpbuf_clear(&ofpacts);
590 #define MFF_LOG_REG(ID) put_load(0, ID, 0, 32, &ofpacts);
593 put_resubmit(OFTABLE_LOG_EGRESS_PIPELINE, &ofpacts);
594 ofctrl_add_flow(flow_table, OFTABLE_DROP_LOOPBACK, 0, &match, &ofpacts);
596 ofpbuf_uninit(&ofpacts);
597 simap_destroy(&lport_to_ofport);
598 struct chassis_tunnel *tun_next;
599 HMAP_FOR_EACH_SAFE (tun, tun_next, hmap_node, &tunnels) {
600 hmap_remove(&tunnels, &tun->hmap_node);
603 hmap_destroy(&tunnels);
605 /* Table 0, Priority 100
606 * =====================
608 * We have now determined the full set of port bindings associated with
609 * each "localnet" network. Only create flows for datapaths that have
610 * another local binding. Otherwise, we know it would just be dropped.
612 struct shash_node *ln_bindings_node, *ln_bindings_node_next;
613 SHASH_FOR_EACH_SAFE (ln_bindings_node, ln_bindings_node_next,
615 struct localnet_bindings *ln_bindings = ln_bindings_node->data;
618 match_init_catchall(&match);
619 match_set_in_port(&match, ln_bindings->ofport);
621 struct ofpbuf ofpacts;
622 ofpbuf_init(&ofpacts, 0);
624 struct binding_elem *b;
625 LIST_FOR_EACH_POP (b, list_elem, &ln_bindings->bindings) {
626 struct hmap_node *ld;
627 ld = hmap_first_with_hash(&local_datapaths,
628 b->binding->datapath->tunnel_key);
630 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
631 put_load(b->binding->datapath->tunnel_key, MFF_LOG_DATAPATH,
633 put_load(b->binding->tunnel_key, MFF_LOG_INPORT, 0, 32,
635 put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
642 ofctrl_add_flow(flow_table, 0, 100, &match, &ofpacts);
645 ofpbuf_uninit(&ofpacts);
647 shash_delete(&localnet_inputs, ln_bindings_node);
650 shash_destroy(&localnet_inputs);
652 struct hmap_node *node;
653 while ((node = hmap_first(&local_datapaths))) {
654 hmap_remove(&local_datapaths, node);
657 hmap_destroy(&local_datapaths);
659 simap_destroy(&localnet_to_ofport);