1 /* Copyright (c) 2015 Nicira, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
21 #include "ofp-actions.h"
23 #include "ovn-controller.h"
24 #include "ovn/lib/ovn-sb-idl.h"
25 #include "openvswitch/vlog.h"
30 #include "vswitch-idl.h"
32 VLOG_DEFINE_THIS_MODULE(physical);
35 physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
37 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_bridge);
38 ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_ports);
40 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_port);
41 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_name);
42 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_interfaces);
43 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_external_ids);
45 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface);
46 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_name);
47 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_ofport);
48 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_external_ids);
51 /* Maps from a chassis to the OpenFlow port number of the tunnel that can be
52 * used to reach that chassis. */
53 struct chassis_tunnel {
54 struct hmap_node hmap_node;
55 const char *chassis_id;
57 enum chassis_tunnel_type type;
60 static struct chassis_tunnel *
61 chassis_tunnel_find(struct hmap *tunnels, const char *chassis_id)
63 struct chassis_tunnel *tun;
64 HMAP_FOR_EACH_WITH_HASH (tun, hmap_node, hash_string(chassis_id, 0),
66 if (!strcmp(tun->chassis_id, chassis_id)) {
74 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
75 struct ofpbuf *ofpacts)
77 struct ofpact_set_field *sf = ofpact_put_SET_FIELD(ofpacts);
78 sf->field = mf_from_id(dst);
79 sf->flow_has_vlan = false;
81 ovs_be64 n_value = htonll(value);
82 bitwise_copy(&n_value, 8, 0, &sf->value, sf->field->n_bytes, ofs, n_bits);
83 bitwise_one(&sf->mask, sf->field->n_bytes, ofs, n_bits);
87 put_move(enum mf_field_id src, int src_ofs,
88 enum mf_field_id dst, int dst_ofs,
90 struct ofpbuf *ofpacts)
92 struct ofpact_reg_move *move = ofpact_put_REG_MOVE(ofpacts);
93 move->src.field = mf_from_id(src);
94 move->src.ofs = src_ofs;
95 move->src.n_bits = n_bits;
96 move->dst.field = mf_from_id(dst);
97 move->dst.ofs = dst_ofs;
98 move->dst.n_bits = n_bits;
102 put_resubmit(uint8_t table_id, struct ofpbuf *ofpacts)
104 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(ofpacts);
105 resubmit->in_port = OFPP_IN_PORT;
106 resubmit->table_id = table_id;
110 put_encapsulation(enum mf_field_id mff_ovn_geneve,
111 const struct chassis_tunnel *tun,
112 const struct sbrec_datapath_binding *datapath,
113 uint16_t outport, struct ofpbuf *ofpacts)
115 if (tun->type == GENEVE) {
116 put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
117 put_load(outport, mff_ovn_geneve, 0, 32, ofpacts);
118 put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts);
119 } else if (tun->type == STT) {
120 put_load(datapath->tunnel_key | (outport << 24), MFF_TUN_ID, 0, 64,
122 put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts);
123 } else if (tun->type == VXLAN) {
124 put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
131 put_stack(enum mf_field_id field, struct ofpact_stack *stack)
133 stack->subfield.field = mf_from_id(field);
134 stack->subfield.ofs = 0;
135 stack->subfield.n_bits = stack->subfield.field->n_bits;
139 physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
140 const struct ovsrec_bridge *br_int, const char *this_chassis_id,
141 const struct simap *ct_zones, struct hmap *flow_table)
143 struct simap localvif_to_ofport = SIMAP_INITIALIZER(&localvif_to_ofport);
144 struct hmap tunnels = HMAP_INITIALIZER(&tunnels);
146 for (int i = 0; i < br_int->n_ports; i++) {
147 const struct ovsrec_port *port_rec = br_int->ports[i];
148 if (!strcmp(port_rec->name, br_int->name)) {
152 const char *chassis_id = smap_get(&port_rec->external_ids,
154 if (chassis_id && !strcmp(chassis_id, this_chassis_id)) {
158 const char *localnet = smap_get(&port_rec->external_ids,
159 "ovn-localnet-port");
160 const char *logpatch = smap_get(&port_rec->external_ids,
161 "ovn-logical-patch-port");
163 for (int j = 0; j < port_rec->n_interfaces; j++) {
164 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
166 /* Get OpenFlow port number. */
167 if (!iface_rec->n_ofport) {
170 int64_t ofport = iface_rec->ofport[0];
171 if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
175 /* Record as patch to local net, logical patch port, chassis, or
176 * local logical port. */
177 bool is_patch = !strcmp(iface_rec->type, "patch");
178 if (is_patch && localnet) {
179 /* localnet patch ports can be handled just like VIFs. */
180 simap_put(&localvif_to_ofport, localnet, ofport);
182 } else if (is_patch && logpatch) {
183 /* Logical patch ports can be handled just like VIFs. */
184 simap_put(&localvif_to_ofport, logpatch, ofport);
186 } else if (chassis_id) {
187 enum chassis_tunnel_type tunnel_type;
188 if (!strcmp(iface_rec->type, "geneve")) {
189 tunnel_type = GENEVE;
190 if (!mff_ovn_geneve) {
193 } else if (!strcmp(iface_rec->type, "stt")) {
195 } else if (!strcmp(iface_rec->type, "vxlan")) {
201 struct chassis_tunnel *tun = xmalloc(sizeof *tun);
202 hmap_insert(&tunnels, &tun->hmap_node,
203 hash_string(chassis_id, 0));
204 tun->chassis_id = chassis_id;
205 tun->ofport = u16_to_ofp(ofport);
206 tun->type = tunnel_type;
209 const char *iface_id = smap_get(&iface_rec->external_ids,
212 simap_put(&localvif_to_ofport, iface_id, ofport);
218 struct ofpbuf ofpacts;
219 ofpbuf_init(&ofpacts, 0);
221 /* Set up flows in table 0 for physical-to-logical translation and in table
222 * 64 for logical-to-physical translation. */
223 const struct sbrec_port_binding *binding;
224 SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
225 /* Find the OpenFlow port for the logical port, as 'ofport'. This is
228 * - If the port is a VIF on the chassis we're managing, the
229 * OpenFlow port for the VIF. 'tun' will be NULL.
231 * The same logic handles logical patch ports, as well as
232 * localnet patch ports.
234 * For a container nested inside a VM and accessible via a VLAN,
235 * 'tag' is the VLAN ID; otherwise 'tag' is 0.
237 * For a localnet patch port, if a VLAN ID was configured, 'tag'
238 * is set to that VLAN ID; otherwise 'tag' is 0.
240 * - If the port is on a remote chassis, the OpenFlow port for a
241 * tunnel to the VIF's remote chassis. 'tun' identifies that
247 if (binding->parent_port && *binding->parent_port) {
251 ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
252 binding->parent_port));
257 ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
258 binding->logical_port));
259 if (!strcmp(binding->type, "localnet") && ofport && binding->tag) {
264 const struct chassis_tunnel *tun = NULL;
266 if (!binding->chassis) {
269 tun = chassis_tunnel_find(&tunnels, binding->chassis->name);
273 ofport = tun->ofport;
278 int zone_id = simap_get(ct_zones, binding->logical_port);
279 /* Packets that arrive from a vif can belong to a VM or
280 * to a container located inside that VM. Packets that
281 * arrive from containers have a tag (vlan) associated with them.
284 /* Table 0, Priority 150 and 100.
285 * ==============================
287 * Priority 150 is for tagged traffic. This may be containers in a
288 * VM or a VLAN on a local network. For such traffic, match on the
289 * tags and then strip the tag.
291 * Priority 100 is for traffic belonging to VMs or untagged locally
292 * connected networks.
294 * For both types of traffic: set MFF_LOG_INPORT to the logical
295 * input port, MFF_LOG_DATAPATH to the logical datapath, and
296 * resubmit into the logical ingress pipeline starting at table
298 ofpbuf_clear(&ofpacts);
299 match_init_catchall(&match);
300 match_set_in_port(&match, ofport);
302 /* Match a VLAN tag and strip it, including stripping priority tags
303 * (e.g. VLAN ID 0). In the latter case we'll add a second flow
304 * for frames that lack any 802.1Q header later. */
305 if (tag || !strcmp(binding->type, "localnet")) {
306 match_set_dl_vlan(&match, htons(tag));
307 ofpact_put_STRIP_VLAN(&ofpacts);
310 /* Remember the size with just strip vlan added so far,
311 * as we're going to remove this with ofpbuf_pull() later. */
312 uint32_t ofpacts_orig_size = ofpacts.size;
315 put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
318 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
319 put_load(binding->datapath->tunnel_key, MFF_LOG_DATAPATH, 0, 64,
321 put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 32,
324 /* Resubmit to first logical ingress pipeline table. */
325 put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
326 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
327 tag ? 150 : 100, &match, &ofpacts);
329 if (!tag && !strcmp(binding->type, "localnet")) {
330 /* Add a second flow for frames that lack any 802.1Q
331 * header. For these, drop the OFPACT_STRIP_VLAN
333 ofpbuf_pull(&ofpacts, ofpacts_orig_size);
334 match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
335 ofctrl_add_flow(flow_table, 0, 100, &match, &ofpacts);
338 /* Table 33, priority 100.
339 * =======================
341 * Implements output to local hypervisor. Each flow matches a
342 * logical output port on the local hypervisor, and resubmits to
346 match_init_catchall(&match);
347 ofpbuf_clear(&ofpacts);
349 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
350 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
351 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
352 binding->tunnel_key);
355 put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
358 /* Resubmit to table 34. */
359 put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
360 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, &match,
363 /* Table 64, Priority 100.
364 * =======================
366 * Deliver the packet to the local vif. */
367 match_init_catchall(&match);
368 ofpbuf_clear(&ofpacts);
369 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
370 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
371 binding->tunnel_key);
373 /* For containers sitting behind a local vif, tag the packets
374 * before delivering them. */
375 struct ofpact_vlan_vid *vlan_vid;
376 vlan_vid = ofpact_put_SET_VLAN_VID(&ofpacts);
377 vlan_vid->vlan_vid = tag;
378 vlan_vid->push_vlan_if_needed = true;
380 /* A packet might need to hair-pin back into its ingress
381 * OpenFlow port (to a different logical port, which we already
382 * checked back in table 34), so set the in_port to zero. */
383 put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(&ofpacts));
384 put_load(0, MFF_IN_PORT, 0, 16, &ofpacts);
386 ofpact_put_OUTPUT(&ofpacts)->port = ofport;
388 /* Revert the tag added to the packets headed to containers
389 * in the previous step. If we don't do this, the packets
390 * that are to be broadcasted to a VM in the same logical
391 * switch will also contain the tag. Also revert the zero'd
393 ofpact_put_STRIP_VLAN(&ofpacts);
394 put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(&ofpacts));
396 ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100,
399 /* Table 32, priority 100.
400 * =======================
402 * Implements output to remote hypervisors. Each flow matches an
403 * output port that includes a logical port on a remote hypervisor,
404 * and tunnels the packet to that hypervisor.
407 match_init_catchall(&match);
408 ofpbuf_clear(&ofpacts);
410 /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
411 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
412 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
413 binding->tunnel_key);
415 put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
416 binding->tunnel_key, &ofpacts);
418 /* Output to tunnel. */
419 ofpact_put_OUTPUT(&ofpacts)->port = ofport;
420 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
424 /* Table 34, Priority 100.
425 * =======================
427 * Drop packets whose logical inport and outport are the same. */
428 match_init_catchall(&match);
429 ofpbuf_clear(&ofpacts);
430 match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
431 match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, binding->tunnel_key);
432 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, binding->tunnel_key);
433 ofctrl_add_flow(flow_table, OFTABLE_DROP_LOOPBACK, 100,
437 /* Handle output to multicast groups, in tables 32 and 33. */
438 const struct sbrec_multicast_group *mc;
439 struct ofpbuf remote_ofpacts;
440 ofpbuf_init(&remote_ofpacts, 0);
441 SBREC_MULTICAST_GROUP_FOR_EACH (mc, ctx->ovnsb_idl) {
442 struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
445 match_init_catchall(&match);
446 match_set_metadata(&match, htonll(mc->datapath->tunnel_key));
447 match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, mc->tunnel_key);
449 /* Go through all of the ports in the multicast group:
451 * - For remote ports, add the chassis to 'remote_chassis'.
453 * - For local ports (other than logical patch ports), add actions
454 * to 'ofpacts' to set the output port and resubmit.
456 * - For logical patch ports, add actions to 'remote_ofpacts'
457 * instead. (If we put them in 'ofpacts', then the output
458 * would happen on every hypervisor in the multicast group,
459 * effectively duplicating the packet.)
461 ofpbuf_clear(&ofpacts);
462 ofpbuf_clear(&remote_ofpacts);
463 for (size_t i = 0; i < mc->n_ports; i++) {
464 struct sbrec_port_binding *port = mc->ports[i];
466 if (port->datapath != mc->datapath) {
467 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
468 VLOG_WARN_RL(&rl, UUID_FMT": multicast group contains ports "
470 UUID_ARGS(&mc->header_.uuid));
474 int zone_id = simap_get(ct_zones, port->logical_port);
476 put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
479 if (!strcmp(port->type, "patch")) {
480 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
482 put_resubmit(OFTABLE_DROP_LOOPBACK, &remote_ofpacts);
483 } else if (simap_contains(&localvif_to_ofport,
484 (port->parent_port && *port->parent_port)
485 ? port->parent_port : port->logical_port)) {
486 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
487 put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
488 } else if (port->chassis) {
489 sset_add(&remote_chassis, port->chassis->name);
493 /* Table 33, priority 100.
494 * =======================
496 * Handle output to the local logical ports in the multicast group, if
498 bool local_ports = ofpacts.size > 0;
500 /* Following delivery to local logical ports, restore the multicast
501 * group as the logical output port. */
502 put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
504 ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
508 /* Table 32, priority 100.
509 * =======================
511 * Handle output to the remote chassis in the multicast group, if
513 if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) {
514 if (remote_ofpacts.size > 0) {
515 /* Following delivery to logical patch ports, restore the
516 * multicast group as the logical output port. */
517 put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
522 const struct chassis_tunnel *prev = NULL;
523 SSET_FOR_EACH (chassis, &remote_chassis) {
524 const struct chassis_tunnel *tun
525 = chassis_tunnel_find(&tunnels, chassis);
530 if (!prev || tun->type != prev->type) {
531 put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
532 mc->tunnel_key, &remote_ofpacts);
535 ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
538 if (remote_ofpacts.size) {
540 put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts);
542 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
543 &match, &remote_ofpacts);
546 sset_destroy(&remote_chassis);
548 ofpbuf_uninit(&remote_ofpacts);
550 /* Table 0, priority 100.
551 * ======================
553 * Process packets that arrive from a remote hypervisor (by matching
554 * on tunnel in_port). */
556 /* Add flows for Geneve and STT encapsulations. These
557 * encapsulations have metadata about the ingress and egress logical
558 * ports. We set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and
559 * MFF_LOG_OUTPORT from the tunnel key data, then resubmit to table
560 * 33 to handle packets to the local hypervisor. */
561 struct chassis_tunnel *tun;
562 HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
563 struct match match = MATCH_CATCHALL_INITIALIZER;
564 match_set_in_port(&match, tun->ofport);
566 ofpbuf_clear(&ofpacts);
567 if (tun->type == GENEVE) {
568 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
569 put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15,
571 put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16,
573 } else if (tun->type == STT) {
574 put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts);
575 put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
576 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
577 } else if (tun->type == VXLAN) {
578 /* We'll handle VXLAN later. */
584 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
586 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, &match, &ofpacts);
589 /* Add flows for VXLAN encapsulations. Due to the limited amount of
590 * metadata, we only support VXLAN for connections to gateways. The
591 * VNI is used to populate MFF_LOG_DATAPATH. The gateway's logical
592 * port is set to MFF_LOG_INPORT. Then the packet is resubmitted to
593 * table 16 to determine the logical egress port.
595 * xxx Due to resubmitting to table 16, broadcasts will be re-sent to
596 * xxx all logical ports, including non-local ones which could cause
597 * xxx duplicate packets to be received by multiply-connected gateways. */
598 HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
599 if (tun->type != VXLAN) {
603 SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
604 struct match match = MATCH_CATCHALL_INITIALIZER;
606 if (!binding->chassis ||
607 strcmp(tun->chassis_id, binding->chassis->name)) {
611 match_set_in_port(&match, tun->ofport);
612 match_set_tun_id(&match, htonll(binding->datapath->tunnel_key));
614 ofpbuf_clear(&ofpacts);
615 put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
616 put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 15, &ofpacts);
617 put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
619 ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, &match,
624 /* Table 32, Priority 0.
625 * =======================
627 * Resubmit packets that are not directed at tunnels or part of a
628 * multicast group to the local output table. */
630 match_init_catchall(&match);
631 ofpbuf_clear(&ofpacts);
632 put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
633 ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, &match, &ofpacts);
635 /* Table 34, Priority 0.
636 * =======================
638 * Resubmit packets that don't output to the ingress port (already checked
639 * in table 33) to the logical egress pipeline, clearing the logical
640 * registers (for consistent behavior with packets that get tunneled). */
641 match_init_catchall(&match);
642 ofpbuf_clear(&ofpacts);
643 #define MFF_LOG_REG(ID) put_load(0, ID, 0, 32, &ofpacts);
646 put_resubmit(OFTABLE_LOG_EGRESS_PIPELINE, &ofpacts);
647 ofctrl_add_flow(flow_table, OFTABLE_DROP_LOOPBACK, 0, &match, &ofpacts);
649 ofpbuf_uninit(&ofpacts);
650 simap_destroy(&localvif_to_ofport);
651 struct chassis_tunnel *tun_next;
652 HMAP_FOR_EACH_SAFE (tun, tun_next, hmap_node, &tunnels) {
653 hmap_remove(&tunnels, &tun->hmap_node);
656 hmap_destroy(&tunnels);