X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=lib%2Flearning-switch.c;h=cb0e49bc63b83a80056d7231268c7fbad1f2d77d;hb=f25d0cf3c366;hp=22fa70d5d3b13dad3fd27a555151d167d1ba5ac1;hpb=aaaa7553a9b7fef47436e96fb0177981b09e4a83;p=cascardo%2Fovs.git diff --git a/lib/learning-switch.c b/lib/learning-switch.c index 22fa70d5d..cb0e49bc6 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,30 +23,32 @@ #include #include +#include "byte-order.h" +#include "classifier.h" #include "flow.h" +#include "hmap.h" #include "mac-learning.h" #include "ofpbuf.h" +#include "ofp-actions.h" +#include "ofp-errors.h" #include "ofp-parse.h" #include "ofp-print.h" #include "ofp-util.h" #include "openflow/openflow.h" #include "poll-loop.h" -#include "queue.h" #include "rconn.h" -#include "stp.h" +#include "shash.h" +#include "simap.h" #include "timeval.h" #include "vconn.h" #include "vlog.h" -#include "xtoxll.h" -VLOG_DEFINE_THIS_MODULE(learning_switch) +VLOG_DEFINE_THIS_MODULE(learning_switch); -enum port_state { - P_DISABLED = 1 << 0, - P_LISTENING = 1 << 1, - P_LEARNING = 1 << 2, - P_FORWARDING = 1 << 3, - P_BLOCKING = 1 << 4 +struct lswitch_port { + struct hmap_node hmap_node; /* Hash node for port number. */ + uint16_t port_no; /* OpenFlow port number, in host byte order. */ + uint32_t queue_id; /* OpenFlow queue number. */ }; struct lswitch { @@ -55,28 +57,20 @@ struct lswitch { * Otherwise, the switch processes every packet. */ int max_idle; + enum ofputil_protocol protocol; unsigned long long int datapath_id; - uint32_t capabilities; time_t last_features_request; struct mac_learning *ml; /* NULL to act as hub instead of switch. */ - uint32_t wildcards; /* Wildcards to apply to flows. */ + struct flow_wildcards wc; /* Wildcards to apply to flows. */ bool action_normal; /* Use OFPP_NORMAL? */ - uint32_t queue; /* OpenFlow queue to use, or UINT32_MAX. */ + + /* Queue distribution. */ + uint32_t default_queue; /* Default OpenFlow queue, or UINT32_MAX. */ + struct hmap queue_numbers; /* Map from port number to lswitch_port. */ + struct shash queue_names; /* Map from port name to lswitch_port. */ /* Number of outgoing queued packets on the rconn. */ struct rconn_packet_counter *queued; - - /* Spanning tree protocol implementation. - * - * We implement STP states by, whenever a port's STP state changes, - * querying all the flows on the switch and then deleting any of them that - * are inappropriate for a port's STP state. */ - long long int next_query; /* Next time at which to query all flows. */ - long long int last_query; /* Last time we sent a query. */ - long long int last_reply; /* Last time we received a query reply. */ - unsigned int port_states[STP_MAX_PORTS]; - uint32_t query_xid; /* XID used for query. */ - int n_flows, n_no_recv, n_no_send; }; /* The log messages here could actually be useful in debugging, so keep the @@ -85,74 +79,108 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); static void queue_tx(struct lswitch *, struct rconn *, struct ofpbuf *); static void send_features_request(struct lswitch *, struct rconn *); -static void send_default_flows(struct lswitch *sw, struct rconn *rconn, - FILE *default_flows); -static void schedule_query(struct lswitch *, long long int delay); -static bool may_learn(const struct lswitch *, uint16_t port_no); -static bool may_recv(const struct lswitch *, uint16_t port_no, - bool any_actions); -static bool may_send(const struct lswitch *, uint16_t port_no); - -typedef void packet_handler_func(struct lswitch *, struct rconn *, void *); -static packet_handler_func process_switch_features; -static packet_handler_func process_packet_in; -static packet_handler_func process_echo_request; -static packet_handler_func process_port_status; -static packet_handler_func process_phy_port; -static packet_handler_func process_stats_reply; - -/* Creates and returns a new learning switch. - * - * If 'learn_macs' is true, the new switch will learn the ports on which MAC - * addresses appear. Otherwise, the new switch will flood all packets. - * - * If 'max_idle' is nonnegative, the new switch will set up flows that expire - * after the given number of seconds (or never expire, if 'max_idle' is - * OFP_FLOW_PERMANENT). Otherwise, the new switch will process every packet. - * - * The caller may provide the file stream 'default_flows' that defines - * default flows that should be pushed when a switch connects. Each - * line is a flow entry in the format described for "add-flows" command - * in the Flow Syntax section of the ovs-ofct(8) man page. The caller - * is responsible for closing the stream. + +static enum ofperr process_switch_features(struct lswitch *, + struct ofp_switch_features *); +static void process_packet_in(struct lswitch *, struct rconn *, + const struct ofp_header *); +static void process_echo_request(struct lswitch *, struct rconn *, + const struct ofp_header *); + +/* Creates and returns a new learning switch whose configuration is given by + * 'cfg'. * * 'rconn' is used to send out an OpenFlow features request. */ struct lswitch * -lswitch_create(struct rconn *rconn, bool learn_macs, - bool exact_flows, int max_idle, bool action_normal, - FILE *default_flows) +lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) { + enum ofputil_protocol protocol; struct lswitch *sw; - size_t i; sw = xzalloc(sizeof *sw); - sw->max_idle = max_idle; + sw->max_idle = cfg->max_idle; sw->datapath_id = 0; sw->last_features_request = time_now() - 1; - sw->ml = learn_macs ? mac_learning_create() : NULL; - sw->action_normal = action_normal; - if (exact_flows) { - /* Exact match. */ - sw->wildcards = 0; - } else { - /* We cannot wildcard all fields. - * We need in_port to detect moves. - * We need both SA and DA to do learning. */ - sw->wildcards = (OFPFW_DL_TYPE | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK - | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST); + sw->ml = (cfg->mode == LSW_LEARN + ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME) + : NULL); + sw->action_normal = cfg->mode == LSW_NORMAL; + + flow_wildcards_init_exact(&sw->wc); + if (cfg->wildcards) { + uint32_t ofpfw; + + if (cfg->wildcards == UINT32_MAX) { + /* Try to wildcard as many fields as possible, but we cannot + * wildcard all fields. We need in_port to detect moves. We need + * Ethernet source and dest and VLAN VID to do L2 learning. */ + ofpfw = (OFPFW10_DL_TYPE | OFPFW10_DL_VLAN_PCP + | OFPFW10_NW_SRC_ALL | OFPFW10_NW_DST_ALL + | OFPFW10_NW_TOS | OFPFW10_NW_PROTO + | OFPFW10_TP_SRC | OFPFW10_TP_DST); + } else { + ofpfw = cfg->wildcards; + } + + ofputil_wildcard_from_ofpfw10(ofpfw, &sw->wc); } - sw->queue = UINT32_MAX; - sw->queued = rconn_packet_counter_create(); - sw->next_query = LLONG_MIN; - sw->last_query = LLONG_MIN; - sw->last_reply = LLONG_MIN; - for (i = 0; i < STP_MAX_PORTS; i++) { - sw->port_states[i] = P_DISABLED; + + sw->default_queue = cfg->default_queue; + hmap_init(&sw->queue_numbers); + shash_init(&sw->queue_names); + if (cfg->port_queues) { + struct simap_node *node; + + SIMAP_FOR_EACH (node, cfg->port_queues) { + struct lswitch_port *port = xmalloc(sizeof *port); + hmap_node_nullify(&port->hmap_node); + port->queue_id = node->data; + shash_add(&sw->queue_names, node->name, port); + } } + + sw->queued = rconn_packet_counter_create(); send_features_request(sw, rconn); - if (default_flows) { - send_default_flows(sw, rconn, default_flows); + + protocol = ofputil_protocol_from_ofp_version(rconn_get_version(rconn)); + if (cfg->default_flows) { + enum ofputil_protocol usable_protocols; + struct ofpbuf *msg = NULL; + int error = 0; + size_t i; + + /* If the initial protocol isn't good enough for default_flows, then + * pick one that will work and encode messages to set up that + * protocol. + * + * This could be improved by actually negotiating a mutually acceptable + * flow format with the switch, but that would require an asynchronous + * state machine. This version ought to work fine in practice. */ + usable_protocols = ofputil_flow_mod_usable_protocols( + cfg->default_flows, cfg->n_default_flows); + if (!(protocol & usable_protocols)) { + enum ofputil_protocol want = rightmost_1bit(usable_protocols); + while (!error) { + msg = ofputil_encode_set_protocol(protocol, want, &protocol); + if (!msg) { + break; + } + error = rconn_send(rconn, msg, NULL); + } + } + + for (i = 0; !error && i < cfg->n_default_flows; i++) { + msg = ofputil_encode_flow_mod(&cfg->default_flows[i], protocol); + error = rconn_send(rconn, msg, NULL); + } + + if (error) { + VLOG_INFO_RL(&rl, "%s: failed to queue default flows (%s)", + rconn_get_name(rconn), strerror(error)); + } } + sw->protocol = protocol; + return sw; } @@ -161,100 +189,27 @@ void lswitch_destroy(struct lswitch *sw) { if (sw) { + struct lswitch_port *node, *next; + + HMAP_FOR_EACH_SAFE (node, next, hmap_node, &sw->queue_numbers) { + hmap_remove(&sw->queue_numbers, &node->hmap_node); + free(node); + } + shash_destroy(&sw->queue_names); mac_learning_destroy(sw->ml); rconn_packet_counter_destroy(sw->queued); free(sw); } } -/* Sets 'queue' as the OpenFlow queue used by packets and flows set up by 'sw'. - * Specify UINT32_MAX to avoid specifying a particular queue, which is also the - * default if this function is never called for 'sw'. */ -void -lswitch_set_queue(struct lswitch *sw, uint32_t queue) -{ - sw->queue = queue; -} - /* Takes care of necessary 'sw' activity, except for receiving packets (which * the caller must do). */ void -lswitch_run(struct lswitch *sw, struct rconn *rconn) +lswitch_run(struct lswitch *sw) { - long long int now = time_msec(); - if (sw->ml) { mac_learning_run(sw->ml, NULL); } - - /* If we're waiting for more replies, keeping waiting for up to 10 s. */ - if (sw->last_reply != LLONG_MIN) { - if (now - sw->last_reply > 10000) { - VLOG_ERR_RL(&rl, "%016llx: No more flow stat replies last 10 s", - sw->datapath_id); - sw->last_reply = LLONG_MIN; - sw->last_query = LLONG_MIN; - schedule_query(sw, 0); - } else { - return; - } - } - - /* If we're waiting for any reply at all, keep waiting for up to 10 s. */ - if (sw->last_query != LLONG_MIN) { - if (now - sw->last_query > 10000) { - VLOG_ERR_RL(&rl, "%016llx: No flow stat replies in last 10 s", - sw->datapath_id); - sw->last_query = LLONG_MIN; - schedule_query(sw, 0); - } else { - return; - } - } - - /* If it's time to send another query, do so. */ - if (sw->next_query != LLONG_MIN && now >= sw->next_query) { - sw->next_query = LLONG_MIN; - if (!rconn_is_connected(rconn)) { - schedule_query(sw, 1000); - } else { - struct ofp_stats_request *osr; - struct ofp_flow_stats_request *ofsr; - struct ofpbuf *b; - int error; - - VLOG_DBG("%016llx: Sending flow stats request to implement STP", - sw->datapath_id); - - sw->last_query = now; - sw->query_xid = random_uint32(); - sw->n_flows = 0; - sw->n_no_recv = 0; - sw->n_no_send = 0; - osr = make_openflow_xid(sizeof *osr + sizeof *ofsr, - OFPT_STATS_REQUEST, sw->query_xid, &b); - osr->type = htons(OFPST_FLOW); - osr->flags = htons(0); - ofsr = (struct ofp_flow_stats_request *) osr->body; - ofsr->match.wildcards = htonl(OFPFW_ALL); - ofsr->table_id = 0xff; - ofsr->out_port = htons(OFPP_NONE); - - error = rconn_send(rconn, b, NULL); - if (error) { - VLOG_WARN_RL(&rl, "%016llx: sending flow stats request " - "failed: %s", sw->datapath_id, strerror(error)); - ofpbuf_delete(b); - schedule_query(sw, 1000); - } - } - } -} - -static void -wait_timeout(long long int started) -{ - poll_timer_wait_until(started + 10000); } void @@ -263,12 +218,6 @@ lswitch_wait(struct lswitch *sw) if (sw->ml) { mac_learning_wait(sw->ml); } - - if (sw->last_reply != LLONG_MIN) { - wait_timeout(sw->last_reply); - } else if (sw->last_query != LLONG_MIN) { - wait_timeout(sw->last_query); - } } /* Processes 'msg', which should be an OpenFlow received on 'rconn', according @@ -279,48 +228,9 @@ void lswitch_process_packet(struct lswitch *sw, struct rconn *rconn, const struct ofpbuf *msg) { - struct processor { - uint8_t type; - size_t min_size; - packet_handler_func *handler; - }; - static const struct processor processors[] = { - { - OFPT_ECHO_REQUEST, - sizeof(struct ofp_header), - process_echo_request - }, - { - OFPT_FEATURES_REPLY, - sizeof(struct ofp_switch_features), - process_switch_features - }, - { - OFPT_PACKET_IN, - offsetof(struct ofp_packet_in, data), - process_packet_in - }, - { - OFPT_PORT_STATUS, - sizeof(struct ofp_port_status), - process_port_status - }, - { - OFPT_STATS_REPLY, - offsetof(struct ofp_stats_reply, body), - process_stats_reply - }, - { - OFPT_FLOW_REMOVED, - sizeof(struct ofp_flow_removed), - NULL - }, - }; - const size_t n_processors = ARRAY_SIZE(processors); - const struct processor *p; - struct ofp_header *oh; - - oh = msg->data; + const struct ofp_header *oh = msg->data; + const struct ofputil_msg_type *type; + if (sw->datapath_id == 0 && oh->type != OFPT_ECHO_REQUEST && oh->type != OFPT_FEATURES_REPLY) { @@ -328,27 +238,77 @@ lswitch_process_packet(struct lswitch *sw, struct rconn *rconn, return; } - for (p = processors; p < &processors[n_processors]; p++) { - if (oh->type == p->type) { - if (msg->size < p->min_size) { - VLOG_WARN_RL(&rl, "%016llx: %s: too short (%zu bytes) for " - "type %"PRIu8" (min %zu)", sw->datapath_id, - rconn_get_name(rconn), msg->size, oh->type, - p->min_size); - return; - } - if (p->handler) { - (p->handler)(sw, rconn, msg->data); - } - return; + ofputil_decode_msg_type(oh, &type); + switch (ofputil_msg_type_code(type)) { + case OFPUTIL_OFPT_ECHO_REQUEST: + process_echo_request(sw, rconn, msg->data); + break; + + case OFPUTIL_OFPT_FEATURES_REPLY: + process_switch_features(sw, msg->data); + break; + + case OFPUTIL_OFPT_PACKET_IN: + case OFPUTIL_NXT_PACKET_IN: + process_packet_in(sw, rconn, msg->data); + break; + + case OFPUTIL_OFPT_FLOW_REMOVED: + /* Nothing to do. */ + break; + + case OFPUTIL_MSG_INVALID: + case OFPUTIL_OFPT_HELLO: + case OFPUTIL_OFPT_ERROR: + case OFPUTIL_OFPT_ECHO_REPLY: + case OFPUTIL_OFPT_FEATURES_REQUEST: + case OFPUTIL_OFPT_GET_CONFIG_REQUEST: + case OFPUTIL_OFPT_GET_CONFIG_REPLY: + case OFPUTIL_OFPT_SET_CONFIG: + case OFPUTIL_OFPT_PORT_STATUS: + case OFPUTIL_OFPT_PACKET_OUT: + case OFPUTIL_OFPT_FLOW_MOD: + case OFPUTIL_OFPT_PORT_MOD: + case OFPUTIL_OFPT_BARRIER_REQUEST: + case OFPUTIL_OFPT_BARRIER_REPLY: + case OFPUTIL_OFPT_QUEUE_GET_CONFIG_REQUEST: + case OFPUTIL_OFPT_QUEUE_GET_CONFIG_REPLY: + case OFPUTIL_OFPST_DESC_REQUEST: + case OFPUTIL_OFPST_FLOW_REQUEST: + case OFPUTIL_OFPST_AGGREGATE_REQUEST: + case OFPUTIL_OFPST_TABLE_REQUEST: + case OFPUTIL_OFPST_PORT_REQUEST: + case OFPUTIL_OFPST_QUEUE_REQUEST: + case OFPUTIL_OFPST_PORT_DESC_REQUEST: + case OFPUTIL_OFPST_DESC_REPLY: + case OFPUTIL_OFPST_FLOW_REPLY: + case OFPUTIL_OFPST_QUEUE_REPLY: + case OFPUTIL_OFPST_PORT_REPLY: + case OFPUTIL_OFPST_TABLE_REPLY: + case OFPUTIL_OFPST_AGGREGATE_REPLY: + case OFPUTIL_OFPST_PORT_DESC_REPLY: + case OFPUTIL_NXT_ROLE_REQUEST: + case OFPUTIL_NXT_ROLE_REPLY: + case OFPUTIL_NXT_FLOW_MOD_TABLE_ID: + case OFPUTIL_NXT_SET_FLOW_FORMAT: + case OFPUTIL_NXT_SET_PACKET_IN_FORMAT: + case OFPUTIL_NXT_FLOW_MOD: + case OFPUTIL_NXT_FLOW_REMOVED: + case OFPUTIL_NXT_FLOW_AGE: + case OFPUTIL_NXT_SET_ASYNC_CONFIG: + case OFPUTIL_NXT_SET_CONTROLLER_ID: + case OFPUTIL_NXST_FLOW_REQUEST: + case OFPUTIL_NXST_AGGREGATE_REQUEST: + case OFPUTIL_NXST_FLOW_REPLY: + case OFPUTIL_NXST_AGGREGATE_REPLY: + default: + if (VLOG_IS_DBG_ENABLED()) { + char *s = ofp_to_string(msg->data, msg->size, 2); + VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", + sw->datapath_id, s); + free(s); } } - if (VLOG_IS_DBG_ENABLED()) { - char *p = ofp_to_string(msg->data, msg->size, 2); - VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", - sw->datapath_id, p); - free(p); - } } static void @@ -372,53 +332,6 @@ send_features_request(struct lswitch *sw, struct rconn *rconn) } } -static void -send_default_flows(struct lswitch *sw, struct rconn *rconn, - FILE *default_flows) -{ - char line[1024]; - - while (fgets(line, sizeof line, default_flows)) { - struct ofpbuf *b; - struct ofp_flow_mod *ofm; - uint16_t priority, idle_timeout, hard_timeout; - uint64_t cookie; - struct ofp_match match; - - char *comment; - - /* Delete comments. */ - comment = strchr(line, '#'); - if (comment) { - *comment = '\0'; - } - - /* Drop empty lines. */ - if (line[strspn(line, " \t\n")] == '\0') { - continue; - } - - /* Parse and send. str_to_flow() will expand and reallocate the data - * in 'buffer', so we can't keep pointers to across the str_to_flow() - * call. */ - make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &b); - parse_ofp_str(line, &match, b, - NULL, NULL, &priority, &idle_timeout, &hard_timeout, - &cookie); - ofm = b->data; - ofm->match = match; - ofm->command = htons(OFPFC_ADD); - ofm->cookie = htonll(cookie); - ofm->idle_timeout = htons(idle_timeout); - ofm->hard_timeout = htons(hard_timeout); - ofm->buffer_id = htonl(UINT32_MAX); - ofm->priority = htons(priority); - - update_openflow_length(b); - queue_tx(sw, rconn, b); - } -} - static void queue_tx(struct lswitch *sw, struct rconn *rconn, struct ofpbuf *b) { @@ -435,46 +348,49 @@ queue_tx(struct lswitch *sw, struct rconn *rconn, struct ofpbuf *b) } } -static void -schedule_query(struct lswitch *sw, long long int delay) +static enum ofperr +process_switch_features(struct lswitch *sw, struct ofp_switch_features *osf) { - long long int now = time_msec(); - if (sw->next_query == LLONG_MIN || sw->next_query > now + delay) { - sw->next_query = now + delay; - } -} - -static void -process_switch_features(struct lswitch *sw, struct rconn *rconn, void *osf_) -{ - struct ofp_switch_features *osf = osf_; - size_t n_ports = ((ntohs(osf->header.length) - - offsetof(struct ofp_switch_features, ports)) - / sizeof *osf->ports); - size_t i; - - sw->datapath_id = ntohll(osf->datapath_id); - sw->capabilities = ntohl(osf->capabilities); - for (i = 0; i < n_ports; i++) { - process_phy_port(sw, rconn, &osf->ports[i]); - } - if (sw->capabilities & OFPC_STP) { - schedule_query(sw, 1000); + struct ofputil_switch_features features; + struct ofputil_phy_port port; + enum ofperr error; + struct ofpbuf b; + + error = ofputil_decode_switch_features(osf, &features, &b); + if (error) { + VLOG_ERR("received invalid switch feature reply (%s)", + ofperr_to_string(error)); + return error; + } + + sw->datapath_id = features.datapath_id; + + while (!ofputil_pull_phy_port(osf->header.version, &b, &port)) { + struct lswitch_port *lp = shash_find_data(&sw->queue_names, port.name); + if (lp && hmap_node_is_null(&lp->hmap_node)) { + lp->port_no = port.port_no; + hmap_insert(&sw->queue_numbers, &lp->hmap_node, + hash_int(lp->port_no, 0)); + } } + return 0; } static uint16_t -lswitch_choose_destination(struct lswitch *sw, const flow_t *flow) +lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) { uint16_t out_port; /* Learn the source MAC. */ - if (may_learn(sw, flow->in_port) && sw->ml) { - if (mac_learning_learn(sw->ml, flow->dl_src, 0, flow->in_port, - GRAT_ARP_LOCK_NONE)) { + if (mac_learning_may_learn(sw->ml, flow->dl_src, 0)) { + struct mac_entry *mac = mac_learning_insert(sw->ml, flow->dl_src, 0); + if (mac_entry_is_new(mac) || mac->port.i != flow->in_port) { VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on " "port %"PRIu16, sw->datapath_id, ETH_ADDR_ARGS(flow->dl_src), flow->in_port); + + mac->port.i = flow->in_port; + mac_learning_changed(sw->ml, mac); } } @@ -483,16 +399,13 @@ lswitch_choose_destination(struct lswitch *sw, const flow_t *flow) return OFPP_NONE; } - if (!may_recv(sw, flow->in_port, false)) { - /* STP prevents receiving anything on this port. */ - return OFPP_NONE; - } - out_port = OFPP_FLOOD; if (sw->ml) { - int learned_port = mac_learning_lookup(sw->ml, flow->dl_dst, 0, NULL); - if (learned_port >= 0 && may_send(sw, learned_port)) { - out_port = learned_port; + struct mac_entry *mac; + + mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0, NULL); + if (mac) { + out_port = mac->port.i; if (out_port == flow->in_port) { /* Don't send a packet back out its input port. */ return OFPP_NONE; @@ -508,254 +421,122 @@ lswitch_choose_destination(struct lswitch *sw, const flow_t *flow) return out_port; } +static uint32_t +get_queue_id(const struct lswitch *sw, uint16_t in_port) +{ + const struct lswitch_port *port; + + HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_int(in_port, 0), + &sw->queue_numbers) { + if (port->port_no == in_port) { + return port->queue_id; + } + } + + return sw->default_queue; +} + static void -process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_) +process_packet_in(struct lswitch *sw, struct rconn *rconn, + const struct ofp_header *oh) { - struct ofp_packet_in *opi = opi_; - uint16_t in_port = ntohs(opi->in_port); + struct ofputil_packet_in pi; + uint32_t queue_id; uint16_t out_port; - struct ofp_action_header actions[2]; - size_t actions_len; + uint64_t ofpacts_stub[64 / 8]; + struct ofpbuf ofpacts; + + struct ofputil_packet_out po; + enum ofperr error; - size_t pkt_ofs, pkt_len; struct ofpbuf pkt; - flow_t flow; + struct flow flow; + + error = ofputil_decode_packet_in(&pi, oh); + if (error) { + VLOG_WARN_RL(&rl, "failed to decode packet-in: %s", + ofperr_to_string(error)); + return; + } + + /* Ignore packets sent via output to OFPP_CONTROLLER. This library never + * uses such an action. You never know what experiments might be going on, + * though, and it seems best not to interfere with them. */ + if (pi.reason != OFPR_NO_MATCH) { + return; + } /* Extract flow data from 'opi' into 'flow'. */ - pkt_ofs = offsetof(struct ofp_packet_in, data); - pkt_len = ntohs(opi->header.length) - pkt_ofs; - pkt.data = opi->data; - pkt.size = pkt_len; - flow_extract(&pkt, 0, in_port, &flow); + ofpbuf_use_const(&pkt, pi.packet, pi.packet_len); + flow_extract(&pkt, 0, pi.fmd.tun_id, pi.fmd.in_port, &flow); /* Choose output port. */ out_port = lswitch_choose_destination(sw, &flow); /* Make actions. */ - memset(actions, 0, sizeof actions); + queue_id = get_queue_id(sw, pi.fmd.in_port); + ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); if (out_port == OFPP_NONE) { - actions_len = 0; - } else if (sw->queue == UINT32_MAX || out_port >= OFPP_MAX) { - struct ofp_action_output *oao = (struct ofp_action_output *) actions; - oao->type = htons(OFPAT_OUTPUT); - oao->len = htons(sizeof *oao); - oao->port = htons(out_port); - actions_len = sizeof *oao; + /* No actions. */ + } else if (queue_id == UINT32_MAX || out_port >= OFPP_MAX) { + ofpact_put_OUTPUT(&ofpacts)->port = out_port; + } else { + struct ofpact_enqueue *enqueue = ofpact_put_ENQUEUE(&ofpacts); + enqueue->port = out_port; + enqueue->queue = queue_id; + } + ofpact_pad(&ofpacts); + + /* Prepare packet_out in case we need one. */ + po.buffer_id = pi.buffer_id; + if (po.buffer_id == UINT32_MAX) { + po.packet = pkt.data; + po.packet_len = pkt.size; } else { - struct ofp_action_enqueue *oae = (struct ofp_action_enqueue *) actions; - oae->type = htons(OFPAT_ENQUEUE); - oae->len = htons(sizeof *oae); - oae->port = htons(out_port); - oae->queue_id = htonl(sw->queue); - actions_len = sizeof *oae; + po.packet = NULL; + po.packet_len = 0; } - assert(actions_len <= sizeof actions); + po.in_port = pi.fmd.in_port; + po.ofpacts = ofpacts.data; + po.ofpacts_len = ofpacts.size; /* Send the packet, and possibly the whole flow, to the output port. */ if (sw->max_idle >= 0 && (!sw->ml || out_port != OFPP_FLOOD)) { + struct ofputil_flow_mod fm; struct ofpbuf *buffer; - struct ofp_flow_mod *ofm; /* The output port is known, or we always flood everything, so add a * new flow. */ - buffer = make_add_flow(&flow, ntohl(opi->buffer_id), - sw->max_idle, actions_len); - ofpbuf_put(buffer, actions, actions_len); - ofm = buffer->data; - ofm->match.wildcards = htonl(sw->wildcards); + memset(&fm, 0, sizeof fm); + cls_rule_init(&flow, &sw->wc, 0, &fm.cr); + fm.table_id = 0xff; + fm.command = OFPFC_ADD; + fm.idle_timeout = sw->max_idle; + fm.buffer_id = pi.buffer_id; + fm.out_port = OFPP_NONE; + fm.ofpacts = ofpacts.data; + fm.ofpacts_len = ofpacts.size; + buffer = ofputil_encode_flow_mod(&fm, sw->protocol); + queue_tx(sw, rconn, buffer); /* If the switch didn't buffer the packet, we need to send a copy. */ - if (ntohl(opi->buffer_id) == UINT32_MAX && actions_len > 0) { - queue_tx(sw, rconn, - make_packet_out(&pkt, UINT32_MAX, in_port, - actions, actions_len / sizeof *actions)); + if (pi.buffer_id == UINT32_MAX && out_port != OFPP_NONE) { + queue_tx(sw, rconn, ofputil_encode_packet_out(&po)); } } else { /* We don't know that MAC, or we don't set up flows. Send along the * packet without setting up a flow. */ - if (ntohl(opi->buffer_id) != UINT32_MAX || actions_len > 0) { - queue_tx(sw, rconn, - make_packet_out(&pkt, ntohl(opi->buffer_id), in_port, - actions, actions_len / sizeof *actions)); + if (pi.buffer_id != UINT32_MAX || out_port != OFPP_NONE) { + queue_tx(sw, rconn, ofputil_encode_packet_out(&po)); } } } static void -process_echo_request(struct lswitch *sw, struct rconn *rconn, void *rq_) +process_echo_request(struct lswitch *sw, struct rconn *rconn, + const struct ofp_header *rq) { - struct ofp_header *rq = rq_; queue_tx(sw, rconn, make_echo_reply(rq)); } - -static void -process_port_status(struct lswitch *sw, struct rconn *rconn, void *ops_) -{ - struct ofp_port_status *ops = ops_; - process_phy_port(sw, rconn, &ops->desc); -} - -static void -process_phy_port(struct lswitch *sw, struct rconn *rconn OVS_UNUSED, - void *opp_) -{ - const struct ofp_phy_port *opp = opp_; - uint16_t port_no = ntohs(opp->port_no); - if (sw->capabilities & OFPC_STP && port_no < STP_MAX_PORTS) { - uint32_t config = ntohl(opp->config); - uint32_t state = ntohl(opp->state); - unsigned int *port_state = &sw->port_states[port_no]; - unsigned int new_port_state; - - if (!(config & (OFPPC_NO_STP | OFPPC_PORT_DOWN)) - && !(state & OFPPS_LINK_DOWN)) - { - switch (state & OFPPS_STP_MASK) { - case OFPPS_STP_LISTEN: - new_port_state = P_LISTENING; - break; - case OFPPS_STP_LEARN: - new_port_state = P_LEARNING; - break; - case OFPPS_STP_FORWARD: - new_port_state = P_FORWARDING; - break; - case OFPPS_STP_BLOCK: - new_port_state = P_BLOCKING; - break; - default: - new_port_state = P_DISABLED; - break; - } - } else { - new_port_state = P_FORWARDING; - } - if (*port_state != new_port_state) { - *port_state = new_port_state; - schedule_query(sw, 1000); - } - } -} - -static unsigned int -get_port_state(const struct lswitch *sw, uint16_t port_no) -{ - return (port_no >= STP_MAX_PORTS || !(sw->capabilities & OFPC_STP) - ? P_FORWARDING - : sw->port_states[port_no]); -} - -static bool -may_learn(const struct lswitch *sw, uint16_t port_no) -{ - return get_port_state(sw, port_no) & (P_LEARNING | P_FORWARDING); -} - -static bool -may_recv(const struct lswitch *sw, uint16_t port_no, bool any_actions) -{ - unsigned int state = get_port_state(sw, port_no); - return !(any_actions - ? state & (P_DISABLED | P_LISTENING | P_BLOCKING) - : state & (P_DISABLED | P_LISTENING | P_BLOCKING | P_LEARNING)); -} - -static bool -may_send(const struct lswitch *sw, uint16_t port_no) -{ - return get_port_state(sw, port_no) & P_FORWARDING; -} - -static void -process_flow_stats(struct lswitch *sw, struct rconn *rconn, - const struct ofp_flow_stats *ofs) -{ - const char *end = (char *) ofs + ntohs(ofs->length); - bool delete = false; - - /* Decide to delete the flow if it matches on an STP-disabled physical - * port. But don't delete it if the flow just drops all received packets, - * because that's a perfectly reasonable thing to do for disabled physical - * ports. */ - if (!(ofs->match.wildcards & htonl(OFPFW_IN_PORT))) { - if (!may_recv(sw, ntohs(ofs->match.in_port), - end > (char *) ofs->actions)) { - delete = true; - sw->n_no_recv++; - } - } - - /* Decide to delete the flow if it forwards to an STP-disabled physical - * port. */ - if (!delete) { - const struct ofp_action_header *a; - size_t len; - - for (a = ofs->actions; (char *) a < end; a += len / 8) { - len = ntohs(a->len); - if (len > end - (char *) a) { - VLOG_DBG_RL(&rl, "%016llx: action exceeds available space " - "(%zu > %td)", - sw->datapath_id, len, end - (char *) a); - break; - } else if (len % 8) { - VLOG_DBG_RL(&rl, "%016llx: action length (%zu) not multiple " - "of 8 bytes", sw->datapath_id, len); - break; - } - - if (a->type == htons(OFPAT_OUTPUT)) { - struct ofp_action_output *oao = (struct ofp_action_output *) a; - if (!may_send(sw, ntohs(oao->port))) { - delete = true; - sw->n_no_send++; - break; - } - } - } - } - - /* Delete the flow. */ - if (delete) { - struct ofp_flow_mod *ofm; - struct ofpbuf *b; - - ofm = make_openflow(offsetof(struct ofp_flow_mod, actions), - OFPT_FLOW_MOD, &b); - ofm->match = ofs->match; - ofm->command = OFPFC_DELETE_STRICT; - rconn_send(rconn, b, NULL); - } -} - -static void -process_stats_reply(struct lswitch *sw, struct rconn *rconn, void *osr_) -{ - struct ofp_stats_reply *osr = osr_; - struct flow_stats_iterator i; - const struct ofp_flow_stats *fs; - - if (sw->last_query == LLONG_MIN - || osr->type != htons(OFPST_FLOW) - || osr->header.xid != sw->query_xid) { - return; - } - for (fs = flow_stats_first(&i, osr); fs; fs = flow_stats_next(&i)) { - sw->n_flows++; - process_flow_stats(sw, rconn, fs); - } - if (!(osr->flags & htons(OFPSF_REPLY_MORE))) { - VLOG_DBG("%016llx: Deleted %d of %d received flows to " - "implement STP, %d because of no-recv, %d because of " - "no-send", sw->datapath_id, - sw->n_no_recv + sw->n_no_send, sw->n_flows, - sw->n_no_recv, sw->n_no_send); - sw->last_query = LLONG_MIN; - sw->last_reply = LLONG_MIN; - } else { - sw->last_reply = time_msec(); - } -} -