2 * Copyright (c) 2014 Red Hat, Inc.
4 * Based on mac-learning implementation.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 #include "mcast-snooping.h"
26 #include "byte-order.h"
30 #include "poll-loop.h"
33 #include "unaligned.h"
35 #include "vlan-bitmap.h"
36 #include "openvswitch/vlog.h"
38 COVERAGE_DEFINE(mcast_snooping_learned);
39 COVERAGE_DEFINE(mcast_snooping_expired);
41 static struct mcast_port_bundle *
42 mcast_snooping_port_lookup(struct ovs_list *list, void *port);
43 static struct mcast_mrouter_bundle *
44 mcast_snooping_mrouter_lookup(struct mcast_snooping *ms, uint16_t vlan,
46 OVS_REQ_RDLOCK(ms->rwlock);
49 mcast_snooping_enabled(const struct mcast_snooping *ms)
55 mcast_snooping_flood_unreg(const struct mcast_snooping *ms)
57 return ms->flood_unreg;
61 mcast_snooping_is_query(ovs_be16 igmp_type)
63 return igmp_type == htons(IGMP_HOST_MEMBERSHIP_QUERY);
67 mcast_snooping_is_membership(ovs_be16 igmp_type)
69 switch (ntohs(igmp_type)) {
70 case IGMP_HOST_MEMBERSHIP_REPORT:
71 case IGMPV2_HOST_MEMBERSHIP_REPORT:
72 case IGMPV3_HOST_MEMBERSHIP_REPORT:
73 case IGMP_HOST_LEAVE_MESSAGE:
79 /* Returns the number of seconds since multicast group 'b' was learned in a
82 mcast_bundle_age(const struct mcast_snooping *ms,
83 const struct mcast_group_bundle *b)
85 time_t remaining = b->expires - time_now();
86 return ms->idle_time - remaining;
90 mcast_table_hash(const struct mcast_snooping *ms, ovs_be32 grp_ip4,
93 return hash_3words((OVS_FORCE uint32_t) grp_ip4, vlan, ms->secret);
96 static struct mcast_group_bundle *
97 mcast_group_bundle_from_lru_node(struct ovs_list *list)
99 return CONTAINER_OF(list, struct mcast_group_bundle, bundle_node);
102 static struct mcast_group *
103 mcast_group_from_lru_node(struct ovs_list *list)
105 return CONTAINER_OF(list, struct mcast_group, group_node);
108 /* Searches 'ms' for and returns an mcast group for destination address
109 * 'dip' in 'vlan'. */
111 mcast_snooping_lookup(const struct mcast_snooping *ms, ovs_be32 dip,
113 OVS_REQ_RDLOCK(ms->rwlock)
115 struct mcast_group *grp;
118 hash = mcast_table_hash(ms, dip, vlan);
119 HMAP_FOR_EACH_WITH_HASH (grp, hmap_node, hash, &ms->table) {
120 if (grp->vlan == vlan && grp->ip4 == dip) {
127 /* If the LRU list is not empty, stores the least-recently-used entry
128 * in '*e' and returns true. Otherwise, if the LRU list is empty,
129 * stores NULL in '*e' and return false. */
131 group_get_lru(const struct mcast_snooping *ms, struct mcast_group **grp)
132 OVS_REQ_RDLOCK(ms->rwlock)
134 if (!list_is_empty(&ms->group_lru)) {
135 *grp = mcast_group_from_lru_node(ms->group_lru.next);
144 normalize_idle_time(unsigned int idle_time)
146 return (idle_time < 15 ? 15
147 : idle_time > 3600 ? 3600
151 /* Creates and returns a new mcast table with an initial mcast aging
152 * timeout of MCAST_ENTRY_DEFAULT_IDLE_TIME seconds and an initial maximum of
153 * MCAST_DEFAULT_MAX entries. */
154 struct mcast_snooping *
155 mcast_snooping_create(void)
157 struct mcast_snooping *ms;
159 ms = xmalloc(sizeof *ms);
160 hmap_init(&ms->table);
161 list_init(&ms->group_lru);
162 list_init(&ms->mrouter_lru);
163 list_init(&ms->fport_list);
164 list_init(&ms->rport_list);
165 ms->secret = random_uint32();
166 ms->idle_time = MCAST_ENTRY_DEFAULT_IDLE_TIME;
167 ms->max_entries = MCAST_DEFAULT_MAX_ENTRIES;
168 ms->need_revalidate = false;
169 ms->flood_unreg = true;
170 ovs_refcount_init(&ms->ref_cnt);
171 ovs_rwlock_init(&ms->rwlock);
175 struct mcast_snooping *
176 mcast_snooping_ref(const struct mcast_snooping *ms_)
178 struct mcast_snooping *ms = CONST_CAST(struct mcast_snooping *, ms_);
180 ovs_refcount_ref(&ms->ref_cnt);
185 /* Unreferences (and possibly destroys) mcast snooping table 'ms'. */
187 mcast_snooping_unref(struct mcast_snooping *ms)
189 if (!mcast_snooping_enabled(ms)) {
193 if (ovs_refcount_unref_relaxed(&ms->ref_cnt) == 1) {
194 mcast_snooping_flush(ms);
195 hmap_destroy(&ms->table);
196 ovs_rwlock_destroy(&ms->rwlock);
201 /* Changes the mcast aging timeout of 'ms' to 'idle_time' seconds. */
203 mcast_snooping_set_idle_time(struct mcast_snooping *ms, unsigned int idle_time)
204 OVS_REQ_WRLOCK(ms->rwlock)
206 struct mcast_group *grp;
207 struct mcast_group_bundle *b;
210 idle_time = normalize_idle_time(idle_time);
211 if (idle_time != ms->idle_time) {
212 delta = (int) idle_time - (int) ms->idle_time;
213 LIST_FOR_EACH (grp, group_node, &ms->group_lru) {
214 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
218 ms->idle_time = idle_time;
222 /* Sets the maximum number of entries in 'ms' to 'max_entries', adjusting it
223 * to be within a reasonable range. */
225 mcast_snooping_set_max_entries(struct mcast_snooping *ms,
227 OVS_REQ_WRLOCK(ms->rwlock)
229 ms->max_entries = (max_entries < 10 ? 10
230 : max_entries > 1000 * 1000 ? 1000 * 1000
234 /* Sets if unregistered multicast packets should be flooded to
235 * all ports or only to ports connected to multicast routers
237 * Returns true if previous state differs from current state,
238 * false otherwise. */
240 mcast_snooping_set_flood_unreg(struct mcast_snooping *ms, bool enable)
241 OVS_REQ_WRLOCK(ms->rwlock)
243 bool prev = ms->flood_unreg;
244 ms->flood_unreg = enable;
245 return prev != enable;
248 static struct mcast_group_bundle *
249 mcast_group_bundle_lookup(struct mcast_snooping *ms OVS_UNUSED,
250 struct mcast_group *grp, void *port)
251 OVS_REQ_RDLOCK(ms->rwlock)
253 struct mcast_group_bundle *b;
255 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
256 if (b->port == port) {
263 /* Insert a new bundle to the mcast group or update its
264 * position and expiration if it is already there. */
265 static struct mcast_group_bundle *
266 mcast_group_insert_bundle(struct mcast_snooping *ms OVS_UNUSED,
267 struct mcast_group *grp, void *port, int idle_time)
268 OVS_REQ_WRLOCK(ms->rwlock)
270 struct mcast_group_bundle *b;
272 b = mcast_group_bundle_lookup(ms, grp, port);
274 list_remove(&b->bundle_node);
276 b = xmalloc(sizeof *b);
277 list_init(&b->bundle_node);
281 b->expires = time_now() + idle_time;
282 list_push_back(&grp->bundle_lru, &b->bundle_node);
286 /* Return true if multicast still has bundles associated.
287 * Return false if there is no bundles. */
289 mcast_group_has_bundles(struct mcast_group *grp)
291 return !list_is_empty(&grp->bundle_lru);
294 /* Delete 'grp' from the 'ms' hash table.
295 * Caller is responsible to clean bundle lru first. */
297 mcast_snooping_flush_group__(struct mcast_snooping *ms,
298 struct mcast_group *grp)
300 ovs_assert(list_is_empty(&grp->bundle_lru));
301 hmap_remove(&ms->table, &grp->hmap_node);
302 list_remove(&grp->group_node);
306 /* Flush out mcast group and its bundles */
308 mcast_snooping_flush_group(struct mcast_snooping *ms, struct mcast_group *grp)
309 OVS_REQ_WRLOCK(ms->rwlock)
311 struct mcast_group_bundle *b;
313 LIST_FOR_EACH_POP (b, bundle_node, &grp->bundle_lru) {
316 mcast_snooping_flush_group__(ms, grp);
317 ms->need_revalidate = true;
321 /* Delete bundle returning true if it succeeds,
322 * false if it didn't find the group. */
324 mcast_group_delete_bundle(struct mcast_snooping *ms OVS_UNUSED,
325 struct mcast_group *grp, void *port)
326 OVS_REQ_WRLOCK(ms->rwlock)
328 struct mcast_group_bundle *b;
330 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
331 if (b->port == port) {
332 list_remove(&b->bundle_node);
340 /* If any bundle has expired, delete it. Returns the number of deleted
343 mcast_snooping_prune_expired(struct mcast_snooping *ms,
344 struct mcast_group *grp)
345 OVS_REQ_WRLOCK(ms->rwlock)
348 struct mcast_group_bundle *b, *next_b;
349 time_t timenow = time_now();
352 LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) {
353 /* This list is sorted on expiration time. */
354 if (b->expires > timenow) {
357 list_remove(&b->bundle_node);
362 if (!mcast_group_has_bundles(grp)) {
363 mcast_snooping_flush_group__(ms, grp);
368 ms->need_revalidate = true;
369 COVERAGE_ADD(mcast_snooping_expired, expired);
375 /* Add a multicast group to the mdb. If it exists, then
376 * move to the last position in the LRU list.
379 mcast_snooping_add_group(struct mcast_snooping *ms, ovs_be32 ip4,
380 uint16_t vlan, void *port)
381 OVS_REQ_WRLOCK(ms->rwlock)
384 struct mcast_group *grp;
386 /* Avoid duplicate packets. */
387 if (mcast_snooping_mrouter_lookup(ms, vlan, port)
388 || mcast_snooping_port_lookup(&ms->fport_list, port)) {
393 grp = mcast_snooping_lookup(ms, ip4, vlan);
395 uint32_t hash = mcast_table_hash(ms, ip4, vlan);
397 if (hmap_count(&ms->table) >= ms->max_entries) {
398 group_get_lru(ms, &grp);
399 mcast_snooping_flush_group(ms, grp);
402 grp = xmalloc(sizeof *grp);
403 hmap_insert(&ms->table, &grp->hmap_node, hash);
406 list_init(&grp->bundle_lru);
408 ms->need_revalidate = true;
409 COVERAGE_INC(mcast_snooping_learned);
411 list_remove(&grp->group_node);
413 mcast_group_insert_bundle(ms, grp, port, ms->idle_time);
415 /* Mark 'grp' as recently used. */
416 list_push_back(&ms->group_lru, &grp->group_node);
421 mcast_snooping_add_report(struct mcast_snooping *ms,
422 const struct dp_packet *p,
423 uint16_t vlan, void *port)
427 const struct igmpv3_header *igmpv3;
428 const struct igmpv3_record *record;
432 offset = (char *) dp_packet_l4(p) - (char *) dp_packet_data(p);
433 igmpv3 = dp_packet_at(p, offset, IGMPV3_HEADER_LEN);
437 ngrp = ntohs(igmpv3->ngrp);
438 offset += IGMPV3_HEADER_LEN;
441 record = dp_packet_at(p, offset, sizeof(struct igmpv3_record));
445 /* Only consider known record types. */
446 if (record->type < IGMPV3_MODE_IS_INCLUDE
447 || record->type > IGMPV3_BLOCK_OLD_SOURCES) {
450 ip4 = get_16aligned_be32(&record->maddr);
452 * If record is INCLUDE MODE and there are no sources, it's equivalent
455 if (ntohs(record->nsrcs) == 0
456 && (record->type == IGMPV3_MODE_IS_INCLUDE
457 || record->type == IGMPV3_CHANGE_TO_INCLUDE_MODE)) {
458 ret = mcast_snooping_leave_group(ms, ip4, vlan, port);
460 ret = mcast_snooping_add_group(ms, ip4, vlan, port);
465 offset += sizeof(*record)
466 + ntohs(record->nsrcs) * sizeof(ovs_be32) + record->aux_len;
472 mcast_snooping_leave_group(struct mcast_snooping *ms, ovs_be32 ip4,
473 uint16_t vlan, void *port)
474 OVS_REQ_WRLOCK(ms->rwlock)
476 struct mcast_group *grp;
478 /* Ports flagged to forward Reports usually have more
479 * than one host behind it, so don't leave the group
480 * on the first message and just let it expire */
481 if (mcast_snooping_port_lookup(&ms->rport_list, port)) {
485 grp = mcast_snooping_lookup(ms, ip4, vlan);
486 if (grp && mcast_group_delete_bundle(ms, grp, port)) {
487 ms->need_revalidate = true;
496 /* Returns the number of seconds since the multicast router
497 * was learned in a port. */
499 mcast_mrouter_age(const struct mcast_snooping *ms OVS_UNUSED,
500 const struct mcast_mrouter_bundle *mrouter)
502 time_t remaining = mrouter->expires - time_now();
503 return MCAST_MROUTER_PORT_IDLE_TIME - remaining;
506 static struct mcast_mrouter_bundle *
507 mcast_mrouter_from_lru_node(struct ovs_list *list)
509 return CONTAINER_OF(list, struct mcast_mrouter_bundle, mrouter_node);
512 /* If the LRU list is not empty, stores the least-recently-used mrouter
513 * in '*m' and returns true. Otherwise, if the LRU list is empty,
514 * stores NULL in '*m' and return false. */
516 mrouter_get_lru(const struct mcast_snooping *ms,
517 struct mcast_mrouter_bundle **m)
518 OVS_REQ_RDLOCK(ms->rwlock)
520 if (!list_is_empty(&ms->mrouter_lru)) {
521 *m = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
529 static struct mcast_mrouter_bundle *
530 mcast_snooping_mrouter_lookup(struct mcast_snooping *ms, uint16_t vlan,
532 OVS_REQ_RDLOCK(ms->rwlock)
534 struct mcast_mrouter_bundle *mrouter;
536 LIST_FOR_EACH (mrouter, mrouter_node, &ms->mrouter_lru) {
537 if (mrouter->vlan == vlan && mrouter->port == port) {
545 mcast_snooping_add_mrouter(struct mcast_snooping *ms, uint16_t vlan,
547 OVS_REQ_WRLOCK(ms->rwlock)
549 struct mcast_mrouter_bundle *mrouter;
551 /* Avoid duplicate packets. */
552 if (mcast_snooping_port_lookup(&ms->fport_list, port)) {
556 mrouter = mcast_snooping_mrouter_lookup(ms, vlan, port);
558 list_remove(&mrouter->mrouter_node);
560 mrouter = xmalloc(sizeof *mrouter);
561 mrouter->vlan = vlan;
562 mrouter->port = port;
563 COVERAGE_INC(mcast_snooping_learned);
564 ms->need_revalidate = true;
567 mrouter->expires = time_now() + MCAST_MROUTER_PORT_IDLE_TIME;
568 list_push_back(&ms->mrouter_lru, &mrouter->mrouter_node);
569 return ms->need_revalidate;
573 mcast_snooping_flush_mrouter(struct mcast_mrouter_bundle *mrouter)
575 list_remove(&mrouter->mrouter_node);
581 static struct mcast_port_bundle *
582 mcast_port_from_list_node(struct ovs_list *list)
584 return CONTAINER_OF(list, struct mcast_port_bundle, node);
587 /* If the list is not empty, stores the fport in '*f' and returns true.
588 * Otherwise, if the list is empty, stores NULL in '*f' and return false. */
590 mcast_snooping_port_get(const struct ovs_list *list,
591 struct mcast_port_bundle **f)
593 if (!list_is_empty(list)) {
594 *f = mcast_port_from_list_node(list->next);
602 static struct mcast_port_bundle *
603 mcast_snooping_port_lookup(struct ovs_list *list, void *port)
605 struct mcast_port_bundle *pbundle;
607 LIST_FOR_EACH (pbundle, node, list) {
608 if (pbundle->port == port) {
616 mcast_snooping_add_port(struct ovs_list *list, void *port)
618 struct mcast_port_bundle *pbundle;
620 pbundle = xmalloc(sizeof *pbundle);
621 pbundle->port = port;
622 list_insert(list, &pbundle->node);
626 mcast_snooping_flush_port(struct mcast_port_bundle *pbundle)
628 list_remove(&pbundle->node);
635 mcast_snooping_set_port_flood(struct mcast_snooping *ms, void *port,
637 OVS_REQ_WRLOCK(ms->rwlock)
639 struct mcast_port_bundle *fbundle;
641 fbundle = mcast_snooping_port_lookup(&ms->fport_list, port);
642 if (flood && !fbundle) {
643 mcast_snooping_add_port(&ms->fport_list, port);
644 ms->need_revalidate = true;
645 } else if (!flood && fbundle) {
646 mcast_snooping_flush_port(fbundle);
647 ms->need_revalidate = true;
651 /* Flood Reports ports. */
654 mcast_snooping_set_port_flood_reports(struct mcast_snooping *ms, void *port,
656 OVS_REQ_WRLOCK(ms->rwlock)
658 struct mcast_port_bundle *pbundle;
660 pbundle = mcast_snooping_port_lookup(&ms->rport_list, port);
661 if (flood && !pbundle) {
662 mcast_snooping_add_port(&ms->rport_list, port);
663 ms->need_revalidate = true;
664 } else if (!flood && pbundle) {
665 mcast_snooping_flush_port(pbundle);
666 ms->need_revalidate = true;
673 mcast_snooping_mdb_flush__(struct mcast_snooping *ms)
674 OVS_REQ_WRLOCK(ms->rwlock)
676 struct mcast_group *grp;
677 struct mcast_mrouter_bundle *mrouter;
679 while (group_get_lru(ms, &grp)) {
680 mcast_snooping_flush_group(ms, grp);
683 hmap_shrink(&ms->table);
685 while (mrouter_get_lru(ms, &mrouter)) {
686 mcast_snooping_flush_mrouter(mrouter);
691 mcast_snooping_mdb_flush(struct mcast_snooping *ms)
693 if (!mcast_snooping_enabled(ms)) {
697 ovs_rwlock_wrlock(&ms->rwlock);
698 mcast_snooping_mdb_flush__(ms);
699 ovs_rwlock_unlock(&ms->rwlock);
702 /* Flushes mdb and flood ports. */
704 mcast_snooping_flush__(struct mcast_snooping *ms)
705 OVS_REQ_WRLOCK(ms->rwlock)
707 struct mcast_group *grp;
708 struct mcast_mrouter_bundle *mrouter;
709 struct mcast_port_bundle *pbundle;
711 while (group_get_lru(ms, &grp)) {
712 mcast_snooping_flush_group(ms, grp);
715 hmap_shrink(&ms->table);
717 /* flush multicast routers */
718 while (mrouter_get_lru(ms, &mrouter)) {
719 mcast_snooping_flush_mrouter(mrouter);
722 /* flush flood ports */
723 while (mcast_snooping_port_get(&ms->fport_list, &pbundle)) {
724 mcast_snooping_flush_port(pbundle);
727 /* flush flood report ports */
728 while (mcast_snooping_port_get(&ms->rport_list, &pbundle)) {
729 mcast_snooping_flush_port(pbundle);
734 mcast_snooping_flush(struct mcast_snooping *ms)
736 if (!mcast_snooping_enabled(ms)) {
740 ovs_rwlock_wrlock(&ms->rwlock);
741 mcast_snooping_flush__(ms);
742 ovs_rwlock_unlock(&ms->rwlock);
746 mcast_snooping_run__(struct mcast_snooping *ms)
747 OVS_REQ_WRLOCK(ms->rwlock)
749 bool need_revalidate;
750 struct mcast_group *grp;
751 struct mcast_mrouter_bundle *mrouter;
754 while (group_get_lru(ms, &grp)) {
755 if (hmap_count(&ms->table) > ms->max_entries) {
756 mcast_snooping_flush_group(ms, grp);
758 if (!mcast_snooping_prune_expired(ms, grp)) {
764 hmap_shrink(&ms->table);
767 while (mrouter_get_lru(ms, &mrouter)
768 && time_now() >= mrouter->expires) {
769 mcast_snooping_flush_mrouter(mrouter);
773 if (mrouter_expired) {
774 ms->need_revalidate = true;
775 COVERAGE_ADD(mcast_snooping_expired, mrouter_expired);
778 need_revalidate = ms->need_revalidate;
779 ms->need_revalidate = false;
780 return need_revalidate;
783 /* Does periodic work required by 'ms'. Returns true if something changed
784 * that may require flow revalidation. */
786 mcast_snooping_run(struct mcast_snooping *ms)
788 bool need_revalidate;
790 if (!mcast_snooping_enabled(ms)) {
794 ovs_rwlock_wrlock(&ms->rwlock);
795 need_revalidate = mcast_snooping_run__(ms);
796 ovs_rwlock_unlock(&ms->rwlock);
798 return need_revalidate;
802 mcast_snooping_wait__(struct mcast_snooping *ms)
803 OVS_REQ_RDLOCK(ms->rwlock)
805 if (hmap_count(&ms->table) > ms->max_entries
806 || ms->need_revalidate) {
807 poll_immediate_wake();
809 struct mcast_group *grp;
810 struct mcast_group_bundle *bundle;
811 struct mcast_mrouter_bundle *mrouter;
812 long long int mrouter_msec;
813 long long int msec = 0;
815 if (!list_is_empty(&ms->group_lru)) {
816 grp = mcast_group_from_lru_node(ms->group_lru.next);
817 bundle = mcast_group_bundle_from_lru_node(grp->bundle_lru.next);
818 msec = bundle->expires * 1000LL;
821 if (!list_is_empty(&ms->mrouter_lru)) {
822 mrouter = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
823 mrouter_msec = mrouter->expires * 1000LL;
824 msec = msec ? MIN(msec, mrouter_msec) : mrouter_msec;
828 poll_timer_wait_until(msec);
834 mcast_snooping_wait(struct mcast_snooping *ms)
836 if (!mcast_snooping_enabled(ms)) {
840 ovs_rwlock_rdlock(&ms->rwlock);
841 mcast_snooping_wait__(ms);
842 ovs_rwlock_unlock(&ms->rwlock);