2 * Copyright (c) 2014 Red Hat, Inc.
4 * Based on mac-learning implementation.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 #include "mcast-snooping.h"
26 #include "byte-order.h"
30 #include "poll-loop.h"
33 #include "unaligned.h"
35 #include "vlan-bitmap.h"
38 COVERAGE_DEFINE(mcast_snooping_learned);
39 COVERAGE_DEFINE(mcast_snooping_expired);
41 static struct mcast_mrouter_bundle *
42 mcast_snooping_mrouter_lookup(struct mcast_snooping *ms, uint16_t vlan,
44 OVS_REQ_RDLOCK(ms->rwlock);
47 mcast_snooping_enabled(const struct mcast_snooping *ms)
53 mcast_snooping_flood_unreg(const struct mcast_snooping *ms)
55 return ms->flood_unreg;
59 mcast_snooping_is_query(ovs_be16 igmp_type)
61 return igmp_type == htons(IGMP_HOST_MEMBERSHIP_QUERY);
65 mcast_snooping_is_membership(ovs_be16 igmp_type)
67 switch (ntohs(igmp_type)) {
68 case IGMP_HOST_MEMBERSHIP_REPORT:
69 case IGMPV2_HOST_MEMBERSHIP_REPORT:
70 case IGMP_HOST_LEAVE_MESSAGE:
76 /* Returns the number of seconds since multicast group 'b' was learned in a
79 mcast_bundle_age(const struct mcast_snooping *ms,
80 const struct mcast_group_bundle *b)
82 time_t remaining = b->expires - time_now();
83 return ms->idle_time - remaining;
87 mcast_table_hash(const struct mcast_snooping *ms, ovs_be32 grp_ip4,
90 return hash_3words((OVS_FORCE uint32_t) grp_ip4, vlan, ms->secret);
93 static struct mcast_group_bundle *
94 mcast_group_bundle_from_lru_node(struct list *list)
96 return CONTAINER_OF(list, struct mcast_group_bundle, bundle_node);
99 static struct mcast_group *
100 mcast_group_from_lru_node(struct list *list)
102 return CONTAINER_OF(list, struct mcast_group, group_node);
105 /* Searches 'ms' for and returns an mcast group for destination address
106 * 'dip' in 'vlan'. */
108 mcast_snooping_lookup(const struct mcast_snooping *ms, ovs_be32 dip,
110 OVS_REQ_RDLOCK(ms->rwlock)
112 struct mcast_group *grp;
115 hash = mcast_table_hash(ms, dip, vlan);
116 HMAP_FOR_EACH_WITH_HASH (grp, hmap_node, hash, &ms->table) {
117 if (grp->vlan == vlan && grp->ip4 == dip) {
124 /* If the LRU list is not empty, stores the least-recently-used entry
125 * in '*e' and returns true. Otherwise, if the LRU list is empty,
126 * stores NULL in '*e' and return false. */
128 group_get_lru(const struct mcast_snooping *ms, struct mcast_group **grp)
129 OVS_REQ_RDLOCK(ms->rwlock)
131 if (!list_is_empty(&ms->group_lru)) {
132 *grp = mcast_group_from_lru_node(ms->group_lru.next);
141 normalize_idle_time(unsigned int idle_time)
143 return (idle_time < 15 ? 15
144 : idle_time > 3600 ? 3600
148 /* Creates and returns a new mcast table with an initial mcast aging
149 * timeout of MCAST_ENTRY_DEFAULT_IDLE_TIME seconds and an initial maximum of
150 * MCAST_DEFAULT_MAX entries. */
151 struct mcast_snooping *
152 mcast_snooping_create(void)
154 struct mcast_snooping *ms;
156 ms = xmalloc(sizeof *ms);
157 hmap_init(&ms->table);
158 list_init(&ms->group_lru);
159 list_init(&ms->mrouter_lru);
160 list_init(&ms->fport_list);
161 ms->secret = random_uint32();
162 ms->idle_time = MCAST_ENTRY_DEFAULT_IDLE_TIME;
163 ms->max_entries = MCAST_DEFAULT_MAX_ENTRIES;
164 ms->need_revalidate = false;
165 ms->flood_unreg = true;
166 ovs_refcount_init(&ms->ref_cnt);
167 ovs_rwlock_init(&ms->rwlock);
171 struct mcast_snooping *
172 mcast_snooping_ref(const struct mcast_snooping *ms_)
174 struct mcast_snooping *ms = CONST_CAST(struct mcast_snooping *, ms_);
176 ovs_refcount_ref(&ms->ref_cnt);
181 /* Unreferences (and possibly destroys) mcast snooping table 'ms'. */
183 mcast_snooping_unref(struct mcast_snooping *ms)
185 if (!mcast_snooping_enabled(ms)) {
189 if (ovs_refcount_unref_relaxed(&ms->ref_cnt) == 1) {
190 mcast_snooping_flush(ms);
191 hmap_destroy(&ms->table);
192 ovs_rwlock_destroy(&ms->rwlock);
197 /* Changes the mcast aging timeout of 'ms' to 'idle_time' seconds. */
199 mcast_snooping_set_idle_time(struct mcast_snooping *ms, unsigned int idle_time)
200 OVS_REQ_WRLOCK(ms->rwlock)
202 struct mcast_group *grp;
203 struct mcast_group_bundle *b;
206 idle_time = normalize_idle_time(idle_time);
207 if (idle_time != ms->idle_time) {
208 delta = (int) idle_time - (int) ms->idle_time;
209 LIST_FOR_EACH (grp, group_node, &ms->group_lru) {
210 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
214 ms->idle_time = idle_time;
218 /* Sets the maximum number of entries in 'ms' to 'max_entries', adjusting it
219 * to be within a reasonable range. */
221 mcast_snooping_set_max_entries(struct mcast_snooping *ms,
223 OVS_REQ_WRLOCK(ms->rwlock)
225 ms->max_entries = (max_entries < 10 ? 10
226 : max_entries > 1000 * 1000 ? 1000 * 1000
230 /* Sets if unregistered multicast packets should be flooded to
231 * all ports or only to ports connected to multicast routers
233 * Returns true if previous state differs from current state,
234 * false otherwise. */
236 mcast_snooping_set_flood_unreg(struct mcast_snooping *ms, bool enable)
237 OVS_REQ_WRLOCK(ms->rwlock)
239 bool prev = ms->flood_unreg;
240 ms->flood_unreg = enable;
241 return prev != enable;
244 static struct mcast_group_bundle *
245 mcast_group_bundle_lookup(struct mcast_snooping *ms OVS_UNUSED,
246 struct mcast_group *grp, void *port)
247 OVS_REQ_RDLOCK(ms->rwlock)
249 struct mcast_group_bundle *b;
251 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
252 if (b->port == port) {
259 /* Insert a new bundle to the mcast group or update its
260 * position and expiration if it is already there. */
261 static struct mcast_group_bundle *
262 mcast_group_insert_bundle(struct mcast_snooping *ms OVS_UNUSED,
263 struct mcast_group *grp, void *port, int idle_time)
264 OVS_REQ_WRLOCK(ms->rwlock)
266 struct mcast_group_bundle *b;
268 b = mcast_group_bundle_lookup(ms, grp, port);
270 list_remove(&b->bundle_node);
272 b = xmalloc(sizeof *b);
273 list_init(&b->bundle_node);
277 b->expires = time_now() + idle_time;
278 list_push_back(&grp->bundle_lru, &b->bundle_node);
282 /* Return true if multicast still has bundles associated.
283 * Return false if there is no bundles. */
285 mcast_group_has_bundles(struct mcast_group *grp)
287 return !list_is_empty(&grp->bundle_lru);
290 /* Delete 'grp' from the 'ms' hash table.
291 * Caller is responsible to clean bundle lru first. */
293 mcast_snooping_flush_group__(struct mcast_snooping *ms,
294 struct mcast_group *grp)
296 ovs_assert(list_is_empty(&grp->bundle_lru));
297 hmap_remove(&ms->table, &grp->hmap_node);
298 list_remove(&grp->group_node);
302 /* Flush out mcast group and its bundles */
304 mcast_snooping_flush_group(struct mcast_snooping *ms, struct mcast_group *grp)
305 OVS_REQ_WRLOCK(ms->rwlock)
307 struct mcast_group_bundle *b, *next_b;
309 LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) {
310 list_remove(&b->bundle_node);
313 mcast_snooping_flush_group__(ms, grp);
314 ms->need_revalidate = true;
318 /* Delete bundle returning true if it succeeds,
319 * false if it didn't find the group. */
321 mcast_group_delete_bundle(struct mcast_snooping *ms OVS_UNUSED,
322 struct mcast_group *grp, void *port)
323 OVS_REQ_WRLOCK(ms->rwlock)
325 struct mcast_group_bundle *b;
327 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
328 if (b->port == port) {
329 list_remove(&b->bundle_node);
337 /* If any bundle has expired, delete it. Returns the number of deleted
340 mcast_snooping_prune_expired(struct mcast_snooping *ms,
341 struct mcast_group *grp)
342 OVS_REQ_WRLOCK(ms->rwlock)
345 struct mcast_group_bundle *b, *next_b;
346 time_t timenow = time_now();
349 LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) {
350 /* This list is sorted on expiration time. */
351 if (b->expires > timenow) {
354 list_remove(&b->bundle_node);
359 if (!mcast_group_has_bundles(grp)) {
360 mcast_snooping_flush_group__(ms, grp);
365 ms->need_revalidate = true;
366 COVERAGE_ADD(mcast_snooping_expired, expired);
372 /* Add a multicast group to the mdb. If it exists, then
373 * move to the last position in the LRU list.
376 mcast_snooping_add_group(struct mcast_snooping *ms, ovs_be32 ip4,
377 uint16_t vlan, void *port)
378 OVS_REQ_WRLOCK(ms->rwlock)
381 struct mcast_group *grp;
383 /* Avoid duplicate packets. */
384 if (mcast_snooping_mrouter_lookup(ms, vlan, port)
385 || mcast_snooping_fport_lookup(ms, vlan, port)) {
390 grp = mcast_snooping_lookup(ms, ip4, vlan);
392 uint32_t hash = mcast_table_hash(ms, ip4, vlan);
394 if (hmap_count(&ms->table) >= ms->max_entries) {
395 group_get_lru(ms, &grp);
396 mcast_snooping_flush_group(ms, grp);
399 grp = xmalloc(sizeof *grp);
400 hmap_insert(&ms->table, &grp->hmap_node, hash);
403 list_init(&grp->bundle_lru);
405 ms->need_revalidate = true;
406 COVERAGE_INC(mcast_snooping_learned);
408 list_remove(&grp->group_node);
410 mcast_group_insert_bundle(ms, grp, port, ms->idle_time);
412 /* Mark 'grp' as recently used. */
413 list_push_back(&ms->group_lru, &grp->group_node);
418 mcast_snooping_leave_group(struct mcast_snooping *ms, ovs_be32 ip4,
419 uint16_t vlan, void *port)
420 OVS_REQ_WRLOCK(ms->rwlock)
422 struct mcast_group *grp;
424 grp = mcast_snooping_lookup(ms, ip4, vlan);
425 if (grp && mcast_group_delete_bundle(ms, grp, port)) {
426 ms->need_revalidate = true;
435 /* Returns the number of seconds since the multicast router
436 * was learned in a port. */
438 mcast_mrouter_age(const struct mcast_snooping *ms OVS_UNUSED,
439 const struct mcast_mrouter_bundle *mrouter)
441 time_t remaining = mrouter->expires - time_now();
442 return MCAST_MROUTER_PORT_IDLE_TIME - remaining;
445 static struct mcast_mrouter_bundle *
446 mcast_mrouter_from_lru_node(struct list *list)
448 return CONTAINER_OF(list, struct mcast_mrouter_bundle, mrouter_node);
451 /* If the LRU list is not empty, stores the least-recently-used mrouter
452 * in '*m' and returns true. Otherwise, if the LRU list is empty,
453 * stores NULL in '*m' and return false. */
455 mrouter_get_lru(const struct mcast_snooping *ms,
456 struct mcast_mrouter_bundle **m)
457 OVS_REQ_RDLOCK(ms->rwlock)
459 if (!list_is_empty(&ms->mrouter_lru)) {
460 *m = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
468 static struct mcast_mrouter_bundle *
469 mcast_snooping_mrouter_lookup(struct mcast_snooping *ms, uint16_t vlan,
471 OVS_REQ_RDLOCK(ms->rwlock)
473 struct mcast_mrouter_bundle *mrouter;
475 LIST_FOR_EACH (mrouter, mrouter_node, &ms->mrouter_lru) {
476 if (mrouter->vlan == vlan && mrouter->port == port) {
484 mcast_snooping_add_mrouter(struct mcast_snooping *ms, uint16_t vlan,
486 OVS_REQ_WRLOCK(ms->rwlock)
488 struct mcast_mrouter_bundle *mrouter;
490 /* Avoid duplicate packets. */
491 if (mcast_snooping_fport_lookup(ms, vlan, port)) {
495 mrouter = mcast_snooping_mrouter_lookup(ms, vlan, port);
497 list_remove(&mrouter->mrouter_node);
499 mrouter = xmalloc(sizeof *mrouter);
500 mrouter->vlan = vlan;
501 mrouter->port = port;
502 COVERAGE_INC(mcast_snooping_learned);
503 ms->need_revalidate = true;
506 mrouter->expires = time_now() + MCAST_MROUTER_PORT_IDLE_TIME;
507 list_push_back(&ms->mrouter_lru, &mrouter->mrouter_node);
508 return ms->need_revalidate;
512 mcast_snooping_flush_mrouter(struct mcast_mrouter_bundle *mrouter)
514 list_remove(&mrouter->mrouter_node);
520 static struct mcast_fport_bundle *
521 mcast_fport_from_list_node(struct list *list)
523 return CONTAINER_OF(list, struct mcast_fport_bundle, fport_node);
526 /* If the list is not empty, stores the fport in '*f' and returns true.
527 * Otherwise, if the list is empty, stores NULL in '*f' and return false. */
529 fport_get(const struct mcast_snooping *ms, struct mcast_fport_bundle **f)
530 OVS_REQ_RDLOCK(ms->rwlock)
532 if (!list_is_empty(&ms->fport_list)) {
533 *f = mcast_fport_from_list_node(ms->fport_list.next);
541 struct mcast_fport_bundle *
542 mcast_snooping_fport_lookup(struct mcast_snooping *ms, uint16_t vlan,
544 OVS_REQ_RDLOCK(ms->rwlock)
546 struct mcast_fport_bundle *fport;
548 LIST_FOR_EACH (fport, fport_node, &ms->fport_list) {
549 if (fport->vlan == vlan && fport->port == port) {
557 mcast_snooping_add_fport(struct mcast_snooping *ms, uint16_t vlan, void *port)
558 OVS_REQ_WRLOCK(ms->rwlock)
560 struct mcast_fport_bundle *fport;
562 fport = xmalloc(sizeof *fport);
565 list_insert(&ms->fport_list, &fport->fport_node);
569 mcast_snooping_flush_fport(struct mcast_fport_bundle *fport)
571 list_remove(&fport->fport_node);
576 mcast_snooping_set_port_flood(struct mcast_snooping *ms, uint16_t vlan,
577 void *port, bool flood)
578 OVS_REQ_WRLOCK(ms->rwlock)
580 struct mcast_fport_bundle *fport;
582 fport = mcast_snooping_fport_lookup(ms, vlan, port);
583 if (flood && !fport) {
584 mcast_snooping_add_fport(ms, vlan, port);
585 ms->need_revalidate = true;
586 } else if (!flood && fport) {
587 mcast_snooping_flush_fport(fport);
588 ms->need_revalidate = true;
595 mcast_snooping_mdb_flush__(struct mcast_snooping *ms)
596 OVS_REQ_WRLOCK(ms->rwlock)
598 struct mcast_group *grp;
599 struct mcast_mrouter_bundle *mrouter;
601 while (group_get_lru(ms, &grp)) {
602 mcast_snooping_flush_group(ms, grp);
605 hmap_shrink(&ms->table);
607 while (mrouter_get_lru(ms, &mrouter)) {
608 mcast_snooping_flush_mrouter(mrouter);
613 mcast_snooping_mdb_flush(struct mcast_snooping *ms)
615 if (!mcast_snooping_enabled(ms)) {
619 ovs_rwlock_wrlock(&ms->rwlock);
620 mcast_snooping_mdb_flush__(ms);
621 ovs_rwlock_unlock(&ms->rwlock);
624 /* Flushes mdb and flood ports. */
626 mcast_snooping_flush__(struct mcast_snooping *ms)
627 OVS_REQ_WRLOCK(ms->rwlock)
629 struct mcast_group *grp;
630 struct mcast_mrouter_bundle *mrouter;
631 struct mcast_fport_bundle *fport;
633 while (group_get_lru(ms, &grp)) {
634 mcast_snooping_flush_group(ms, grp);
637 hmap_shrink(&ms->table);
639 while (mrouter_get_lru(ms, &mrouter)) {
640 mcast_snooping_flush_mrouter(mrouter);
643 while (fport_get(ms, &fport)) {
644 mcast_snooping_flush_fport(fport);
649 mcast_snooping_flush(struct mcast_snooping *ms)
651 if (!mcast_snooping_enabled(ms)) {
655 ovs_rwlock_wrlock(&ms->rwlock);
656 mcast_snooping_flush__(ms);
657 ovs_rwlock_unlock(&ms->rwlock);
661 mcast_snooping_run__(struct mcast_snooping *ms)
662 OVS_REQ_WRLOCK(ms->rwlock)
664 bool need_revalidate;
665 struct mcast_group *grp;
666 struct mcast_mrouter_bundle *mrouter;
669 while (group_get_lru(ms, &grp)) {
670 if (hmap_count(&ms->table) > ms->max_entries) {
671 mcast_snooping_flush_group(ms, grp);
673 if (!mcast_snooping_prune_expired(ms, grp)) {
679 hmap_shrink(&ms->table);
682 while (mrouter_get_lru(ms, &mrouter)
683 && time_now() >= mrouter->expires) {
684 mcast_snooping_flush_mrouter(mrouter);
688 if (mrouter_expired) {
689 ms->need_revalidate = true;
690 COVERAGE_ADD(mcast_snooping_expired, mrouter_expired);
693 need_revalidate = ms->need_revalidate;
694 ms->need_revalidate = false;
695 return need_revalidate;
698 /* Does periodic work required by 'ms'. Returns true if something changed
699 * that may require flow revalidation. */
701 mcast_snooping_run(struct mcast_snooping *ms)
703 bool need_revalidate;
705 if (!mcast_snooping_enabled(ms)) {
709 ovs_rwlock_wrlock(&ms->rwlock);
710 need_revalidate = mcast_snooping_run__(ms);
711 ovs_rwlock_unlock(&ms->rwlock);
713 return need_revalidate;
717 mcast_snooping_wait__(struct mcast_snooping *ms)
718 OVS_REQ_RDLOCK(ms->rwlock)
720 if (hmap_count(&ms->table) > ms->max_entries
721 || ms->need_revalidate) {
722 poll_immediate_wake();
724 struct mcast_group *grp;
725 struct mcast_group_bundle *bundle;
726 struct mcast_mrouter_bundle *mrouter;
727 long long int mrouter_msec;
728 long long int msec = 0;
730 if (!list_is_empty(&ms->group_lru)) {
731 grp = mcast_group_from_lru_node(ms->group_lru.next);
732 bundle = mcast_group_bundle_from_lru_node(grp->bundle_lru.next);
733 msec = bundle->expires * 1000LL;
736 if (!list_is_empty(&ms->mrouter_lru)) {
737 mrouter = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
738 mrouter_msec = mrouter->expires * 1000LL;
739 msec = msec ? MIN(msec, mrouter_msec) : mrouter_msec;
743 poll_timer_wait_until(msec);
749 mcast_snooping_wait(struct mcast_snooping *ms)
751 if (!mcast_snooping_enabled(ms)) {
755 ovs_rwlock_rdlock(&ms->rwlock);
756 mcast_snooping_wait__(ms);
757 ovs_rwlock_unlock(&ms->rwlock);