mlxsw: spectrum_router: Implement next-hop routing
authorJiri Pirko <jiri@mellanox.com>
Tue, 5 Jul 2016 09:27:50 +0000 (11:27 +0200)
committerDavid S. Miller <davem@davemloft.net>
Tue, 5 Jul 2016 16:06:30 +0000 (09:06 -0700)
Implement next-hop routing offload including ECMP. To make it possible,
introduce next-hop group entity. This entity keeps track of resolved
neighbours and updates HW adjacency table accordingly. Note that HW
next-hops are stored in this adjacency table, in form of MAC.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

index e781128..0fe6051 100644 (file)
@@ -222,6 +222,7 @@ struct mlxsw_sp_router {
                struct delayed_work dw;
                unsigned long interval; /* ms */
        } neighs_update;
+       struct list_head nexthop_group_list;
 };
 
 struct mlxsw_sp {
index ed0e6c0..dc13178 100644 (file)
@@ -117,6 +117,8 @@ enum mlxsw_sp_fib_entry_type {
        MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
 };
 
+struct mlxsw_sp_nexthop_group;
+
 struct mlxsw_sp_fib_entry {
        struct rhash_head ht_node;
        struct mlxsw_sp_fib_key key;
@@ -124,6 +126,8 @@ struct mlxsw_sp_fib_entry {
        u8 added:1;
        u16 rif; /* used for action local */
        struct mlxsw_sp_vr *vr;
+       struct list_head nexthop_group_node;
+       struct mlxsw_sp_nexthop_group *nh_group;
 };
 
 struct mlxsw_sp_fib {
@@ -563,6 +567,9 @@ struct mlxsw_sp_neigh_entry {
        struct delayed_work dw;
        struct mlxsw_sp_port *mlxsw_sp_port;
        unsigned char ha[ETH_ALEN];
+       struct list_head nexthop_list; /* list of nexthops using
+                                       * this neigh entry
+                                       */
 };
 
 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
@@ -606,6 +613,7 @@ mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
        neigh_entry->rif = rif;
        neigh_entry->n = n;
        INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
+       INIT_LIST_HEAD(&neigh_entry->nexthop_list);
        return neigh_entry;
 }
 
@@ -808,6 +816,11 @@ static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
        mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
 }
 
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_neigh_entry *neigh_entry,
+                             bool removing);
+
 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry =
@@ -849,6 +862,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
                } else {
                        neigh_entry->offloaded = true;
                }
+               mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
        } else if (removing) {
                mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
                                      neigh_entry->rif,
@@ -861,6 +875,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
                } else {
                        neigh_entry->offloaded = false;
                }
+               mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
        }
 
        neigh_release(n);
@@ -978,6 +993,434 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
        rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
 }
 
+struct mlxsw_sp_nexthop {
+       struct list_head neigh_list_node; /* member of neigh entry list */
+       struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
+                                               * this belongs to
+                                               */
+       u8 should_offload:1, /* set indicates this neigh is connected and
+                             * should be put to KVD linear area of this group.
+                             */
+          offloaded:1, /* set in case the neigh is actually put into
+                        * KVD linear area of this group.
+                        */
+          update:1; /* set indicates that MAC of this neigh should be
+                     * updated in HW
+                     */
+       struct mlxsw_sp_neigh_entry *neigh_entry;
+};
+
+struct mlxsw_sp_nexthop_group {
+       struct list_head list; /* node in mlxsw->router.nexthop_group_list */
+       struct list_head fib_list; /* list of fib entries that use this group */
+       u8 adj_index_valid:1;
+       u32 adj_index;
+       u16 ecmp_size;
+       u16 count;
+       struct mlxsw_sp_nexthop nexthops[0];
+};
+
+static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
+                                            struct mlxsw_sp_vr *vr,
+                                            u32 adj_index, u16 ecmp_size,
+                                            u32 new_adj_index,
+                                            u16 new_ecmp_size)
+{
+       char raleu_pl[MLXSW_REG_RALEU_LEN];
+
+       mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
+                            adj_index, ecmp_size,
+                            new_adj_index, new_ecmp_size);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
+}
+
+static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
+                                         struct mlxsw_sp_nexthop_group *nh_grp,
+                                         u32 old_adj_index, u16 old_ecmp_size)
+{
+       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_vr *vr = NULL;
+       int err;
+
+       list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+               if (vr == fib_entry->vr)
+                       continue;
+               vr = fib_entry->vr;
+               err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
+                                                       old_adj_index,
+                                                       old_ecmp_size,
+                                                       nh_grp->adj_index,
+                                                       nh_grp->ecmp_size);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+                                      struct mlxsw_sp_nexthop *nh)
+{
+       struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+       char ratr_pl[MLXSW_REG_RATR_LEN];
+
+       mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+                           true, adj_index, neigh_entry->rif);
+       mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_nexthop_group *nh_grp)
+{
+       u32 adj_index = nh_grp->adj_index; /* base */
+       struct mlxsw_sp_nexthop *nh;
+       int i;
+       int err;
+
+       for (i = 0; i < nh_grp->count; i++) {
+               nh = &nh_grp->nexthops[i];
+
+               if (!nh->should_offload) {
+                       nh->offloaded = 0;
+                       continue;
+               }
+
+               if (nh->update) {
+                       err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
+                                                         adj_index, nh);
+                       if (err)
+                               return err;
+                       nh->update = 0;
+                       nh->offloaded = 1;
+               }
+               adj_index++;
+       }
+       return 0;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp_fib_entry *fib_entry);
+
+static int
+mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_nexthop_group *nh_grp)
+{
+       struct mlxsw_sp_fib_entry *fib_entry;
+       int err;
+
+       list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+               err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+static void
+mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_nexthop_group *nh_grp)
+{
+       struct mlxsw_sp_nexthop *nh;
+       bool offload_change = false;
+       u32 adj_index;
+       u16 ecmp_size = 0;
+       bool old_adj_index_valid;
+       u32 old_adj_index;
+       u16 old_ecmp_size;
+       int ret;
+       int i;
+       int err;
+
+       for (i = 0; i < nh_grp->count; i++) {
+               nh = &nh_grp->nexthops[i];
+
+               if (nh->should_offload ^ nh->offloaded) {
+                       offload_change = true;
+                       if (nh->should_offload)
+                               nh->update = 1;
+               }
+               if (nh->should_offload)
+                       ecmp_size++;
+       }
+       if (!offload_change) {
+               /* Nothing was added or removed, so no need to reallocate. Just
+                * update MAC on existing adjacency indexes.
+                */
+               err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+               if (err) {
+                       dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+                       goto set_trap;
+               }
+               return;
+       }
+       if (!ecmp_size)
+               /* No neigh of this group is connected so we just set
+                * the trap and let everthing flow through kernel.
+                */
+               goto set_trap;
+
+       ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
+       if (ret < 0) {
+               /* We ran out of KVD linear space, just set the
+                * trap and let everything flow through kernel.
+                */
+               dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
+               goto set_trap;
+       }
+       adj_index = ret;
+       old_adj_index_valid = nh_grp->adj_index_valid;
+       old_adj_index = nh_grp->adj_index;
+       old_ecmp_size = nh_grp->ecmp_size;
+       nh_grp->adj_index_valid = 1;
+       nh_grp->adj_index = adj_index;
+       nh_grp->ecmp_size = ecmp_size;
+       err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+       if (err) {
+               dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+               goto set_trap;
+       }
+
+       if (!old_adj_index_valid) {
+               /* The trap was set for fib entries, so we have to call
+                * fib entry update to unset it and use adjacency index.
+                */
+               err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+               if (err) {
+                       dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
+                       goto set_trap;
+               }
+               return;
+       }
+
+       err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
+                                            old_adj_index, old_ecmp_size);
+       mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+       if (err) {
+               dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
+               goto set_trap;
+       }
+       return;
+
+set_trap:
+       old_adj_index_valid = nh_grp->adj_index_valid;
+       nh_grp->adj_index_valid = 0;
+       for (i = 0; i < nh_grp->count; i++) {
+               nh = &nh_grp->nexthops[i];
+               nh->offloaded = 0;
+       }
+       err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+       if (err)
+               dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
+       if (old_adj_index_valid)
+               mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+}
+
+static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
+                                           bool removing)
+{
+       if (!removing && !nh->should_offload)
+               nh->should_offload = 1;
+       else if (removing && nh->offloaded)
+               nh->should_offload = 0;
+       nh->update = 1;
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_neigh_entry *neigh_entry,
+                             bool removing)
+{
+       struct mlxsw_sp_nexthop *nh;
+
+       /* Take RTNL mutex here to prevent lists from changes */
+       rtnl_lock();
+       list_for_each_entry(nh, &neigh_entry->nexthop_list,
+                           neigh_list_node) {
+               __mlxsw_sp_nexthop_neigh_update(nh, removing);
+               mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+       }
+       rtnl_unlock();
+}
+
+static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_nexthop_group *nh_grp,
+                                struct mlxsw_sp_nexthop *nh,
+                                struct fib_nh *fib_nh)
+{
+       struct mlxsw_sp_neigh_entry *neigh_entry;
+       u32 gwip = ntohl(fib_nh->nh_gw);
+       struct net_device *dev = fib_nh->nh_dev;
+       struct neighbour *n;
+       u8 nud_state;
+
+       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+                                                 sizeof(gwip), dev);
+       if (!neigh_entry) {
+               __be32 gwipn = htonl(gwip);
+
+               n = neigh_create(&arp_tbl, &gwipn, dev);
+               if (IS_ERR(n))
+                       return PTR_ERR(n);
+               neigh_event_send(n, NULL);
+               neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+                                                         sizeof(gwip), dev);
+               if (!neigh_entry) {
+                       neigh_release(n);
+                       return -EINVAL;
+               }
+       } else {
+               /* Take a reference of neigh here ensuring that neigh would
+                * not be detructed before the nexthop entry is finished.
+                * The second branch takes the reference in neith_create()
+                */
+               n = neigh_entry->n;
+               neigh_clone(n);
+       }
+       nh->nh_grp = nh_grp;
+       nh->neigh_entry = neigh_entry;
+       list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
+       read_lock_bh(&n->lock);
+       nud_state = n->nud_state;
+       read_unlock_bh(&n->lock);
+       __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
+
+       return 0;
+}
+
+static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_nexthop *nh)
+{
+       struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+
+       list_del(&nh->neigh_list_node);
+       neigh_release(neigh_entry->n);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+       struct mlxsw_sp_nexthop_group *nh_grp;
+       struct mlxsw_sp_nexthop *nh;
+       struct fib_nh *fib_nh;
+       size_t alloc_size;
+       int i;
+       int err;
+
+       alloc_size = sizeof(*nh_grp) +
+                    fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
+       nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+       if (!nh_grp)
+               return ERR_PTR(-ENOMEM);
+       INIT_LIST_HEAD(&nh_grp->fib_list);
+       nh_grp->count = fi->fib_nhs;
+       for (i = 0; i < nh_grp->count; i++) {
+               nh = &nh_grp->nexthops[i];
+               fib_nh = &fi->fib_nh[i];
+               err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+               if (err)
+                       goto err_nexthop_init;
+       }
+       list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
+       mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+       return nh_grp;
+
+err_nexthop_init:
+       for (i--; i >= 0; i--)
+               mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+       kfree(nh_grp);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_nexthop_group *nh_grp)
+{
+       struct mlxsw_sp_nexthop *nh;
+       int i;
+
+       list_del(&nh_grp->list);
+       for (i = 0; i < nh_grp->count; i++) {
+               nh = &nh_grp->nexthops[i];
+               mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+       }
+       kfree(nh_grp);
+}
+
+static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
+                                  struct fib_info *fi)
+{
+       int i;
+
+       for (i = 0; i < fi->fib_nhs; i++) {
+               struct fib_nh *fib_nh = &fi->fib_nh[i];
+               u32 gwip = ntohl(fib_nh->nh_gw);
+
+               if (memcmp(nh->neigh_entry->key.addr,
+                          &gwip, sizeof(u32)) == 0 &&
+                   nh->neigh_entry->key.dev == fib_nh->nh_dev)
+                       return true;
+       }
+       return false;
+}
+
+static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
+                                        struct fib_info *fi)
+{
+       int i;
+
+       if (nh_grp->count != fi->fib_nhs)
+               return false;
+       for (i = 0; i < nh_grp->count; i++) {
+               struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+               if (!mlxsw_sp_nexthop_match(nh, fi))
+                       return false;
+       }
+       return true;
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+       struct mlxsw_sp_nexthop_group *nh_grp;
+
+       list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
+                           list) {
+               if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
+                       return nh_grp;
+       }
+       return NULL;
+}
+
+static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
+                                     struct mlxsw_sp_fib_entry *fib_entry,
+                                     struct fib_info *fi)
+{
+       struct mlxsw_sp_nexthop_group *nh_grp;
+
+       nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
+       if (!nh_grp) {
+               nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+               if (IS_ERR(nh_grp))
+                       return PTR_ERR(nh_grp);
+       }
+       list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
+       fib_entry->nh_group = nh_grp;
+       return 0;
+}
+
+static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_fib_entry *fib_entry)
+{
+       struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+       list_del(&fib_entry->nexthop_group_node);
+       if (!list_empty(&nh_grp->fib_list))
+               return;
+       mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+}
+
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
        char rgcr_pl[MLXSW_REG_RGCR_LEN];
@@ -999,6 +1442,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
        int err;
 
+       INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
        err = __mlxsw_sp_router_init(mlxsw_sp);
        if (err)
                return err;
@@ -1013,6 +1457,38 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
        __mlxsw_sp_router_fini(mlxsw_sp);
 }
 
+static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
+                                        struct mlxsw_sp_fib_entry *fib_entry,
+                                        enum mlxsw_reg_ralue_op op)
+{
+       char ralue_pl[MLXSW_REG_RALUE_LEN];
+       u32 *p_dip = (u32 *) fib_entry->key.addr;
+       struct mlxsw_sp_vr *vr = fib_entry->vr;
+       enum mlxsw_reg_ralue_trap_action trap_action;
+       u16 trap_id = 0;
+       u32 adjacency_index = 0;
+       u16 ecmp_size = 0;
+
+       /* In case the nexthop group adjacency index is valid, use it
+        * with provided ECMP size. Otherwise, setup trap and pass
+        * traffic to kernel.
+        */
+       if (fib_entry->nh_group->adj_index_valid) {
+               trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+               adjacency_index = fib_entry->nh_group->adj_index;
+               ecmp_size = fib_entry->nh_group->ecmp_size;
+       } else {
+               trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+               trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
+       }
+
+       mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+                             fib_entry->key.prefix_len, *p_dip);
+       mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
+                                       adjacency_index, ecmp_size);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
                                        struct mlxsw_sp_fib_entry *fib_entry,
                                        enum mlxsw_reg_ralue_op op)
@@ -1049,7 +1525,7 @@ static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
 {
        switch (fib_entry->type) {
        case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
-               return -EINVAL;
+               return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
        case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
                return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
        case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
@@ -1129,7 +1605,17 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
                fib_entry->rif = r->rif;
                return 0;
        }
-       return -EINVAL;
+       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+       return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
+}
+
+static void
+mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_fib_entry *fib_entry)
+{
+       if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
+               return;
+       mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
 }
 
 static int
@@ -1173,6 +1659,7 @@ mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
        return 0;
 
 err_alloc_info:
+       mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
 err_fib4_entry_init:
        mlxsw_sp_fib_entry_destroy(fib_entry);
 err_fib_entry_create:
@@ -1207,6 +1694,7 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
 err_fib_entry_add:
        mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
 err_fib_entry_insert:
+       mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
        mlxsw_sp_fib_entry_destroy(fib_entry);
        mlxsw_sp_vr_put(mlxsw_sp, vr);
        return err;
@@ -1243,6 +1731,7 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
        }
        mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
        mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
+       mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
        mlxsw_sp_fib_entry_destroy(fib_entry);
        mlxsw_sp_vr_put(mlxsw_sp, vr);
        return 0;