From 59211bd3b6329c3e5f4a90ac3d7f87ffa7867073 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:20 +0300 Subject: [PATCH] net/mlx5: Split the load/unload flow into hardware and software flows Gather all software context creating/destroying in one function and call it once in the first load and in the last unload. load/unload functions will now receive indication if we need to create/destroy the software contexts. In internal/pci error do the unload/load flows without releasing the software objects. In this way we perserve the sw core state and it help us restoring old driver state after PCI error/shutdown. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/main.c | 171 +++++++++++------- 1 file changed, 107 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 108d8f2e73e1..966647f74db5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1093,8 +1093,76 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) debugfs_remove(priv->dbg_root); } -#define MLX5_IB_MOD "mlx5_ib" -static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto out; + } + + err = mlx5_query_board_id(dev); + if (err) { + dev_err(&pdev->dev, "query board id failed\n"); + goto out; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto out; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + err = mlx5_init_cq_table(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize cq table\n"); + goto err_eq_cleanup; + } + + mlx5_init_qp_table(dev); + + mlx5_init_srq_table(dev); + + mlx5_init_mkey_table(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + + return 0; + +err_tables_cleanup: + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +out: + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_cleanup_rl_table(dev); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_eq_cleanup(dev); +} + +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool boot) { struct pci_dev *pdev = dev->pdev; int err; @@ -1127,12 +1195,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out_err; } - mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); - goto err_pagealloc_cleanup; + goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); @@ -1185,34 +1251,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto err_stop_poll; - } - - err = mlx5_query_board_id(dev); - if (err) { - dev_err(&pdev->dev, "query board id failed\n"); + if (boot && mlx5_init_once(dev, priv)) { + dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; } err = mlx5_enable_msix(dev); if (err) { dev_err(&pdev->dev, "enable msix failed\n"); - goto err_stop_poll; - } - - err = mlx5_eq_init(dev); - if (err) { - dev_err(&pdev->dev, "failed to initialize eq\n"); - goto disable_msix; + goto err_cleanup_once; } err = mlx5_alloc_uuars(dev, &priv->uuari); if (err) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); - goto err_eq_cleanup; + goto err_disable_msix; } err = mlx5_start_eqs(dev); @@ -1228,15 +1281,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } err = mlx5_irq_set_affinity_hints(dev); - if (err) + if (err) { dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - - mlx5_init_cq_table(dev); - mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); + goto err_affinity_hints; + } err = mlx5_init_fs(dev); if (err) { @@ -1244,12 +1292,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_fs; } - err = mlx5_init_rl_table(dev); - if (err) { - dev_err(&pdev->dev, "Failed to init rate limiting\n"); - goto err_rl; - } - #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1281,22 +1323,19 @@ out: return 0; -err_sriov: +err_reg_dev: mlx5_sriov_cleanup(dev); +err_sriov: #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif -err_reg_dev: - mlx5_cleanup_rl_table(dev); -err_rl: mlx5_cleanup_fs(dev); + err_fs: - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1305,12 +1344,13 @@ err_stop_eqs: err_free_uar: mlx5_free_uuars(dev, &priv->uuari); -err_eq_cleanup: - mlx5_eq_cleanup(dev); - -disable_msix: +err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_once: + if (boot) + mlx5_cleanup_once(dev); + err_stop_poll: mlx5_stop_health_poll(dev); if (mlx5_cmd_teardown_hca(dev)) { @@ -1327,8 +1367,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); -err_pagealloc_cleanup: - mlx5_pagealloc_cleanup(dev); +err_cmd_cleanup: mlx5_cmd_cleanup(dev); out_err: @@ -1338,7 +1377,8 @@ out_err: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool cleanup) { int err = 0; @@ -1346,6 +1386,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); + if (cleanup) + mlx5_cleanup_once(dev); goto out; } @@ -1356,18 +1398,14 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif - mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); - mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); + if (cleanup) + mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); err = mlx5_cmd_teardown_hca(dev); if (err) { @@ -1377,7 +1415,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); - mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); out: @@ -1416,6 +1453,7 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif }; +#define MLX5_IB_MOD "mlx5_ib" static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1462,11 +1500,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - err = mlx5_load_one(dev, priv); + mlx5_pagealloc_init(dev); + + err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); if (err) pr_info("failed request module on %s\n", MLX5_IB_MOD); @@ -1478,8 +1519,9 @@ static int init_one(struct pci_dev *pdev, return 0; clean_load: - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, true); clean_health: + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); @@ -1499,12 +1541,13 @@ static void remove_one(struct pci_dev *pdev) devlink_unregister(devlink); mlx5_unregister_device(dev); - if (mlx5_unload_one(dev, priv)) { + if (mlx5_unload_one(dev, priv, true)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); @@ -1519,7 +1562,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? @@ -1591,7 +1634,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, priv); + err = mlx5_load_one(dev, priv, false); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" , __func__, err); @@ -1613,7 +1656,7 @@ static void shutdown(struct pci_dev *pdev) dev_info(&pdev->dev, "Shutdown was called\n"); /* Notify mlx5 clients that the kernel is being shut down */ set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } -- 2.20.1