powerpc/powernv: Don't escalate non-existing frozen PE
authorGavin Shan <gwshan@linux.vnet.ibm.com>
Sun, 4 May 2014 23:29:04 +0000 (09:29 +1000)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 11 Jun 2014 07:04:20 +0000 (17:04 +1000)
Commit cb5b242c ("powerpc/eeh: Escalate error on non-existing PE")
escalates the frozen state on non-existing PE to fenced PHB. It
was to improve kdump reliability. After that, commit 361f2a2a
("powrpc/powernv: Reset PHB in kdump kernel") was introduced to
issue complete reset on all PHBs to increase the reliability of
kdump kernel.

Commit cb5b242c becomes unuseful and it would be reverted.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/platforms/powernv/eeh-ioda.c

index 68167cd..5711f6f 100644 (file)
@@ -794,23 +794,17 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
                        break;
                case OPAL_EEH_PE_ERROR:
                        /*
-                        * If we can't find the corresponding PE, the
-                        * PEEV / PEST would be messy. So we force an
-                        * fenced PHB so that it can be recovered.
-                        *
-                        * If the PE has been marked as isolated, that
-                        * should have been removed permanently or in
-                        * progress with recovery. We needn't report
-                        * it again.
+                        * If we can't find the corresponding PE, we
+                        * just try to unfreeze.
                         */
                        if (ioda_eeh_get_pe(hose,
-                                       be64_to_cpu(frozen_pe_no), pe)) {
-                               *pe = phb_pe;
-                               pr_err("EEH: Escalated fenced PHB#%x "
-                                      "detected for PE#%llx\n",
-                                       hose->global_number,
-                                       be64_to_cpu(frozen_pe_no));
-                               ret = EEH_NEXT_ERR_FENCED_PHB;
+                                           be64_to_cpu(frozen_pe_no), pe)) {
+                               /* Try best to clear it */
+                               pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+                                       hose->global_number, frozen_pe_no);
+                               opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
+                                       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+                               ret = EEH_NEXT_ERR_NONE;
                        } else if ((*pe)->state & EEH_PE_ISOLATED) {
                                ret = EEH_NEXT_ERR_NONE;
                        } else {