amd64_edac: correct sys address to chip select mapping

The routine does the reverse mapping of the error address of a CECC back to the node id, DRAM controller and chip select of the DIMM which caused the error. We should lookup the channel using the syndromes _only_ when the DCTs are ganged so fix that. Also, add an early exit when there's an error while scanning for the csrow thus decreasing indentation levels for better readability. Finally, fixup comments. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
author: Borislav Petkov <borislav.petkov@amd.com> 2009-11-13 15:10:43 +0100
committer: Borislav Petkov <borislav.petkov@amd.com> 2009-12-08 13:38:12 +0100
commit: bdc30a0c8c7427a1c1d2e4d149d372d4d77781ee (patch)
tree: a990bedfacf041586af4d6fdf59389c975114a5d /drivers
parent: bfc04aec7d687282b5e7adb26799d3eb50d05f01 (diff)
1 files changed, 27 insertions, 31 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 0969a404f84..533f5ff2ec3 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1645,10 +1645,11 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
 }
 
 /*
- * This the F10h reference code from AMD to map a @sys_addr to NodeID,
- * CSROW, Channel.
+ * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
+ * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
  *
- * The @sys_addr is usually an error address received from the hardware.
+ * The @sys_addr is usually an error address received from the hardware
+ * (MCX_ADDR).
  */
 static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
 				     struct err_regs *info,
@@ -1661,39 +1662,34 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
 
 	csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
 
-	if (csrow >= 0) {
-		error_address_to_page_and_offset(sys_addr, &page, &offset);
+	if (csrow < 0) {
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+		return;
+	}
+
+	error_address_to_page_and_offset(sys_addr, &page, &offset);
 
-		syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
-		syndrome |= LOW_SYNDROME(info->nbsh);
+	syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
+	syndrome |= LOW_SYNDROME(info->nbsh);
+
+	/*
+	 * We need the syndromes for channel detection only when we're
+	 * ganged. Otherwise @chan should already contain the channel at
+	 * this point.
+	 */
+	if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL)
+		chan = get_channel_from_ecc_syndrome(mci, syndrome);
 
+	if (chan >= 0)
+		edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
+				  EDAC_MOD_STR);
+	else
 		/*
-		 * Is CHIPKILL on? If so, then we can attempt to use the
-		 * syndrome to isolate which channel the error was on.
+		 * Channel unknown, report all channels on this CSROW as failed.
 		 */
-		if (pvt->nbcfg & K8_NBCFG_CHIPKILL)
-			chan = get_channel_from_ecc_syndrome(mci, syndrome);
-
-		if (chan >= 0) {
+		for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
 			edac_mc_handle_ce(mci, page, offset, syndrome,
-					csrow, chan, EDAC_MOD_STR);
-		} else {
-			/*
-			 * Channel unknown, report all channels on this
-			 * CSROW as failed.
-			 */
-			for (chan = 0; chan < mci->csrows[csrow].nr_channels;
-								chan++) {
-					edac_mc_handle_ce(mci, page, offset,
-							syndrome,
-							csrow, chan,
-							EDAC_MOD_STR);
-			}
-		}
-
-	} else {
-		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
-	}
+					  csrow, chan, EDAC_MOD_STR);
 }
 
 /*
author	Borislav Petkov <borislav.petkov@amd.com>	2009-11-13 15:10:43 +0100
committer	Borislav Petkov <borislav.petkov@amd.com>	2009-12-08 13:38:12 +0100
commit	bdc30a0c8c7427a1c1d2e4d149d372d4d77781ee (patch)
tree	a990bedfacf041586af4d6fdf59389c975114a5d /drivers
parent	bfc04aec7d687282b5e7adb26799d3eb50d05f01 (diff)