From 7af6846b8eda2abf93b527a3ebaa6771b165b569 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Wed, 14 Nov 2012 13:27:17 -0800
Subject: [PATCH] CHROMIUM: pm-check: make pm-check sum function stronger

Add a rotate to the sum function and switch back to 32-bit.
The rotate seems to help in catching more instances of corruption than
a pure checksum.  So far we haven't seen sum + ror miss a corruption
in over 2000 known cases whereas pure sum misses 10 of those.  There
seems to be some kind of optimizer bug when I try to use 64-bit + ror
so let's go back to 32-bit and take a speed hit.  This code seems to
run in about 760 msec vs about 540 msec for the pure checksum.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>

BUG=chrome-os-partner:16148
TEST=run suspend_stress_test, ensure errors are still detected

Change-Id: I0adaec6f73d9acd752059840f37f707d5d2a2f59
Reviewed-on: https://gerrit.chromium.org/gerrit/38053
Reviewed-by: Doug Anderson <dianders@chromium.org>
Commit-Ready: Sonny Rao <sonnyrao@chromium.org>
Tested-by: Sonny Rao <sonnyrao@chromium.org>
---
 arch/arm/plat-samsung/pm-check.c | 54 ++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c
index 290ede2724ff..0a8c553e2202 100644
--- a/arch/arm/plat-samsung/pm-check.c
+++ b/arch/arm/plat-samsung/pm-check.c
@@ -165,9 +165,8 @@ static u32 s3c_pm_xor_mem(u32 val, unsigned char const *ptr, size_t len)
 /**
  * s3c_pm_sum_mem() - Sum all the words in the memory range passed
  *
- * Doesn't quite give you a simple sum.  Since we work 64-bits at a time
- * the carry bit from the lower 32-bits get added to the upper 32-bits, but
- * this is a close enough approximation.
+ * Doesn't quite give you a simple sum.  It grabs a sum and does a
+ * one byte rotate on it.
  *
  * @val: Initial value to start the sum from; must be 32-bit aligned.
  * @ptr: Pointer to the start of the memory range
@@ -175,24 +174,47 @@ static u32 s3c_pm_xor_mem(u32 val, unsigned char const *ptr, size_t len)
  */
 static u32 s3c_pm_sum_mem(u32 val, unsigned char const *ptr, size_t len)
 {
-	/* using 64-bit quantities helps the compiler to optimize */
-	const u64 *wptr = (const u64 *)ptr;
-	u64 *end_ptr = (u64 *)(ptr + len);
-	u64 result = val;
+	const u32 *wptr = (const u32 *)ptr;
+	u32 *end_ptr = (u32 *)(ptr + len);
+
 
 	while (wptr < end_ptr) {
 		prefetch(wptr + 128); /* 16 cachelines ahead */
-		result += *wptr++;
-		result += *wptr++;
-		result += *wptr++;
-		result += *wptr++;
-		result += *wptr++;
-		result += *wptr++;
-		result += *wptr++;
-		result += *wptr++;
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
+		val += *wptr++;
+		val = ror32(val, 8);
 	}
 	BUG_ON(wptr != end_ptr);
-	return (u32)((result >> 32) + (result & 0xffffffff));
+	return val;
 }
 
 /**
-- 
2.20.1