[CRYPTO] rmd: Use pointer form of endian swapping operations

This patch converts the relevant code in the rmd implementations to
use the pointer form of the endian swapping operations.  This allows
certain architectures to generate more optimised code.  For example,
on sparc64 this more than halves the CPU cycles on a typical hashing
operation.

Based on a patch by David Miller.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/rmd128.c b/crypto/rmd128.c
index 89a535a..1a481df 100644
--- a/crypto/rmd128.c
+++ b/crypto/rmd128.c
@@ -44,7 +44,7 @@
 #define F4(x, y, z) (y ^ (z & (x ^ y)))	/* z ? x : y */
 
 #define ROUND(a, b, c, d, f, k, x, s)  { \
-	(a) += f((b), (c), (d)) + le32_to_cpu(x) + (k); \
+	(a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k);	\
 	(a) = rol32((a), (s)); \
 }
 
@@ -285,7 +285,7 @@
 
 	/* Store state in digest */
 	for (i = 0; i < 4; i++)
-		dst[i] = cpu_to_le32(rctx->state[i]);
+		dst[i] = cpu_to_le32p(&rctx->state[i]);
 
 	/* Wipe context */
 	memset(rctx, 0, sizeof(*rctx));
diff --git a/crypto/rmd160.c b/crypto/rmd160.c
index 136e31f..e9fd5f6 100644
--- a/crypto/rmd160.c
+++ b/crypto/rmd160.c
@@ -47,7 +47,7 @@
 #define F5(x, y, z) (x ^ (y | ~z))
 
 #define ROUND(a, b, c, d, e, f, k, x, s)  { \
-	(a) += f((b), (c), (d)) + le32_to_cpu(x) + (k); \
+	(a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \
 	(a) = rol32((a), (s)) + (e); \
 	(c) = rol32((c), 10); \
 }
@@ -329,7 +329,7 @@
 
 	/* Store state in digest */
 	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_le32(rctx->state[i]);
+		dst[i] = cpu_to_le32p(&rctx->state[i]);
 
 	/* Wipe context */
 	memset(rctx, 0, sizeof(*rctx));
diff --git a/crypto/rmd256.c b/crypto/rmd256.c
index 88f2203..b088526 100644
--- a/crypto/rmd256.c
+++ b/crypto/rmd256.c
@@ -44,7 +44,7 @@
 #define F4(x, y, z) (y ^ (z & (x ^ y)))	/* z ? x : y */
 
 #define ROUND(a, b, c, d, f, k, x, s)  { \
-	(a) += f((b), (c), (d)) + le32_to_cpu(x) + (k); \
+	(a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \
 	(a) = rol32((a), (s)); \
 }
 
@@ -304,7 +304,7 @@
 
 	/* Store state in digest */
 	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_le32(rctx->state[i]);
+		dst[i] = cpu_to_le32p(&rctx->state[i]);
 
 	/* Wipe context */
 	memset(rctx, 0, sizeof(*rctx));
diff --git a/crypto/rmd320.c b/crypto/rmd320.c
index 5b172f8..dba03ec 100644
--- a/crypto/rmd320.c
+++ b/crypto/rmd320.c
@@ -47,7 +47,7 @@
 #define F5(x, y, z) (x ^ (y | ~z))
 
 #define ROUND(a, b, c, d, e, f, k, x, s)  { \
-	(a) += f((b), (c), (d)) + le32_to_cpu(x) + (k); \
+	(a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \
 	(a) = rol32((a), (s)) + (e); \
 	(c) = rol32((c), 10); \
 }
@@ -353,7 +353,7 @@
 
 	/* Store state in digest */
 	for (i = 0; i < 10; i++)
-		dst[i] = cpu_to_le32(rctx->state[i]);
+		dst[i] = cpu_to_le32p(&rctx->state[i]);
 
 	/* Wipe context */
 	memset(rctx, 0, sizeof(*rctx));