Blackfin arch: unify/cleanup cache code

 - to be correct wrt to end ranges
 - to be optimal with a one-instruction hardware loop

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index 85f8c79..db53218 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -1,148 +1,91 @@
 /*
- * File:         arch/blackfin/mach-common/cache.S
- * Based on:
- * Author:       LG Soft India
+ * Blackfin cache control code
  *
- * Created:
- * Description:  cache control support
+ * Copyright 2004-2008 Analog Devices Inc.
  *
- * Modified:
- *               Copyright 2004-2006 Analog Devices Inc.
+ * Enter bugs at http://blackfin.uclinux.org/
  *
- * Bugs:         Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ * Licensed under the GPL-2 or later.
  */
 
 #include <linux/linkage.h>
-#include <asm/cplb.h>
-#include <asm/entry.h>
 #include <asm/blackfin.h>
 #include <asm/cache.h>
+#include <asm/page.h>
 
 .text
 
-/*
- * blackfin_cache_flush_range(start, end)
- * Invalidate all cache lines assocoiated with this
- * area of memory.
+/* Since all L1 caches work the same way, we use the same method for flushing
+ * them.  Only the actual flush instruction differs.  We write this in asm as
+ * GCC can be hard to coax into writing nice hardware loops.
  *
- * start:	Start address
- * end:		End address
+ * Also, we assume the following register setup:
+ * R0 = start address
+ * R1 = end address
  */
-ENTRY(_blackfin_icache_flush_range)
+.macro do_flush flushins:req optflushins optnopins label
+
+	/* end = ((end - 1) & -L1_CACHE_BYTES) + L1_CACHE_BYTES; */
+	R1 += -1;
 	R2 = -L1_CACHE_BYTES;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC(R3);
-	IFLUSH [P0];
+	R1 = R1 & R2;
+	R1 += L1_CACHE_BYTES;
+
+	/* count = (end - start) >> L1_CACHE_SHIFT */
+	R2 = R1 - R0;
+	R2 >>= L1_CACHE_SHIFT;
+	P1 = R2;
+
+.ifnb \label
+\label :
+.endif
+	P0 = R0;
+	LSETUP (1f, 2f) LC1 = P1;
 1:
-	IFLUSH [P0++];
-	CC = P0 < P1 (iu);
-	IF CC JUMP 1b (bp);
-	IFLUSH [P0];
-	SSYNC(R3);
+.ifnb \optflushins
+	\optflushins [P0];
+.endif
+.ifb \optnopins
+2:
+.endif
+	\flushins [P0++];
+.ifnb \optnopins
+2: \optnopins;
+.endif
+
 	RTS;
+.endm
+
+/* Invalidate all instruction cache lines assocoiated with this memory area */
+ENTRY(_blackfin_icache_flush_range)
+	do_flush IFLUSH, , nop
 ENDPROC(_blackfin_icache_flush_range)
 
-/*
- * blackfin_icache_dcache_flush_range(start, end)
- * FLUSH all cache lines assocoiated with this
- * area of memory.
- *
- * start:	Start address
- * end:		End address
- */
-
+/* Flush all cache lines assocoiated with this area of memory. */
 ENTRY(_blackfin_icache_dcache_flush_range)
-	R2 = -L1_CACHE_BYTES;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC(R3);
-	IFLUSH [P0];
-1:
-	FLUSH [P0];
-	IFLUSH [P0++];
-	CC = P0 < P1 (iu);
-	IF CC JUMP 1b (bp);
-	IFLUSH [P0];
-	FLUSH [P0];
-	SSYNC(R3);
-	RTS;
+	do_flush IFLUSH, FLUSH
 ENDPROC(_blackfin_icache_dcache_flush_range)
 
 /* Throw away all D-cached data in specified region without any obligation to
- * write them back. However, we must clean the D-cached entries around the
- * boundaries of the start and/or end address is not cache aligned.
- *
- * Start: start address,
- * end  : end address.
+ * write them back.  Since the Blackfin ISA does not have an "invalidate"
+ * instruction, we use flush/invalidate.  Perhaps as a speed optimization we
+ * could bang on the DTEST MMRs ...
  */
-
 ENTRY(_blackfin_dcache_invalidate_range)
-	R2 = -L1_CACHE_BYTES;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC(R3);
-	FLUSHINV[P0];
-1:
-	FLUSHINV[P0++];
-	CC = P0 < P1 (iu);
-	IF CC JUMP 1b (bp);
-
-	/* If the data crosses a cache line, then we'll be pointing to
-	 * the last cache line, but won't have flushed/invalidated it yet,
-	 * so do one more.
-	 */
-	FLUSHINV[P0];
-	SSYNC(R3);
-	RTS;
+	do_flush FLUSHINV
 ENDPROC(_blackfin_dcache_invalidate_range)
 
+/* Flush all data cache lines assocoiated with this memory area */
 ENTRY(_blackfin_dcache_flush_range)
-	R2 = -L1_CACHE_BYTES;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC(R3);
-	FLUSH[P0];
-1:
-	FLUSH[P0++];
-	CC = P0 < P1 (iu);
-	IF CC JUMP 1b (bp);
-
-	/* If the data crosses a cache line, then we'll be pointing to
-	 * the last cache line, but won't have flushed it yet, so do
-	 * one more.
-	 */
-	FLUSH[P0];
-	SSYNC(R3);
-	RTS;
+	do_flush FLUSH, , , .Ldfr
 ENDPROC(_blackfin_dcache_flush_range)
 
+/* Our headers convert the page structure to an address, so just need to flush
+ * its contents like normal.  We know the start address is page aligned (which
+ * greater than our cache alignment), as is the end address.  So just jump into
+ * the middle of the dcache flush function.
+ */
 ENTRY(_blackfin_dflush_page)
 	P1 = 1 << (PAGE_SHIFT - L1_CACHE_SHIFT);
-	P0 = R0;
-	CSYNC(R3);
-	FLUSH[P0];
-	LSETUP (.Lfl1, .Lfl1) LC0 = P1;
-.Lfl1:	FLUSH [P0++];
-	SSYNC(R3);
-	RTS;
+	jump .Ldfr;
 ENDPROC(_blackfin_dflush_page)