sh: flush_cache_range() cleanup and optimizations.
flush_cache_range() wasn't page aligning the end of the range,
we can't assume that it will always be page aligned, and we
ended up getting unaligned faults in some rare call paths.
Additionally, we add a small optimization to just purge the
dcache entirely if the range is large enough that the page
table walking will take longer. We use an arbitrary value of
64 pages for the large range size, as per sh64.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 524cea5..94c05d0 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,7 +2,7 @@
* arch/sh/mm/cache-sh4.c
*
* Copyright (C) 1999, 2000, 2002 Niibe Yutaka
- * Copyright (C) 2001, 2002, 2003, 2004 Paul Mundt
+ * Copyright (C) 2001, 2002, 2003, 2004, 2005 Paul Mundt
* Copyright (C) 2003 Richard Curnow
*
* This file is subject to the terms and conditions of the GNU General Public
@@ -25,6 +25,8 @@
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
+extern void __flush_cache_4096(unsigned long addr, unsigned long phys,
+ unsigned long exec_offset);
extern void __flush_cache_4096_all(unsigned long start);
static void __flush_cache_4096_all_ex(unsigned long start);
extern void __flush_dcache_all(void);
@@ -112,9 +114,14 @@
{
unsigned long addr, end_addr, entry_offset;
- end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways;
+ end_addr = CACHE_OC_ADDRESS_ARRAY +
+ (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) *
+ cpu_data->dcache.ways;
+
entry_offset = 1 << cpu_data->dcache.entry_shift;
- for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) {
+ for (addr = CACHE_OC_ADDRESS_ARRAY;
+ addr < end_addr;
+ addr += entry_offset) {
ctrl_outl(0, addr);
}
}
@@ -125,7 +132,8 @@
int i;
entry_offset = 1 << cpu_data->dcache.entry_shift;
- for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) {
+ for (i = 0; i < cpu_data->dcache.ways;
+ i++, start += cpu_data->dcache.way_incr) {
for (addr = CACHE_OC_ADDRESS_ARRAY + start;
addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start;
addr += entry_offset) {
@@ -153,14 +161,14 @@
}
/*
- * Write back the D-cache and purge the I-cache for signal trampoline.
+ * Write back the D-cache and purge the I-cache for signal trampoline.
* .. which happens to be the same behavior as flush_icache_range().
* So, we simply flush out a line.
*/
void flush_cache_sigtramp(unsigned long addr)
{
unsigned long v, index;
- unsigned long flags;
+ unsigned long flags;
int i;
v = addr & ~(L1_CACHE_BYTES-1);
@@ -172,7 +180,8 @@
local_irq_save(flags);
jump_to_P2();
- for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr)
+ for (i = 0; i < cpu_data->icache.ways;
+ i++, index += cpu_data->icache.way_incr)
ctrl_outl(0, index); /* Clear out Valid-bit */
back_to_P1();
local_irq_restore(flags);
@@ -181,8 +190,7 @@
static inline void flush_cache_4096(unsigned long start,
unsigned long phys)
{
- unsigned long flags;
- extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset);
+ unsigned long flags;
/*
* SH7751, SH7751R, and ST40 have no restriction to handle cache.
@@ -191,10 +199,12 @@
if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG)
|| start < CACHE_OC_ADDRESS_ARRAY) {
local_irq_save(flags);
- __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000);
+ __flush_cache_4096(start | SH_CACHE_ASSOC,
+ P1SEGADDR(phys), 0x20000000);
local_irq_restore(flags);
} else {
- __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0);
+ __flush_cache_4096(start | SH_CACHE_ASSOC,
+ P1SEGADDR(phys), 0);
}
}
@@ -231,29 +241,22 @@
local_irq_restore(flags);
}
-void flush_cache_all(void)
+void flush_dcache_all(void)
{
if (cpu_data->dcache.ways == 1)
__flush_dcache_all();
else
__flush_dcache_all_ex();
+}
+
+void flush_cache_all(void)
+{
+ flush_dcache_all();
flush_icache_all();
}
void flush_cache_mm(struct mm_struct *mm)
{
- /* Is there any good way? */
- /* XXX: possibly call flush_cache_range for each vm area */
- /*
- * FIXME: Really, the optimal solution here would be able to flush out
- * individual lines created by the specified context, but this isn't
- * feasible for a number of architectures (such as MIPS, and some
- * SPARC) .. is this possible for SuperH?
- *
- * In the meantime, we'll just flush all of the caches.. this
- * seems to be the simplest way to avoid at least a few wasted
- * cache flushes. -Lethal
- */
flush_cache_all();
}
@@ -301,13 +304,30 @@
unsigned long p = start & PAGE_MASK;
pgd_t *dir;
pmd_t *pmd;
+ pud_t *pud;
pte_t *pte;
pte_t entry;
unsigned long phys;
unsigned long d = 0;
+ /*
+ * Don't bother with the lookup and alias check if we have a
+ * wide range to cover, just blow away the dcache in its
+ * entirety instead. -- PFM.
+ */
+ if (((end - start) >> PAGE_SHIFT) >= 64) {
+ flush_dcache_all();
+
+ if (vma->vm_flags & VM_EXEC)
+ flush_icache_all();
+
+ return;
+ }
+
dir = pgd_offset(vma->vm_mm, p);
- pmd = pmd_offset(dir, p);
+ pud = pud_offset(dir, p);
+ pmd = pmd_offset(pud, p);
+ end = PAGE_ALIGN(end);
do {
if (pmd_none(*pmd) || pmd_bad(*pmd)) {
@@ -322,7 +342,7 @@
if ((pte_val(entry) & _PAGE_PRESENT)) {
phys = pte_val(entry)&PTE_PHYS_MASK;
if ((p^phys) & CACHE_ALIAS) {
- d |= 1 << ((p & CACHE_ALIAS)>>12);
+ d |= 1 << ((p & CACHE_ALIAS)>>12);
d |= 1 << ((phys & CACHE_ALIAS)>>12);
if (d == 0x0f)
goto loop_exit;