[PATCH] sparc64: Reduce ptrace cache flushing

We were flushing the D-cache excessively for ptrace() processing
and this makes debugging threads so slow as to be totally unusable.

All process page accesses via ptrace() go via access_process_vm().
This routine, for each process page, uses get_user_pages().  That
in turn does a flush_dcache_page() on the child pages before we
copy in/out the ptrace request data.

Therefore, all we need to do after the data movement is:

1) Flush the D-cache pages if the kernel maps the page to a different
   color than userspace does.
2) If we wrote to the page, we need to flush the I-cache on older cpus.

Previously we just flushed the entire cache at the end of a ptrace()
request, and that was beyond stupid.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 08bac53..5f080cf 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -103,6 +103,55 @@
 	/* nothing to do */
 }
 
+/* To get the necessary page struct, access_process_vm() first calls
+ * get_user_pages().  This has done a flush_dcache_page() on the
+ * accessed page.  Then our caller (copy_{to,from}_user_page()) did
+ * to memcpy to read/write the data from that page.
+ *
+ * Now, the only thing we have to do is:
+ * 1) flush the D-cache if it's possible than an illegal alias
+ *    has been created
+ * 2) flush the I-cache if this is pre-cheetah and we did a write
+ */
+void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+			 unsigned long uaddr, void *kaddr,
+			 unsigned long len, int write)
+{
+	BUG_ON(len > PAGE_SIZE);
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	/* If bit 13 of the kernel address we used to access the
+	 * user page is the same as the virtual address that page
+	 * is mapped to in the user's address space, we can skip the
+	 * D-cache flush.
+	 */
+	if ((uaddr ^ kaddr) & (1UL << 13)) {
+		unsigned long start = __pa(kaddr);
+		unsigned long end = start + len;
+
+		if (tlb_type == spitfire) {
+			for (; start < end; start += 32)
+				spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
+		} else {
+			for (; start < end; start += 32)
+				__asm__ __volatile__(
+					"stxa %%g0, [%0] %1\n\t"
+					"membar #Sync"
+					: /* no outputs */
+					: "r" (va),
+					"i" (ASI_DCACHE_INVALIDATE));
+		}
+	}
+#endif
+	if (write && tlb_type == spitfire) {
+		unsigned long start = (unsigned long) kaddr;
+		unsigned long end = start + len;
+
+		for (; start < end; start += 32)
+			flushi(start);
+	}
+}
+
 asmlinkage void do_ptrace(struct pt_regs *regs)
 {
 	int request = regs->u_regs[UREG_I0];
@@ -227,7 +276,7 @@
 			pt_error_return(regs, -res);
 		else
 			pt_os_succ_return(regs, tmp64, (void __user *) data);
-		goto flush_and_out;
+		goto out_tsk;
 	}
 
 	case PTRACE_POKETEXT: /* write the word at location addr. */
@@ -253,7 +302,7 @@
 			pt_error_return(regs, -res);
 		else
 			pt_succ_return(regs, res);
-		goto flush_and_out;
+		goto out_tsk;
 	}
 
 	case PTRACE_GETREGS: {
@@ -485,12 +534,12 @@
 					  (char __user *)addr2, data);
 		if (res == data) {
 			pt_succ_return(regs, 0);
-			goto flush_and_out;
+			goto out_tsk;
 		}
 		if (res >= 0)
 			res = -EIO;
 		pt_error_return(regs, -res);
-		goto flush_and_out;
+		goto out_tsk;
 	}
 
 	case PTRACE_WRITETEXT:
@@ -499,12 +548,12 @@
 					   addr, data);
 		if (res == data) {
 			pt_succ_return(regs, 0);
-			goto flush_and_out;
+			goto out_tsk;
 		}
 		if (res >= 0)
 			res = -EIO;
 		pt_error_return(regs, -res);
-		goto flush_and_out;
+		goto out_tsk;
 	}
 	case PTRACE_SYSCALL: /* continue and stop at (return from) syscall */
 		addr = 1;
@@ -571,27 +620,6 @@
 		goto out_tsk;
 	}
 	}
-flush_and_out:
-	{
-		unsigned long va;
-
-		if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-			for (va = 0; va < (1 << 16); va += (1 << 5))
-				spitfire_put_dcache_tag(va, 0x0);
-			/* No need to mess with I-cache on Cheetah. */
-		} else {
-			for (va =  0; va < L1DCACHE_SIZE; va += 32)
-				spitfire_put_dcache_tag(va, 0x0);
-			if (request == PTRACE_PEEKTEXT ||
-			    request == PTRACE_POKETEXT ||
-			    request == PTRACE_READTEXT ||
-			    request == PTRACE_WRITETEXT) {
-				for (va =  0; va < (PAGE_SIZE << 1); va += 32)
-					spitfire_put_icache_tag(va, 0x0);
-				__asm__ __volatile__("flush %g6");
-			}
-		}
-	}
 out_tsk:
 	if (child)
 		put_task_struct(child);
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index 86f0293..51b26e8 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -49,16 +49,22 @@
 #define flush_icache_page(vma, pg)	do { } while(0)
 #define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
 
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-	do {							\
-		flush_cache_page(vma, vaddr, page_to_pfn(page));\
-		memcpy(dst, src, len);				\
+extern void flush_ptrace_access(struct vm_area_struct *, struct page *,
+				unsigned long uaddr, void *kaddr,
+				unsigned long len, int write);
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
+	do {								\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));	\
+		memcpy(dst, src, len);					\
+		flush_ptrace_access(vma, page, vaddr, src, len, 0);	\
 	} while (0)
 
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-	do {							\
-		flush_cache_page(vma, vaddr, page_to_pfn(page));\
-		memcpy(dst, src, len);				\
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) 		\
+	do {								\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));	\
+		memcpy(dst, src, len);					\
+		flush_ptrace_access(vma, page, vaddr, dst, len, 1);	\
 	} while (0)
 
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)