drm/radeon/kms: Workaround RV410/R420 CP errata (V3)

Long story short, this fixes sporadic hardlocks with my rv410 during
times of intense 2D acceleration (Flash on Fx3).

V2: Fix indentation and move errata_fini to suspend function so we
don't leak scratch register over suspend/resume cycle.
V3: Move scratch_reg to asic specific structure (aim is to slowly
    move stuff to asic specific structure and avoid poluting
    radeon_device struct with asic specific variables)

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index c05a727..f46502a 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -30,6 +30,7 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "atom.h"
+#include "r100d.h"
 #include "r420d.h"
 
 int r420_mc_init(struct radeon_device *rdev)
@@ -165,6 +166,34 @@
 	WREG32_PLL(R_00000D_SCLK_CNTL, sclk_cntl);
 }
 
+static void r420_cp_errata_init(struct radeon_device *rdev)
+{
+	/* RV410 and R420 can lock up if CP DMA to host memory happens
+	 * while the 2D engine is busy.
+	 *
+	 * The proper workaround is to queue a RESYNC at the beginning
+	 * of the CP init, apparently.
+	 */
+	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
+	radeon_ring_lock(rdev, 8);
+	radeon_ring_write(rdev, PACKET0(R300_CP_RESYNC_ADDR, 1));
+	radeon_ring_write(rdev, rdev->config.r300.resync_scratch);
+	radeon_ring_write(rdev, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev);
+}
+
+static void r420_cp_errata_fini(struct radeon_device *rdev)
+{
+	/* Catch the RESYNC we dispatched all the way back,
+	 * at the very beginning of the CP init.
+	 */
+	radeon_ring_lock(rdev, 8);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_RB3D_DC_FINISH);
+	radeon_ring_unlock_commit(rdev);
+	radeon_scratch_free(rdev, rdev->config.r300.resync_scratch);
+}
+
 static int r420_startup(struct radeon_device *rdev)
 {
 	int r;
@@ -196,6 +225,7 @@
 		dev_err(rdev->dev, "failled initializing CP (%d).\n", r);
 		return r;
 	}
+	r420_cp_errata_init(rdev);
 	r = r100_wb_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "failled initializing WB (%d).\n", r);
@@ -238,6 +268,7 @@
 
 int r420_suspend(struct radeon_device *rdev)
 {
+	r420_cp_errata_fini(rdev);
 	r100_cp_disable(rdev);
 	r100_wb_disable(rdev);
 	r100_irq_disable(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index a7e349d..cee8bdc 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -674,6 +674,7 @@
 struct r300_asic {
 	const unsigned	*reg_safe_bm;
 	unsigned	reg_safe_bm_size;
+	u32		resync_scratch;
 };
 
 struct r600_asic {