drm/radeon/kms: Schedule host path read cache flush through the ring V2

R300 family will hard lockup if host path read cache flush is
done through MMIO to HOST_PATH_CNTL. But scheduling same flush
through ring seems harmless. This patch remove the hdp_flush
callback and add a flush after each fence emission which means
a flush after each IB schedule. Thus we should have same behavior
without the hard lockup.

Tested on R100,R200,R300,R400,R500,R600,R700 family.

V2: Adjust fence counts in r600_blit_prepare_copy()

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7172746..1a056b7 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -356,6 +356,11 @@
 	/* Wait until IDLE & CLEAN */
 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
 	radeon_ring_write(rdev, (1 << 16) | (1 << 17));
+	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
+				RADEON_HDP_READ_BUFFER_INVALIDATE);
+	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
 	radeon_ring_write(rdev, fence->seq);
@@ -1713,14 +1718,6 @@
 	r100_hdp_reset(rdev);
 }
 
-void r100_hdp_flush(struct radeon_device *rdev)
-{
-	u32 tmp;
-	tmp = RREG32(RADEON_HOST_PATH_CNTL);
-	tmp |= RADEON_HDP_READ_BUFFER_INVALIDATE;
-	WREG32(RADEON_HOST_PATH_CNTL, tmp);
-}
-
 void r100_hdp_reset(struct radeon_device *rdev)
 {
 	uint32_t tmp;
@@ -3313,6 +3310,7 @@
 	}
 	/* Enable IRQ */
 	r100_irq_set(rdev);
+	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
 	/* 1M ring buffer */
 	r = r100_cp_init(rdev, 1024 * 1024);
 	if (r) {