V4L/DVB (9804): cx18: Avoid making firmware API calls with the queue lock held

cx18: Avoid making firmware API calls with the queue lock held.  The source
of MPEG strem corruption when not holding the queue lock was found to be that
the MPEG buffer could be retrieved by the user app before it was sync'ed for
the host cpu.  Incoming buffers are now sync'ed before being put on q_full and
releasing the queue lock.  We can thus avoid the sometimes lengthy call to
the firmware for CPU_DE_SET_MDL while holding the queue lock, so we can get
better performance.

Signed-off-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
diff --git a/drivers/media/video/cx18/cx18-mailbox.c b/drivers/media/video/cx18/cx18-mailbox.c
index 9d82205..ca8d5f4 100644
--- a/drivers/media/video/cx18/cx18-mailbox.c
+++ b/drivers/media/video/cx18/cx18-mailbox.c
@@ -163,7 +163,7 @@
 		 * it's filled in).
 		 *
 		 * cx18_queue_get buf() will detect the lost buffers
-		 * and put them back in rotation eventually.
+		 * and send them back to q_free for fw rotation eventually.
 		 */
 		if ((order->flags & CX18_F_EWO_MB_STALE_UPON_RECEIPT) &&
 		    !(id >= s->mdl_offset &&
@@ -174,24 +174,27 @@
 			break;
 		}
 		buf = cx18_queue_get_buf(s, id, mdl_ack->data_used);
+
 		CX18_DEBUG_HI_DMA("DMA DONE for %s (buffer %d)\n", s->name, id);
 		if (buf == NULL) {
 			CX18_WARN("Could not find buf %d for stream %s\n",
 				  id, s->name);
+			/* Put as many buffers as possible back into fw use */
+			cx18_stream_load_fw_queue(s);
 			continue;
 		}
 
-		cx18_buf_sync_for_cpu(s, buf);
 		if (s->type == CX18_ENC_STREAM_TYPE_TS && s->dvb.enabled) {
 			CX18_DEBUG_HI_DMA("TS recv bytesused = %d\n",
 					  buf->bytesused);
-
 			dvb_dmx_swfilter(&s->dvb.demux, buf->buf,
 					 buf->bytesused);
-
+		}
+		/* Put as many buffers as possible back into fw use */
+		cx18_stream_load_fw_queue(s);
+		/* Put back TS buffer, since it was removed from all queues */
+		if (s->type == CX18_ENC_STREAM_TYPE_TS)
 			cx18_stream_put_buf_fw(s, buf);
-		} else
-			set_bit(CX18_F_B_NEED_BUF_SWAP, &buf->b_flags);
 	}
 	wake_up(&cx->dma_waitq);
 	if (s->id != -1)
diff --git a/drivers/media/video/cx18/cx18-queue.c b/drivers/media/video/cx18/cx18-queue.c
index 40379d8..a6b0666 100644
--- a/drivers/media/video/cx18/cx18-queue.c
+++ b/drivers/media/video/cx18/cx18-queue.c
@@ -117,16 +117,18 @@
 		}
 
 		buf->bytesused = bytesused;
+		/* Sync the buffer before we release the qlock */
+		cx18_buf_sync_for_cpu(s, buf);
 		if (s->type == CX18_ENC_STREAM_TYPE_TS) {
 			/*
-			 * TS doesn't use q_full, but for sweeping up lost
-			 * buffers, we want the TS to requeue the buffer just
-			 * before sending the MDL back to the firmware, so we
-			 * pull it off the list here.
+			 * TS doesn't use q_full.  As we pull the buffer off of
+			 * the queue here, the caller will have to put it back.
 			 */
 			list_del_init(&buf->list);
 		} else {
+			/* Move buffer from q_busy to q_full */
 			list_move_tail(&buf->list, &s->q_full.list);
+			set_bit(CX18_F_B_NEED_BUF_SWAP, &buf->b_flags);
 			s->q_full.bytesused += buf->bytesused;
 			atomic_inc(&s->q_full.buffers);
 		}
@@ -135,9 +137,6 @@
 		ret = buf;
 		break;
 	}
-
-	/* Put more buffers into the transfer rotation from q_free, if we can */
-	cx18_stream_load_fw_queue_nolock(s);
 	mutex_unlock(&s->qlock);
 	return ret;
 }
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index d2690cc..9ead459 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -419,31 +419,22 @@
 	return q;
 }
 
-/* Must hold s->qlock when calling */
-void cx18_stream_load_fw_queue_nolock(struct cx18_stream *s)
+void cx18_stream_load_fw_queue(struct cx18_stream *s)
 {
+	struct cx18_queue *q;
 	struct cx18_buffer *buf;
-	struct cx18 *cx = s->cx;
 
-	/* Move from q_free to q_busy notifying the firmware: 63 buf limit */
-	while (s->handle != CX18_INVALID_TASK_HANDLE &&
-	       test_bit(CX18_F_S_STREAMING, &s->s_flags) &&
-	       atomic_read(&s->q_busy.buffers) < 63 &&
-	       !list_empty(&s->q_free.list)) {
+	if (atomic_read(&s->q_free.buffers) == 0 ||
+	    atomic_read(&s->q_busy.buffers) >= 63)
+		return;
 
-		/* Move from q_free to q_busy */
-		buf = list_entry(s->q_free.list.next, struct cx18_buffer, list);
-		list_move_tail(&buf->list, &s->q_busy.list);
-		buf->bytesused = buf->readpos = buf->b_flags = buf->skipped = 0;
-		atomic_dec(&s->q_free.buffers);
-		atomic_inc(&s->q_busy.buffers);
-
-		/* Notify firmware */
-		cx18_buf_sync_for_device(s, buf);
-		cx18_vapi(cx, CX18_CPU_DE_SET_MDL, 5, s->handle,
-		  (void __iomem *) &cx->scb->cpu_mdl[buf->id] - cx->enc_mem,
-		  1, buf->id, s->buf_size);
-	}
+	/* Move from q_free to q_busy notifying the firmware, until the limit */
+	do {
+		buf = cx18_dequeue(s, &s->q_free);
+		if (buf == NULL)
+			break;
+		q = cx18_stream_put_buf_fw(s, buf);
+	} while (atomic_read(&s->q_busy.buffers) < 63 && q == &s->q_busy);
 }
 
 int cx18_start_v4l2_encode_stream(struct cx18_stream *s)
@@ -543,8 +534,8 @@
 					&cx->scb->cpu_mdl[buf->id].paddr);
 		cx18_writel(cx, s->buf_size, &cx->scb->cpu_mdl[buf->id].length);
 	}
-	cx18_stream_load_fw_queue_nolock(s);
 	mutex_unlock(&s->qlock);
+	cx18_stream_load_fw_queue(s);
 
 	/* begin_capture */
 	if (cx18_vapi(cx, CX18_CPU_CAPTURE_START, 1, s->handle)) {
diff --git a/drivers/media/video/cx18/cx18-streams.h b/drivers/media/video/cx18/cx18-streams.h
index 635d34b..420e0a1 100644
--- a/drivers/media/video/cx18/cx18-streams.h
+++ b/drivers/media/video/cx18/cx18-streams.h
@@ -29,7 +29,7 @@
 void cx18_streams_cleanup(struct cx18 *cx, int unregister);
 
 /* Capture related */
-void cx18_stream_load_fw_queue_nolock(struct cx18_stream *s);
+void cx18_stream_load_fw_queue(struct cx18_stream *s);
 struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s,
 					  struct cx18_buffer *buf);
 int cx18_start_v4l2_encode_stream(struct cx18_stream *s);