Merge branch 'for-2.6.25' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.25' of git://git.kernel.dk/linux-2.6-block:
block: implement drain buffers
__bio_clone: don't calculate hw/phys segment counts
block: allow queue dma_alignment of zero
blktrace: Add blktrace ioctls to SCSI generic devices
diff --git a/block/blktrace.c b/block/blktrace.c
index 9b4da4a..568588c 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -235,7 +235,7 @@
kfree(bt);
}
-static int blk_trace_remove(struct request_queue *q)
+int blk_trace_remove(struct request_queue *q)
{
struct blk_trace *bt;
@@ -249,6 +249,7 @@
return 0;
}
+EXPORT_SYMBOL_GPL(blk_trace_remove);
static int blk_dropped_open(struct inode *inode, struct file *filp)
{
@@ -316,18 +317,17 @@
/*
* Setup everything required to start tracing
*/
-int do_blk_trace_setup(struct request_queue *q, struct block_device *bdev,
+int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct blk_user_trace_setup *buts)
{
struct blk_trace *old_bt, *bt = NULL;
struct dentry *dir = NULL;
- char b[BDEVNAME_SIZE];
int ret, i;
if (!buts->buf_size || !buts->buf_nr)
return -EINVAL;
- strcpy(buts->name, bdevname(bdev, b));
+ strcpy(buts->name, name);
/*
* some device names have larger paths - convert the slashes
@@ -352,7 +352,7 @@
goto err;
bt->dir = dir;
- bt->dev = bdev->bd_dev;
+ bt->dev = dev;
atomic_set(&bt->dropped, 0);
ret = -EIO;
@@ -399,8 +399,8 @@
return ret;
}
-static int blk_trace_setup(struct request_queue *q, struct block_device *bdev,
- char __user *arg)
+int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+ char __user *arg)
{
struct blk_user_trace_setup buts;
int ret;
@@ -409,7 +409,7 @@
if (ret)
return -EFAULT;
- ret = do_blk_trace_setup(q, bdev, &buts);
+ ret = do_blk_trace_setup(q, name, dev, &buts);
if (ret)
return ret;
@@ -418,8 +418,9 @@
return 0;
}
+EXPORT_SYMBOL_GPL(blk_trace_setup);
-static int blk_trace_startstop(struct request_queue *q, int start)
+int blk_trace_startstop(struct request_queue *q, int start)
{
struct blk_trace *bt;
int ret;
@@ -452,6 +453,7 @@
return ret;
}
+EXPORT_SYMBOL_GPL(blk_trace_startstop);
/**
* blk_trace_ioctl: - handle the ioctls associated with tracing
@@ -464,6 +466,7 @@
{
struct request_queue *q;
int ret, start = 0;
+ char b[BDEVNAME_SIZE];
q = bdev_get_queue(bdev);
if (!q)
@@ -473,7 +476,8 @@
switch (cmd) {
case BLKTRACESETUP:
- ret = blk_trace_setup(q, bdev, arg);
+ strcpy(b, bdevname(bdev, b));
+ ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
break;
case BLKTRACESTART:
start = 1;
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cae0a85..b733732 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -545,6 +545,7 @@
struct blk_user_trace_setup buts;
struct compat_blk_user_trace_setup cbuts;
struct request_queue *q;
+ char b[BDEVNAME_SIZE];
int ret;
q = bdev_get_queue(bdev);
@@ -554,6 +555,8 @@
if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
return -EFAULT;
+ strcpy(b, bdevname(bdev, b));
+
buts = (struct blk_user_trace_setup) {
.act_mask = cbuts.act_mask,
.buf_size = cbuts.buf_size,
@@ -565,7 +568,7 @@
memcpy(&buts.name, &cbuts.name, 32);
mutex_lock(&bdev->bd_mutex);
- ret = do_blk_trace_setup(q, bdev, &buts);
+ ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
mutex_unlock(&bdev->bd_mutex);
if (ret)
return ret;
diff --git a/block/elevator.c b/block/elevator.c
index f9736fb..8cd5775 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -741,7 +741,21 @@
q->boundary_rq = NULL;
}
- if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
+ if (rq->cmd_flags & REQ_DONTPREP)
+ break;
+
+ if (q->dma_drain_size && rq->data_len) {
+ /*
+ * make sure space for the drain appears we
+ * know we can do this because max_hw_segments
+ * has been adjusted to be one fewer than the
+ * device can handle
+ */
+ rq->nr_phys_segments++;
+ rq->nr_hw_segments++;
+ }
+
+ if (!q->prep_rq_fn)
break;
ret = q->prep_rq_fn(q, rq);
@@ -754,6 +768,16 @@
* avoid resource deadlock. REQ_STARTED will
* prevent other fs requests from passing this one.
*/
+ if (q->dma_drain_size && rq->data_len &&
+ !(rq->cmd_flags & REQ_DONTPREP)) {
+ /*
+ * remove the space for the drain we added
+ * so that we don't add it again
+ */
+ --rq->nr_phys_segments;
+ --rq->nr_hw_segments;
+ }
+
rq = NULL;
break;
} else if (ret == BLKPREP_KILL) {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c16fdfe..1932a56 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -721,6 +721,45 @@
EXPORT_SYMBOL(blk_queue_stack_limits);
/**
+ * blk_queue_dma_drain - Set up a drain buffer for excess dma.
+ *
+ * @q: the request queue for the device
+ * @buf: physically contiguous buffer
+ * @size: size of the buffer in bytes
+ *
+ * Some devices have excess DMA problems and can't simply discard (or
+ * zero fill) the unwanted piece of the transfer. They have to have a
+ * real area of memory to transfer it into. The use case for this is
+ * ATAPI devices in DMA mode. If the packet command causes a transfer
+ * bigger than the transfer size some HBAs will lock up if there
+ * aren't DMA elements to contain the excess transfer. What this API
+ * does is adjust the queue so that the buf is always appended
+ * silently to the scatterlist.
+ *
+ * Note: This routine adjusts max_hw_segments to make room for
+ * appending the drain buffer. If you call
+ * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
+ * calling this routine, you must set the limit to one fewer than your
+ * device can support otherwise there won't be room for the drain
+ * buffer.
+ */
+int blk_queue_dma_drain(struct request_queue *q, void *buf,
+ unsigned int size)
+{
+ if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+ return -EINVAL;
+ /* make room for appending the drain */
+ --q->max_hw_segments;
+ --q->max_phys_segments;
+ q->dma_drain_buffer = buf;
+ q->dma_drain_size = size;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
+
+/**
* blk_queue_segment_boundary - set boundary rules for segment merging
* @q: the request queue for the device
* @mask: the memory boundary mask
@@ -1374,6 +1413,16 @@
bvprv = bvec;
} /* segments in rq */
+ if (q->dma_drain_size) {
+ sg->page_link &= ~0x02;
+ sg = sg_next(sg);
+ sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
+ q->dma_drain_size,
+ ((unsigned long)q->dma_drain_buffer) &
+ (PAGE_SIZE - 1));
+ nsegs++;
+ }
+
if (sg)
sg_mark_end(sg);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 17216b7..aba28f3 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -48,6 +48,7 @@
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/scatterlist.h>
+#include <linux/blktrace_api.h>
#include "scsi.h"
#include <scsi/scsi_dbg.h>
@@ -1067,6 +1068,17 @@
case BLKSECTGET:
return put_user(sdp->device->request_queue->max_sectors * 512,
ip);
+ case BLKTRACESETUP:
+ return blk_trace_setup(sdp->device->request_queue,
+ sdp->disk->disk_name,
+ sdp->device->sdev_gendev.devt,
+ (char *)arg);
+ case BLKTRACESTART:
+ return blk_trace_startstop(sdp->device->request_queue, 1);
+ case BLKTRACESTOP:
+ return blk_trace_startstop(sdp->device->request_queue, 0);
+ case BLKTRACETEARDOWN:
+ return blk_trace_remove(sdp->device->request_queue);
default:
if (read_only)
return -EPERM; /* don't know so take safe approach */
diff --git a/fs/bio.c b/fs/bio.c
index d59ddbf..242e409 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -248,11 +248,13 @@
*/
void __bio_clone(struct bio *bio, struct bio *bio_src)
{
- struct request_queue *q = bdev_get_queue(bio_src->bi_bdev);
-
memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
bio_src->bi_max_vecs * sizeof(struct bio_vec));
+ /*
+ * most users will be overriding ->bi_bdev with a new target,
+ * so we don't set nor calculate new physical/hw segment counts here
+ */
bio->bi_sector = bio_src->bi_sector;
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_flags |= 1 << BIO_CLONED;
@@ -260,8 +262,6 @@
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
bio->bi_idx = bio_src->bi_idx;
- bio_phys_segments(q, bio);
- bio_hw_segments(q, bio);
}
/**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b71c390..71e7a84 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -356,6 +356,8 @@
unsigned int max_segment_size;
unsigned long seg_boundary_mask;
+ void *dma_drain_buffer;
+ unsigned int dma_drain_size;
unsigned int dma_alignment;
struct blk_queue_tag *queue_tags;
@@ -692,6 +694,8 @@
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
+extern int blk_queue_dma_drain(struct request_queue *q, void *buf,
+ unsigned int size);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
@@ -768,12 +772,7 @@
static inline int queue_dma_alignment(struct request_queue *q)
{
- int retval = 511;
-
- if (q && q->dma_alignment)
- retval = q->dma_alignment;
-
- return retval;
+ return q ? q->dma_alignment : 511;
}
/* assumes size > 256 */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7e11d23..06dadba 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -148,7 +148,7 @@
extern void blk_trace_shutdown(struct request_queue *);
extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
extern int do_blk_trace_setup(struct request_queue *q,
- struct block_device *bdev, struct blk_user_trace_setup *buts);
+ char *name, dev_t dev, struct blk_user_trace_setup *buts);
/**
@@ -282,6 +282,11 @@
__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
}
+extern int blk_trace_setup(request_queue_t *q, char *name, dev_t dev,
+ char __user *arg);
+extern int blk_trace_startstop(request_queue_t *q, int start);
+extern int blk_trace_remove(request_queue_t *q);
+
#else /* !CONFIG_BLK_DEV_IO_TRACE */
#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
#define blk_trace_shutdown(q) do { } while (0)
@@ -290,7 +295,10 @@
#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
-#define do_blk_trace_setup(q, bdev, buts) (-ENOTTY)
+#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
+#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
+#define blk_trace_startstop(q, start) (-ENOTTY)
+#define blk_trace_remove(q) (-ENOTTY)
#endif /* CONFIG_BLK_DEV_IO_TRACE */
#endif /* __KERNEL__ */
#endif