block: allow large discard requests
Currently we set the bio size to the byte equivalent of the blocks to
be trimmed when submitting the initial DISCARD ioctl. That means it
is subject to the max_hw_sectors limitation of the HBA which is
much lower than the size of a DISCARD request we can support.
Add a separate max_discard_sectors tunable to limit the size for discard
requests.
We limit the max discard request size in bytes to 32bit as that is the
limit for bio->bi_size. This could be much larger if we had a way to pass
that information through the block layer.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 21f5025..8873b9b 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -385,6 +385,8 @@
while (nr_sects && !ret) {
unsigned int sector_size = q->limits.logical_block_size;
+ unsigned int max_discard_sectors =
+ min(q->limits.max_discard_sectors, UINT_MAX >> 9);
bio = bio_alloc(gfp_mask, 1);
if (!bio)
@@ -411,10 +413,10 @@
* touch many more blocks on disk than the actual payload
* length.
*/
- if (nr_sects > queue_max_hw_sectors(q)) {
- bio->bi_size = queue_max_hw_sectors(q) << 9;
- nr_sects -= queue_max_hw_sectors(q);
- sector += queue_max_hw_sectors(q);
+ if (nr_sects > max_discard_sectors) {
+ bio->bi_size = max_discard_sectors << 9;
+ nr_sects -= max_discard_sectors;
+ sector += max_discard_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 80a020d..34504f3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1436,7 +1436,8 @@
goto end_io;
}
- if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+ if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+ nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d29498e..e0695bc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,7 @@
lim->max_segment_size = MAX_SEGMENT_SIZE;
lim->max_sectors = BLK_DEF_MAX_SECTORS;
lim->max_hw_sectors = INT_MAX;
+ lim->max_discard_sectors = SAFE_MAX_SECTORS;
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
lim->alignment_offset = 0;
@@ -239,6 +240,18 @@
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
/**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q: the request queue for the device
+ * @max_discard: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+ unsigned int max_discard_sectors)
+{
+ q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
* blk_queue_max_phys_segments - set max phys segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f62d45e..1a03b71 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -311,6 +311,7 @@
unsigned int alignment_offset;
unsigned int io_min;
unsigned int io_opt;
+ unsigned int max_discard_sectors;
unsigned short logical_block_size;
unsigned short max_hw_segments;
@@ -928,6 +929,8 @@
extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_sectors(struct request_queue *q,
+ unsigned int max_discard_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_alignment_offset(struct request_queue *q,