Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH] virtio_blk: add block topology support

4 views
Skip to first unread message

Christoph Hellwig

unread,
Jan 29, 2010, 2:10:02 PM1/29/10
to
Allow reading various alignment values from the config page. This
allows the guest to much better align I/O requests depending on the
storage topology.

Note that the formats for the config values appear a bit messed up,
but we follow the formats used by ATA and SCSI so they are expected in
the storage world.

Signed-off-by: Christoph Hellwig <h...@lst.de>

Index: linux-2.6/drivers/block/virtio_blk.c
===================================================================
--- linux-2.6.orig/drivers/block/virtio_blk.c 2010-01-29 11:13:38.509004113 +0100
+++ linux-2.6/drivers/block/virtio_blk.c 2010-01-29 19:36:44.183006458 +0100
@@ -243,10 +243,12 @@ static int index_to_minor(int index)
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
+ struct request_queue *q;
int err;
u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
+ u8 physical_block_exp, alignment_offset;

if (index_to_minor(index) >= 1 << MINORBITS)
return -ENOSPC;
@@ -293,13 +295,13 @@ static int __devinit virtblk_probe(struc
goto out_mempool;
}

- vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
- if (!vblk->disk->queue) {
+ q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+ if (!q) {
err = -ENOMEM;
goto out_put_disk;
}

- vblk->disk->queue->queuedata = vblk;
+ q->queuedata = vblk;

if (index < 26) {
sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
@@ -323,10 +325,10 @@ static int __devinit virtblk_probe(struc

/* If barriers are supported, tell block layer that queue is ordered */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
- blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
+ blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH,
virtblk_prepare_flush);
else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
- blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
+ blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL);

/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
@@ -345,14 +347,14 @@ static int __devinit virtblk_probe(struc
set_capacity(vblk->disk, cap);

/* We can handle whatever the host told us to handle. */
- blk_queue_max_phys_segments(vblk->disk->queue, vblk->sg_elems-2);
- blk_queue_max_hw_segments(vblk->disk->queue, vblk->sg_elems-2);
+ blk_queue_max_phys_segments(q, vblk->sg_elems-2);
+ blk_queue_max_hw_segments(q, vblk->sg_elems-2);

/* No need to bounce any requests */
- blk_queue_bounce_limit(vblk->disk->queue, BLK_BOUNCE_ANY);
+ blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);

/* No real sector limit. */
- blk_queue_max_sectors(vblk->disk->queue, -1U);
+ blk_queue_max_sectors(q, -1U);

/* Host can optionally specify maximum segment size and number of
* segments. */
@@ -360,29 +362,46 @@ static int __devinit virtblk_probe(struc
offsetof(struct virtio_blk_config, size_max),
&v);
if (!err)
- blk_queue_max_segment_size(vblk->disk->queue, v);
+ blk_queue_max_segment_size(q, v);
else
- blk_queue_max_segment_size(vblk->disk->queue, -1U);
+ blk_queue_max_segment_size(q, -1U);

/* Host can optionally specify the block size of the device */
err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
offsetof(struct virtio_blk_config, blk_size),
&blk_size);
if (!err)
- blk_queue_logical_block_size(vblk->disk->queue, blk_size);
+ blk_queue_logical_block_size(q, blk_size);

/* Use topology information if available */
- err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_TOPOLOGY,
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+ offsetof(struct virtio_blk_config, physical_block_exp),
+ &physical_block_exp);
+ if (!err && physical_block_exp) {
+ u16 physical_block_size =
+ queue_logical_block_size(q) * (1 << physical_block_exp);
+ blk_queue_physical_block_size(q, physical_block_size);
+ }
+
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+ offsetof(struct virtio_blk_config, alignment_offset),
+ &alignment_offset);
+ if (!err && alignment_offset) {
+ u16 alignment = queue_logical_block_size(q) * alignment_offset;
+ blk_queue_alignment_offset(q, alignment);
+ }
+
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
offsetof(struct virtio_blk_config, min_io_size),
&min_io_size);
- if (!err)
- blk_queue_io_min(vblk->disk->queue, min_io_size);
+ if (!err && min_io_size)
+ blk_queue_io_min(q, min_io_size);

- err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_TOPOLOGY,
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
offsetof(struct virtio_blk_config, opt_io_size),
&opt_io_size);
- if (!err)
- blk_queue_io_opt(vblk->disk->queue, opt_io_size);
+ if (!err && opt_io_size)
+ blk_queue_io_opt(q, opt_io_size);


add_disk(vblk->disk);
Index: linux-2.6/include/linux/virtio_blk.h
===================================================================
--- linux-2.6.orig/include/linux/virtio_blk.h 2010-01-29 11:11:20.436254184 +0100
+++ linux-2.6/include/linux/virtio_blk.h 2010-01-29 19:35:44.431016843 +0100
@@ -15,6 +15,7 @@
#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_FLUSH 9 /* Cache flush command support */
+#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */

struct virtio_blk_config {
/* The capacity (in 512-byte sectors). */
@@ -29,8 +30,20 @@ struct virtio_blk_config {
__u8 heads;
__u8 sectors;
} geometry;
+
/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
__u32 blk_size;
+
+ /* the next 4 entries are covered by VIRTIO_BLK_F_TOPOLOGY */
+ /* exponent for physical block per logical block. */
+ __u8 physical_block_exp;
+ /* alignment offset in logical blocks. */
+ __u8 alignment_offset;
+ /* minimum I/O size without performance penalty in bytes. */
+ __u16 min_io_size;
+ /* optimal sustained I/O size. */
+ __u32 opt_io_size;
+
} __attribute__((packed));

/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Rusty Russell

unread,
Jan 30, 2010, 12:10:02 AM1/30/10
to
On Sat, 30 Jan 2010 05:31:58 am Christoph Hellwig wrote:
> Allow reading various alignment values from the config page. This
> allows the guest to much better align I/O requests depending on the
> storage topology.
>
> Note that the formats for the config values appear a bit messed up,
> but we follow the formats used by ATA and SCSI so they are expected in
> the storage world.

I bow to your expertise on that. My only query is the __u16 for min_io_size; is that likely to restrict us?

Also, patch seems to be based on a prior one?

> /* Use topology information if available */
> - err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_TOPOLOGY,
> + err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
> + offsetof(struct virtio_blk_config, physical_block_exp),
> + &physical_block_exp);

Thanks,
Rusty.

Christoph Hellwig

unread,
Jan 30, 2010, 3:20:01 PM1/30/10
to
On Sat, Jan 30, 2010 at 03:29:49PM +1030, Rusty Russell wrote:
> I bow to your expertise on that. My only query is the __u16 for min_io_size; is that likely to restrict us?

Looks like you caught me there - I wrote the above odd format about the
physical_block exponent, but scsi actually does the min_io and opt_io
size in logical blocks, too. With that in account the u16 as in scsi
is perfectly fine.

> Also, patch seems to be based on a prior one?

Yes, quilt once again totally messed up the patch, sorry.

New version below that uses block based values everywhere (but still the
strange exponent for the physical block size) and actually contains
all the needed hunks. I'll make sure qemu also advertizes the values in
blocks. Btw, I remember you had a virtio protocol documentation
somewhere - is that one only for the base virtio protocol or does it
also include virtio-blk? I'll prepare some patches for it if it
includes the latter.

---

From: Christoph Hellwig <h...@lst.de>
Subject: [PATCH] virtio_blk: add block topology support

Allow reading various alignment values from the config page. This
allows the guest to much better align I/O requests depending on the
storage topology.

Note that the formats for the config values appear a bit messed up,
but we follow the formats used by ATA and SCSI so they are expected in
the storage world.

Signed-off-by: Christoph Hellwig <h...@lst.de>

Index: linux-2.6/drivers/block/virtio_blk.c
===================================================================
--- linux-2.6.orig/drivers/block/virtio_blk.c 2010-01-30 11:26:36.867004166 +0100
+++ linux-2.6/drivers/block/virtio_blk.c 2010-01-30 19:04:39.378003955 +0100


@@ -243,10 +243,12 @@ static int index_to_minor(int index)
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
+ struct request_queue *q;
int err;
u64 cap;

- u32 v;
- u32 blk_size, sg_elems;
+ u32 v, blk_size, sg_elems, opt_io_size;
+ u16 min_io_size;

@@ -360,16 +362,45 @@ static int __devinit virtblk_probe(struc


offsetof(struct virtio_blk_config, size_max),
&v);
if (!err)
- blk_queue_max_segment_size(vblk->disk->queue, v);
+ blk_queue_max_segment_size(q, v);
else
- blk_queue_max_segment_size(vblk->disk->queue, -1U);
+ blk_queue_max_segment_size(q, -1U);

/* Host can optionally specify the block size of the device */
err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
offsetof(struct virtio_blk_config, blk_size),
&blk_size);
if (!err)
- blk_queue_logical_block_size(vblk->disk->queue, blk_size);
+ blk_queue_logical_block_size(q, blk_size);

+ else
+ blk_size = queue_logical_block_size(q);
+
+ /* Use topology information if available */


+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+ offsetof(struct virtio_blk_config, physical_block_exp),
+ &physical_block_exp);

+ if (!err && physical_block_exp)

+ blk_queue_physical_block_size(q,
+ blk_size * (1 << physical_block_exp));
+


+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,

+ offsetof(struct virtio_blk_config, alignment_offset),
+ &alignment_offset);
+ if (!err && alignment_offset)

+ blk_queue_alignment_offset(q, blk_size * alignment_offset);
+


+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,

+ offsetof(struct virtio_blk_config, min_io_size),
+ &min_io_size);


+ if (!err && min_io_size)

+ blk_queue_io_min(q, blk_size * min_io_size);
+


+ err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,

+ offsetof(struct virtio_blk_config, opt_io_size),
+ &opt_io_size);


+ if (!err && opt_io_size)

+ blk_queue_io_opt(q, blk_size * opt_io_size);
+

add_disk(vblk->disk);
return 0;
@@ -412,7 +443,7 @@ static struct virtio_device_id id_table[
static unsigned int features[] = {
VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
- VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH
+ VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
};

/*
Index: linux-2.6/include/linux/virtio_blk.h
===================================================================
--- linux-2.6.orig/include/linux/virtio_blk.h 2010-01-29 23:10:26.000000000 +0100
+++ linux-2.6/include/linux/virtio_blk.h 2010-01-30 19:09:46.397253920 +0100


@@ -15,6 +15,7 @@
#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_FLUSH 9 /* Cache flush command support */
+#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */

struct virtio_blk_config {
/* The capacity (in 512-byte sectors). */
@@ -29,8 +30,20 @@ struct virtio_blk_config {
__u8 heads;
__u8 sectors;
} geometry;
+
/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
__u32 blk_size;
+

+ /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */


+ /* exponent for physical block per logical block. */
+ __u8 physical_block_exp;
+ /* alignment offset in logical blocks. */
+ __u8 alignment_offset;

+ /* minimum I/O size without performance penalty in logical blocks. */
+ __u16 min_io_size;
+ /* optimal sustained I/O size in logical blocks. */


+ __u32 opt_io_size;
+
} __attribute__((packed));

/*

Rusty Russell

unread,
Jan 31, 2010, 8:20:01 PM1/31/10
to
On Sun, 31 Jan 2010 06:49:10 am Christoph Hellwig wrote:
> On Sat, Jan 30, 2010 at 03:29:49PM +1030, Rusty Russell wrote:
> > I bow to your expertise on that. My only query is the __u16 for min_io_size; is that likely to restrict us?
>
> Looks like you caught me there - I wrote the above odd format about the
> physical_block exponent, but scsi actually does the min_io and opt_io
> size in logical blocks, too. With that in account the u16 as in scsi
> is perfectly fine.

Thanks, applied.

Rusty.

Alexey Zaytsev

unread,
Jul 14, 2011, 7:40:01 PM7/14/11
to
On Mon, Feb 1, 2010 at 04:10, Rusty Russell <ru...@rustcorp.com.au> wrote:
> On Sun, 31 Jan 2010 06:49:10 am Christoph Hellwig wrote:
>> On Sat, Jan 30, 2010 at 03:29:49PM +1030, Rusty Russell wrote:
>> > I bow to your expertise on that.  My only query is the __u16 for min_io_size; is that likely to restrict us?
>>
>> Looks like you caught me there - I wrote the above odd format about the
>> physical_block exponent, but scsi actually does the min_io and opt_io
>> size in logical blocks, too.  With that in account the u16 as in scsi
>> is perfectly fine.
>
> Thanks, applied.

Ugh, guys. I know it's already applied long ago, but this kind of
contradicts the virtio specification, doesn't it?


VIRTIO_BLK_F_SECTOR_MAX (10) Maximum total sectors in
an I/O.
[...]

4. If the VIRTIO_BLK_F_SECTOR_MAX feature is negotiated, the sec-
tors_max eld should be read to determine the maximum I/O size for
the driver to use. No requests should be submitted which go beyond this
limit.

struct virtio_blk_config {
u64 capacity ;
u32 si z e_ m ax ;
u32 seg_max ;
struct virtio_blk_geometry {
u16 cylinders ;
u8 heads ;
u8 sectors ;
} geometry ;
u32 blk_size ;
u32 sectors_max ;
};

Rusty Russell

unread,
Jul 21, 2011, 5:40:03 AM7/21/11
to
On Fri, 15 Jul 2011 03:36:42 +0400, Alexey Zaytsev <alexey....@gmail.com> wrote:
> On Mon, Feb 1, 2010 at 04:10, Rusty Russell <ru...@rustcorp.com.au> wrote:
> > On Sun, 31 Jan 2010 06:49:10 am Christoph Hellwig wrote:
> >> On Sat, Jan 30, 2010 at 03:29:49PM +1030, Rusty Russell wrote:
> >> > I bow to your expertise on that.  My only query is the __u16 for min_io_size; is that likely to restrict us?
> >>
> >> Looks like you caught me there - I wrote the above odd format about the
> >> physical_block exponent, but scsi actually does the min_io and opt_io
> >> size in logical blocks, too.  With that in account the u16 as in scsi
> >> is perfectly fine.
> >
> > Thanks, applied.
>
> Ugh, guys. I know it's already applied long ago, but this kind of
> contradicts the virtio specification, doesn't it?

Ugh indeed! The same field is used by two places, as
VIRTIO_BLK_F_SECTOR_MAX never made it into the linux headers.

> VIRTIO_BLK_F_SECTOR_MAX (10) Maximum total sectors in
> an I/O.

The patch made the spec, but as far as I can tell, no implementation.

If that's right, we just remove it from the spec. If it did make it in,
we now have a very ugly case where the layout will have to vary
depending on what options are negotiated.

Once we've resolved this, I'll update the spec...

Thanks,
Rusty.

Alexey Zaytsev

unread,
Jul 29, 2011, 3:20:02 PM7/29/11
to
On Thu, Jul 21, 2011 at 10:53, Rusty Russell <ru...@rustcorp.com.au> wrote:
> On Fri, 15 Jul 2011 03:36:42 +0400, Alexey Zaytsev <alexey....@gmail.com> wrote:
>> On Mon, Feb 1, 2010 at 04:10, Rusty Russell <ru...@rustcorp.com.au> wrote:
>> > On Sun, 31 Jan 2010 06:49:10 am Christoph Hellwig wrote:
>> >> On Sat, Jan 30, 2010 at 03:29:49PM +1030, Rusty Russell wrote:
>> >> > I bow to your expertise on that.  My only query is the __u16 for min_io_size; is that likely to restrict us?
>> >>
>> >> Looks like you caught me there - I wrote the above odd format about the
>> >> physical_block exponent, but scsi actually does the min_io and opt_io
>> >> size in logical blocks, too.  With that in account the u16 as in scsi
>> >> is perfectly fine.
>> >
>> > Thanks, applied.
>>
>> Ugh, guys. I know it's already applied long ago, but this kind of
>> contradicts the virtio specification, doesn't it?
>
> Ugh indeed!  The same field is used by two places, as
> VIRTIO_BLK_F_SECTOR_MAX never made it into the linux headers.
>
>> VIRTIO_BLK_F_SECTOR_MAX (10) Maximum total sectors in
>>    an I/O.
>
> The patch made the spec, but as far as I can tell, no implementation.
>
> If that's right, we just remove it from the spec.  If it did make it in,
> we now have a very ugly case where the layout will have to vary
> depending on what options are negotiated.

VIRTIO_BLK_F_TOPOLOGY and VIRTIO_BLK_F_SECTOR_MAX features use the
same bit, don't they?

I guess you could just remove VIRTIO_BLK_F_SECTOR_MAX from the spec.
Linux did not implement it, and virtualbox does not have virtio block
support at all. Are there any other known users, who might have
implemented the VIRTIO_BLK_F_SECTOR_MAX feature?

Rusty Russell

unread,
Jul 30, 2011, 1:30:02 AM7/30/11
to
On Fri, 29 Jul 2011 22:09:49 +0300, Alexey Zaytsev <alexey....@gmail.com> wrote:
> VIRTIO_BLK_F_TOPOLOGY and VIRTIO_BLK_F_SECTOR_MAX features use the
> same bit, don't they?

Yes, spec bug.

> I guess you could just remove VIRTIO_BLK_F_SECTOR_MAX from the spec.
> Linux did not implement it, and virtualbox does not have virtio block
> support at all. Are there any other known users, who might have
> implemented the VIRTIO_BLK_F_SECTOR_MAX feature?

Did qemu implement it? If not, we'll simply kill it, and pretend it
never happened...

Thanks,
Rusty.

Christoph Hellwig

unread,
Jul 31, 2011, 2:20:03 PM7/31/11
to
On Sat, Jul 30, 2011 at 02:22:42PM +0930, Rusty Russell wrote:
> > I guess you could just remove VIRTIO_BLK_F_SECTOR_MAX from the spec.
> > Linux did not implement it, and virtualbox does not have virtio block
> > support at all. Are there any other known users, who might have
> > implemented the VIRTIO_BLK_F_SECTOR_MAX feature?
>
> Did qemu implement it? If not, we'll simply kill it, and pretend it
> never happened...

It was never added to either qemu or the kernel. It would really help
if we had a copy of the spec in a readable format (that is plain text)
in major implementations of virtio like qemu and the kernel.

Rusty Russell

unread,
Aug 1, 2011, 7:30:04 PM8/1/11
to
On Sun, 31 Jul 2011 20:19:05 +0200, Christoph Hellwig <h...@lst.de> wrote:
> On Sat, Jul 30, 2011 at 02:22:42PM +0930, Rusty Russell wrote:
> > > I guess you could just remove VIRTIO_BLK_F_SECTOR_MAX from the spec.
> > > Linux did not implement it, and virtualbox does not have virtio block
> > > support at all. Are there any other known users, who might have
> > > implemented the VIRTIO_BLK_F_SECTOR_MAX feature?
> >
> > Did qemu implement it? If not, we'll simply kill it, and pretend it
> > never happened...
>
> It was never added to either qemu or the kernel. It would really help
> if we had a copy of the spec in a readable format (that is plain text)
> in major implementations of virtio like qemu and the kernel.

That's a really good idea. I've prepared a patch which puts the latest
version in the kernel source (Documentation/virtio/virtio-spec.txt).

Thanks!
Rusty.

0 new messages