Signed-off-by: Corrado Zoccolo <czoc...@gmail.com>
---
block/cfq-iosched.c | 9 +++++----
1 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b00ca4c..a594388 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -284,7 +284,7 @@ struct cfq_data {
*/
struct cfq_queue oom_cfqq;
- unsigned long last_end_sync_rq;
+ unsigned long last_delayed_sync;
/* List of cfq groups being managed on this device*/
struct hlist_head cfqg_list;
@@ -2264,7 +2264,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
* based on the last sync IO we serviced
*/
if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
- unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+ unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
unsigned int depth;
depth = last_sync / cfqd->cfq_slice[1];
@@ -3272,7 +3272,8 @@ static void cfq_completed_request(struct request_queue
*q, struct request *rq)
if (sync) {
RQ_CIC(rq)->last_end_request = now;
- cfqd->last_end_sync_rq = now;
+ if (time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
+ cfqd->last_delayed_sync = now;
}
/*
@@ -3706,7 +3707,7 @@ static void *cfq_init_queue(struct request_queue *q)
cfqd->cfq_latency = 1;
cfqd->cfq_group_isolation = 0;
cfqd->hw_tag = -1;
- cfqd->last_end_sync_rq = jiffies;
+ cfqd->last_delayed_sync = jiffies - HZ;
return cfqd;
}
--
1.6.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Sorry, the check be reversed (!time_after).
Thanks,
Corrado
--
__________________________________________________________________________
dott. Corrado Zoccolo mailto:czoc...@gmail.com
PhD - Department of Computer Science - University of Pisa, Italy
--------------------------------------------------------------------------
The self-confidence of a warrior is not the self-confidence of the average
man. The average man seeks certainty in the eyes of the onlooker and calls
that self-confidence. The warrior seeks impeccability in his own eyes and
calls that humbleness.
Tales of Power - C. Castaneda
I queued this up for post inclusion into 2.6.33, with the time_after()
fixed.
The patch was word-wrapped, btw.
--
Jens Axboe
> On Sun, Dec 06 2009, Corrado Zoccolo wrote:
>> Hi Jeff,
>> I remember you saw large performance drop on your SAN for sequential
>> writes with low_latency=1. Can you test if Shaohua's and this patch
>> fix allow to recover some bandwidth? I think that enabling the queue
>> depth ramp up only if a sync request was delayed should disable it for
>> fast hardware like yours, so you should not be seeing the slowdown any
>> more.
>
> I queued this up for post inclusion into 2.6.33, with the time_after()
> fixed.
>
> The patch was word-wrapped, btw.
So in what branch can I find this fix? Once I know that I can queue up
some tests.
Cheers,
Jeff
It's in next-2.6.33
--
Jens Axboe
> On Mon, Dec 07 2009, Jeff Moyer wrote:
>> Jens Axboe <jens....@oracle.com> writes:
>>
>> > On Sun, Dec 06 2009, Corrado Zoccolo wrote:
>> >> Hi Jeff,
>> >> I remember you saw large performance drop on your SAN for sequential
>> >> writes with low_latency=1. Can you test if Shaohua's and this patch
>> >> fix allow to recover some bandwidth? I think that enabling the queue
>> >> depth ramp up only if a sync request was delayed should disable it for
>> >> fast hardware like yours, so you should not be seeing the slowdown any
>> >> more.
>> >
>> > I queued this up for post inclusion into 2.6.33, with the time_after()
>> > fixed.
>> >
>> > The patch was word-wrapped, btw.
>>
>> So in what branch can I find this fix? Once I know that I can queue up
>> some tests.
>
> It's in next-2.6.33
next-2.6.33 won't boot for me:
general protection fault: 0000 [#1] SMP
async/0 used greatest stack depth: 4256 bytes left
last sysfs file: /sys/class/firmware/timeout
CPU 1
Modules linked in: ata_piix pata_acpi libata sd_mod scsi_mod ext3 jbd mbcache uh
ci_hcd ohci_hcd ehci_hcd
Pid: 729, comm: async/1 Not tainted 2.6.32 #1 ProLiant DL320 G5p
RIP: 0010:[<ffffffff81199cee>] [<ffffffff81199cee>] cfq_put_cfqg+0x0/0x91
RSP: 0018:ffff8801251b1d48 EFLAGS: 00010002
RAX: ffff880126dcdd28 RBX: ffff8801251fa158 RCX: 0000000000170001
RDX: ffff880125556700 RSI: ffff8801251fa158 RDI: 6b6b6b6b6b6b6b6b
RBP: ffff8801251b1d70 R08: ffff8801255a0448 R09: 000000000000005a
R10: ffff8801255a0448 R11: ffffffff818d6210 R12: ffff880126dcdb18
R13: ffff880126dcdb50 R14: 0000000000000286 R15: ffff880125556760
FS: 0000000000000000(0000) GS:ffff88002f200000(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 00000001256a5000 CR4: 00000000000006a0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process async/1 (pid: 729, threadinfo ffff8801251b0000, task ffff880125556770)
Stack:
ffffffff8119a856 0000000000000002 ffff880126dcdb18 ffff8801251fa158
<0> ffff880125210000 ffff8801251b1d90 ffffffff8119a9e5 ffff8801251f80d0
<0> ffff880126dcdb18 ffff8801251b1db0 ffffffff8119aa62 ffff880126dcdb18
Call Trace:
[<ffffffff8119a856>] ? cfq_put_queue+0xfa/0x102
[<ffffffff8119a9e5>] cfq_exit_cfqq+0x99/0x9e
[<ffffffff8119aa62>] __cfq_exit_single_io_context+0x78/0x85
[<ffffffff8119aaa9>] cfq_exit_single_io_context+0x3a/0x52
[<ffffffff8119aa6f>] ? cfq_exit_single_io_context+0x0/0x52
[<ffffffff8119b27b>] call_for_each_cic+0x56/0x7c
[<ffffffff8119b225>] ? call_for_each_cic+0x0/0x7c
[<ffffffff8119b2b1>] cfq_exit_io_context+0x10/0x12
[<ffffffff81192d3b>] exit_io_context+0x93/0xbc
[<ffffffff81192d03>] ? exit_io_context+0x5b/0xbc
[<ffffffff810474e5>] do_exit+0x71a/0x747
[<ffffffff810628f1>] ? async_thread+0x0/0x1fa
[<ffffffff8105cd9e>] kthread_stop+0x0/0xb3
[<ffffffff81033fa6>] ? complete+0x1c/0x4b
[<ffffffff8100cafa>] child_rip+0xa/0x20
[<ffffffff8103d667>] ? finish_task_switch+0x0/0xe3
[<ffffffff8100c4bc>] ? restore_args+0x0/0x30
[<ffffffff8105ccf8>] ? kthreadd+0xdf/0x100
[<ffffffff8105cd19>] ? kthread+0x0/0x85
[<ffffffff8100caf0>] ? child_rip+0x0/0x20
Code: 48 c7 43 38 00 00 00 00 48 c7 43 40 00 00 00 00 48 89 3e 48 8b 73 48 e8 fd 9e 00 00 eb 08 48 c7 43 48 00 00 00 00 5b 41 5c c9 c3 <8b> 87 d8 01 00 00 55 48 89 e5 85 c0 7f 04 0f 0b eb fe 48 8d 87
RIP [<ffffffff81199cee>] cfq_put_cfqg+0x0/0x91
RSP <ffff8801251b1d48>
---[ end trace ac909576caca45e8 ]---
Thanks Jeff. Looks like something wrong with my code. I am looking into
it now.
Thanks
Vivek
Hm.., I seem to be accessing cfqq->orig_cfqg, after I have freed cfqq.
Following patch should fix it. Testing it now.
Thanks
Vivek
Index: linux-2.6-block/block/cfq-iosched.c
===================================================================
--- linux-2.6-block.orig/block/cfq-iosched.c
+++ linux-2.6-block/block/cfq-iosched.c
@@ -2368,7 +2368,7 @@ static int cfq_dispatch_requests(struct
static void cfq_put_queue(struct cfq_queue *cfqq)
{
struct cfq_data *cfqd = cfqq->cfqd;
- struct cfq_group *cfqg;
+ struct cfq_group *cfqg, *orig_cfqg;
BUG_ON(atomic_read(&cfqq->ref) <= 0);
@@ -2379,6 +2379,7 @@ static void cfq_put_queue(struct cfq_que
BUG_ON(rb_first(&cfqq->sort_list));
BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
cfqg = cfqq->cfqg;
+ orig_cfqg = cfqq->orig_cfqg;
if (unlikely(cfqd->active_queue == cfqq)) {
__cfq_slice_expired(cfqd, cfqq, 0);
@@ -2388,8 +2389,8 @@ static void cfq_put_queue(struct cfq_que
BUG_ON(cfq_cfqq_on_rr(cfqq));
kmem_cache_free(cfq_pool, cfqq);
cfq_put_cfqg(cfqg);
- if (cfqq->orig_cfqg)
- cfq_put_cfqg(cfqq->orig_cfqg);
+ if (orig_cfqg)
+ cfq_put_cfqg(orig_cfqg);
}
/*
[..]
I tested this patch and it is working. Now system boots fine. Thanks for
loaning the system to me Jeff.
Vivek
> Hi Jeff, I remember you saw large performance drop on your SAN for
> sequential writes with low_latency=1. Can you test if Shaohua's and
> this patch fix allow to recover some bandwidth? I think that enabling
> the queue depth ramp up only if a sync request was delayed should
> disable it for fast hardware like yours, so you should not be seeing
> the slowdown any more.
Average of 10 runs. Low latency set to 0:
Unit information
================
File size = megabytes
Blk Size = bytes
Rate = megabytes per second
CPU% = percentage of CPU used during the test
Latency = milliseconds
Lat% = percent of requests that took longer than X seconds
CPU Eff = Rate divided by CPU% - throughput per cpu load
Sequential Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 90.08 94.51% 51.316 10268.25 0.00000 0.00000 95
2.6.32 8192 65536 16 99.36 199.8% 89.248 13883.81 0.00000 0.00000 50
Random Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 81.16 85.90% 45.672 5963.19 0.00000 0.00000 94
2.6.32 8192 65536 16 116.32 230.3% 58.371 6098.36 0.00000 0.00000 51
Sequential Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 112.17 1085.% 42.623 17114.54 0.00152 0.00000 10
2.6.32 8192 65536 16 111.26 2117.% 84.964 26480.60 0.03202 0.00000 5
Random Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 162.48 887.1% 6.106 313.54 0.00000 0.00000 18
2.6.32 8192 65536 16 156.38 1767.% 14.077 1254.62 0.00000 0.00000 9
Average of 10 runs. Low latency set to 1:
Unit information
================
File size = megabytes
Blk Size = bytes
Rate = megabytes per second
CPU% = percentage of CPU used during the test
Latency = milliseconds
Lat% = percent of requests that took longer than X seconds
CPU Eff = Rate divided by CPU% - throughput per cpu load
Sequential Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 92.23 100.1% 52.119 6920.18 0.00000 0.00000 92
2.6.32 8192 65536 16 97.88 217.0% 99.691 7453.18 0.00000 0.00000 45
Random Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 98.70 107.5% 42.994 3409.08 0.00000 0.00000 92
2.6.32 8192 65536 16 140.41 323.9% 59.616 4525.46 0.00000 0.00000 43
Sequential Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 112.33 1076.% 42.617 17072.28 0.00076 0.00000 10
2.6.32 8192 65536 16 111.84 2097.% 85.156 28221.77 0.02976 0.00000 5
Random Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 159.33 870.5% 6.469 765.50 0.00000 0.00000 18
2.6.32 8192 65536 16 141.60 1632.% 15.364 2337.57 0.00000 0.00000 9
Cheers,
Jeff
> The introduction of ramp-up formula for async queue depths has
> slowed down dirty page reclaim, by reducing async write performance.
> This patch makes sure the formula kicks in only when sync request
> was recently delayed.
> @@ -3706,7 +3707,7 @@ static void *cfq_init_queue(struct request_queue *q)
> cfqd->cfq_latency = 1;
> cfqd->cfq_group_isolation = 0;
> cfqd->hw_tag = -1;
> - cfqd->last_end_sync_rq = jiffies;
> + cfqd->last_delayed_sync = jiffies - HZ;
So, umm, what's that about?
Cheers,
Jeff
Previously, when cfq started, it started in a state where writes were
completely throttled. Now, we optimistically prefer to start with a
reasonable max depth (10)
Thanks
Corrado
> Cheers,
> Jeff
>
--
__________________________________________________________________________
dott. Corrado Zoccolo mailto:czoc...@gmail.com
PhD - Department of Computer Science - University of Pisa, Italy
--------------------------------------------------------------------------
The self-confidence of a warrior is not the self-confidence of the average
man. The average man seeks certainty in the eyes of the onlooker and calls
that self-confidence. The warrior seeks impeccability in his own eyes and
calls that humbleness.
Tales of Power - C. Castaneda
> The numbers look good. Now, there is no penalty in having low_latency
> set for sequential writes, and just a small penalty for random ones.
> The fact that random reads are faster with low_latency set is interesting.
> Is the test is running with your patched tiobench (so that the number
> of random operations is comparable with sequential ones)?
No, I forgot all about that. The number of random operations defaults
to 4000, which is pretty low. I'll re-run the tests with a number
comparable to the sequential runs. Sorry about that.
> Hi Jeff,
> On Tue, Dec 8, 2009 at 7:00 PM, Jeff Moyer <jmo...@redhat.com> wrote:
>> Corrado Zoccolo <czoc...@gmail.com> writes:
>>
>>> The introduction of ramp-up formula for async queue depths has
>>> slowed down dirty page reclaim, by reducing async write performance.
>>> This patch makes sure the formula kicks in only when sync request
>>> was recently delayed.
>>> @@ -3706,7 +3707,7 @@ static void *cfq_init_queue(struct request_queue *q)
>>> cfqd->cfq_latency = 1;
>>> cfqd->cfq_group_isolation = 0;
>>> cfqd->hw_tag = -1;
>>> - cfqd->last_end_sync_rq = jiffies;
>>> + cfqd->last_delayed_sync = jiffies - HZ;
>>
>> So, umm, what's that about?
>
> Previously, when cfq started, it started in a state where writes were
> completely throttled. Now, we optimistically prefer to start with a
> reasonable max depth (10)
OK. Can we put a comment in there and change the initialization to
cfq_slice_sync * 10?
Thanks,
Jeff
Agree, that would be MUCH easier to understand.
--
Jens Axboe
From f06cd83b45b3a7ee13ae7322197b610085dc70dd Mon Sep 17 00:00:00 2001
From: Corrado Zoccolo <corrado@localhost.(none)>
Date: Wed, 9 Dec 2009 20:40:16 +0100
Subject: [PATCH] cfq-iosched: commenting non-obvious initialization
Added a comment to explain the initialization of last_delayed_sync.
Signed-off-by: Corrado Zoccolo <czoc...@gmail.com>
---
block/cfq-iosched.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 98b15b9..69ecee7 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,6 +3759,10 @@ static void *cfq_init_queue(struct request_queue *q)
cfqd->cfq_latency = 1;
cfqd->cfq_group_isolation = 0;
cfqd->hw_tag = -1;
+ /*
+ * we optimistically start assuming sync ops weren't delayed in last
+ * second, in order to have larger depth for async operations.
+ */
cfqd->last_delayed_sync = jiffies - HZ;
INIT_RCU_HEAD(&cfqd->rcu);
return cfqd;
--
1.6.2.5
Thanks, that explains the HZ nicely. Applied.
--
Jens Axboe
> On Wed, Dec 9, 2009 at 8:05 PM, Jens Axboe <jens....@oracle.com> wrote:
>> On Wed, Dec 09 2009, Jeff Moyer wrote:
>>> OK. Can we put a comment in there and change the initialization to
>>> cfq_slice_sync * 10?
>>
>> Agree, that would be MUCH easier to understand.
>>
> Sure, we can put a comment there, but I don't like hardcoding a
> constant that depends on how the formula is computed (what if the
> formula is changed, and it doesn't depend on cfq_slice_sync any more,
> or if cfq_slice_sync changes dynamically?).
Then presumably you'd change the initialization of that variable.
> When I wrote it, what I really meant was exactly what you read in the
> C code (assume the last delayed sync happened 1 second ago). Then, the
> effect would be to start with a queue depth of 10 with the current
> formula, but even if we change the formula, 1 second is still
> meaningful (while 10 *cfq_slice_sync, that has the same value, becomes
> misleading). So my proposed fix is just:
Well, given your initial explanation, my suggestion made sense to me.
Given this new explanation, I'm fine with the change below. Thanks for
clarifying.
Acked-by: Jeff Moyer <jmo...@redhat.com>
--
> Cheers,
> Jeff
>
Thanks
Corrado
> Hi Jeff,
> On Wed, Dec 9, 2009 at 7:09 PM, Jeff Moyer <jmo...@redhat.com> wrote:
>> Corrado Zoccolo <czoc...@gmail.com> writes:
>>
>>> The numbers look good. Now, there is no penalty in having low_latency
>>> set for sequential writes, and just a small penalty for random ones.
>>> The fact that random reads are faster with low_latency set is interesting.
>>> Is the test is running with your patched tiobench (so that the number
>>> of random operations is comparable with sequential ones)?
>>
>> No, I forgot all about that. The number of random operations defaults
>> to 4000, which is pretty low. I'll re-run the tests with a number
>> comparable to the sequential runs. Sorry about that.
>>
> N.P.
low_latency=1
Random Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 19.35 26.10% 74.639 3278.74 0.00000 0.00000 74
2.6.32 8192 65536 16 20.40 53.07% 135.695 3705.55 0.00000 0.00000 38
Random Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 64.64 342.2% 22.708 5960.22 0.00153 0.00000 19
2.6.32 8192 65536 16 61.19 663.7% 47.648 11294.56 0.16098 0.00000 9
low_latency=0
Random Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 19.71 25.32% 70.891 4340.92 0.00000 0.00000 78
2.6.32 8192 65536 16 22.67 58.94% 121.669 6180.49 0.00229 0.00000 38
Random Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 64.69 339.5% 22.651 6770.99 0.00152 0.00000 19
2.6.32 8192 65536 16 61.94 663.0% 46.962 10719.13 0.12741 0.00000 9
> if you have time, can you also re-run the test changing:
> iosched/fifo_expire_async to 8 ?
Sure.
Cheers,
Jeff
> Hi Jeff,
> On Wed, Dec 9, 2009 at 7:09 PM, Jeff Moyer <jmo...@redhat.com> wrote:
>> Corrado Zoccolo <czoc...@gmail.com> writes:
>>
>>> The numbers look good. Now, there is no penalty in having low_latency
>>> set for sequential writes, and just a small penalty for random ones.
>>> The fact that random reads are faster with low_latency set is interesting.
>>> Is the test is running with your patched tiobench (so that the number
>>> of random operations is comparable with sequential ones)?
>>
>> No, I forgot all about that. The number of random operations defaults
>> to 4000, which is pretty low. I'll re-run the tests with a number
>> comparable to the sequential runs. Sorry about that.
>>
> N.P.
> if you have time, can you also re-run the test changing:
> iosched/fifo_expire_async to 8 ?
> I hope that reducing the expire_async, will make cfq quicker at switching
> between the different threads, allowing more parallelism for seq
> writers on your hw.
> If this is the case, I think I can try to estimate the
> fifo_expire_async in the autotuning patch.
Sorry this took so long. I've been rather busy of late.
Cheers,
Jeff
low_latency=1, fifo_expire_async=8
Unit information
================
File size = megabytes
Blk Size = bytes
Rate = megabytes per second
CPU% = percentage of CPU used during the test
Latency = milliseconds
Lat% = percent of requests that took longer than X seconds
CPU Eff = Rate divided by CPU% - throughput per cpu load
Sequential Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 88.39 89.74% 16.388 2032.62 0.00000 0.00000 98
2.6.32 8192 65536 16 90.77 185.3% 32.213 2175.99 0.00000 0.00000 49
Random Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 19.62 25.74% 71.827 3397.26 0.00000 0.00000 76
2.6.32 8192 65536 16 23.82 55.01% 103.361 4075.53 0.00000 0.00000 43
Sequential Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 108.28 1007.% 12.984 5643.55 0.00076 0.00000 11
2.6.32 8192 65536 16 112.40 2014.% 25.430 8592.98 0.00839 0.00000 6
Random Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 63.94 337.7% 22.885 6047.22 0.00076 0.00000 19
2.6.32 8192 65536 16 61.94 662.5% 46.997 12759.69 0.15411 0.00000 9
low_latency=0, fifo_expire_async=8
Unit information
================
File size = megabytes
Blk Size = bytes
Rate = megabytes per second
CPU% = percentage of CPU used during the test
Latency = milliseconds
Lat% = percent of requests that took longer than X seconds
CPU Eff = Rate divided by CPU% - throughput per cpu load
Sequential Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 89.13 88.30% 15.872 3101.39 0.00000 0.00000 101
2.6.32 8192 65536 16 86.78 161.7% 30.794 4909.02 0.00000 0.00000 54
Random Reads
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 20.21 26.64% 69.863 4285.42 0.00000 0.00000 76
2.6.32 8192 65536 16 20.10 52.75% 139.761 5986.94 0.00076 0.00000 38
Sequential Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 108.74 1020.% 13.070 5331.78 0.00076 0.00000 11
2.6.32 8192 65536 16 112.18 2020.% 25.559 7903.16 0.00992 0.00000 6
Random Writes
File Blk Num Avg Maximum Lat% Lat% CPU
Identifier Size Size Thr Rate (CPU%) Latency Latency >2s >10s Eff
---------------------------- ------ ----- --- ------ ------ --------- ----------- -------- -------- -----
2.6.32 8192 65536 8 64.53 337.8% 22.671 5388.77 0.00000 0.00000 19
2.6.32 8192 65536 16 61.75 668.9% 47.265 13271.37 0.12894 0.00000 9