Hi Scott,
On Feb 17, 4:50 pm, Scott Hernandez <
scotthernan...@gmail.com> wrote:
> You can make sure writes initially get distributed better by
> pre-splitting chunks and making sure those chunks are assigned to
> different shards before you ever insert data. That way when the first
> data come it not start with a single chunk going to a single shard
> (server).
Hi Scott,
Many thanks for the reply. I understand a lot better now. So, I've
been testing on a PowerEdge R710 (32GB RAM, 8 x 2.4GHz cores) with 2 x
mongod, 3 x config servers and 2 x mongos. /data is xfs, mounted
noatime,nodiratime on 6 x 15K SAS RAID 0 array.
I have a C++ stress test app which I run twice, each connecting to one
mongos. I am sharding on a key whih has 10 possible values (0-(, and 5
chunks on each shard. But I am seeing it taking over 10 seconds to do
100K inserts, which is around 10X slower than writing just to a single
mongod on the same box. This seems wrong. Should it be like this, or
am I doing something wrong? There is barely any disk IO going on and
the each process is maxing out at only about 60% CPU - and there's
enough cores for each process to have one each.
mongod --dbpath /data/config1/ --port 20000
mongod --dbpath /data/config2/ --port 20001
mongod --dbpath /data/config3/ --port 20002
mongos --port 30000 --configdb localhost:20000,localhost:
20001,localhost:20002
mongos --port 30001 --configdb localhost:20000,localhost:
20001,localhost:20002
mongod --dbpath /data/shard1/ --port 10000
mongod --dbpath /data/shard2/ --port 10001
mongo localhost:30000/admin
The C++ app does this.
const char *data =
"mongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotestmongotest";
for (int i = 0; i < 10000; ++i) {
for (int j = 0; j < 10; ++j) {
BSONObjBuilder page;
page.genOID();
page.append("ResultID", 100);
page.append("StartDateTime", 33);
page.append("shard", j);
page.append("data", data);
BSONObj pageO = page.obj();
c.insert("chris.test", pageO);
}
}
(complete source at
http://chrishowells.co.uk/people.txt)
[root@man-dt-01850 ~]# time ./people
real 0m11.599s
user 0m0.328s
sys 0m0.153s
I set up sharding like this:
> db.runCommand({addshard : "localhost:10000", allowLocal: true})
{ "shardAdded" : "shard0000", "ok" : 1 }
> db.runCommand({addshard : "localhost:10001", allowLocal: true})
{ "shardAdded" : "shard0001", "ok" : 1 }
> use admin;
switched to db admin
> db.runCommand({"enablesharding" : "chris"})
{ "ok" : 1 }
> db.runCommand({"shardcollection" : "chris.test", "key" : {"shard" : 1}})
{ "collectionsharded" : "chris.test", "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 1 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 2 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 3 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 4 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 5 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 6 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 7 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 8 }})
{ "ok" : 1 }
> db.runCommand({ split : "chris.test" , middle : { shard : 9 }})
{ "ok" : 1 }
> use config;
switched to db config
> db.shards.find()
{ "_id" : "shard0000", "host" : "localhost:10000" }
{ "_id" : "shard0001", "host" : "localhost:10001" }
> db.chunks.find()
{ "_id" : "chris.test-shard_MinKey", "lastmod" : { "t" : 2000, "i" :
0 }, "ns" : "chris.test", "min" : { "shard" : { $minKey : 1 } },
"max" : { "shard" : 1 }, "shard" : "shard0001" }
{ "_id" : "chris.test-shard_1.0", "lastmod" : { "t" : 3000, "i" : 0 },
"ns" : "chris.test", "min" : { "shard" : 1 }, "max" : { "shard" : 2 },
"shard" : "shard0001" }
{ "_id" : "chris.test-shard_2.0", "lastmod" : { "t" : 4000, "i" : 0 },
"ns" : "chris.test", "min" : { "shard" : 2 }, "max" : { "shard" : 3 },
"shard" : "shard0001" }
{ "_id" : "chris.test-shard_3.0", "lastmod" : { "t" : 5000, "i" : 0 },
"ns" : "chris.test", "min" : { "shard" : 3 }, "max" : { "shard" : 4 },
"shard" : "shard0001" }
{ "_id" : "chris.test-shard_4.0", "lastmod" : { "t" : 6000, "i" : 0 },
"ns" : "chris.test", "min" : { "shard" : 4 }, "max" : { "shard" : 5 },
"shard" : "shard0001" }
{ "_id" : "chris.test-shard_5.0", "lastmod" : { "t" : 6000, "i" : 1 },
"ns" : "chris.test", "min" : { "shard" : 5 }, "max" : { "shard" : 6 },
"shard" : "shard0000" }
{ "_id" : "chris.test-shard_6.0", "lastmod" : { "t" : 1000, "i" :
13 }, "ns" : "chris.test", "min" : { "shard" : 6 }, "max" :
{ "shard" : 7 }, "shard" : "shard0000" }
{ "_id" : "chris.test-shard_7.0", "lastmod" : { "t" : 2000, "i" : 2 },
"ns" : "chris.test", "min" : { "shard" : 7 }, "max" : { "shard" : 8 },
"shard" : "shard0000" }
{ "_id" : "chris.test-shard_8.0", "lastmod" : { "t" : 4000, "i" : 2 },
"ns" : "chris.test", "min" : { "shard" : 8 }, "max" : { "shard" : 9 },
"shard" : "shard0000" }
{ "_id" : "chris.test-shard_9.0", "lastmod" : { "t" : 4000, "i" : 3 },
"ns" : "chris.test", "min" : { "shard" : 9 }, "max" : { "shard" :
{ $maxKey : 1 } }, "shard" : "shard0000" }
I see messages like this from the mongod's:
Fri Feb 18 13:03:09 [conn5] request split points lookup for chunk
chris.test { : 8.0 } -->> { : 9.0 }
Fri Feb 18 13:03:09 [conn5] warning: chunk is larger than 33554432
bytes because of key { shard: 8 }
Fri Feb 18 13:03:09 [conn5] Finding the split vector for chris.test
over { shard: 1.0 } keyCount: 8924 numSplits: 0 took 229 ms.
Fri Feb 18 13:03:09 [conn5] query admin.$cmd ntoreturn:1 command:
{ splitVector: "chris.test", keyPattern: { shard: 1.0 }, min: { shard:
8.0 }, max: { shard: 9.0 }, maxChunkSizeBytes: 33554432,
maxSplitPoints: 2, maxChunkObjects: 250000 } reslen:69 229ms
Fri Feb 18 13:03:10 [conn5] request split points lookup for chunk
chris.test { : 5.0 } -->> { : 6.0 }
Fri Feb 18 13:03:11 [conn5] warning: chunk is larger than 33554432
bytes because of key { shard: 5 }
Fri Feb 18 13:03:11 [conn5] Finding the split vector for chris.test
over { shard: 1.0 } keyCount: 8924 numSplits: 0 took 231 ms.
Fri Feb 18 13:03:11 [conn5] query admin.$cmd ntoreturn:1 command:
{ splitVector: "chris.test", keyPattern: { shard: 1.0 }, min: { shard:
5.0 }, max: { shard: 6.0 }, maxChunkSizeBytes: 33554432,
maxSplitPoints: 2, maxChunkObjects: 250000 } reslen:69 231ms
After a few runs:
> use chris
switched to db chris
> db.test.find().count()
3800000
> db.printShardingStatus()
--- Sharding Status ---
sharding version: { "_id" : 1, "version" : 3 }
shards:
{ "_id" : "shard0000", "host" : "localhost:10000" }
{ "_id" : "shard0001", "host" : "localhost:10001" }
databases:
{ "_id" : "admin", "partitioned" : false, "primary" :
"config" }
{ "_id" : "chris", "partitioned" : true, "primary" :
"shard0000" }
chris.test chunks:
shard0001 5
shard0000 5
{ "shard" : { $minKey : 1 } } -->> { "shard" :
1 } on : shard0001 { "t" : 2000, "i" : 0 }
{ "shard" : 1 } -->> { "shard" : 2 } on :
shard0001 { "t" : 3000, "i" : 0 }
{ "shard" : 2 } -->> { "shard" : 3 } on :
shard0001 { "t" : 4000, "i" : 0 }
{ "shard" : 3 } -->> { "shard" : 4 } on :
shard0001 { "t" : 5000, "i" : 0 }
{ "shard" : 4 } -->> { "shard" : 5 } on :
shard0001 { "t" : 6000, "i" : 0 }
{ "shard" : 5 } -->> { "shard" : 6 } on :
shard0000 { "t" : 6000, "i" : 1 }
{ "shard" : 6 } -->> { "shard" : 7 } on :
shard0000 { "t" : 1000, "i" : 13 }
{ "shard" : 7 } -->> { "shard" : 8 } on :
shard0000 { "t" : 2000, "i" : 2 }
{ "shard" : 8 } -->> { "shard" : 9 } on :
shard0000 { "t" : 4000, "i" : 2 }
{ "shard" : 9 } -->> { "shard" : { $maxKey :
1 } } on : shard0000 { "t" : 4000, "i" : 3 }
Thanks for any clues.