#Cassandra properties
#host list is in the form> 1.1.1.1:9160,1.1.1.2:9160
kairosdb.datastore.cassandra.host_list= <list of casandra nodes>
kairosdb.datastore.cassandra.keyspace=kairosdb
kairosdb.datastore.cassandra.replication_factor=3
kairosdb.datastore.cassandra.write_delay=500
kairosdb.datastore.cassandra.write_buffer_max_size=500000
#When reading one row read in 10k
kairosdb.datastore.cassandra.single_row_read_size=10240
#The number of rows to read when doing a multi get
kairosdb.datastore.cassandra.multi_row_size=1000
#The amount of data to read from each row when doing a multi get
kairosdb.datastore.cassandra.multi_row_read_size=1024
#Size of the row key cache size. This can be monitored by querying
#Write failed: Broken pipesize and filtering on the tag buffer = row_key_index
sjc-vchandra-mba:leaderboard vchandra$ y_index should stabilize to zero except
#when data rolls to a new row
kairosdb.datastore.cassandra.row_key_cache_size=10240
kairosdb.datastore.cassandra.string_cache_size=5000
#
03-04|17:47:14.521 [Thread-5] WARN [HConnectionManager.java:302] - Could not fullfill request on this host CassandraClient<10.1.31.101:9160-27204>
03-04|17:47:14.521 [Thread-5] WARN [HConnectionManager.java:303] - Exception:
me.prettyprint.hector.api.exceptions.HTimedOutException: TimedOutException(acknowledged_by:1)
at me.prettyprint.cassandra.service.ExceptionsTranslatorImpl.translate(ExceptionsTranslatorImpl.java:42) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:260) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.ExecutingKeyspace.doExecuteOperation(ExecutingKeyspace.java:113) [hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl.execute(MutatorImpl.java:243) [hector-core-1.1-4.jar:na]
at org.kairosdb.datastore.cassandra.WriteBuffer.run(WriteBuffer.java:237) [kairosdb-0.9.4-6.jar:0.9.4-6.20150330114205]
at java.lang.Thread.run(Thread.java:745) [na:1.7.0_91]
Caused by: org.apache.cassandra.thrift.TimedOutException: null
at org.apache.cassandra.thrift.Cassandra$batch_mutate_result.read(Cassandra.java:20849) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78) ~[libthrift-0.7.0.jar:0.7.0]
at org.apache.cassandra.thrift.Cassandra$Client.recv_batch_mutate(Cassandra.java:964) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at org.apache.cassandra.thrift.Cassandra$Client.batch_mutate(Cassandra.java:950) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at me.prettyprint.cassandra.model.MutatorImpl$3.execute(MutatorImpl.java:246) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl$3.execute(MutatorImpl.java:243) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.service.Operation.executeAndSetResult(Operation.java:104) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:253) ~[hector-core-1.1-4.jar:na]
... 4 common frames omitted
03-04|17:48:00.000 [QuartzScheduler_Worker-3] INFO [WriteBuffer.java:199] - Increasing write buffer data_points size to 37719
03-04|17:48:00.001 [QuartzScheduler_Worker-3] INFO [WriteBuffer.java:199] - Increasing write buffer row_key_index size to 17764
Full Configuration:
#===============================================================================
#Cassandra properties
#host list is in the form> 1.1.1.1:9160,1.1.1.2:9160
kairosdb.datastore.cassandra.host_list=<list of nodes>
kairosdb.datastore.cassandra.keyspace=kairosdb
kairosdb.datastore.cassandra.replication_factor=3
kairosdb.datastore.cassandra.write_delay=500
kairosdb.datastore.cassandra.write_buffer_max_size=500000
#When reading one row read in 10k
kairosdb.datastore.cassandra.single_row_read_size=10240
#The number of rows to read when doing a multi get
kairosdb.datastore.cassandra.multi_row_size=1000
#The amount of data to read from each row when doing a multi get
kairosdb.datastore.cassandra.multi_row_read_size=1024
#Size of the row key cache size. This can be monitored by querying
#kairosdb.datastore.write_size and filtering on the tag buffer = row_key_index
#Ideally the data written to the row_key_index should stabilize to zero except
#when data rolls to a new row
kairosdb.datastore.cassandra.row_key_cache_size=10240
kairosdb.datastore.cassandra.string_cache_size=5000
# Uses Quartz Cron syntax - default is to run every five minutes
kairosdb.datastore.cassandra.increase_buffer_size_schedule=0 */1 * * * ?
#Control the required consistency for cassandra operations.
#Available settings are cassandra version dependent:
kairosdb.datastore.cassandra.read_consistency_level=QUORUM
kairosdb.datastore.cassandra.write_consistency_level=QUORUM
#for cassandra authentication use the following
#kairosdb.datastore.cassandra.auth.[prop name]=[prop value]
#example:
kairosdb.datastore.cassandra.auth.username=user
kairosdb.datastore.cassandra.auth.password=password
#the time to live in seconds for datapoints. After this period the data will be
#deleted automatically. If not set the data will live forever.
#TTLs are added to columns as they're inserted so setting this will not affect
#existing data, only new data.
#kairosdb.datastore.cassandra.datapoint_ttl=31536000
#===============================================================================
# Hector configuration
kairosdb.datastore.cassandra.hector.maxActive=64
#kairosdb.datastore.cassandra.hector.maxWaitTimeWhenExhausted=-1
#kairosdb.datastore.cassandra.hector.useSocketKeepalive=false
kairosdb.datastore.cassandra.hector.useSocketKeepalive=true
#kairosdb.datastore.cassandra.hector.cassandraThriftSocketTimeout=0
kairosdb.datastore.cassandra.hector.retryDownedHosts=true
kairosdb.datastore.cassandra.hector.retryDownedHostsDelayInSeconds=10
#kairosdb.datastore.cassandra.hector.retryDownedHostsQueueSize=-1
#kairosdb.datastore.cassandra.hector.autoDiscoverHosts=false
#kairosdb.datastore.cassandra.hector.autoDiscoveryDelayInSeconds=30
#kairosdb.datastore.cassandra.hector.autoDiscoveryDataCenters=
#kairosdb.datastore.cassandra.hector.runAutoDiscoveryAtStartup=false
#kairosdb.datastore.cassandra.hector.useHostTimeoutTracker=false
#kairosdb.datastore.cassandra.hector.maxFrameSize=2147483647
#kairosdb.datastore.cassandra.hector.loadBalancingPolicy=roundRobin | leastActive | dynamic
#kairosdb.datastore.cassandra.hector.loadBalancingPolicy=dynamic
kairosdb.datastore.cassandra.hector.loadBalancingPolicy=leastActive
#kairosdb.datastore.cassandra.hector.hostTimeoutCounter=10
#kairosdb.datastore.cassandra.hector.hostTimeoutWindow=500
#kairosdb.datastore.cassandra.hector.hostTimeoutSuspensionDurationInSeconds=10
#kairosdb.datastore.cassandra.hector.hostTimeoutUnsuspendCheckDelay=10
#kairosdb.datastore.cassandra.hector.maxConnectTimeMillis=-1
#kairosdb.datastore.cassandra.hector.maxLastSuccessTimeMillis-1
03-04|17:47:14.521 [Thread-5] WARN [HConnectionManager.java:302] - Could not fullfill request on this host CassandraClient<10.1.31.101:9160-27204>
03-04|17:47:14.521 [Thread-5] WARN [HConnectionManager.java:303] - Exception:
me.prettyprint.hector.api.exceptions.HTimedOutException: TimedOutException(acknowledged_by:1)
at me.prettyprint.cassandra.service.ExceptionsTranslatorImpl.translate(ExceptionsTranslatorImpl.java:42) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:260) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.ExecutingKeyspace.doExecuteOperation(ExecutingKeyspace.java:113) [hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl.execute(MutatorImpl.java:243) [hector-core-1.1-4.jar:na]
at org.kairosdb.datastore.cassandra.WriteBuffer.run(WriteBuffer.java:237) [kairosdb-0.9.4-6.jar:0.9.4-6.20150330114205]
at java.lang.Thread.run(Thread.java:745) [na:1.7.0_91]
Caused by: org.apache.cassandra.thrift.TimedOutException: null
at org.apache.cassandra.thrift.Cassandra$batch_mutate_result.read(Cassandra.java:20849) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78) ~[libthrift-0.7.0.jar:0.7.0]
at org.apache.cassandra.thrift.Cassandra$Client.recv_batch_mutate(Cassandra.java:964) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at org.apache.cassandra.thrift.Cassandra$Client.batch_mutate(Cassandra.java:950) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at me.prettyprint.cassandra.model.MutatorImpl$3.execute(MutatorImpl.java:246) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl$3.execute(MutatorImpl.java:243) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.service.Operation.executeAndSetResult(Operation.java:104) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:253) ~[hector-core-1.1-4.jar:na]
... 4 common frames omitted
03-04|17:48:00.000 [QuartzScheduler_Worker-3] INFO [WriteBuffer.java:199] - Increasing write buffer data_points size to 37719
03-04|17:48:00.001 [QuartzScheduler_Worker-3] INFO [WriteBuffer.java:199] - Increasing write buffer row_key_index size to 17764
03-07|03:03:02.493 [pool-4-thread-14] WARN [HConnectionManager.java:302] - Could not fullfill request on this host CassandraClient<>
03-07|03:03:02.493 [pool-4-thread-14] WARN [HConnectionManager.java:303] - Exception:
me.prettyprint.hector.api.exceptions.HTimedOutException: TimedOutException(acknowledged_by:1)
at me.prettyprint.cassandra.service.ExceptionsTranslatorImpl.translate(ExceptionsTranslatorImpl.java:42) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:260) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.ExecutingKeyspace.doExecuteOperation(ExecutingKeyspace.java:113) [hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl.execute(MutatorImpl.java:243) [hector-core-1.1-4.jar:na]
at org.kairosdb.datastore.cassandra.WriteBuffer$WriteDataJob.run(WriteBuffer.java:372) [kairosdb-1.1.1-1.jar:1.1.1-1.20151207194217]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_51]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [na:1.8.0_51]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_51]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_51]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_51]
Caused by: org.apache.cassandra.thrift.TimedOutException: null
at org.apache.cassandra.thrift.Cassandra$batch_mutate_result.read(Cassandra.java:20849) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78) ~[libthrift-0.7.0.jar:0.7.0]
at org.apache.cassandra.thrift.Cassandra$Client.recv_batch_mutate(Cassandra.java:964) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at org.apache.cassandra.thrift.Cassandra$Client.batch_mutate(Cassandra.java:950) ~[cassandra-thrift-1.2.5.jar:1.2.5]
at me.prettyprint.cassandra.model.MutatorImpl$3.execute(MutatorImpl.java:246) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl$3.execute(MutatorImpl.java:243) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.service.Operation.executeAndSetResult(Operation.java:104) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:253) ~[hector-core-1.1-4.jar:na]
... 8 common frames omitted
Thanks Loic and Brian. I will try to gzip metrics before posting to Kairos and as suggest increase cassandra nodes and reduce replication. Also can you please provide you feedback on following configuration:
I still have few more read related questions:
1. During query i set cache time to 3600, but i don't see any content in cache files created in /tmp. Also these files are deleted even though the schedule for cleanup is set longer.
2. In my existing setup for a given metric in worst case i might have tag cardinality of around ( ~ 50k) with 5 million data sample size, I see these type of queries take around 30-50 seconds.
So what changes can i make to reduce the read time for such queries. Do i need to increase kairosdb.datastore.cassandra.hector.maxActive=64 ?
Thanks
Varun
kairosdb.datastore.cassandra.keyspace=kairosdb
kairosdb.datastore.cassandra.replication_factor=3
kairosdb.datastore.cassandra.write_delay=1000
kairosdb.datastore.cassandra.write_buffer_max_size=2000000
#When reading one row read in 10k
kairosdb.datastore.cassandra.single_row_read_size=10240
#The number of rows to read when doing a multi get
kairosdb.datastore.cassandra.multi_row_size=1000
#The amount of data to read from each row when doing a multi get
kairosdb.datastore.cassandra.multi_row_read_size=1024
#Size of the row key cache size. This can be monitored by querying
#kairosdb.datastore.write_size and filtering on the tag buffer = row_key_index
#Ideally the data written to the row_key_index should stabilize to zero except
#when data rolls to a new row
kairosdb.datastore.cassandra.row_key_cache_size=65536
kairosdb.datastore.cassandra.string_cache_size=20000
# Uses Quartz Cron syntax - default is to run every five minutes
kairosdb.datastore.cassandra.increase_buffer_size_schedule=0 */5 * * * ?
#Control the required consistency for cassandra operations.
#Available settings are cassandra version dependent:
kairosdb.datastore.cassandra.read_consistency_level=ONE
kairosdb.datastore.cassandra.write_consistency_level=LOCAL_QUORUM
04-05|22:46:35.147 [pool-2-thread-10] ERROR [WriteBuffer.java:379] - Error sending data to Cassandra (data_points)
me.prettyprint.hector.api.exceptions.HectorException: All host pools marked down. Retry burden pushed out to client.
at me.prettyprint.cassandra.connection.HConnectionManager.getClientFromLBPolicy(HConnectionManager.java:390) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.connection.HConnectionManager.operateWithFailover(HConnectionManager.java:244) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.ExecutingKeyspace.doExecuteOperation(ExecutingKeyspace.java:113) ~[hector-core-1.1-4.jar:na]
at me.prettyprint.cassandra.model.MutatorImpl.execute(MutatorImpl.java:243) ~[hector-core-1.1-4.jar:na]
at org.kairosdb.datastore.cassandra.WriteBuffer$WriteDataJob.run(WriteBuffer.java:372) ~[kairosdb-1.1.1-1.jar:1.1.1-1.20151207194217]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_51]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [na:1.8.0_51]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_51]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_51]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_51]
04-05|22:46:35.148 [pool-2-thread-10] ERROR [WriteBuffer.java:383] - Reducing write buffer size to 0. You need to increase your cassandra capacity or change the kairosdb.datastore.cassandra.write_buffer_max_size property.