Hello,
I am trying to test the new Kafka indexing service released in druid-0.9.1-rc1
I have set up a druid cluster on a test machine and I submitted the following supervisor Spec
I see from the overlord console that approximately every 30 seconds a new task is
created. Each task runs for some seconds and exists with status FAILED,
then a new one is created and so on.
I have checked the logs of overlord, middleManager and of each peon task, but there is not any error in any of them. The logs of each task located under baseTaskDir/{taskID} are empty, too.
Can you please have a look at the spec and tell me if I'm doing something wrong?
$ cat druid/kafka-supervisor.json
{
"type":"kafka",
"dataSchema" : {
"dataSource" : "content20stats_stage",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "csv",
"columns" : [
"timestamp",
"a_attrs",
"a_boxes_ctr_id",
"a_boxes_id",
"a_scrolls",
"n_boximpression",
"n_breakpoint",
"n_click",
"n_doc_type",
"n_fbcomment",
"n_fblike",
"n_fbshare",
"n_gplus",
"n_impression",
"n_info",
"n_mappa",
"n_searchno",
"n_staytime",
"n_twcount",
"s_area",
"s_box",
"s_cat1",
"s_cat2",
"s_cat3",
"s_dest_id",
"s_doc_id",
"s_domain",
"s_link_type",
"s_pag_id",
"s_page",
"s_ref_host",
"s_ref_path",
"s_search",
"s_source",
"s_ua"
],
"dimensionsSpec" : {
"dimensions" : [
"a_attrs",
"a_boxes_ctr_id",
"a_boxes_id",
"a_scrolls",
"n_boximpression",
"n_breakpoint",
"n_click",
"n_doc_type",
"n_fbcomment",
"n_fblike",
"n_fbshare",
"n_gplus",
"n_impression",
"n_info",
"n_mappa",
"n_searchno",
"n_staytime",
"n_twcount",
"s_area",
"s_box",
"s_cat1",
"s_cat2",
"s_cat3",
"s_dest_id",
"s_doc_id",
"s_domain",
"s_link_type",
"s_pag_id",
"s_page",
"s_ref_host",
"s_ref_path",
"s_search",
"s_source",
"s_ua"
]
},
"listDelimiter" : ";",
"timestampSpec" : {
"column" : "timestamp",
"format" : "millis"
}
}
},
"granularitySpec" : {
"queryGranularity" : "MINUTE",
"segmentGranularity" : "HOUR"
},
"metricsSpec" : [{
"name" : "count",
"type" : "count"
}, {
"fieldName" : "n_impression",
"name" : "impressions",
"type" : "longSum"
}, {
"fieldName" : "n_click",
"name" : "clicks",
"type" : "longSum"
}, {
"fieldName" : "n_boximpression",
"name" : "boximpressions",
"type" : "longSum"
}, {
"fieldName" : "n_staytime",
"name" : "totstaytime",
"type" : "longSum"
}, {
"fieldName" : "n_fblike",
"name" : "fblike",
"type" : "longSum"
}, {
"fieldName" : "n_fbshare",
"name" : "fbshare",
"type" : "longSum"
}, {
"fieldName" : "n_fbcomment",
"name" : "fbcomment",
"type" : "longSum"
}, {
"fieldName" : "n_twcount",
"name" : "twcount",
"type" : "longSum"
}, {
"fieldName" : "n_searchno",
"name" : "searchres",
"type" : "longSum"
}
]
},
"ioConfig" : {
"topic": "event",
"consumerProperties": {
"bootstrap.servers": "ip_server1:9092,ip_server2:9092"
},
"taskCount": 1,
"replicas": 1,
"taskDuration": "PT1H"
},
"tuningConfig" : {
"type" : "kafka",
"maxRowsInMemory" : "100000",
"intermediatePersistPeriod" : "PT10M",
"reportParseExceptions" : true
}
}
Here are the runtime.properties of overlord and middleManager
$ cat druid/middleManager/runtime.properties
druid.service=middleManager
druid.port=8083
# Number of tasks per middleManager
#leave default, which is Number of available processors - 1
#druid.worker.capacity=3
# Task launch parameters
druid.indexer.runner.javaOpts=-server -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager -Dlog-name=peon
druid.indexer.task.baseTaskDir=/data/tmp/druid/task
# HTTP server threads
druid.server.http.numThreads=25
# Processing threads and buffers
druid.processing.buffer.sizeBytes=536870912
druid.processing.numThreads=2
# Hadoop indexing
druid.indexer.task.hadoopWorkingPath=/data/tmp/druid/hadoop-tmp
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.3.0"]
druid.indexer.task.restoreTasksOnRestart=true
$ cat druid/overlord/runtime.properties
druid.port=8084
druid.service=overlord
druid.indexer.queue.startDelay=PT30S
druid.indexer.runner.type=remote
druid.indexer.storage.type=metadata
Thanks,
Tommaso