Hello,
I have setup druid indexing service before and with the below spec and I could index the data without any issues. But with the same spec ( with minor modifications to be inline with HadoopIndexer) fails.
Will appreciate if someone tell me what's wrong with the spec. Following is the spec .
{
"dataSchema":{
"dataSource":"daily1",
"parser":{
"type":"hadoopyString",
"parseSpec":{
"format":"csv",
"timestampSpec":{
"column":"trans_date",
"format":"yyyy-MM-dd HH:mm:ss"
},
"columns":[
"country","city","region","geo_region","us_ind","total_impressions","network_impressions","network_impressions_filled","network_impressions_unfilled","network_clicks","paid_impressions","backfill_impressions","backfill_clicks","active_views","amt_yield","trans_date","year_month","matched","year","month"
],
"dimensionsSpec":{
"dimensions":[
"country","city","region","geo_region","us_ind" ],
"dimensionExclusions":[
],
"spatialDimensions":[
]
}
}
},
"metricsSpec":[
{
"type":"doubleSum",
"name":"total_impressions",
"fieldName":"total_impressions"
},
{
"type":"doubleSum",
"name":"network_impressions",
"fieldName":"network_impressions"
},
{
"type":"doubleSum",
"name":"network_impressions_filled",
"fieldName":"network_impressions_filled"
},
{
"type":"doubleSum",
"name":"network_impressions_unfilled",
"fieldName":"network_impressions_unfilled"
},
{
"type":"doubleSum",
"name":"network_clicks",
"fieldName":"network_clicks"
},
{
"type":"doubleSum",
"name":"amt_yield",
"fieldName":"amt_yield"
},
{
"type":"doubleSum",
"name":"paid_impressions",
"fieldName":"paid_impressions"
}
],
"granularitySpec":{
"type":"arbitrary",
"segmentGranularity":"MONTH",
"queryGranularity":"NONE",
"intervals" : [ "2015-10-01/2015-10-31" ]
} },
"ioConfig" : {
"type" : "hadoop",
"inputSpec" : {
"type" : "static",
"paths" : "hdfs://
10.50.150.242:8020/input/monthly.csv-2015_10_fixed.csv"
},
"metadataUpdateSpec" : {
"type":"mysql",
"connectURI" : "jdbc:mysql://
10.50.150.242:3306/druid",
"password" : "druid",
"segmentTable" : "druid_segments",
"user" : "druid"
},
"segmentOutputPath" : "/druid/data/index/output"
},
"tuningConfig" : {
"type" : "hadoop",
"workingPath": "/tmp",
"partitionsSpec" : {
"type" : "dimension",
"targetPartitionSize" : 5000000,
"maxPartitionSize" : 7500000,
"assumeGrouped" : false,
"numShards" : -1
},
"shardSpecs" : { },
"leaveIntermediate" : false,
"cleanupOnFailure" : true,
"overwriteFiles" : false,
"ignoreInvalidRows" : false,
"jobProperties" : { },
"combineText" : false,
"persistInHeap" : false,
"ingestOffheap" : false,
"bufferSize" : 134217728,
"aggregationBufferRatio" : 0.5,
"rowFlushBoundary" : 300000
} }
Data record is as below :
United States,Denver,Colorado,North America,U.S.,3,3,3,0,0,0,0,0,0,0.0,
2015-10-01 00:00:00,201510,TRUE,2015,10
Exception stacktrace:
2016-04-15T07:49:50,055 ERROR [main] io.druid.cli.CliHadoopIndexer - failure!!!!
java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_20]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_20]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_20]
at java.lang.reflect.Method.invoke(Method.java:483) ~[?:1.8.0_20]
at io.druid.cli.CliHadoopIndexer.run(CliHadoopIndexer.java:120) [druid-services-0.8.3.jar:0.8.3]
at io.druid.cli.Main.main(Main.java:99) [druid-services-0.8.3.jar:0.8.3]
Caused by: java.lang.RuntimeException: java.lang.RuntimeException: No buckets?? seems there is no data to index.
at io.druid.indexer.IndexGeneratorJob.run(IndexGeneratorJob.java:211) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.indexer.JobHelper.runJobs(JobHelper.java:321) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.indexer.HadoopDruidIndexerJob.run(HadoopDruidIndexerJob.java:96) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.indexer.JobHelper.runJobs(JobHelper.java:321) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.cli.CliInternalHadoopIndexer.run(CliInternalHadoopIndexer.java:132) ~[druid-services-0.8.3.jar:0.8.3]
at io.druid.cli.Main.main(Main.java:99) ~[druid-services-0.8.3.jar:0.8.3]
... 6 more
Caused by: java.lang.RuntimeException: No buckets?? seems there is no data to index.
at io.druid.indexer.IndexGeneratorJob.run(IndexGeneratorJob.java:160) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.indexer.JobHelper.runJobs(JobHelper.java:321) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.indexer.HadoopDruidIndexerJob.run(HadoopDruidIndexerJob.java:96) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.indexer.JobHelper.runJobs(JobHelper.java:321) ~[druid-indexing-hadoop-0.8.3.jar:0.8.3]
at io.druid.cli.CliInternalHadoopIndexer.run(CliInternalHadoopIndexer.java:132) ~[druid-services-0.8.3.jar:0.8.3]
at io.druid.cli.Main.main(Main.java:99) ~[druid-services-0.8.3.jar:0.8.3]
... 6 more
Thanks
Nalini