year,month,volume,store_id,product_id
2015,jul,2.113,22653,45
{
"type" : "index_hadoop",
"spec" : {
"ioConfig" : {
"type" : "hadoop",
"inputSpec" : {
"type" : "static",
"paths" : "../../data/volume-data.csv"
}
},
"dataSchema" : {
"dataSource" : "volume-data",
"listDelimiter":",",
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "month",
"queryGranularity" : "none",
"intervals" : ["2011-09-12/2016-09-13"]
},
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "csv",
"columns":["year","month","volume","store_id","product_id"],
"dimensionsSpec" : {
"dimensions" : [
"store_id",
"product_id",
"month",
"year"
]
},
"timestampSpec" : [{
"format" : "YYYY",
"column" : "year"
},
{
"format" : "mmm",
"column" : "month"
}]
}
},
"metricsSpec" : [
{
"name" : "volume",
"type" : "doubleSum",
"fieldName" : "volume"
}
]
},
"tuningConfig" : {
"type" : "hadoop",
"partitionsSpec" : {
"type" : "hashed",
"targetPartitionSize" : 5000000
},
"jobProperties" : {}
}
}
}
I get the following exception in the logs
2016-07-07T07:07:58,660 WARN [Thread-21] org.apache.hadoop.mapred.LocalJobRunner - job_local1102702440_0001
java.lang.Exception: java.lang.IllegalArgumentException: Can not deserialize instance of io.druid.data.input.impl.TimestampSpec out of START_ARRAY token
at [Source: N/A; line: -1, column: -1]
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) ~[hadoop-mapreduce-client-common-2.3.0.jar:?]
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522) [hadoop-mapreduce-client-common-2.3.0.jar:?]
Caused by: java.lang.IllegalArgumentException: Can not deserialize instance of io.druid.data.input.impl.TimestampSpec out of START_ARRAY token
at [Source: N/A; line: -1, column: -1]
at com.fasterxml.jackson.databind.ObjectMapper._convert(ObjectMapper.java:2774) ~[jackson-databind-2.4.6.jar:2.4.6]
at com.fasterxml.jackson.databind.ObjectMapper.convertValue(ObjectMapper.java:2700) ~[jackson-databind-2.4.6.jar:2.4.6]
at io.druid.segment.indexing.DataSchema.getParser(DataSchema.java:101) ~[druid-server-0.9.1.1.jar:0.9.1.1]
at io.druid.indexer.HadoopDruidIndexerConfig.verify(HadoopDruidIndexerConfig.java:567) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
at io.druid.indexer.HadoopDruidIndexerConfig.fromConfiguration(HadoopDruidIndexerConfig.java:209) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineHashedPartitionsPartitioner.setConf(DetermineHashedPartitionsJob.java:399) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
Is the only option to merge the month and year column to create a new timestamp column? Or can I directly use the two?