Trying to ingest data from tsv.. fails with o/p path, we are not specifying o/p path during ingestion.
2017-04-28T18:07:48,092 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - Task Id : attempt_1492015449829_0019_r_000000_1, Status : FAILED
Error: com.metamx.common.ISE: outputPath[/tmp/druid-indexing/fnd_user/2017-04-28T171836.846Z_843137ceab404e35bf6a5475d1b8b50e/20060101T000000.000Z_20060102T000000.000Z/partitions.json] must not exist.
at io.druid.indexer.Utils.makePathAndOutputStream(Utils.java:63)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityReducer.reduce(DetermineHashedPartitionsJob.java:328)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityReducer.reduce(DetermineHashedPartitionsJob.java:299)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityReducer.run(DetermineHashedPartitionsJob.java:351)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:175)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1807)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:169)
2017-04-28T18:07:58,160 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - Task Id : attempt_1492015449829_0019_r_000000_2, Status : FAILED
Error: com.metamx.common.ISE: outputPath[/tmp/druid-indexing/fnd_user/2017-04-28T171836.846Z_843137ceab404e35bf6a5475d1b8b50e/20060101T000000.000Z_20060102T000000.000Z/partitions.json] must not exist.
at io.druid.indexer.Utils.makePathAndOutputStream(Utils.java:63)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityReducer.reduce(DetermineHashedPartitionsJob.java:328)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityReducer.reduce(DetermineHashedPartitionsJob.java:299)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityReducer.run(DetermineHashedPartitionsJob.java:351)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:175)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1807)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:169)
2017-04-28T18:08:08,255 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - Job job_1492015449829_0019 failed with state FAILED due to: Task failed task_1492015449829_0019_r_000000
Job failed as tasks failed. failedMaps:0 failedReduces:1
we are using following to ingest
{
"type" : "index_hadoop",
"spec" : {
"ioConfig" : {
"type" : "hadoop",
"inputSpec" : {
"type" : "static",
"paths" : "hdfs://10.100.205.192:8020/analysis/fnd_user.tsv"
}
},
"dataSchema" : {
"dataSource" : "fnd_user",
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "day",
"queryGranularity" : "day",
"intervals" : ["2006-01-01/2017-04-30"]
},
"parser" : {
"type" : "hadoopyString",
"parseSpec" : {
"format" : "tsv",
"timestampSpec" : {
"format" : "M/dd/y H:m:s a",
"column" : "creation_date"
},
"columns" : ["user_id","user_name","last_update_date","last_updated_by","creation_date","created_by","last_update_login","encrypted_foundation_password","encrypted_user_password","session_number","start_date","end_date","description","last_logon_date","password_date","password_accesses_left","password_lifespan_accesses","password_lifespan_days","employee_id","email_address","fax","customer_id","supplier_id","web_password","user_guid","gcn_code_combination_id","person_party_id"],
"dimensionsSpec" : {
"dimensions" : []
}
}
},
"metricsSpec" : [
{
"name" : "count",
"type" : "count"
}
]
},
"tuningConfig" : {
"type" : "hadoop",
"partitionsSpec" : {
"type" : "hashed",
"targetPartitionSize" : 5000000
},
"jobProperties" : {
"mapreduce.job.classloader" : "true",
"mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop.",
"mapreduce.map.java.opts":"-Duser.timezone=UTC -Dfile.encoding=UTF-8", "mapreduce.reduce.java.opts":"-Duser.timezone=UTC -Dfile.encoding=UTF-8"
}
}
}
}