We are receiving the following error while indexing using hadoop and using hdfs as deep storage. We are using google connector with gcs as default fs. Attaching full job file as well as index hadoop spec input.
2016-10-03T11:25:05,679 ERROR [task-runner-0-priority-0] io.druid.indexing.overlord.ThreadPoolTaskRunner - Exception while running task[HadoopIndexTask{id=index_hadoop_prism-data-15_2016-10-03T10:59:46.123Z, type=index_hadoop, dataSource=prism-data-15}]
{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "prism-data-15",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"dimensionsSpec": {
"dimensions": [
"event_id",
"lang",
"share_clicks",
"ts_bucket",
"old_hash_id",
"ab_test",
"event_name",
"title",
"noti_opened",
"fullstory_time_total",
"ts_back_valid",
"custom_title",
"targeted_city",
"at",
"short_view_event",
"published_dt",
"short_time",
"notification_type",
"variants",
"device_id",
"category",
"toss_opened",
"noti_shown",
"event_source",
"score",
"author",
"bookmark",
"is_video",
"source",
"like_count",
"share_view",
"vid_length",
"content",
"fullstory_view",
"ts_valid",
"targeted_country",
"video_event",
"shortened_url",
"toss_clicked",
"hashId",
"group_id",
"img_url",
"is_deleted"
]
},
"timestampSpec": {
"format": "millis",
"column": "at"
}
}
},
"metricsSpec": [{
"type": "count",
"name": "count"
}, {
"type": "doubleSum",
"name": "fullstory_total_time",
"fieldName": "fullstory_time_total"
}, {
"type": "longSum",
"name": "total_like_count",
"fieldName": "like_count"
}, {
"type": "longMax",
"name": "total_share_views",
"fieldName": "share_views"
}, {
"type": "longMax",
"name": "total_vid_length",
"fieldName": "vid_length"
}, {
"type": "doubleSum",
"name": "total_short_time",
"fieldName": "short_time"
}, {
"type": "hyperUnique",
"name": "distinct_user",
"fieldName": "device_id"
}, {
"type": "hyperUnique",
"name": "distinct_event",
"fieldName": "event_id"
}, {
"type": "hyperUnique",
"name": "distinct_hash_Id",
"fieldName": "hashId"
}, {
"type": "longSum",
"name": "total_bookmark",
"fieldName": "bookmark"
}, {
"type": "longSum",
"name": "total_fullstory_view",
"fieldName": "fullstory_view"
}, {
"type": "longSum",
"name": "total_noti_opened",
"fieldName": "noti_opened"
}, {
"type": "longSum",
"name": "total_noti_shown",
"fieldName": "noti_shown"
}, {
"type": "longSum",
"name": "total_toss_clicked",
"fieldName": "toss_clicked"
}, {
"type": "longSum",
"name": "total_toss_opened",
"fieldName": "toss_opened"
}, {
"type": "longSum",
"name": "total_share_click",
"fieldName": "share_clicks"
}, {
"type": "longSum",
"name": "total_short_views",
"fieldName": "short_view_event"
}, {
"type": "longSum",
"name": "total_video_views",
"fieldName": "video_event"
}, {
"type": "longSum",
"name": "total_ts_valid",
"fieldName": "ts_valid"
}, {
"type": "longSum",
"name": "total_full_ts_valid",
"fieldName": "ts_back_valid"
}, {
"type": "longMax",
"name": "is_ab",
"fieldName": "ab_test"
}, {
"type": "longMax",
"name": "ab_variants",
"fieldName": "variants"
}],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": {
"type": "none"
},
"intervals": [
"2016-01-01T00:00:00.000Z/2017-12-30T00:00:00.000Z"
]
}},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "gs://nis-prism/new/2016/08/02/part-*"
}
},
"tuningConfig": {
"type": "hadoop",
"partitionsSpec": {
"type": "hashed",
"targetPartitionSize": 2500000
},
"numBackgroundPersistThreads" : 1,
"overwriteFiles" : true
}
},
"hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client:2.7.2"]
}