Hi My Hadoop ingestion tasks keep failing with the following error.
Do we know why this happens ? Thanks.
2018-11-10T05:10:38,223 INFO [LocalJobRunner Map Task Executor #0] io.druid.indexer.HadoopDruidIndexerConfig - Running with config:
{
"spec" : {
"dataSchema" : {
"dataSource" : "abandonments",
"parser" : {
"type" : "hadoopyString",
"parseSpec" : {
"format" : "csv",
"columns" : [ "date_id", "client_id", "second_of_day", "session_id", "ad_visitor_id", "ip_address", "referral_url", "referral_type", "referral_name", "natural_search_term", "browser", "os", "destination_url", "country", "state", "city", "language", "time_zone", "screen_resolution", "color_depth", "javascript_version", "user_agent", "mobile_device", "mobile_network", "connection_type", "device_vendor", "device_model", "device_marketing_name", "device_type", "order_number", "product_id", "product_name", "site_location_id", "product_quantity", "backorder_quantity", "base_price", "site_id" ],
"dimensionsSpec" : {
"dimensions" : [ "client_id", "second_of_day", "session_id", "ad_visitor_id", "ip_address", "referral_url", "referral_type", "referral_name", "natural_search_term", "browser", "os", "destination_url", "country", "state", "city", "language", "time_zone", "screen_resolution", "color_depth", "javascript_version", "user_agent", "mobile_device", "mobile_network", "connection_type", "device_vendor", "device_model", "device_marketing_name", "device_type", "order_number", "product_id", "product_name", "site_location_id", "site_id" ]
},
"timestampSpec" : {
"format" : "yyyy-MM-dd'T'HH:mm:ss'Z'",
"column" : "date_id"
}
}
},
"metricsSpec" : [ {
"type" : "longSum",
"name" : "abandonments_product_quantity",
"fieldName" : "product_quantity",
"expression" : null
}, {
"type" : "longSum",
"name" : "abandonments_backorder_quantity",
"fieldName" : "backorder_quantity",
"expression" : null
}, {
"type" : "floatSum",
"name" : "abandonments_base_price_total",
"fieldName" : "base_price",
"expression" : null
}, {
"type" : "count",
"name" : "session_id_count"
} ],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "HOUR",
"queryGranularity" : {
"type" : "none"
},
"rollup" : true,
"intervals" : [ "2008-09-12T00:00:00.000Z/2020-12-31T00:00:00.000Z" ]
},
"transformSpec" : {
"filter" : null,
"transforms" : [ ]
}
},
"ioConfig" : {
"type" : "hadoop",
"inputSpec" : {
"type" : "static",
"paths" : "hdfs://dev02-ev0100.cm.emm.local:54010/druid_data/abandonments.txt"
},
"metadataUpdateSpec" : null,
"segmentOutputPath" : "hdfs://dev02-ev0100.cm.emm.local:54010/druid/segments"
},
"tuningConfig" : {
"type" : "hadoop",
"workingPath" : "var/druid/hadoop-tmp",
"version" : "2018-11-10T05:10:23.557Z",
"partitionsSpec" : {
"type" : "hashed",
"targetPartitionSize" : 5000000,
"maxPartitionSize" : 7500000,
"assumeGrouped" : false,
"numShards" : -1,
"partitionDimensions" : [ ]
},
"shardSpecs" : { },
"indexSpec" : {
"bitmap" : {
"type" : "concise"
},
"dimensionCompression" : "lz4",
"metricCompression" : "lz4",
"longEncoding" : "longs"
},
"maxRowsInMemory" : 75000,
"leaveIntermediate" : false,
"cleanupOnFailure" : true,
"overwriteFiles" : false,
"ignoreInvalidRows" : false,
"jobProperties" : { },
"combineText" : false,
"useCombiner" : false,
"buildV9Directly" : true,
"numBackgroundPersistThreads" : 0,
"forceExtendableShardSpecs" : false,
"useExplicitVersion" : false,
"allowedHadoopPrefix" : [ ]
},
"uniqueId" : "af69fd25383946baa0c21378b688dd7e"
}
}
2018-11-10T05:10:38,325 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - Job job_local1983385492_0001 running in uber mode : false
2018-11-10T05:10:38,326 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - map 0% reduce 0%
2018-11-10T05:10:38,443 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.conf.Configuration.deprecation - mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2018-11-10T05:10:38,696 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.LocalJobRunner -
2018-11-10T05:10:38,701 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.MapTask - Starting flush of map output
2018-11-10T05:10:38,701 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.MapTask - Spilling map output
2018-11-10T05:10:38,701 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.MapTask - bufstart = 0; bufend = 2048811; bufvoid = 104857600
2018-11-10T05:10:38,701 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.MapTask - kvstart = 26214396(104857584); kvend = 25783072(103132288); length = 431325/6553600
2018-11-10T05:10:42,740 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.MapTask - Finished spill 0
2018-11-10T05:10:42,785 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.Task - Task:attempt_local1983385492_0001_m_000000_0 is done. And is in the process of committing
2018-11-10T05:10:42,799 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.LocalJobRunner - map
2018-11-10T05:10:42,799 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.Task - Task 'attempt_local1983385492_0001_m_000000_0' done.
2018-11-10T05:10:42,799 INFO [LocalJobRunner Map Task Executor #0] org.apache.hadoop.mapred.LocalJobRunner - Finishing task: attempt_local1983385492_0001_m_000000_0
2018-11-10T05:10:42,799 INFO [Thread-57] org.apache.hadoop.mapred.LocalJobRunner - map task executor complete.
2018-11-10T05:10:43,331 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - map 100% reduce 0%
2018-11-10T05:12:05,602 WARN [main-SendThread(10.111.232.114:2181)] org.apache.zookeeper.ClientCnxn - Client session timed out, have not heard from server in 27888ms for sessionid 0x166fb9c2fc9000c
2018-11-10T05:12:38,758 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - Job job_local1983385492_0001 failed with state FAILED due to: NA
2018-11-10T05:12:38,756 WARN [Thread-57] org.apache.hadoop.mapred.LocalJobRunner - job_local1983385492_0001
java.lang.OutOfMemoryError: GC overhead limit exceeded
at java.util.concurrent.ConcurrentHashMap.putVal(ConcurrentHashMap.java:1019) ~[?:1.8.0_144]
at java.util.concurrent.ConcurrentHashMap.putAll(ConcurrentHashMap.java:1084) ~[?:1.8.0_144]
at java.util.concurrent.ConcurrentHashMap.<init>(ConcurrentHashMap.java:852) ~[?:1.8.0_144]
at org.apache.hadoop.conf.Configuration.<init>(Configuration.java:713) ~[hadoop-common-2.7.3.jar:?]
at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:442) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.<init>(LocalJobRunner.java:294) ~[hadoop-mapreduce-client-common-2.7.3.jar:?]
at org.apache.hadoop.mapred.LocalJobRunner$Job.getReduceTaskRunnables(LocalJobRunner.java:350) ~[hadoop-mapreduce-client-common-2.7.3.jar:?]
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:526) [hadoop-mapreduce-client-common-2.7.3.jar:?]
2018-11-10T05:12:10,254 WARN [main-SendThread(10.111.232.114:2181)] org.apache.zookeeper.server.ZooKeeperThread - Exception occurred from thread main-SendThread(10.111.232.114:2181)
java.lang.OutOfMemoryError: GC overhead limit exceeded
at org.apache.logging.log4j.core.util.datetime.FixedDateFormat.format(FixedDateFormat.java:270) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.pattern.DatePatternConverter$FixedFormatter.format(DatePatternConverter.java:72) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.pattern.DatePatternConverter$CachedTime.<init>(DatePatternConverter.java:103) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.pattern.DatePatternConverter.format(DatePatternConverter.java:207) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.pattern.DatePatternConverter.format(DatePatternConverter.java:201) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.pattern.PatternFormatter.format(PatternFormatter.java:36) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.layout.PatternLayout$PatternSerializer.toSerializable(PatternLayout.java:292) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.layout.PatternLayout.toSerializable(PatternLayout.java:206) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.layout.PatternLayout.toSerializable(PatternLayout.java:56) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.layout.AbstractStringLayout.toByteArray(AbstractStringLayout.java:148) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.appender.AbstractOutputStreamAppender.append(AbstractOutputStreamAppender.java:112) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.AppenderControl.tryCallAppender(AppenderControl.java:152) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.AppenderControl.callAppender0(AppenderControl.java:125) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.AppenderControl.callAppenderPreventRecursion(AppenderControl.java:116) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.AppenderControl.callAppender(AppenderControl.java:84) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.LoggerConfig.callAppenders(LoggerConfig.java:390) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:378) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:362) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:352) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.config.AwaitCompletionReliabilityStrategy.log(AwaitCompletionReliabilityStrategy.java:63) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.core.Logger.logMessage(Logger.java:147) ~[log4j-core-2.5.jar:2.5]
at org.apache.logging.log4j.spi.AbstractLogger.logMessage(AbstractLogger.java:1016) ~[log4j-api-2.5.jar:2.5]
at org.apache.logging.log4j.spi.AbstractLogger.logIfEnabled(AbstractLogger.java:964) ~[log4j-api-2.5.jar:2.5]
at org.apache.logging.slf4j.Log4jLogger.info(Log4jLogger.java:178) ~[log4j-slf4j-impl-2.5.jar:2.5]
at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1156) ~[zookeeper-3.4.10.jar:3.4.10-39d3a4f269333c922ed3db283be479f9deacaa0f]