I am trying to ingest some snappy compressed avro data. It is giving me this error :
2017-08-07 14:52:45,916 FATAL [main] org.apache.hadoop.mapred.YarnChild: Error running child : java.lang.UnsatisfiedLinkError: org.xerial.snappy.SnappyNative.uncompressedLength(Ljava/lang/Object;II)I
at org.xerial.snappy.SnappyNative.uncompressedLength(Native Method)
at org.xerial.snappy.Snappy.uncompressedLength(Snappy.java:541)
at org.apache.avro.file.SnappyCodec.decompress(SnappyCodec.java:58)
at org.apache.avro.file.DataFileStream$DataBlock.decompressUsing(DataFileStream.java:343)
at org.apache.avro.file.DataFileStream.hasNext(DataFileStream.java:199)
at org.apache.avro.mapreduce.AvroRecordReaderBase.nextKeyValue(AvroRecordReaderBase.java:117)
at org.apache.hadoop.mapreduce.lib.input.DelegatingRecordReader.nextKeyValue(DelegatingRecordReader.java:89)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:556)
at org.apache.hadoop.mapreduce.task.MapContextImpl.nextKeyValue(MapContextImpl.java:80)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.nextKeyValue(WrappedMapper.java:91)
at io.druid.indexer.DetermineHashedPartitionsJob$DetermineCardinalityMapper.run(DetermineHashedPartitionsJob.java:284)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:170)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:164)
{
"type" : "index_hadoop",
"spec" : {
"ioConfig" : {
"type" : "hadoop",
"inputSpec" : {
"type" : "static",
"inputFormat" :"io.druid.data.input.avro.AvroValueInputFormat",
"paths" : "/path/avro/d_2017-06-12T2020Z.7e949067-2691-4aed-b282-e01ea2434ea5.0.avro"
}
},
"dataSchema" : {
"dataSource" : "testavro",
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "day",
"queryGranularity" : "minute",
"intervals" : ["2017-06-04/2017-06-06"]
},
"parser" : {
"type" : "avro_hadoop",
"parseSpec" : {
"format" : "timeAndDims",
"dimensionsSpec" : {
"dimensions" : [
"header.channel",
"header.clientId",
"header.eventType"
]
},
"timestampSpec" : {
"format" : "auto",
"column" : "header.timeEpochMillisUTC"
}
}
},
"metricsSpec" : [
{
"name" : "count",
"type" : "count"
},
{
"name" : "user_unique",
"type" : "hyperUnique",
"fieldName" : "header.channel"
}
]
},
"tuningConfig" : {
"type" : "hadoop",
"partitionsSpec" : {
"type" : "hashed",
"targetPartitionSize" : 5000000
},
"jobProperties" : {
"avro.schema.input.value.path" : "/avro.idm.json",
"hadoop.mapreduce.job.user.classpath.first":"true",
"io.compression.codecs" : "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec"
}
}
}
}