val jdbcDF = sqlContext.read.format("jdbc").options(
Map(
"mergeSchema" -> "false",
"driver" -> dbConf.getString("driver"),
"url" -> dbConf.getString("url"),
"user" -> dbConf.getString("user"),
"password" -> dbConf.getString("password"),
"dbtable" -> "event_product_refs"
)
).load()
jdbcDF.write
.parquet("alluxio://diva/eventProductRefs.parquet")
16/04/28 22:24:02 WARN hadoop.ParquetOutputCommitter: could not write summary file for alluxio://dev17-spark-master01:19998/diva/eventProductRefs.parquet
java.io.IOException: Could not read footer: java.io.IOException: Could not read footer for file FileStatus{path=alluxio://dev17-spark-master01:19998/diva/eventProductRefs.parquet/part-r-00000-a5df7b1b-e9e4-4663-bdbb-e3bf68a32f0c.gz.parquet; isDirectory=false; length=1407788; replication=3; blocksize=536870912; modification_time=1461882237757; access_time=1461882237757; owner=; group=; permission=rw-rw-rw-; isSymlink=false}
at org.apache.parquet.hadoop.ParquetFileReader.readAllFootersInParallel(ParquetFileReader.java:247)
at org.apache.parquet.hadoop.ParquetFileReader.readAllFootersInParallel(ParquetFileReader.java:262)
at org.apache.parquet.hadoop.ParquetOutputCommitter.writeMetaDataFile(ParquetOutputCommitter.java:56)
at org.apache.parquet.hadoop.ParquetOutputCommitter.commitJob(ParquetOutputCommitter.java:48)
at org.apache.spark.sql.execution.datasources.BaseWriterContainer.commitJob(WriterContainer.scala:230)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply$mcV$sp(InsertIntoHadoopFsRelation.scala:151)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:256)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:334)
at com.ticketfly.etl.diva.DumpProductRefsToAlluxio$delayedInit$body.apply(DumpProductRefsToAlluxio.scala:28)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:71)
at scala.App$$anonfun$main$1.apply(App.scala:71)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:32)
at scala.App$class.main(App.scala:71)
at com.ticketfly.etl.diva.DumpProductRefsToAlluxio$.main(DumpProductRefsToAlluxio.scala:6)
at com.ticketfly.etl.diva.DumpProductRefsToAlluxio.main(DumpProductRefsToAlluxio.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.io.IOException: Could not read footer for file FileStatus{path=alluxio://dev17-spark-master01:19998/diva/eventProductRefs.parquet/part-r-00000-a5df7b1b-e9e4-4663-bdbb-e3bf68a32f0c.gz.parquet; isDirectory=false; length=1407788; replication=3; blocksize=536870912; modification_time=1461882237757; access_time=1461882237757; owner=; group=; permission=rw-rw-rw-; isSymlink=false}
at org.apache.parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:239)
at org.apache.parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:233)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: alluxio.exception.BlockDoesNotExistException: TempBlockMeta not found for blockId 167,772,160
at alluxio.client.block.RemoteBlockOutStream.cancel(RemoteBlockOutStream.java:85)
at alluxio.client.file.FileInStream.closeCacheStream(FileInStream.java:288)
at alluxio.client.file.FileInStream.close(FileInStream.java:117)
at alluxio.hadoop.HdfsFileInputStream.close(HdfsFileInputStream.java:114)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:432)
at org.apache.parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:237)
... 5 more
Caused by: alluxio.exception.BlockDoesNotExistException: TempBlockMeta not found for blockId 167,772,160
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at alluxio.exception.AlluxioException.from(AlluxioException.java:73)
at alluxio.AbstractClient.retryRPC(AbstractClient.java:324)
at alluxio.client.block.BlockWorkerClient.cancelBlock(BlockWorkerClient.java:156)
at alluxio.client.block.RemoteBlockOutStream.cancel(RemoteBlockOutStream.java:83)
... 11 more
...
--
You received this message because you are subscribed to the Google Groups "Alluxio Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to alluxio-user...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
2016-05-06 04:42:37,310 WARN httpclient.RestStorageService (RestStorageService.java:performRequest) - Error Response: HEAD '/alluxio%2Fdiva%2FeventProductRefs.parquet_%24folder%24' -- ResponseCode: 404, ResponseStatus: Not Found, Request Headers: [Content-Type: , Date: Fri, 06 May 2016 04:42:37 GMT, Authorization: AWS xxxxxxxxxxxxxxx:xxxxxxxxxxxxxxxx, User-Agent: JetS3t/0.8.1 (Linux/3.2.0-72-virtual; amd64; en; JVM 1.8.0_74), Host: tfly-spark-dev17.s3.amazonaws.com], Response Headers: [x-amz-request-id: A2D6B4E27047095B, x-amz-id-2: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx, Content-Type: application/xml, Transfer-Encoding: chunked, Date: Fri, 06 May 2016 04:42:36 GMT, Server: AmazonS3]
Dragiša Krsmanović | Platform Engineer | Ticketfly
--conf spark.driver.extraJavaOptions="-Dalluxio.user.file.cache.partially.read.block=false"
in spark to use it. Let me know if you find any problem.
It is recommended to colocate Alluxio and Spark. You can see bigger performance gain that way.
Thank you.
Pei
To unsubscribe from this group and stop receiving emails from it, send an email to alluxio-users+unsubscribe@googlegroups.com.