Hi,
I'm using Spark Notebook with the default configuration (local mode).
When I display a DataFrame that has been loaded from nontemporary a Hive Table, the following exception is thrown and the DataFrame is not rendered as a table:
java.lang.ExceptionInInitializerError
at sun.misc.Unsafe.ensureClassInitialized(Native Method)
at sun.reflect.UnsafeFieldAccessorFactory.newFieldAccessor(UnsafeFieldAccessorFactory.java:43)
at sun.reflect.ReflectionFactory.newFieldAccessor(ReflectionFactory.java:142)
at java.lang.reflect.Field.acquireFieldAccessor(Field.java:1088)
at java.lang.reflect.Field.getFieldAccessor(Field.java:1069)
at java.lang.reflect.Field.get(Field.java:393)
at notebook.kernel.Repl.getModule$1(Repl.scala:203)
at notebook.kernel.Repl.iws$1(Repl.scala:212)
at notebook.kernel.Repl.liftedTree1$1(Repl.scala:219)
at notebook.kernel.Repl.evaluate(Repl.scala:199)
at notebook.client.ReplCalculator$$anonfun$15$$anon$1$$anonfun$29.apply(ReplCalculator.scala:378)
at notebook.client.ReplCalculator$$anonfun$15$$anon$1$$anonfun$29.apply(ReplCalculator.scala:375)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.NoSuchMethodException: org.apache.spark.io.SnappyCompressionCodec.<init>(org.apache.spark.SparkConf)
at java.lang.Class.getConstructor0(Class.java:3082)
at java.lang.Class.getConstructor(Class.java:1825)
at org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:71)
at org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:65)
at
org.apache.spark.broadcast.TorrentBroadcast.org$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
at org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:80)
at org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
at org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.apply(DataSourceStrategy.scala:108)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at org.apache.spark.sql.execution.SparkStrategies$EquiJoinSelection$.apply(SparkStrategies.scala:113)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at org.apache.spark.sql.execution.SparkStrategies$EquiJoinSelection$.apply(SparkStrategies.scala:113)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at org.apache.spark.sql.execution.SparkStrategies$EquiJoinSelection$.apply(SparkStrategies.scala:113)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at org.apache.spark.sql.execution.SparkStrategies$EquiJoinSelection$.apply(SparkStrategies.scala:113)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at org.apache.spark.sql.DataFrame.toJSON(DataFrame.scala:1724)
at notebook.front.widgets.DataFrameView$class.notebook$front$widgets$DataFrameView$$json(DataFrame.scala:40)
at notebook.front.widgets.DataFrameWidget.notebook$front$widgets$DataFrameView$$json$lzycompute(DataFrame.scala:64)
at notebook.front.widgets.DataFrameWidget.notebook$front$widgets$DataFrameView$$json(DataFrame.scala:64)
at notebook.front.widgets.DataFrameView$class.$init$(DataFrame.scala:41)
at notebook.front.widgets.DataFrameWidget.<init>(DataFrame.scala:69)
at notebook.front.ExtraLowPriorityRenderers$dataFrameAsTable$.render(renderer.scala:13)
at notebook.front.ExtraLowPriorityRenderers$dataFrameAsTable$.render(renderer.scala:12)
at notebook.front.Widget$.fromRenderer(Widget.scala:32)
at $line17.$rendered$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$.<init>(<console>:49)
at $line17.$rendered$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$.<clinit>(<console>)
... 20 more
[debug] application - Termination of op calculator
This happens for all Hive tables (Parquet or JSON format), written using saveAsTable() and read using sqlContext.sql("select * from Person_AdressType").
It does not happen for DataFrames coming from a JDBC source.
In order to use Hive Tables, I added a hive-site.xml to the conf folder and set export HADOOP_CONF_DIR=./conf.
I do not use Hadoop.
Workaround: Use some action to read the DataFrame into Memory and display the table afterwards:
val x = sqlContext.sql("select * from Person_AddressType").cache
val c = x.count
x