Hi,
We are in the process of getting up and running with Tinkerpop 3.1 with Spark and Yarn on a Cloudera Hadoop cluster. We can connect to our cluster by setting the usual environment variables, e.g. SPARK_HOME, JAVA_HOME, HADOOP_GREMLIN_LIBS, HADOOP_HOME, HADOOP_YARN_HOME, and the CLASSPATH. However, the only way we can get Gremlin to recognize the yarn-enabled spark jar is by copying this jar ('spark-assembly-1.5.1-hadoop2.6.0.jar') into the 'ext/spark-gremlin/plugin' directory. This jar is in our $SPARK_HOME/lib directory and is on the CLASSPATH. With this jar copied, we can successfully load and query the example graph in our cluster with Spark-Gremlin and YARN. Without this jar, we get an error when running e.g. g.V().count() (see below).
Is there a recommended configuration to get access to this jar, which would be a cleaner solution than copying it?
Jen
gremlin> g.V().count()
java.lang.ExceptionInInitializerError
java.lang.IllegalStateException: java.lang.ExceptionInInitializerError
at org.apache.tinkerpop.gremlin.process.computer.traversal.step.map.ComputerResultStep.processNextStart(ComputerResultStep.java:82)
at org.apache.tinkerpop.gremlin.process.traversal.step.util.AbstractStep.hasNext(AbstractStep.java:140)
at org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversal.hasNext(DefaultTraversal.java:144)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.apache.tinkerpop.gremlin.console.Console$_closure3.doCall(Console.groovy:205)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:324)
at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:292)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1016)
at org.codehaus.groovy.tools.shell.Groovysh.setLastResult(Groovysh.groovy:441)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.codehaus.groovy.tools.shell.Groovysh.execute(Groovysh.groovy:185)
at org.codehaus.groovy.tools.shell.Shell.leftShift(Shell.groovy:119)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.super$2$work(InteractiveShellRunner.groovy)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:324)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1207)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuperN(ScriptBytecodeAdapter.java:130)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuper0(ScriptBytecodeAdapter.java:150)
at org.codehaus.groovy.tools.shell.ShellRunner.run(ShellRunner.groovy:58)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.super$2$run(InteractiveShellRunner.groovy)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:324)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1207)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuperN(ScriptBytecodeAdapter.java:130)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuper0(ScriptBytecodeAdapter.java:150)
at org.codehaus.groovy.tools.shell.InteractiveShellRunner.run(InteractiveShellRunner.groovy:82)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.apache.tinkerpop.gremlin.console.Console.<init>(Console.groovy:144)
at org.codehaus.groovy.vmplugin.v7.IndyInterface.selectMethod(IndyInterface.java:215)
at org.apache.tinkerpop.gremlin.console.Console.main(Console.groovy:303)
Caused by: java.util.concurrent.ExecutionException: java.lang.ExceptionInInitializerError
at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1887)
at org.apache.tinkerpop.gremlin.process.computer.traversal.step.map.ComputerResultStep.processNextStart(ComputerResultStep.java:80)
... 44 more
Caused by: java.lang.ExceptionInInitializerError
at org.apache.spark.util.Utils$.getSparkOrYarnConfig(Utils.scala:2042)
at org.apache.spark.storage.BlockManager.<init>(BlockManager.scala:97)
at org.apache.spark.storage.BlockManager.<init>(BlockManager.scala:173)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:345)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:193)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:450)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2256)
at org.apache.spark.SparkContext.getOrCreate(SparkContext.scala)
at org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer.lambda$submit$21(SparkGraphComputer.java:137)
at org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer$$Lambda$50/1760378672.get(Unknown Source)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1582)
at java.util.concurrent.CompletableFuture$AsyncSupply.exec(CompletableFuture.java:1574)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1689)
at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:157)
Caused by: org.apache.spark.SparkException: Unable to load YARN support
at org.apache.spark.deploy.SparkHadoopUtil$.liftedTree1$1(SparkHadoopUtil.scala:392)
at org.apache.spark.deploy.SparkHadoopUtil$.<init>(SparkHadoopUtil.scala:387)
at org.apache.spark.deploy.SparkHadoopUtil$.<clinit>(SparkHadoopUtil.scala)
... 17 more
Caused by: java.lang.ClassNotFoundException: org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.Utils$.classForName(Utils.scala:173)
at org.apache.spark.deploy.SparkHadoopUtil$.liftedTree1$1(SparkHadoopUtil.scala:388)
... 19 more