java.lang.NoClassDefFoundError: scala/runtime/AbstractPartialFunction$mcJL$sp

760 views
Skip to first unread message

Kaushal Shriyan

unread,
Jul 24, 2017, 3:19:35 AM7/24/17
to DataStax Spark Connector for Apache Cassandra
I am facing issue while connecting Apache Spark to Apache Cassandra Datastore

[root@bin]# ./spark-shell --jars ../jars/spark-cassandra-connector-assembly-2.0.3-36-g9a50162.jar
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
17/07/23 23:12:56 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/07/23 23:13:01 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
Spark context Web UI available at http://111.23.140.15:4040
Spark context available as 'sc' (master = spark://172.16.214.126:7077, app id = app-20170723231257-0008).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.2.0
/_/
Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_131)
Type in expressions to have them evaluated.
Type :help for more information.
scala> sc.stop
scala> import com.datastax.spark.connector._, org.apache.spark.SparkContext, org.apache.spark.SparkContext._, org.apache.spark.SparkConf
import com.datastax.spark.connector._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
scala> val conf = new SparkConf(true).set("spark.cassandra.connection.host","172.16.214.41")
conf: org.apache.spark.SparkConf = org.apache.spark.SparkConf@7d0e43d6
scala> val sc = new SparkContext(conf)
sc: org.apache.spark.SparkContext = org.apache.spark.SparkContext@202b5293
scala> val test_spark_rdd = sc.cassandraTable("test_spark", "test")
test_spark_rdd: com.datastax.spark.connector.rdd.CassandraTableScanRDD[com.datastax.spark.connector.CassandraRow] = CassandraTableScanRDD[0] at RDD at CassandraRDD.scala:16
scala> test_spark_rdd.first
17/07/23 23:15:04 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, 172.16.214.41, executor 0): java.lang.NoClassDefFoundError: scala/runtime/AbstractPartialFunction$mcJL$sp
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: scala.runtime.AbstractPartialFunction$mcJL$sp
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 22 more
17/07/23 23:15:04 WARN TaskSetManager: Lost task 0.1 in stage 0.0 (TID 1, 172.16.214.41, executor 0): java.lang.NoClassDefFoundError: com/datastax/spark/connector/rdd/CassandraLimit$$anonfun$limitForIterator$1
at com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
17/07/23 23:15:04 ERROR TaskSetManager: Task 0 in stage 0.0 failed 4 times; aborting job
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, 172.16.214.41, executor 0): java.lang.NoClassDefFoundError: com/datastax/spark/connector/rdd/CassandraLimit$$anonfun$limitForIterator$1
at com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1354)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.take(RDD.scala:1327)
at com.datastax.spark.connector.rdd.CassandraRDD.take(CassandraRDD.scala:127)
at com.datastax.spark.connector.rdd.CassandraRDD.take(CassandraRDD.scala:128)
at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1368)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
... 52 elided
Caused by: java.lang.NoClassDefFoundError: com/datastax/spark/connector/rdd/CassandraLimit$$anonfun$limitForIterator$1
at com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
scala>

I am following https://www.datastax.com/dev/blog/kindling-an-introduction-to-spark-with-cassandra-part-1. Also can i use python to test Spark Cassandra Connector?

Any help will be highly appreciable.

Regards,

Kaushal

Francesco Malerba

unread,
Aug 10, 2017, 12:11:39 PM8/10/17
to DataStax Spark Connector for Apache Cassandra
Hi, i was facing the same issue and i solved it by choosing the right version of the connector for my cassandra-spark build.
The following command works

spark-shell --packages datastax:spark-cassandra-connector:2.0.3-s_2.11 --master=spark://master:7077 --conf spark.cassandra.connection.host=172.3.3.11

when executed on the following compose

version: '2.0'

networks:
cassandra_stratio_net:
driver: bridge
ipam:
driver: default
config:
-
subnet: 172.3.3.0/24

services:
cassandra-1:
hostname: cassandra-1
image: sharkcell/cassandra-stratio-lucene:3.0
command: /bin/bash -c "sleep 1 && echo ' -- Pausing to let system catch up ... -->' && /docker-entrypoint.sh cassandra -f"
environment:
- CASSANDRA_CLUSTER_NAME=stratio_cluster
expose:
- 7000
- 7001
- 7199
- 9042
- 9160
volumes: # uncomment if you desire mounts, also uncomment cluster.sh
- ./data/cassandra-1:/var/lib/cassandra:rw
networks:
cassandra_stratio_net:
ipv4_address: 172.3.3.11

cassandra-2:
hostname: cassandra-2
image: sharkcell/cassandra-stratio-lucene:3.0
command: /bin/bash -c "sleep 10 && echo ' -- Pausing to let system catch up ... -->' && /docker-entrypoint.sh cassandra -f"
environment:
- CASSANDRA_CLUSTER_NAME=stratio_cluster
- CASSANDRA_SEEDS=cassandra-1
links:
- cassandra-1
expose:
- 7000
- 7001
- 7199
- 9042
- 9160
volumes: # uncomment if you desire mounts, also uncomment cluster.sh
- ./data/cassandra-2:/var/lib/cassandra:rw
networks:
cassandra_stratio_net:
ipv4_address: 172.3.3.12

master:
image: gettyimages/spark:2.1.0-hadoop-2.7
command: bin/spark-class org.apache.spark.deploy.master.Master -h master
hostname: master
environment:
MASTER: spark://master:7077
SPARK_CONF_DIR: /conf
SPARK_PUBLIC_DNS: 0.0.0.0
expose:
- 7001
- 7002
- 7003
- 7004
- 7005
- 7006
- 7077
- 6066
volumes:
- ./conf/spark_master:/conf
networks:
cassandra_stratio_net:
ipv4_address: 172.3.3.14

worker-1:
image: gettyimages/spark:2.1.0-hadoop-2.7
command: /bin/bash -c "sleep 5 && echo ' -- Pausing to let system catch up ... -->' && bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077"
hostname: worker-1
environment:
SPARK_CONF_DIR: /conf
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_WORKER_PORT: 8881
SPARK_WORKER_WEBUI_PORT: 8081
SPARK_PUBLIC_DNS: 172.3.3.14
links:
- master
expose:
- 7001
- 7002
- 7003
- 7004
- 7005
- 7006
- 7077
- 6066
volumes:
- ./conf/spark_master:/conf
networks:
cassandra_stratio_net:
ipv4_address: 172.3.3.15


Reply all
Reply to author
Forward
0 new messages