/* SimpleApp.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import com.datastax.spark.connector._,org.apache.spark.SparkContext,org.apache.spark.SparkContext._, org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.sql._
import org.apache.spark.SparkConf
import com.datastax.driver.core.utils.UUIDs
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.cassandra
import org.apache.spark.sql.cassandra._
import com.datastax.spark.connector.cql.CassandraConnectorConf
import com.datastax.spark.connector.rdd.ReadConf
object SimpleApp {
def main(args: Array[String]) {
//val logFile = "/home/goutham/derby.log" // Should be some file on your system
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
//val logData = sc.textFile(logFile, 2).cache()
//val numAs = logData.filter(line => line.contains("a")).count()
//val numBs = logData.filter(line => line.contains("b")).count()
//println(s"Lines with a: $numAs, Lines with b: $numBs")
val timeUUID = udf(() => UUIDs.timeBased().toString)
val sqlcontext = new org.apache.spark.sql.SQLContext(sc)
val df = sqlcontext.read.format("com.databricks.spark.csv").option("wholeFile", "true").option("header", "true").option("parserLib", "UNIVOCITY").option("quote","\"").option("inferSchema", "true").option("escape","\"").option("quoteMode","ALL").load("/home/goutham/Work/data/user.csv").withColumn("user_uuid", timeUUID())
df.createOrReplaceTempView("source_user")
val num = df.count()
println(s" Number of records to be proccessed in the file is $num")
sqlcontext.sql("""CREATE TEMPORARY VIEW Dest_user
|USING org.apache.spark.sql.cassandra
|OPTIONS (
| table "t_user",
| keyspace "ks_payu",
| cluster "Test Cluster",
| pushdown "true"
|)""".stripMargin)
following is the .sbt file :
name := "Simple Project"
version := "1.0"
scalaVersion := "2.11.8"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.0.2"
libraryDependencies += "com.datastax.spark" %% "spark-cassandra-connector" % "2.0.0"
libraryDependencies += "org.apache.spark" % "spark-sql_2.11" % "2.0.2"
and the error :
Number of records to be proccessed in the file is 10
17/04/12 16:24:08 INFO SparkSqlParser: Parsing command: CREATE TEMPORARY VIEW Dest_user
USING org.apache.spark.sql.cassandra
OPTIONS (
table "t_user",
keyspace "ks_payu",
cluster "Test Cluster",
pushdown "true")
Exception in thread "main" java.lang.ClassNotFoundException: Failed to find data source: org.apache.spark.sql.cassandra. Please find packages at https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects
at org.apache.spark.sql.execution.datasources.DataSource.lookupDataSource(DataSource.scala:148)
at org.apache.spark.sql.execution.datasources.DataSource.providingClass$lzycompute(DataSource.scala:79)
at org.apache.spark.sql.execution.datasources.DataSource.providingClass(DataSource.scala:79)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
at org.apache.spark.sql.execution.datasources.CreateTempViewUsing.run(ddl.scala:82)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:86)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:86)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:186)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:167)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:65)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:582)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:682)
at SimpleApp$.main(simpleApp.scala:61)
at SimpleApp.main(simpleApp.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.cassandra.DefaultSource
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$5$$anonfun$apply$1.apply(DataSource.scala:132)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$5$$anonfun$apply$1.apply(DataSource.scala:132)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$5.apply(DataSource.scala:132)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$5.apply(DataSource.scala:132)
at scala.util.Try.orElse(Try.scala:84)
at org.apache.spark.sql.execution.datasources.DataSource.lookupDataSource(DataSource.scala:132)
... 31 more
17/04/12 16:24:08 INFO SparkContext: Invoking stop() from shutdown hook
17/04/12 16:24:08 INFO SparkUI: Stopped Spark web UI at http://10.0.2.15:4040
17/04/12 16:24:08 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/04/12 16:24:08 INFO MemoryStore: MemoryStore cleared
17/04/12 16:24:08 INFO BlockManager: BlockManager stopped
17/04/12 16:24:08 INFO BlockManagerMaster: BlockManagerMaster stopped
17/04/12 16:24:08 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/04/12 16:24:08 INFO SparkContext: Successfully stopped SparkContext
17/04/12 16:24:08 INFO ShutdownHookManager: Shutdown hook called
17/04/12 16:24:08 INFO ShutdownHookManager: Deleting directory /tmp/spark-e91df44f-784e-4df4-819a-e8cc7c556e90
kindly let me know if any issue with the code or dependencies ?
regards
Goutham
--
You received this message because you are subscribed to the Google Groups "DataStax Spark Connector for Apache Cassandra" group.
To unsubscribe from this group and stop receiving emails from it, send an email to spark-connector-...@lists.datastax.com.