import com.nicta.scoobi.Scoobi._
import Reduction._
object WordCount extends ScoobiApp {
def run() {
val lines = fromTextFile("hdfs://user/dust/sample")
val counts = lines.mapFlatten(_.split(" "))
.map(word => (word, 1))
.groupByKey
.combine(Sum.int)
counts.toTextFile("hdfs://user/dust/sample_out", overwrite=true).persist(ScoobiConfiguration())
}
}
name := "MyApplication"
version := "1.0"
scalaVersion := "2.11.4"
libraryDependencies += "com.nicta" %% "scoobi" % "0.9.1"
resolvers ++= Seq(Resolver.sonatypeRepo("releases"),
Resolver.sonatypeRepo("snaspshots"))
$ ls
build.sbt project src target
$ sbt compile
[info] Set current project to MyApplication (in build file:/home/dust/scala/sample_hadoop/)
[info] Compiling 1 Scala source to /home/dust/scala/sample_hadoop/target/scala-2.11/classes...
[success] Total time: 7 s, completed Dec 1, 2014 5:44:42 AM
$ sbt "run-main WordCount"
[info] Set current project to MyApplication (in build file:/home/dust/scala/sample_hadoop/)
[info] Running WordCount
[WARN] NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[INFO] deprecation - mapred.used.genericoptionsparser is deprecated. Instead, use mapreduce.client.genericoptionsparser.used
[INFO] deprecation - mapred.map.child.log.level is deprecated. Instead, use mapreduce.map.log.level
[INFO] deprecation - mapred.reduce.child.log.level is deprecated. Instead, use mapreduce.reduce.log.level
[INFO] deprecation - mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
[error] (run-main-0) java.lang.AbstractMethodError: com.nicta.scoobi.impl.mapreducer.BridgeStore.com$nicta$scoobi$core$DataSink$$compressionConfigured_$eq(Z)V
java.lang.AbstractMethodError: com.nicta.scoobi.impl.mapreducer.BridgeStore.com$nicta$scoobi$core$DataSink$$compressionConfigured_$eq(Z)V
at com.nicta.scoobi.core.DataSink$class.$init$(DataSink.scala:53)
at com.nicta.scoobi.impl.mapreducer.BridgeStore.<init>(BridgeStore.scala:44)
at com.nicta.scoobi.impl.plan.comp.ProcessNodeImpl$class.com$nicta$scoobi$impl$plan$comp$ProcessNodeImpl$$createBridgeStore(ProcessNode.scala:53)
at com.nicta.scoobi.impl.plan.comp.ProcessNodeImpl$class.bridgeStore(ProcessNode.scala:45)
at com.nicta.scoobi.impl.plan.comp.ParallelDo.bridgeStore$lzycompute(ProcessNode.scala:80)
at com.nicta.scoobi.impl.plan.comp.ParallelDo.bridgeStore(ProcessNode.scala:80)
at com.nicta.scoobi.impl.plan.comp.ProcessNodeImpl$class.bridgeToString(ProcessNode.scala:61)
at com.nicta.scoobi.impl.plan.comp.ParallelDo.bridgeToString(ProcessNode.scala:80)
at com.nicta.scoobi.impl.plan.comp.ParallelDo.<init>(ProcessNode.scala:91)
at com.nicta.scoobi.impl.plan.comp.ParallelDo$.create(ProcessNode.scala:184)
at com.nicta.scoobi.impl.plan.DListImpl$.apply(DListImpl.scala:112)
at com.nicta.scoobi.impl.plan.DListImpl$.apply(DListImpl.scala:111)
at com.nicta.scoobi.application.InputsOutputs$class.fromSource(InputsOutputs.scala:190)
at com.nicta.scoobi.application.InputsOutputs$.fromSource(InputsOutputs.scala:193)
at com.nicta.scoobi.io.text.TextInput$class.fromTextSource(TextInput.scala:93)
at com.nicta.scoobi.Scoobi$.fromTextSource(Scoobi.scala:25)
at com.nicta.scoobi.io.text.TextInput$class.fromTextFile(TextInput.scala:47)
at com.nicta.scoobi.Scoobi$.fromTextFile(Scoobi.scala:25)
at WordCount$.run(WordCount.scala:7)
at com.nicta.scoobi.application.ScoobiApp$$anonfun$main$1.apply$mcV$sp(ScoobiApp.scala:81)
at com.nicta.scoobi.application.ScoobiApp$$anonfun$main$1.apply(ScoobiApp.scala:76)
at com.nicta.scoobi.application.ScoobiApp$$anonfun$main$1.apply(ScoobiApp.scala:76)
at com.nicta.scoobi.application.Hadoop$class.runOnCluster(Hadoop.scala:115)
at WordCount$.runOnCluster(WordCount.scala:4)
at com.nicta.scoobi.application.Hadoop$class.executeOnCluster(Hadoop.scala:69)
at WordCount$.executeOnCluster(WordCount.scala:4)
at com.nicta.scoobi.application.Hadoop$$anonfun$onCluster$1.apply(Hadoop.scala:55)
at com.nicta.scoobi.application.InMemoryHadoop$class.withTimer(InMemory.scala:71)
at WordCount$.withTimer(WordCount.scala:4)
at com.nicta.scoobi.application.InMemoryHadoop$class.showTime(InMemory.scala:79)
at WordCount$.showTime(WordCount.scala:4)
at com.nicta.scoobi.application.Hadoop$class.onCluster(Hadoop.scala:55)
at WordCount$.onCluster(WordCount.scala:4)
at com.nicta.scoobi.application.Hadoop$class.onHadoop(Hadoop.scala:61)
at WordCount$.onHadoop(WordCount.scala:4)
at com.nicta.scoobi.application.ScoobiApp$class.main(ScoobiApp.scala:76)
at WordCount$.main(WordCount.scala:4)
at WordCount.main(WordCount.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
[trace] Stack trace suppressed: run last compile:runMain for the full output.
java.lang.RuntimeException: Nonzero exit code: 1
at scala.sys.package$.error(package.scala:27)
[trace] Stack trace suppressed: run last compile:runMain for the full output.
[error] (compile:runMain) Nonzero exit code: 1
[error] Total time: 3 s, completed Dec 1, 2014 5:45:01 AM