Hi,
I like cascading because it is easy to use hadoop. When i learn cascading, i met a little problem.
I use eclipse to test cascading, cascading-core cascading-hadoop cascading-xml cascading-local jar has been add to environment. And ecplise show no error. I use the first example code of user guide to test whether i run in local notebook. Code below. When it run , error display
Exception in thread "main" java.lang.NoClassDefFoundError: org/jgrapht/ext/EdgeNameProvider
at cascading.scheme.Scheme.<init>(Scheme.java:70)
at cascading.scheme.hadoop.TextLine.<init>(TextLine.java:266)
at Main2.main(Main2.java:46)
Caused by: java.lang.ClassNotFoundException: org.jgrapht.ext.EdgeNameProvider
at java.net.URLClassLoader$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
... 3 more
My eclipse environment under the graph.
![]()
/*
* Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information:
http://www.cascading.org/ */
import java.util.Properties;
import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.flow.FlowDef;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.operation.Aggregator;
import cascading.operation.Function;
import cascading.operation.aggregator.Count;
import cascading.operation.expression.ExpressionFilter;
import cascading.operation.expression.ExpressionFunction;
import cascading.operation.regex.RegexFilter;
import cascading.operation.regex.RegexGenerator;
import cascading.operation.regex.RegexParser;
import cascading.pipe.CoGroup;
import cascading.pipe.Checkpoint;
import cascading.pipe.Each;
import cascading.pipe.Every;
import cascading.pipe.GroupBy;
import cascading.pipe.HashJoin;
import cascading.pipe.Pipe;
import cascading.pipe.assembly.Rename;
import cascading.pipe.assembly.Retain;
import cascading.pipe.joiner.InnerJoin;
import cascading.property.AppProps;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.TextDelimited;
import cascading.scheme.hadoop.TextLine;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;
public class Main2 {
public static void main(String[] args) {
String inputPath = args[0];
String outputPath = args[1];
// define source and sink Taps.
Scheme sourceScheme = new TextLine(new Fields("line"));
Tap source = new Hfs(sourceScheme, inputPath);
Scheme sinkScheme = new TextLine(new Fields("word", "count"));
Tap sink = new Hfs(sinkScheme, outputPath, SinkMode.REPLACE);
// the 'head' of the pipe assembly
Pipe assembly = new Pipe("wordcount");
// For each input Tuple
// parse out each word into a new Tuple with the field name "word"
// regular expressions are optional in Cascading
String regex = "(?<!\\pL)(?=\\pL)[^ ]*(?<=\\pL)(?!\\pL)";
Function function = new RegexGenerator(new Fields("word"), regex);
assembly = new Each(assembly, new Fields("line"), function);
// group the Tuple stream by the "word" value
assembly = new GroupBy(assembly, new Fields("word"));
// For every Tuple group
// count the number of occurrences of "word" and store result in
// a field named "count"
Aggregator count = new Count(new Fields("count"));
assembly = new Every(assembly, count);
// initialize app properties, tell Hadoop which jar file to use
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties, Main2.class);
// plan a new Flow from the assembly using the source and sink Taps
// with the above properties
FlowConnector flowConnector = new HadoopFlowConnector(properties);
Flow flow = flowConnector.connect("word-count", source, sink, assembly);
// execute the flow, block until complete
flow.complete();
}
}