public class ReadWriteParquet {
static String textInputPath = "inputOutput/input/in1.txt";
static String parquetOutputPath = "inputOutput/output/parquet-out";
static String textOutputPath = "inputOutput/output/text-out";
public static void main(String[] args) throws Exception {
ReadWriteParquet.write();
ReadWriteParquet.read();
}
private static void read() {
Scheme parquetinput = new ParquetTupleScheme(new Fields("Name",
"College", "Branch", "Age", "Doj", "BigDeci"));
Scheme textoutput = new TextDelimited(true, ",");
Tap source = new Hfs(parquetinput, parquetOutputPath);
Tap sink = new Hfs(textoutput, textOutputPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("Read Parquet");
pipe = new GroupBy(pipe, new Fields("Branch"));
Properties hadoopProps = new Properties();
AppProps.setApplicationJarClass(hadoopProps, ReadWriteParquet.class);
TupleSerializationProps.addSerialization(hadoopProps,
BigDecimalSerialization.class.getName());
FlowDef flowdef = FlowDef.flowDef().addSource(pipe, source)
.addTailSink(pipe, sink);
HadoopFlowConnector hd = new HadoopFlowConnector(hadoopProps);
hd.connect(flowdef).complete();
}
private static void write() {
DateType dateType = new DateType("dd/MM/yyyy");
Fields fields = new Fields("Name", "College", "Branch", "Age", "Doj",
"BigDeci").applyTypes(String.class, String.class, String.class,
Integer.class, dateType, BigDecimal.class);
Scheme input = new TextDelimited(fields, true, ",");
Scheme parquetout = new ParquetTupleScheme(
fields, fields, "message ReadWriteParquet {required Binary Name; required Binary College; required Binary Branch; optional int64 Age; required int64 Doj; required Double BigDeci; }");
Tap source = new Hfs(input, textInputPath);
Tap sink = new Hfs(parquetout, parquetOutputPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("Write Parquet");
FlowDef flowdef = FlowDef.flowDef().addSource(pipe, source)
.addTailSink(pipe, sink);
new HadoopFlowConnector().connect(flowdef).complete();
}
}
--
You received this message because you are subscribed to the Google Groups "cascading-user" group.
To unsubscribe from this group and stop receiving emails from it, send an email to cascading-use...@googlegroups.com.
To post to this group, send email to cascadi...@googlegroups.com.
Visit this group at http://groups.google.com/group/cascading-user.
To view this discussion on the web visit https://groups.google.com/d/msgid/cascading-user/b9c28380-b34d-4b38-959a-977238ae71b1%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.