I am attempting to join a TSV source with an avro source.
I am using scalding 0.8.2 and the scalding.avro project (in this specific job, I'm using the PackedAvroSource).
When I attempt to run the job on my cluster, it throws the following exception during initialization:
Exception in thread "main" cascading.flow.planner.PlannerException: could not build flow from assembly: [unable to pack object: cascading.flow.hadoop.HadoopFlowStep]
at cascading.flow.planner.FlowPlanner.handleExceptionDuringPlanning(FlowPlanner.java:533)
at cascading.flow.hadoop.planner.HadoopPlanner.buildFlow(HadoopPlanner.java:237)
at cascading.flow.FlowConnector.connect(FlowConnector.java:454)
at com.twitter.scalding.Job.buildFlow(Job.scala:91)
at com.twitter.scalding.Job.run(Job.scala:124)
at com.twitter.scalding.Tool.start$1(Tool.scala:105)
at com.twitter.scalding.Tool.run(Tool.scala:121)
at com.twitter.scalding.Tool.run(Tool.scala:72)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at com.twitter.scalding.Tool$.main(Tool.scala:128)
at com.twitter.scalding.Tool.main(Tool.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: cascading.flow.FlowException: unable to pack object: cascading.flow.hadoop.HadoopFlowStep
at cascading.flow.hadoop.HadoopFlowStep.pack(HadoopFlowStep.java:195)
at cascading.flow.hadoop.HadoopFlowStep.getInitializedConfig(HadoopFlowStep.java:170)
at cascading.flow.hadoop.HadoopFlowStep.createFlowStepJob(HadoopFlowStep.java:201)
at cascading.flow.hadoop.HadoopFlowStep.createFlowStepJob(HadoopFlowStep.java:69)
at cascading.flow.planner.BaseFlowStep.getFlowStepJob(BaseFlowStep.java:680)
at cascading.flow.BaseFlow.initializeNewJobsMap(BaseFlow.java:1148)
at cascading.flow.BaseFlow.initialize(BaseFlow.java:198)
at cascading.flow.hadoop.planner.HadoopPlanner.buildFlow(HadoopPlanner.java:231)
... 14 more
Caused by: java.io.NotSerializableException: org.apache.avro.Schema$RecordSchema
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1164)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:330)
at java.util.ArrayList.writeObject(ArrayList.java:570)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:940)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1469)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:330)
at java.util.HashMap.writeObject(HashMap.java:1001)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:940)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1469)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1518)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1483)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1400)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1158)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:330)
at cascading.flow.hadoop.util.JavaObjectSerializer.serialize(JavaObjectSerializer.java:57)
at cascading.flow.hadoop.util.HadoopUtil.serializeBase64(HadoopUtil.java:264)
at cascading.flow.hadoop.HadoopFlowStep.pack(HadoopFlowStep.java:191)
... 21 more
I have not made any changes to the Job configuration, everything should be set to the default values.
Any help is greatly appreciated.