--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <ipython-input-43-034d3b8f0c41> in <module>() 1 #model = dlClassifier.fit(trainingData) ----> 2 model = dlClassifier.fit(trainingData) /home/wojciech/Libraries/spark-2.2.0-bin-hadoop2.7/python/pyspark/ml/base.pyc in fit(self, dataset, params) 62 return self.copy(params)._fit(dataset) 63 else: ---> 64 return self._fit(dataset) 65 else: 66 raise ValueError("Params must be either a param map or a list/tuple of param maps, " /tmp/spark-92b2f0df-6c4d-4bc1-9fd9-9ba8b36fd4ef/userFiles-fccb9f5a-9007-4884-a996-157c390c399b/bigdl-0.3.0-python-api.zip/bigdl/models/ml_pipeline/dl_classifier.py in _fit(self, dataset) 133 134 def _fit(self, dataset): --> 135 jmodel = callBigDlFunc(self.bigdl_type, "fitClassifier", self.value, dataset) 136 model = DLClassifierModel.of(jmodel, self.featureSize, self.bigdl_type) 137 return model /tmp/spark-92b2f0df-6c4d-4bc1-9fd9-9ba8b36fd4ef/userFiles-fccb9f5a-9007-4884-a996-157c390c399b/bigdl-0.3.0-python-api.zip/bigdl/util/common.py in callBigDlFunc(bigdl_type, name, *args) 456 sc = get_spark_context() 457 api = getattr(jinstance, name) --> 458 return callJavaFunc(sc, api, *args) 459 460 /tmp/spark-92b2f0df-6c4d-4bc1-9fd9-9ba8b36fd4ef/userFiles-fccb9f5a-9007-4884-a996-157c390c399b/bigdl-0.3.0-python-api.zip/bigdl/util/common.py in callJavaFunc(sc, func, *args) 491 """ Call Java Function """ 492 args = [_py2java(sc, a) for a in args] --> 493 result = func(*args) 494 return _java2py(sc, result) 495 /home/wojciech/Libraries/spark-2.2.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args) 1131 answer = self.gateway_client.send_command(command) 1132 return_value = get_return_value( -> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 1135 for temp_arg in temp_args: /home/wojciech/Libraries/spark-2.2.0-bin-hadoop2.7/python/pyspark/sql/utils.pyc in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() /home/wojciech/Libraries/spark-2.2.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 317 raise Py4JJavaError( 318 "An error occurred while calling {0}{1}{2}.\n". --> 319 format(target_id, ".", name), value) 320 else: 321 raise Py4JError( Py4JJavaError: An error occurred while calling o50.fitClassifier. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 134.0 failed 1 times, most recent failure: Lost task 0.0 in stage 134.0 (TID 146, localhost, executor driver): java.util.concurrent.ExecutionException: Layer info: Sequential[6218774b]{ [input -> (1) -> (2) -> (3) -> output] (1): Linear[ac52e52b](29 -> 10) (2): Linear[6414a735](10 -> 2) (3): LogSoftMax[72891c1] }/Linear[ac52e52b](29 -> 10) java.lang.IllegalArgumentException: requirement failed: size mismatch, m1:2500x27 m2:29x10 at scala.Predef$.require(Predef.scala:224) at com.intel.analytics.bigdl.tensor.DenseTensorMath$.addmm(DenseTensorMath.scala:515) at com.intel.analytics.bigdl.tensor.DenseTensor.addmm(DenseTensor.scala:1206) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:108) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:45) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.nn.Sequential.updateOutput(Sequential.scala:39) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply$mcI$sp(DistriOptimizer.scala:223) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.utils.ThreadPool$$anonfun$1$$anon$4.call(ThreadPool.scala:112) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:192) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$11.apply(DistriOptimizer.scala:235) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$11.apply(DistriOptimizer.scala:235) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8.apply(DistriOptimizer.scala:235) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8.apply(DistriOptimizer.scala:175) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: Layer info: Sequential[6218774b]{ [input -> (1) -> (2) -> (3) -> output] (1): Linear[ac52e52b](29 -> 10) (2): Linear[6414a735](10 -> 2) (3): LogSoftMax[72891c1] }/Linear[ac52e52b](29 -> 10) java.lang.IllegalArgumentException: requirement failed: size mismatch, m1:2500x27 m2:29x10 at scala.Predef$.require(Predef.scala:224) at com.intel.analytics.bigdl.tensor.DenseTensorMath$.addmm(DenseTensorMath.scala:515) at com.intel.analytics.bigdl.tensor.DenseTensor.addmm(DenseTensor.scala:1206) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:108) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:45) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.nn.Sequential.updateOutput(Sequential.scala:39) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply$mcI$sp(DistriOptimizer.scala:223) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.utils.ThreadPool$$anonfun$1$$anon$4.call(ThreadPool.scala:112) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:286) at com.intel.analytics.bigdl.nn.Sequential.updateOutput(Sequential.scala:39) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply$mcI$sp(DistriOptimizer.scala:223) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.utils.ThreadPool$$anonfun$1$$anon$4.call(ThreadPool.scala:112) at java.util.concurrent.FutureTask.run(FutureTask.java:266) ... 3 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2119) at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1026) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.RDD.reduce(RDD.scala:1008) at com.intel.analytics.bigdl.optim.DistriOptimizer$.optimize(DistriOptimizer.scala:285) at com.intel.analytics.bigdl.optim.DistriOptimizer.optimize(DistriOptimizer.scala:795) at org.apache.spark.ml.DLEstimator.internalFit(DLEstimator.scala:133) at org.apache.spark.ml.DLEstimator.internalFit(DLEstimator.scala:54) at org.apache.spark.ml.DLEstimatorBase.fit(DLEstimatorBase.scala:80) at com.intel.analytics.bigdl.python.api.PythonBigDL.fitClassifier(PythonBigDL.scala:2052) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:280) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.lang.Thread.run(Thread.java:748) Caused by: java.util.concurrent.ExecutionException: Layer info: Sequential[6218774b]{ [input -> (1) -> (2) -> (3) -> output] (1): Linear[ac52e52b](29 -> 10) (2): Linear[6414a735](10 -> 2) (3): LogSoftMax[72891c1] }/Linear[ac52e52b](29 -> 10) java.lang.IllegalArgumentException: requirement failed: size mismatch, m1:2500x27 m2:29x10 at scala.Predef$.require(Predef.scala:224) at com.intel.analytics.bigdl.tensor.DenseTensorMath$.addmm(DenseTensorMath.scala:515) at com.intel.analytics.bigdl.tensor.DenseTensor.addmm(DenseTensor.scala:1206) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:108) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:45) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.nn.Sequential.updateOutput(Sequential.scala:39) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply$mcI$sp(DistriOptimizer.scala:223) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.utils.ThreadPool$$anonfun$1$$anon$4.call(ThreadPool.scala:112) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:192) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$11.apply(DistriOptimizer.scala:235) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$11.apply(DistriOptimizer.scala:235) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8.apply(DistriOptimizer.scala:235) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8.apply(DistriOptimizer.scala:175) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more Caused by: Layer info: Sequential[6218774b]{ [input -> (1) -> (2) -> (3) -> output] (1): Linear[ac52e52b](29 -> 10) (2): Linear[6414a735](10 -> 2) (3): LogSoftMax[72891c1] }/Linear[ac52e52b](29 -> 10) java.lang.IllegalArgumentException: requirement failed: size mismatch, m1:2500x27 m2:29x10 at scala.Predef$.require(Predef.scala:224) at com.intel.analytics.bigdl.tensor.DenseTensorMath$.addmm(DenseTensorMath.scala:515) at com.intel.analytics.bigdl.tensor.DenseTensor.addmm(DenseTensor.scala:1206) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:108) at com.intel.analytics.bigdl.nn.Linear.updateOutput(Linear.scala:45) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.nn.Sequential.updateOutput(Sequential.scala:39) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply$mcI$sp(DistriOptimizer.scala:223) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.utils.ThreadPool$$anonfun$1$$anon$4.call(ThreadPool.scala:112) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:286) at com.intel.analytics.bigdl.nn.Sequential.updateOutput(Sequential.scala:39) at com.intel.analytics.bigdl.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:280) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply$mcI$sp(DistriOptimizer.scala:223) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.optim.DistriOptimizer$$anonfun$8$$anonfun$9$$anonfun$apply$2.apply(DistriOptimizer.scala:216) at com.intel.analytics.bigdl.utils.ThreadPool$$anonfun$1$$anon$4.call(ThreadPool.scala:112) at java.util.concurrent.FutureTask.run(FutureTask.java:266) ... 3 more
--
You received this message because you are subscribed to the Google Groups "BigDL User Group" group.
To unsubscribe from this group and stop receiving emails from it, send an email to bigdl-user-group+unsubscribe@googlegroups.com.
To post to this group, send email to bigdl-user-group@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/bigdl-user-group/20ea8480-dfd8-4067-9748-03e4dfe1209b%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
To unsubscribe from this group and stop receiving emails from it, send an email to bigdl-user-gro...@googlegroups.com.
To post to this group, send email to bigdl-us...@googlegroups.com.
--
You received this message because you are subscribed to a topic in the Google Groups "BigDL User Group" group.
To unsubscribe from this topic, visit https://groups.google.com/d/topic/bigdl-user-group/-MqvR6FSzUE/unsubscribe.
To unsubscribe from this group and all its topics, send an email to bigdl-user-group+unsubscribe@googlegroups.com.
To post to this group, send email to bigdl-user-group@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/bigdl-user-group/e1cebb35-4101-45b8-b1b8-607de675f882%40googlegroups.com.
--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last)
<ipython-input-28-f4a1abde3ba7> in <module>() 36 37 # train the model ---> 38 model = trainer.fit(train) C:\spark-2.3.3-bin-hadoop2.7\python\pyspark\ml\base.py in fit(self, dataset, params) 130 return self.copy(params)._fit(dataset) 131 else: --> 132 return self._fit(dataset) 133 else: 134 raise ValueError("Params must be either a param map or a list/tuple of param maps, " C:\spark-2.3.3-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit(self, dataset) 286 287 def _fit(self, dataset): --> 288 java_model = self._fit_java(dataset) 289 model = self._create_model(java_model) 290 return self._copyValues(model) C:\spark-2.3.3-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit_java(self, dataset) 283 """ 284 self._transfer_params_to_java() --> 285 return self._java_obj.fit(dataset._jdf) 286 287 def _fit(self, dataset): C:\spark-2.3.3-bin-hadoop2.7\python\lib\py4j-0.10.7-src.zip\py4j\java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: C:\spark-2.3.3-bin-hadoop2.7\python\pyspark\sql\utils.py in deco(*a, **kw)
61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString()
C:\spark-2.3.3-bin-hadoop2.7\python\lib\py4j-0.10.7-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( Py4JJavaError: An error occurred while calling o686.fit. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 65.0 failed 1 times, most recent failure: Lost task 0.0 in stage 65.0 (TID 386, localhost, executor driver): java.lang.ArrayIndexOutOfBoundsException: 3 at org.apache.spark.ml.classification.LabelConverter$.encodeLabeledPoint(MultilayerPerceptronClassifier.scala:119) at org.apache.spark.ml.classification.MultilayerPerceptronClassifier$$anonfun$3.apply(MultilayerPerceptronClassifier.scala:244) at org.apache.spark.ml.classification.MultilayerPerceptronClassifier$$anonfun$3.apply(MultilayerPerceptronClassifier.scala:244) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$GroupedIterator.takeDestructively(Iterator.scala:1076) at scala.collection.Iterator$GroupedIterator.go(Iterator.scala:1091) at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1128) at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1132) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:216) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1094) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1085) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1020) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1085) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:811) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) at java.lang.Thread.run(Unknown Source) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1661) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1649) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1648) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1648) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1882) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1820) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2099) at org.apache.spark.rdd.RDD.count(RDD.scala:1168) at org.apache.spark.mllib.optimization.LBFGS$.runLBFGS(LBFGS.scala:195) at org.apache.spark.mllib.optimization.LBFGS.optimize(LBFGS.scala:142) at org.apache.spark.ml.ann.FeedForwardTrainer.train(Layer.scala:854) at org.apache.spark.ml.classification.MultilayerPerceptronClassifier.train(MultilayerPerceptronClassifier.scala:266) at org.apache.spark.ml.classification.MultilayerPerceptronClassifier.train(MultilayerPerceptronClassifier.scala:143) at org.apache.spark.ml.Predictor.fit(Predictor.scala:118) at org.apache.spark.ml.Predictor.fit(Predictor.scala:82) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Unknown Source) Caused by: java.lang.ArrayIndexOutOfBoundsException: 3 at org.apache.spark.ml.classification.LabelConverter$.encodeLabeledPoint(MultilayerPerceptronClassifier.scala:119) at org.apache.spark.ml.classification.MultilayerPerceptronClassifier$$anonfun$3.apply(MultilayerPerceptronClassifier.scala:244) at org.apache.spark.ml.classification.MultilayerPerceptronClassifier$$anonfun$3.apply(MultilayerPerceptronClassifier.scala:244) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$GroupedIterator.takeDestructively(Iterator.scala:1076) at scala.collection.Iterator$GroupedIterator.go(Iterator.scala:1091) at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1128) at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1132) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:216) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1094) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1085) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1020) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1085) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:811) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) ... 1 more
Thanks a lot for the suggestion. Indeed that's something we should document since it can be out of expectation for Spark users. Do you perhaps want to send a PR for the fix?Regards,Yuhao
2017-12-18 9:57 GMT-08:00 Wojciech Krukar <wojciec...@gmail.com>:
Thank you so much.Now the model works perfectly.Since more users can crush into this problems I did some research on the BigDL documentation. I hope that my finding would be useful for improving documentation.Starting from description of DLEstimeator and DLClassifier: https://github.com/intel-analytics/BigDL/blob/master/docs/docs/ProgrammingGuide/MLPipeline.md There is no explicit remark that labels should be non zero.Following link of ML pipeline API https://github.com/intel-analytics/BigDL/blob/master/docs/docs/ProgrammingGuide/MLPipeline.md there are examples with labels formatted properly but no remarks about proper formatting.Wojciech
On Thursday, December 14, 2017 at 7:46:26 PM UTC-6, Yuhao Yang wrote:Hi Wojciech,I tried with the Notebook with BigDL 0.3 and Spark 2.1.I got the error"assertion failed: curTarget 0 is out of range 1 to 2"which is because the label column contains 0.After correcting this in df =df.withColumn('Class1', when(df.Class == 0, 2.0).otherwise(1.0)).drop(df.Class), the training runs normally.Cheers,Yuhao
--
You received this message because you are subscribed to a topic in the Google Groups "BigDL User Group" group.
To unsubscribe from this topic, visit https://groups.google.com/d/topic/bigdl-user-group/-MqvR6FSzUE/unsubscribe.
To unsubscribe from this group and all its topics, send an email to bigdl-us...@googlegroups.com.
To post to this group, send email to bigdl-us...@googlegroups.com.