Hello all,
I've been trying to figure this out for a week with no success.
I'm simply trying to initiate SparkContext in Jupyter but getting the following error when running SparkConf() :
```
from pyspark import SparkConf
SparkConf()
```
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-12-0c80a6a098f7> in <module>()
1 #import statements
2 from pyspark import SparkConf
----> 3 SparkConf()
4
/root/david/spark/python/pyspark/conf.pyc in __init__(self, loadDefaults, _jvm, _jconf)
102 else:
103 from pyspark.context import SparkContext
--> 104 SparkContext._ensure_initialized()
105 _jvm = _jvm or SparkContext._jvm
106 self._jconf = _jvm.SparkConf(loadDefaults)
/root/david/spark/python/pyspark/context.pyc in _ensure_initialized(cls, instance, gateway)
241 with SparkContext._lock:
242 if not SparkContext._gateway:
--> 243 SparkContext._gateway = gateway or launch_gateway()
244 SparkContext._jvm = SparkContext._gateway.jvm
245
/root/david/spark/python/pyspark/java_gateway.pyc in launch_gateway()
74 def preexec_func():
75 signal.signal(signal.SIGINT, signal.SIG_IGN)
---> 76 proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
77 else:
78 # preexec_fn not supported on Windows
/mnt/anaconda/lib/python2.7/subprocess.pyc in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags)
709 p2cread, p2cwrite,
710 c2pread, c2pwrite,
--> 711 errread, errwrite)
712 except Exception:
713 # Preserve original exception in case os.close raises.
/mnt/anaconda/lib/python2.7/subprocess.pyc in _execute_child(self, args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, shell, to_close, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite)
1341 raise
1342 child_exception = pickle.loads(data)
-> 1343 raise child_exception
1344
1345
OSError: [Errno 2] No such file or directory
This is my startJupyter.sh script which I use to lunch Jupyter
#!/bin/bash
if ps -ef |grep $USER| grep python > /dev/null
then
echo "Jupyter is Running - Restarting"
echo "Killing jupyter-notebook process"
running_id=$(ps -ef |grep $USER| grep python)
stringarray=($running_id)
echo ${stringarray[1]}
kill -9 ${stringarray[1]}
export SPARK_HOME='/usr/lib/spark/'
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH:$SPARK_HOME/python/lib/py4j-0.9-src.zip
#jupyter nbextension enable --py widgetsnbextension
/mnt/anaconda/bin/jupyter notebook &
else
echo "Jupyter is Not Running"
echo "Starting Jupyter-NoteBook"
export SPARK_HOME='/usr/lib/spark/'
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH:$SPARK_HOME/python/lib/py4j-0.9-src.zip
#jupyter nbextension enable --py widgetsnbextension
/mnt/anaconda/bin/jupyter notebook &
fi
When running the same in python in console (not in Jupyter), it works fine
Python 2.7.12 |Anaconda 4.2.0 (64-bit)| (default, Jul 2 2016, 17:42:40)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
Anaconda is brought to you by Continuum Analytics.
>>> from pyspark import SparkConf
>>> SparkConf()
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel).
<pyspark.conf.SparkConf object at 0x7f482f78b6d0>
I've validated both the python version and the module path in console and in Jupyter and they seem to match
>>> import sys
>>> sys.version
'2.7.12 |Anaconda 4.2.0 (64-bit)| (default, Jul 2 2016, 17:42:40) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'
>>> import inspect
>>> import pyspark
>>> inspect.getfile(pyspark)
'/root/david/spark/python/pyspark/__init__.pyc'
I can't think of anything else I could do wrong, please help
Thanks, David
My specs:
NAME="Amazon Linux AMI"
VERSION="2017.03"
ID="amzn"
ID_LIKE="rhel fedora"
VERSION_ID="2017.03"
PRETTY_NAME="Amazon Linux AMI 2017.03"
ANSI_COLOR="0;33"
CPE_NAME="cpe:/o:amazon:linux:2017.03:ga"
Amazon Linux AMI release 2017.03