Hi,
I see lot of SAGA-Python-PBSJobScript.* files in /tmp directory on
Sierra FG, created by Vishal. Could someone please fix this in SAGA
python to remove temporary files after job submission. Vishal could
you please temporarily delete the files.
(develop)[pmantha@s1 tmp]$ ls -ltr SAGA-Python*
-bash: /bin/ls: Argument list too long
(develop)[pmantha@s1 tmp]$
I also see, below error running a bigjob script.
PMR - 19:20:29 - Trying to submit pilot job to: pbs+ssh://
sierra.futuregrid.org
Traceback (most recent call last):
File "ex.py", line 34, in <module>
pimr.run()
File "/N/u/pmantha/develop/lib/python2.7/site-packages/PilotMapReduce-0.1.0-py2.7.egg/pimr/clustering/kmeans.py",
line 83, in run
mr.MapReduceMain()
File "/N/u/pmantha/develop/lib/python2.7/site-packages/PilotMapReduce-0.1.0-py2.7.egg/pmr/PilotMapReduce.py",
line 412, in MapReduceMain
self.start_pilot_jobs()
File "/N/u/pmantha/develop/lib/python2.7/site-packages/PilotMapReduce-0.1.0-py2.7.egg/pmr/PilotMapReduce.py",
line 187, in start_pilot_jobs
self.pilot_compute_service.create_pilot(pilot_compute_description=pilot_job_desc)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/BigJob2-0.54_21_g7012c7d-py2.7.egg/pilot/impl/pilotcompute_manager.py",
line 82, in create_pilot
bj = self.__start_bigjob(bj_dict)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/BigJob2-0.54_21_g7012c7d-py2.7.egg/pilot/impl/pilotcompute_manager.py",
line 188, in __start_bigjob
pilot_compute_description = bj_dict["pilot_compute_description"]
File "/N/u/pmantha/develop/lib/python2.7/site-packages/BigJob2-0.54_21_g7012c7d-py2.7.egg/bigjob/bigjob_manager.py",
line 388, in start_pilot_job
self.job.run()
File "/N/u/pmantha/develop/lib/python2.7/site-packages/radical.utils-0.5-py2.7.egg/radical/utils/signatures.py",
line 446, in takes_invocation_proxy
return method(*pargs, **pkwargs)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/radical.utils-0.5-py2.7.egg/radical/utils/signatures.py",
line 477, in returns_invocation_proxy
result = method (*args, **kwargs)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/saga_python-0.9-py2.7.egg/saga/job/job.py",
line 366, in run
return self._adaptor.run (ttype=ttype)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/saga_python-0.9-py2.7.egg/saga/adaptors/cpi/decorators.py",
line 51, in wrap_function
return sync_function (self, *args, **kwargs)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/saga_python-0.9-py2.7.egg/saga/adaptors/pbs/pbsjob.py",
line 1049, in run
self._id = self.js._job_run(self)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/saga_python-0.9-py2.7.egg/saga/adaptors/pbs/pbsjob.py",
line 598, in _job_run
log_error_and_raise(message, saga.NoSuccess, self._logger)
File "/N/u/pmantha/develop/lib/python2.7/site-packages/saga_python-0.9-py2.7.egg/saga/adaptors/pbs/pbsjob.py",
line 97, in log_error_and_raise
raise exception(message)
saga.exceptions.NoSuccess: Error running job via 'qsub': sh: echo:
write error: No space left on device
. Commandline was: SCRIPTFILE=`mktemp -t
SAGA-Python-PBSJobScript.XXXXXX` && echo "
#!/bin/bash
#PBS -V
#PBS -o /N/u/pmantha/PilotMapReduce/pimr/clustering/applications/agent/stdout-bj-79ce9a82-7fef-11e3-acfd-002215124496-agent.txt
#PBS -e /N/u/pmantha/PilotMapReduce/pimr/clustering/applications/agent/stderr-bj-79ce9a82-7fef-11e3-acfd-002215124496-agent.txt
#PBS -l walltime=6:40:00
#PBS -l nodes=8:ppn=8
export PBS_O_WORKDIR=/N/u/pmantha/PilotMapReduce/pimr/clustering/applications/agent
cd \$PBS_O_WORKDIR
/usr/bin/env python -c 'import sys
import os
import urllib
import sys
import time
start_time = time.time()
home = os.environ.get(\"HOME\")
#print \"Home: \" + home
if home==None: home = os.getcwd()
BIGJOB_AGENT_DIR= os.path.join(home, \".bigjob\")
if not os.path.exists(BIGJOB_AGENT_DIR): os.mkdir (BIGJOB_AGENT_DIR)
BIGJOB_PYTHON_DIR=BIGJOB_AGENT_DIR+\"/python/\"
if not os.path.exists(BIGJOB_PYTHON_DIR): os.mkdir(BIGJOB_PYTHON_DIR)
BOOTSTRAP_URL=\"
https://raw.github.com/saga-project/BigJob/master/bootstrap/bigjob-bootstrap.py\"
BOOTSTRAP_FILE=BIGJOB_AGENT_DIR+\"/bigjob-bootstrap.py\"
#ensure that BJ in .bigjob is upfront in sys.path
sys.path.insert(0, os.getcwd() + \"/../\")
p = list()
for i in sys.path:
if i.find(\".bigjob/python\")>1:
p.insert(0, i)
for i in p: sys.path.insert(0, i)
print \"Python path: \" + str(sys.path)
print \"Python version: \" + str(sys.version_info)
try: import saga
except: print \"SAGA not found.\";
try: import bigjob.bigjob_agent
except:
print \"BigJob not installed. Attempt to install it.\";
try:
opener = urllib.FancyURLopener({});
opener.retrieve(BOOTSTRAP_URL, BOOTSTRAP_FILE);
except Exception, ex:
print \"Unable to download bootstrap script: \" + str(ex) +
\". Please install BigJob manually.\"
print \"Execute: \" + \"python \" + BOOTSTRAP_FILE + \" \" +
BIGJOB_PYTHON_DIR
os.system(\"/usr/bin/env\")
try:
os.system(\"python \" + BOOTSTRAP_FILE + \" \" + BIGJOB_PYTHON_DIR);
activate_this = os.path.join(BIGJOB_PYTHON_DIR,
\"bin/activate_this.py\");
execfile(activate_this, dict(__file__=activate_this))
except:
print \"BJ installation failed. Trying system-level python
(/usr/bin/python)\";
os.system(\"/usr/bin/python \" + BOOTSTRAP_FILE + \" \" +
BIGJOB_PYTHON_DIR);
activate_this = os.path.join(BIGJOB_PYTHON_DIR,
\"bin/activate_this.py\");
execfile(activate_this, dict(__file__=activate_this))
#try to import BJ once again
try:
import bigjob.bigjob_agent
except Exception, ex:
print \"Unable install BigJob: \" + str(ex) + \". Please
install BigJob manually.\"
# execute bj agent
args = list()
args.append(\"bigjob_agent.py\")
args.append(\"redis://
ILikeBigJob...@gw68.quarry.iu.teragrid.org:6379\")
args.append(\"bigjob:bj-79ce9a82-7fef-11e3-acfd-002215124496:
sierra.futuregrid.org\")
args.append(\"PilotComputeServiceQueue-pcs-7373e840-7fef-11e3-acfd-002215124496\")
print \"Bootstrap time: \" + str(time.time()-start_time)
print \"Starting BigJob Agents with following args: \" + str(args)
bigjob_agent = bigjob.bigjob_agent.bigjob_agent(args)
' " > $SCRIPTFILE && /opt/torque/bin/qsub $SCRIPTFILE
(/N/u/pmantha/develop/lib/python2.7/site-packages/saga_python-0.9-py2.7.egg/saga/adaptors/pbs/pbsjob.py
+97 (log_error_and_raise) : raise exception(message))
thanks
Pradeep