After sorting out all the issues with getting SLURM to work with scoop,
I decided to add direct support from SLURM in SCOOP.
The question I have is that the valid hostname is defined in utils.py
by:
valid_hostname = r"^[^ /\t=\n]+"
The slurm allocation uses a name and numbered extension ranges approach
(for example: "borgb[013-017],borgd[001-002,004-006]"). The question I
have is the use of '[' and ']' valid in host names, and if so is it in
common usage. If it is valid, then I need to rethink my solution to how
I parse the --host and --hostfile arguments. I could just add a --slurm
argument to force the interpretation, but that seems inelegant.
Anyway, would someone review the appended diff and discuss including it
in the standard distribution? I'm still testing on my end, and I wnted
to get a little feedback before spending lots of time and completely
upgrading my tool chain (which is required to support scoop-0.7.0_rc2
and deap-1.0.0_rc2).
Thanks and best regards,
EBo --
ps: as a note, I used the tip of the repository (which I think is
0.7.0RC2).
=========== diff ========
diff -r 421ff65129f9 scoop/launcher.py
--- a/scoop/launcher.py Mon Dec 09 17:07:04 2013 -0500
+++ b/scoop/launcher.py Sun Jan 12 11:36:55 2014 -0600
@@ -85,7 +85,7 @@
)
)
- if env in ["PBS", "SGE"]:
+ if env in ["SLURM","PBS", "SGE"]:
self.log.info("Detected {0} environment.".format(env))
self.log.info("Deploying {0} worker(s) over {1} "
"host(s).".format(
diff -r 421ff65129f9 scoop/utils.py
--- a/scoop/utils.py Mon Dec 09 17:07:04 2013 -0500
+++ b/scoop/utils.py Sun Jan 12 11:36:55 2014 -0600
@@ -124,7 +124,9 @@
def getEnv():
"""Return the launching environnement"""
- if "PBS_ENVIRONMENT" in os.environ:
+ if "SLURM_NODELIST" in os.environ:
+ return "SLURM"
+ elif "PBS_ENVIRONMENT" in os.environ:
return "PBS"
elif "PE_HOSTFILE" in os.environ:
return "SGE"
@@ -138,6 +140,8 @@
return getHostsFromFile(filename)
elif hostlist:
return getHostsFromList(hostlist)
+ elif "SLURM_NODELIST" in os.environ:
+ return getHostsFromSLURM()
elif "PBS_ENVIRONMENT" in os.environ:
return getHostsFromPBS()
elif "PE_HOSTFILE" in os.environ:
@@ -155,20 +159,32 @@
hosts = []
with open(filename) as f:
for line in f:
- host = hostname_re.search(line.strip())
- if host:
- hostname = host.group()
- n = worker_re.search(line[host.end():])
- if n:
- n = n.group()
- else:
- n = 1
- hosts.append((hostname, int(n)))
+ # check to see if it is a SLURM grouping instead of a
+ # regular list of hosts
+ if re.search('[\[\]]',line):
+ hosts = hosts + parseSLURM(line.strip())
+ else:
+ host = hostname_re.search(line.strip())
+ if host:
+ hostname = host.group()
+ n = worker_re.search(line[host.end():])
+ if n:
+ n = n.group()
+ else:
+ n = 1
+ hosts.append((hostname, int(n)))
return hosts
def getHostsFromList(hostlist):
"""Return the hosts from the command line"""
+ # check to see if it is a SLURM grouping instead of a
+ # regular list of hosts
+ if re.search('[\[\]]',str(hostlist)):
+ return parseSLURM(str(hostlist))
+
# Counter would be more efficient but:
# 1. Won't be Python 2.6 compatible
# 2. Won't be ordered
@@ -179,6 +195,33 @@
return retVal
+def parseSLURM(string):
+ """Return a host list from a SLURM string"""
+ bunchedlist = re.findall('([^ /\t=\n\[,]+)(?=\[)(.*?)(?<=\])',
string)
+
+ hosts = []
+
+ # parse out the name followd by range (ex. borgb[001-002,004-006]
+ for h,n in bunchedlist:
+
+ block = re.findall('([^\[\],]+)', n)
+ for rng in block:
+
+ bmin,bmax = rng.split('-')
+ fill_width = max(len(bmin),len(bmax))
+ for i in range(int(bmin),int(bmax)+1):
+ hostname = str(h)+str(i).zfill(fill_width)
+ hosts.append((hostname, int(1)))
+
+
+ return hosts
+
+
+def getHostsFromSLURM():
+ """Return a host list from a SLURM environment"""
+ return parseSLURM(os.environ["SLURM_NODELIST"])
+
+
def getHostsFromPBS():
"""Return a host list in a PBS environment"""
# See above comment about Counter
@@ -198,7 +241,9 @@
def getWorkerQte(hosts):
"""Return the number of workers to launch depending on the
environment"""
- if "PBS_NP" in os.environ:
+ if "SLURM_NTASKS" in os.environ:
+ return int(os.environ["SLURM_NTASKS"])
+ elif "PBS_NP" in os.environ:
return int(os.environ["PBS_NP"])
elif "NSLOTS" in os.environ:
return int(os.environ["NSLOTS"])