#!/bin/bash
#SBATCH --time 48:00:00 # in hours
#SBATCH --partition public-cpu
#SBATCH --ntasks 9
#SBATCH --mem 10000 # in MB
#SBATCH --job-name [REDACTED]_exabayes
#SBATCH --array 1
#1-4 # run N jobs
cd /home/users/c/cardenac/phylo/[REDACTED]/exabayes
JOB_LIST=${PWD}/job.list
ALIGN=$(cat ${JOB_LIST} | awk -v var=${SLURM_ARRAY_TASK_ID} 'NR==var {print $0}' | cut -d "," -f 1)
PARTI=$(cat ${JOB_LIST} | awk -v var=${SLURM_ARRAY_TASK_ID} 'NR==var {print $0}' | cut -d "," -f 2)
PREFIX=$(echo ${PARTI} | cut -d "." -f 1)
#mkdir ${PREFIX}_results
mkdir test_results
SEED=6669420
# call environment
module load GCC/7.3.0-2.30 OpenMPI/3.1.1
# run exabayes
mpirun -np ${SLURM_NTASKS} /home/users/c/cardenac/exabayes-1.5.1/exabayes \
-f data/${ALIGN} \
-m DNA \
-n test_results \
-s ${SEED} \
-c config.nex \
-w test_results \
-R 3
I checked the resource utilization (seff slurm command) from a 900 thread run, which sat for an hour trying to parse the phylip file:
Job ID: 3420051
Array Job ID: 3420051_1
Cluster: bamboo
User/Group: cardenac/hpc_users
State: CANCELLED (exit code 0)
Nodes: 8
Cores per node: 112
CPU Utilized: 4-13:54:53
CPU Efficiency: 13.69% of 33-10:45:00 core-walltime
Job Wall-clock time: 00:53:31
Memory Utilized: 13.57 GB
Memory Efficiency: 5.43% of 250.00 GB (31.25 GB/node)
I would be happy to share the files with you, but I'd prefer to send them over a private e-mail rather than a public forum.
Anyways, here is some relevant software information:
software:
GCC/7.3.0-2.30
OpenMPI/3.1.1
exabayes v 1.5.1
I'm not sure if its relevant but I can also provide the hardware. But I believe it should be fine given I've successfully run exabayes before.
Thanks for your time!
Problem with pinning! Probably the number of processes and threads
started on this machine exceeds the number of available cores. Thread
pinning is disabled. In the worst case,ExaBayes will run substantially slower (use a tool like htop to monitor,
whether all cores are loaded).
My slurm script contradicts this, as I've requested 15 threads for this test.
#!/bin/bash
#SBATCH --time 48:00:00 # in hours
#SBATCH --partition public-cpu
#SBATCH --ntasks 15
#SBATCH --mem 64000 # in MB
#SBATCH --job-name yggdrasil
#SBATCH --array 1
#1-4 # run N jobs
# fight variables
cd /home/users/c/cardenac/phylo/[REDACTED]/exabayes
JOB_LIST=${PWD}/job.list
ALIGN=$(cat ${JOB_LIST} | awk -v var=${SLURM_ARRAY_TASK_ID} 'NR==var {print $0}' | cut -d "," -f 1)
PARTI=$(cat ${JOB_LIST} | awk -v var=${SLURM_ARRAY_TASK_ID} 'NR==var {print $0}' | cut -d "," -f 2)
PREFIX=$(echo ${PARTI} | cut -d "." -f 1)
#mkdir ${PREFIX}_results
mkdir test_yggdrasil_results
SEED=6669420
# call environment
module load GCC/7.3.0-2.30 OpenMPI/3.1.1
# run exabayes
#mpirun -np ${SLURM_NTASKS} /home/users/c/cardenac/exabayes-1.5.1/exabayes \
/home/users/c/cardenac/exabayes-1.5.1/yggdrasil \
-f data/${ALIGN} \
-m DNA \
-n test_results \
-s ${SEED} \
-c config.nex \
-w test_yggdrasil_results \
-R 3 \
-T 15
I would like to confirm how to use the generated binary file in exabayes, do I just try and "restart" the run using exabayes? If this is the case, why couldn't I use the binary file create by the successful 9 thread MPI run?
I really appreciate your assistance,
Cody