Hi Ward,
I have a try with --gres-flags=enforce-binding but it doesn't work.
The first job apply 1GPU and 6 CPUs . The CPU ID is 0-5 GPU ID is 0.
The second job apply 1GPU and 6 CPUs.The CPU ID is 6-11.But I hope the CPU ID is 16-21.
[zhangyc@ln01 numa]$ sbatch -w g0036 -p gpu_c128 --gpus=1 -n 6 --gres-flags=enforce-binding ./run.sh
Submitted batch job 198106
[zhangyc@ln01 numa]$
[zhangyc@ln01 numa]$ scontrol show job 198106 -d
JobId=198106 JobName=run.sh
UserId=zhangyc(1004) GroupId=zhangyc(1004) MCS_label=N/A
Priority=4294704732 Nice=0 Account=zhangyc QOS=normal WCKey=*
JobState=RUNNING Reason=None Dependency=(null)
Requeue=0 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
DerivedExitCode=0:0
RunTime=00:00:05 TimeLimit=UNLIMITED TimeMin=N/A
SubmitTime=2022-10-14T18:38:52 EligibleTime=2022-10-14T18:38:52
AccrueTime=2022-10-14T18:38:52
StartTime=2022-10-14T18:38:56 EndTime=Unknown Deadline=N/A
SuspendTime=None SecsPreSuspend=0 LastSchedEval=2022-10-14T18:38:56
Partition=gpu_c128 AllocNode:Sid=ln01:26986
ReqNodeList=g0036 ExcNodeList=(null)
NodeList=g0036
BatchHost=g0036
NumNodes=1 NumCPUs=6 NumTasks=6 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
TRES=cpu=6,mem=60000M,node=1,billing=6,gres/gpu=1
Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
JOB_GRES=gpu:1
Nodes=g0036 CPU_IDs=0-5 Mem=60000 GRES=gpu:1(IDX:0)
MinCPUsNode=1 MinMemoryNode=60000M MinTmpDiskNode=0
Features=(null) DelayBoot=00:00:00
OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
Command=./run.sh
WorkDir=/data/run01/zhangyc/numa
StdErr=/data/run01/zhangyc/numa/slurm-198106.out
StdIn=/dev/null
StdOut=/data/run01/zhangyc/numa/slurm-198106.out
Power=
GresEnforceBind=Yes
CpusPerTres=gpu:6
TresPerJob=gpu:1
NtasksPerTRES:0
[zhangyc@ln01 numa]$ sbatch -w g0036 -p gpu_c128 --gpus=1 -n 6 --gres-flags=enforce-binding ./run.sh
Submitted batch job 198107
[zhangyc@ln01 numa]$ scontrol show job 198107 -d
JobId=198107 JobName=run.sh
UserId=zhangyc(1004) GroupId=zhangyc(1004) MCS_label=N/A
Priority=4294704731 Nice=0 Account=zhangyc QOS=normal WCKey=*
JobState=RUNNING Reason=None Dependency=(null)
Requeue=0 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
DerivedExitCode=0:0
RunTime=00:00:04 TimeLimit=UNLIMITED TimeMin=N/A
SubmitTime=2022-10-14T18:39:05 EligibleTime=2022-10-14T18:39:05
AccrueTime=2022-10-14T18:39:05
StartTime=2022-10-14T18:39:05 EndTime=Unknown Deadline=N/A
SuspendTime=None SecsPreSuspend=0 LastSchedEval=2022-10-14T18:39:05
Partition=gpu_c128 AllocNode:Sid=ln01:26986
ReqNodeList=g0036 ExcNodeList=(null)
NodeList=g0036
BatchHost=g0036
NumNodes=1 NumCPUs=6 NumTasks=6 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
TRES=cpu=6,mem=60000M,node=1,billing=6,gres/gpu=1
Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
JOB_GRES=gpu:1
Nodes=g0036 CPU_IDs=6-11 Mem=60000 GRES=gpu:1(IDX:1)
MinCPUsNode=1 MinMemoryNode=60000M MinTmpDiskNode=0
Features=(null) DelayBoot=00:00:00
OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
Command=./run.sh
WorkDir=/data/run01/zhangyc/numa
StdErr=/data/run01/zhangyc/numa/slurm-198107.out
StdIn=/dev/null
StdOut=/data/run01/zhangyc/numa/slurm-198107.out
Power=
GresEnforceBind=Yes
CpusPerTres=gpu:6
TresPerJob=gpu:1
NtasksPerTRES:0