Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F124431380
hpcbind
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Aug 2, 11:25
Size
15 KB
Mime Type
text/x-shellscript
Expires
Mon, Aug 4, 11:25 (2 d)
Engine
blob
Format
Raw Data
Handle
27806136
Attached To
rLAMMPS lammps
hpcbind
View Options
#!/usr/bin/env bash
################################################################################
# Check if hwloc commands exist
################################################################################
declare
-i
HPCBIND_HAS_HWLOC
=
1
type
hwloc-bind >/dev/null 2>&1
HPCBIND_HAS_HWLOC
=
$((
HPCBIND_HAS_HWLOC
&
!
$?
))
type
hwloc-distrib >/dev/null 2>&1
HPCBIND_HAS_HWLOC
=
$((
HPCBIND_HAS_HWLOC
&
!
$?
))
type
hwloc-ls >/dev/null 2>&1
HPCBIND_HAS_HWLOC
=
$((
HPCBIND_HAS_HWLOC
&
!
$?
))
type
hwloc-calc >/dev/null 2>&1
HPCBIND_HAS_HWLOC
=
$((
HPCBIND_HAS_HWLOC
&
!
$?
))
type
hwloc-ps >/dev/null 2>&1
HPCBIND_HAS_HWLOC
=
$((
HPCBIND_HAS_HWLOC
&
!
$?
))
if
[[
${
HPCBIND_HAS_HWLOC
}
-eq 0
]]
;
then
echo
"hwloc not found, no process binding will occur"
fi
# Get parent cpuset
HPCBIND_HWLOC_PARENT_CPUSET
=
""
if
[[
${
HPCBIND_HAS_HWLOC
}
-eq 1
]]
;
then
MY_PID
=
"$BASHPID"
HPCBIND_HWLOC_PARENT_CPUSET
=
$(
hwloc-ps --cpuset | grep
"${MY_PID}"
| cut -f 2
)
fi
################################################################################
# Check if nvidia-smi exist
################################################################################
declare
-i
HPCBIND_HAS_NVIDIA
=
0
type
nvidia-smi >/dev/null 2>&1
HPCBIND_HAS_NVIDIA
=
$((
!
$?
))
################################################################################
# Get visible gpu
################################################################################
declare
-i
NUM_GPUS
=
0
HPCBIND_VISIBLE_GPUS
=
""
if
[[
${
HPCBIND_HAS_NVIDIA
}
-eq 1
]]
;
then
NUM_GPUS
=
$(
nvidia-smi -L | wc -l
)
;
GPU_LIST
=
"$( seq 0 $((NUM_GPUS-1)) )"
HPCBIND_VISIBLE_GPUS
=
${
CUDA_VISIBLE_DEVICES
:-${
GPU_LIST
}}
fi
declare
-i
HPCBIND_ENABLE_GPU_MAPPING
=
$((
NUM_GPUS >
0
))
################################################################################
# Get queue id
# supports sbatch, bsub, aprun
################################################################################
HPCBIND_QUEUE_NAME
=
""
declare
-i
HPCBIND_QUEUE_INDEX
=
0
declare
-i
HPCBIND_QUEUE_GPU_MAPPING
=
0
if
[[
! -z
"${SLURM_LOCAL_ID}"
]]
;
then
HPCBIND_QUEUE_GPU_MAPPING
=
1
HPCBIND_QUEUE_NAME
=
"sbatch"
HPCBIND_QUEUE_INDEX
=
${
SLURM_LOCAL_ID
}
elif
[[
! -z
"${LBS_JOBINDEX}"
]]
;
then
HPCBIND_QUEUE_GPU_MAPPING
=
1
HPCBIND_QUEUE_NAME
=
"bsub"
HPCBIND_QUEUE_INDEX
=
${
LBS_JOBINDEX
}
elif
[[
! -z
"${ALPS_APP_PE}"
]]
;
then
HPCBIND_QUEUE_GPU_MAPPING
=
1
HPCBIND_QUEUE_NAME
=
"aprun"
HPCBIND_QUEUE_INDEX
=
${
ALPS_APP_PE
}
fi
################################################################################
# Show help
################################################################################
function
show_help
{
local
cmd
=
$(
basename
"$0"
)
echo
"Usage: ${cmd} <options> -- command ..."
echo
" Set the process mask, OMP environment variables and CUDA environment"
echo
" variables to sane values if possible. Uses hwloc and nvidia-smi if"
echo
" available. Will preserve the current process binding, so it is safe"
echo
" to use with a queuing system or mpiexec."
echo
""
echo
"Options:"
echo
" --no-hwloc-bind Disable binding"
echo
" --proc-bind=<LOC> Set the initial process mask for the script"
echo
" LOC can be any valid location argument for"
echo
" hwloc-calc Default: all"
echo
" --distribute=N Distribute the current cpuset into N partitions"
echo
" --distribute-partition=I"
echo
" Use the i'th partition (zero based)"
echo
" --visible-gpus=<L> Comma separated list of gpu ids"
echo
" Default: CUDA_VISIBLE_DEVICES or all gpus in"
echo
" sequential order"
echo
" --gpu-ignore-queue Ignore queue job id when choosing visible GPU"
echo
" --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES"
echo
" --openmp=M.m Set env variables for the given OpenMP version"
echo
" Default: 4.0"
echo
" --openmp-percent=N Integer percentage of cpuset to use for OpenMP"
echo
" threads Default: 100"
echo
" --openmp-places=<Op> Op=threads|cores|sockets. Default: threads"
echo
" --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES"
echo
" --force-openmp-num-threads=N"
echo
" Override logic for selecting OMP_NUM_THREADS"
echo
" --force-openmp-proc-bind=<OP>"
echo
" Override logic for selecting OMP_PROC_BIND"
echo
" --no-openmp-nested Set OMP_NESTED to false"
echo
" --show-bindings Show the bindings"
echo
" --lstopo Show bindings in lstopo without executing a command"
echo
" -v|--verbose Show options and relevant environment variables"
echo
" -h|--help Show this message"
echo
""
echo
"Sample Usage:"
echo
" Split the current process cpuset into 4 and use the 3rd partition"
echo
" ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..."
echo
" Bing the process to all even cores"
echo
" ${cmd} --proc-bind=core:even -v -- command ..."
echo
" Bind to the first 64 cores and split the current process cpuset into 4"
echo
" ${cmd} --proc-bind=core:0-63 --distribute=4 --distribute-partition=0 -- command ..."
echo
" skip GPU 0 when mapping visible devices"
echo
" ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..."
echo
" Display the current bindings"
echo
" ${cmd} --proc-bind=numa:0 --show-bindings -- command"
echo
" Display the current bindings using lstopo"
echo
" ${cmd} --proc-bind=numa:0.core:odd --lstopo"
echo
""
}
################################################################################
# Parse command line arguments
################################################################################
# Show help if no command line arguments given
if
[[
"$#"
-eq 0
]]
;
then
show_help
exit
0
fi
declare
-a
UNKNOWN_ARGS
=()
declare
-i
HPCBIND_ENABLE_HWLOC_BIND
=
${
HPCBIND_HAS_HWLOC
}
declare
-i
HPCBIND_DISTRIBUTE
=
1
declare
-i
HPCBIND_PARTITION
=
0
HPCBIND_PROC_BIND
=
"all"
HPCBIND_OPENMP_VERSION
=
4.0
declare
-i
HPCBIND_OPENMP_PERCENT
=
100
HPCBIND_OPENMP_PLACES
=
${
OMP_PLACES
:-
threads
}
declare
-i
HPCBIND_OPENMP_PROC_BIND
=
1
declare
-i
HPCBIND_OPENMP_FORCE_NUM_THREADS
=
-1
HPCBIND_OPENMP_FORCE_PROC_BIND
=
""
HPCBIND_OPENMP_NESTED
=
${
OMP_NESTED
:-
true
}
declare
-i
HPCBIND_VERBOSE
=
0
declare
-i
HPCBIND_SHOW_BINDINGS
=
0
declare
-i
HPCBIND_LSTOPO
=
0
for
i in
$@
;
do
case
$i
in
# number of partitions to create
--no-hwloc-bind
)
HPCBIND_ENABLE_HWLOC_BIND
=
0
shift
;;
--proc-bind
=
*
)
HPCBIND_PROC_BIND
=
"${i#*=}"
shift
;;
--distribute
=
*
)
HPCBIND_DISTRIBUTE
=
"${i#*=}"
shift
;;
# which partition to use
--distribute-partition
=
*
)
HPCBIND_PARTITION
=
"${i#*=}"
shift
;;
--visible-gpus
=
*
)
HPCBIND_VISIBLE_GPUS
=
$(
echo
"${i#*=}"
| tr
','
' '
)
shift
;;
--gpu-ignore-queue
)
HPCBIND_QUEUE_GPU_MAPPING
=
0
shift
;;
--no-gpu-mapping
)
HPCBIND_ENABLE_GPU_MAPPING
=
0
shift
;;
--openmp
=
*
)
HPCBIND_OPENMP_VERSION
=
"${i#*=}"
shift
;;
--openmp-percent
=
*
)
HPCBIND_OPENMP_PERCENT
=
"${i#*=}"
shift
;;
--openmp-places
=
*
)
HPCBIND_OPENMP_PLACES
=
"${i#*=}"
shift
;;
--no-openmp-proc-bind
)
HPCBIND_OPENMP_PROC_BIND
=
0
shift
;;
--force-openmp-proc-bind
=
*
)
HPCBIND_OPENMP_FORCE_PROC_BIND
=
"${i#*=}"
shift
;;
--force-openmp-num-threads
=
*
)
HPCBIND_OPENMP_FORCE_NUM_THREADS
=
"${i#*=}"
shift
;;
--no-openmp-nested
)
HPCBIND_OPENMP_NESTED
=
"false"
shift
;;
--show-bindings
)
HPCBIND_VERBOSE
=
1
HPCBIND_SHOW_BINDINGS
=
1
shift
;;
--lstopo
)
HPCBIND_VERBOSE
=
1
HPCBIND_SHOW_BINDINGS
=
0
HPCBIND_LSTOPO
=
1
shift
;;
-v|--verbose
)
HPCBIND_VERBOSE
=
1
shift
;;
-h|--help
)
show_help
exit
0
;;
# ignore remaining arguments
--
)
shift
break
;;
# unknown option
*
)
UNKNOWN_ARGS+
=(
"$i"
)
shift
;;
esac
done
################################################################################
# Check unknown arguments
################################################################################
if
[[
${#
UNKNOWN_ARGS
[*]
}
> 0
]]
;
then
echo
"Uknown options: ${UNKNOWN_ARGS[*]}"
exit
1
fi
################################################################################
# Check that visible gpus are valid
################################################################################
HPCBIND_VISIBLE_GPUS
=(
${
HPCBIND_VISIBLE_GPUS
}
)
if
[[
${
HPCBIND_ENABLE_GPU_MAPPING
}
-eq 1
]]
;
then
for
((
i
=
0; i <
${#
HPCBIND_VISIBLE_GPUS
[*]
}
; i++
))
;
do
if
[[
${
HPCBIND_VISIBLE_GPUS
[
$i
]
}
-ge
${
NUM_GPUS
}
||
${
HPCBIND_VISIBLE_GPUS
[
$i
]
}
-lt 0
]]
;
then
echo
"Invaild GPU ID ${HPCBIND_VISIBLE_GPUS[$i]}, setting to 0"
HPCBIND_VISIBLE_GPUS
[
$i
]=
0;
fi
done
NUM_GPUS
=
${#
HPCBIND_VISIBLE_GPUS
[@]
}
fi
################################################################################
# Check OpenMP percent
################################################################################
if
[[
${
HPCBIND_OPENMP_PERCENT
}
-lt 1
]]
;
then
echo
"OpenMP percent < 1, setting to 1"
HPCBIND_OPENMP_PERCENT
=
1
elif
[[
${
HPCBIND_OPENMP_PERCENT
}
-gt 100
]]
;
then
echo
"OpenMP percent > 100, setting to 100"
HPCBIND_OPENMP_PERCENT
=
100
fi
################################################################################
# Check distribute
################################################################################
if
[[
${
HPCBIND_DISTRIBUTE
}
-le 0
]]
;
then
echo
"Invalid input for distribute, changing distribute to 1"
HPCBIND_DISTRIBUTE
=
1
fi
if
[[
${
HPCBIND_PARTITION
}
-ge
${
HPCBIND_DISTRIBUTE
}
]]
;
then
echo
"Invalid input for distribute-partition, changing to 0"
HPCBIND_PARTITION
=
0
fi
################################################################################
# Find cpuset and num threads
################################################################################
HPCBIND_HWLOC_CPUSET
=
""
declare
-i
HPCBIND_NUM_PUS
=
0
if
[[
${
HPCBIND_ENABLE_HWLOC_BIND
}
-eq 1
]]
;
then
if
[[
"${HPCBIND_HWLOC_PARENT_CPUSET}"
==
""
]]
;
then
BINDING
=
$(
hwloc-calc
${
HPCBIND_PROC_BIND
})
else
BINDING
=
$(
hwloc-calc --restrict
${
HPCBIND_HWLOC_PARENT_CPUSET
}
${
HPCBIND_PROC_BIND
})
fi
CPUSETS
=(
$(
hwloc-distrib --restrict
${
BINDING
}
--at core
${
HPCBIND_DISTRIBUTE
})
)
HPCBIND_HWLOC_CPUSET
=
${
CPUSETS
[
${
HPCBIND_PARTITION
}
]
}
HPCBIND_NUM_PUS
=
$(
hwloc-ls --restrict
${
HPCBIND_HWLOC_CPUSET
}
--only pu | wc -l
)
else
HPCBIND_NUM_PUS
=
$(
cat /proc/cpuinfo | grep -c processor
)
fi
declare
-i
HPCBIND_OPENMP_NUM_THREADS
=
$((
HPCBIND_NUM_PUS
*
HPCBIND_OPENMP_PERCENT
))
HPCBIND_OPENMP_NUM_THREADS
=
$((
HPCBIND_OPENMP_NUM_THREADS
/
100
))
if
[[
${
HPCBIND_OPENMP_NUM_THREADS
}
-lt 1
]]
;
then
HPCBIND_OPENMP_NUM_THREADS
=
1
elif
[[
${
HPCBIND_OPENMP_NUM_THREADS
}
-gt
${
HPCBIND_NUM_PUS
}
]]
;
then
HPCBIND_OPENMP_NUM_THREADS
=
${
HPCBIND_NUM_PUS
}
fi
if
[[
${
HPCBIND_OPENMP_FORCE_NUM_THREADS
}
-gt 0
]]
;
then
HPCBIND_OPENMP_NUM_THREADS
=
${
HPCBIND_OPENMP_FORCE_NUM_THREADS
}
fi
################################################################################
# Set OpenMP environment variables
################################################################################
# set OMP_NUM_THREADS
export
OMP_NUM_THREADS
=
${
HPCBIND_OPENMP_NUM_THREADS
}
# set OMP_PROC_BIND and OMP_PLACES
if
[[
${
HPCBIND_OPENMP_PROC_BIND
}
-eq 1
]]
;
then
if
[[
"${HPCBIND_OPENMP_FORCE_PROC_BIND}"
==
""
]]
;
then
#default proc bind logic
if
[[
"${HPCBIND_OPENMP_VERSION}"
==
"4.0"
||
"${HPCBIND_OPENMP_VERSION}"
>
"4.0"
]]
;
then
export
OMP_PLACES
=
"${HPCBIND_OPENMP_PLACES}"
export
OMP_PROC_BIND
=
"spread"
else
export
OMP_PROC_BIND
=
"true"
unset
OMP_PLACES
fi
else
#force proc bind
export
OMP_PLACES
=
"${HPCBIND_OPENMP_PLACES}"
export
OMP_PROC_BIND
=
"${HPCBIND_OPENMP_FORCE_PROC_BIND}"
fi
else
# no openmp proc bind
unset
OMP_PLACES
unset
OMP_PROC_BIND
fi
# set OMP_NESTED
export
OMP_NESTED
=
${
HPCBIND_OPENMP_NESTED
}
################################################################################
# Set CUDA environment variables
################################################################################
if
[[
${
HPCBIND_ENABLE_GPU_MAPPING
}
-eq 1
]]
;
then
if
[[
${
HPCBIND_QUEUE_GPU_MAPPING
}
-eq 0
]]
;
then
declare
-i
GPU_ID
=
$((
HPCBIND_PARTITION
%
NUM_GPUS
))
export
CUDA_VISIBLE_DEVICES
=
${
HPCBIND_VISIBLE_GPUS
[
${
GPU_ID
}
]
}
else
declare
-i
MY_TASK_ID
=
$((
HPCBIND_QUEUE_INDEX
*
HPCBIND_DISTRIBUTE
+
HPCBIND_PARTITION
))
declare
-i
GPU_ID
=
$((
MY_TASK_ID
%
NUM_GPUS
))
export
CUDA_VISIBLE_DEVICES
=
${
HPCBIND_VISIBLE_GPUS
[
${
GPU_ID
}
]
}
fi
fi
################################################################################
# Set hpcbind environment variables
################################################################################
export
HPCBIND_HAS_HWLOC
=
${
HPCBIND_HAS_HWLOC
}
export
HPCBIND_HAS_NVIDIA
=
${
HPCBIND_HAS_NVIDIA
}
export
HPCBIND_NUM_PUS
=
${
HPCBIND_NUM_PUS
}
export
HPCBIND_HWLOC_CPUSET
=
${
HPCBIND_HWLOC_CPUSET
}
export
HPCBIND_HWLOC_DISTRIBUTE
=
${
HPCBIND_DISTRIBUTE
}
export
HPCBIND_HWLOC_DISTRIBUTE_PARTITION
=
${
HPCBIND_PARTITION
}
if
[[
"${HPCBIND_HWLOC_PARENT_CPUSET}"
==
""
]]
;
then
export
HPCBIND_HWLOC_PARENT_CPUSET
=
"all"
else
export
HPCBIND_HWLOC_PARENT_CPUSET
=
${
HPCBIND_HWLOC_PARENT_CPUSET
}
fi
export
HPCBIND_HWLOC_PROC_BIND
=
${
HPCBIND_PROC_BIND
}
export
HPCBIND_NVIDIA_ENABLE_GPU_MAPPING
=
${
HPCBIND_ENABLE_GPU_MAPPING
}
export
HPCBIND_NVIDIA_VISIBLE_GPUS
=
$(
echo
"${HPCBIND_VISIBLE_GPUS[*]}"
| tr
' '
','
)
export
HPCBIND_OPENMP_VERSION
=
${
HPCBIND_OPENMP_VERSION
}
if
[[
"${HPCBIND_QUEUE_NAME}"
!
=
""
]]
;
then
export
HPCBIND_QUEUE_INDEX
=
${
HPCBIND_QUEUE_INDEX
}
export
HPCBIND_QUEUE_NAME
=
${
HPCBIND_QUEUE_NAME
}
export
HPCBIND_QUEUE_GPU_MAPPING
=
${
HPCBIND_QUEUE_GPU_MAPPING
}
fi
################################################################################
# Print verbose
################################################################################
if
[[
${
HPCBIND_VERBOSE
}
-eq 1
]]
;
then
MY_ENV
=
$(
env | sort
)
echo
"[HPCBIND]"
echo
"${MY_ENV}"
| grep -E
"^HPCBIND_"
echo
"[CUDA]"
echo
"${MY_ENV}"
| grep -E
"^CUDA_"
echo
"[OPENMP]"
echo
"${MY_ENV}"
| grep -E
"^OMP_"
fi
if
[[
${
HPCBIND_HAS_HWLOC
}
-eq 1
&&
${
HPCBIND_SHOW_BINDINGS
}
-eq 1
]]
;
then
echo
"[BINDINGS]"
hwloc-ls --restrict
${
HPCBIND_HWLOC_CPUSET
}
--only pu
elif
[[
${
HPCBIND_SHOW_BINDINGS
}
-eq 1
]]
;
then
echo
"Unable to show bindings, hwloc not available."
fi
################################################################################
# Run command
################################################################################
if
[[
${
HPCBIND_LSTOPO
}
-eq 0
]]
;
then
if
[[
${
HPCBIND_ENABLE_HWLOC_BIND
}
-eq 1
]]
;
then
hwloc-bind
${
HPCBIND_HWLOC_CPUSET
}
--
$@
else
eval
$@
fi
else
if
[[
${
HPCBIND_HAS_HWLOC
}
-eq 1
]]
;
then
if
[[
${
HPCBIND_ENABLE_HWLOC_BIND
}
-eq 1
&&
! -z
${
DISPLAY
}
]]
;
then
echo
"[BINDINGS]"
hwloc-ls --restrict
${
HPCBIND_HWLOC_CPUSET
}
--only pu
hwloc-bind
${
HPCBIND_HWLOC_CPUSET
}
-- lstopo --pid 0
else
hwloc-ls --restrict
${
HPCBIND_HWLOC_CPUSET
}
fi
else
echo
"Unable to show bindings, hwloc not available."
fi
fi
Event Timeline
Log In to Comment