#!/bin/bash #SBATCH --job-name=nsight_systems # Arbitrary name of the Slurm job #SBATCH --output=%x.%j.out # Standard output file of the job #SBATCH --error=%x.%j.err # Standard error file of the job #SBATCH --ntasks=4 # Number of MPI processes requested #SBATCH --ntasks-per-node=4 # Number of MPI tasks per node (= number of GPUs per node) #SBATCH --gres=gpu:4 # Number of GPUs per node #SBATCH --cpus-per-task=10 # Number of CPU cores per task (a quarter of the node here) # /!\ Important: The following line is misleading but in Slurm vocabulary, # "multithread" refers to hyperthreading. #SBATCH --hint=nomultithread # 1 MPI process per physical core (no hyperthreading) #SBATCH --time=00:20:00 # Job time hh:mm:ss (20mn here) # Loading the modules of your choice module load ... # Loading Nsight Systems module load nvidia-nsight-systems/2021.2.1 # Echo of commands set -x # To not use the /tmp export TMPDIR=$JOBSCRATCH # To circumvent a bug in the current versions of Nsight Systems, # it is also necessary to create a symbolic link which allows # pointing the /tmp/nvidia directory to TMPDIR ln -s $JOBSCRATCH /tmp/nvidia # Profiling in OpenACC mode with the generation of a results file # per process ("report_rank0.qdrep", "report_rank1.qdrep", etc) srun nsys profile -t openacc -o "report_rank%q{SLURM_PROCID}" ./my_bin_exe