+ source /gpfswork/rech/six/commun/start-prod ++ module purge ++ unset _mlshdbg ++ '[' 0 = 1 ']' ++ unset _mlre _mlIFS ++ '[' -n x ']' ++ _mlIFS=' ' ++ IFS=' ' ++ for _mlv in ${MODULES_RUN_QUARANTINE:-} ++ '[' LD_LIBRARY_PATH = LD_LIBRARY_PATH -a LD_LIBRARY_PATH = LD_LIBRARY_PATH ']' +++ eval 'echo ${LD_LIBRARY_PATH+x}' ++++ echo x ++ '[' -n x ']' +++ eval 'echo ${LD_LIBRARY_PATH}' ++++ echo /gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' ' ++ _mlrv=MODULES_RUNENV_LD_LIBRARY_PATH +++ eval 'echo ${MODULES_RUNENV_LD_LIBRARY_PATH:-}' ++++ echo ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ++ '[' -n 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ']' +++ eval 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\''' 'LD_LIBRARY_PATH='\'''\''' /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash '"$@"' ++++ LD_LIBRARY_PATH_modquar=/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++++ LD_LIBRARY_PATH= ++++ /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash purge ++ eval ++ _mlstatus=0 ++ '[' -n x ']' ++ IFS=' ' ++ unset _mlre _mlv _mlrv _mlIFS ++ '[' -n '' ']' ++ unset _mlshdbg ++ return 0 ++ module load pytorch-gpu/py3/1.8.1 ++ unset _mlshdbg ++ '[' 0 = 1 ']' ++ unset _mlre _mlIFS ++ '[' -n x ']' ++ _mlIFS=' ' ++ IFS=' ' ++ for _mlv in ${MODULES_RUN_QUARANTINE:-} ++ '[' LD_LIBRARY_PATH = LD_LIBRARY_PATH -a LD_LIBRARY_PATH = LD_LIBRARY_PATH ']' +++ eval 'echo ${LD_LIBRARY_PATH+x}' ++++ echo x ++ '[' -n x ']' +++ eval 'echo ${LD_LIBRARY_PATH}' ++++ echo /gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' ' ++ _mlrv=MODULES_RUNENV_LD_LIBRARY_PATH +++ eval 'echo ${MODULES_RUNENV_LD_LIBRARY_PATH:-}' ++++ echo ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ++ '[' -n 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ']' +++ eval 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\''' 'LD_LIBRARY_PATH='\'''\''' /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash '"$@"' ++++ LD_LIBRARY_PATH_modquar=/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++++ LD_LIBRARY_PATH= ++++ /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash load pytorch-gpu/py3/1.8.1 Loading pytorch-gpu/py3/1.8.1 Loading requirement: gcc/8.3.1 cuda/11.2 nccl/2.9.6-1-cuda cudnn/8.1.1.33-cuda intel-mkl/2020.4 openmpi/4.1.1-cuda magma/2.5.4-cuda ++ eval 'PSM2_GPUDIRECT=1;' export 'PSM2_GPUDIRECT; MODULES_LMPREREQ=nccl/2.9.6-1-cuda\&cuda/11.2\|cuda/10.2:cudnn/8.1.1.33-cuda\&cuda/11.2\|cuda/10.2:openmpi/4.1.1-cuda\&nvidia-compilers/21.3\|nvidia-compilers/20.11\|nvidia-compilers/20.7\|pgi/20.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:magma/2.5.4-cuda\&intel-compilers/19.0.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:pytorch-gpu/py3/1.8.1\&gcc/8.3.1\&cuda/11.2\&nccl/2.9.6-1-cuda\&cudnn/8.1.1.33-cuda\&intel-mkl/2020.4\&openmpi/4.1.1-cuda\&magma/2.5.4-cuda;' export 'MODULES_LMPREREQ; CPATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include;' export 'CPATH; LD_LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib;' export 'LD_LIBRARY_PATH; SLURM_MPI_TYPE=pmix;' export 'SLURM_MPI_TYPE; OMPI_MCA_mtl=psm2;' export 'OMPI_MCA_mtl; MANPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man;' export 'MANPATH; LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/lib64/stubs:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib;' export 'LIBRARY_PATH; MPIF77=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif77;' export 'MPIF77; MKLROOT_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl:1;' export 'MKLROOT_modshare; CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/;' export 'CMAKE_PREFIX_PATH; LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1;' export 'LOADEDMODULES; _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1;' export '_LMFILES_; PKG_CONFIG_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig;' export 'PKG_CONFIG_PATH; MANPATH_modshare=:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/man:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/catman:1;' export 'MANPATH_modshare; LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/cuda/11.2/lib64/stubs:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/lib64:1;' export 'LIBRARY_PATH_modshare; MODULES_LMCONFLICT=gcc/8.3.1\&gcc:cuda/11.2\&cuda:nccl/2.9.6-1-cuda\&nccl:cudnn/8.1.1.33-cuda\&cudnn:intel-mkl/2020.4\&intel-mkl:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:magma/2.5.4-cuda\&magma:pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3;' export 'MODULES_LMCONFLICT; MPICC=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpicc;' export 'MPICC; NLSPATH_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N:1;' export 'NLSPATH_modshare; OMPI_MCA_pml=cm;' export 'OMPI_MCA_pml; INTEL_LICENSE_FILE=/gpfslocalsys/intel/licenses/site_license.lic;' export 'INTEL_LICENSE_FILE; PKG_CONFIG_PATH_modshare=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig:1;' export 'PKG_CONFIG_PATH_modshare; MODULES_LMCONFLICT_modshare=pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3:1:magma/2.5.4-cuda\&magma:1:cuda/11.2\&cuda:1:cudnn/8.1.1.33-cuda\&cudnn:1:intel-mkl/2020.4\&intel-mkl:1:nccl/2.9.6-1-cuda\&nccl:1:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:1:gcc/8.3.1\&gcc:1;' export 'MODULES_LMCONFLICT_modshare; INTEL_LICENSE_FILE_modshare=/gpfslocalsys/intel/licenses/site_license.lic:1;' export 'INTEL_LICENSE_FILE_modshare; CUDA_INSTALL_PATH=/gpfslocalsys/cuda/11.2;' export 'CUDA_INSTALL_PATH; MODULES_LMNOTUASKED=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda;' export 'MODULES_LMNOTUASKED; PYTHONUNBUFFERED=1;' export 'PYTHONUNBUFFERED; MKLROOT=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl;' export 'MKLROOT; MPICXX=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpic++;' export 'MPICXX; CUDA_PATH=/gpfslocalsys/cuda/11.2;' export 'CUDA_PATH; MODULES_LMNOTUASKED_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:cuda/11.2:1:intel-mkl/2020.4:1:magma/2.5.4-cuda:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1;' export 'MODULES_LMNOTUASKED_modshare; PSM2_CUDA_MEMCACHE_SIZE=1024;' export 'PSM2_CUDA_MEMCACHE_SIZE; NLSPATH=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N;' export 'NLSPATH; MODULES_LMPREREQ_modshare=cudnn/8.1.1.33-cuda\&cuda/11.2\|cuda/10.2:1:pytorch-gpu/py3/1.8.1\&gcc/8.3.1\&cuda/11.2\&nccl/2.9.6-1-cuda\&cudnn/8.1.1.33-cuda\&intel-mkl/2020.4\&openmpi/4.1.1-cuda\&magma/2.5.4-cuda:1:nccl/2.9.6-1-cuda\&cuda/11.2\|cuda/10.2:1:openmpi/4.1.1-cuda\&nvidia-compilers/21.3\|nvidia-compilers/20.11\|nvidia-compilers/20.7\|pgi/20.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:1:magma/2.5.4-cuda\&intel-compilers/19.0.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:1;' export 'MODULES_LMPREREQ_modshare; CPATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:1;' export 'CPATH_modshare; C_INCLUDE_PATH=/gpfslocalsys/cuda/11.2/include;' export 'C_INCLUDE_PATH; LD_LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/cuda/11.2/lib64:1;' export 'LD_LIBRARY_PATH_modshare; CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1;' export 'CMAKE_PREFIX_PATH_modshare; LOADEDMODULES_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:pytorch-gpu/py3/1.8.1:1:cuda/11.2:1:intel-mkl/2020.4:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1:magma/2.5.4-cuda:1;' export 'LOADEDMODULES_modshare; _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1;' export '_LMFILES__modshare; MPIF90=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif90;' export 'MPIF90; CUDA_HOME=/gpfslocalsys/cuda/11.2;' export 'CUDA_HOME; PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin;' export 'PATH; PSM2_CUDA=1;' export 'PSM2_CUDA; PATH_modshare=/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/opt/sgi/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1;' export 'PATH_modshare; .' '/gpfslocalsup/pub/anaconda-py3/2021.05/etc/profile.d/conda.sh; conda' 'deactivate; conda' activate 'pytorch-1.8.1+py3.8.8-lts; test' '0;' +++ PSM2_GPUDIRECT=1 +++ export PSM2_GPUDIRECT +++ MODULES_LMPREREQ='nccl/2.9.6-1-cuda&cuda/11.2|cuda/10.2:cudnn/8.1.1.33-cuda&cuda/11.2|cuda/10.2:openmpi/4.1.1-cuda&nvidia-compilers/21.3|nvidia-compilers/20.11|nvidia-compilers/20.7|pgi/20.4|gcc/8.3.1&cuda/11.2|cuda/10.2:magma/2.5.4-cuda&intel-compilers/19.0.4|gcc/8.3.1&cuda/11.2|cuda/10.2:pytorch-gpu/py3/1.8.1&gcc/8.3.1&cuda/11.2&nccl/2.9.6-1-cuda&cudnn/8.1.1.33-cuda&intel-mkl/2020.4&openmpi/4.1.1-cuda&magma/2.5.4-cuda' +++ export MODULES_LMPREREQ +++ CPATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include +++ export CPATH +++ LD_LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib +++ export LD_LIBRARY_PATH +++ SLURM_MPI_TYPE=pmix +++ export SLURM_MPI_TYPE +++ OMPI_MCA_mtl=psm2 +++ export OMPI_MCA_mtl +++ MANPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man +++ export MANPATH +++ LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/lib64/stubs:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib +++ export LIBRARY_PATH +++ MPIF77=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif77 +++ export MPIF77 +++ MKLROOT_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl:1 +++ export MKLROOT_modshare +++ CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/ +++ export CMAKE_PREFIX_PATH +++ LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1 +++ export LOADEDMODULES +++ _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1 +++ export _LMFILES_ +++ PKG_CONFIG_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig +++ export PKG_CONFIG_PATH +++ MANPATH_modshare=:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/man:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/catman:1 +++ export MANPATH_modshare +++ LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/cuda/11.2/lib64/stubs:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/lib64:1 +++ export LIBRARY_PATH_modshare +++ MODULES_LMCONFLICT='gcc/8.3.1&gcc:cuda/11.2&cuda:nccl/2.9.6-1-cuda&nccl:cudnn/8.1.1.33-cuda&cudnn:intel-mkl/2020.4&intel-mkl:openmpi/4.1.1-cuda&openmpi&intel-mpi:magma/2.5.4-cuda&magma:pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3' +++ export MODULES_LMCONFLICT +++ MPICC=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpicc +++ export MPICC +++ NLSPATH_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N:1 +++ export NLSPATH_modshare +++ OMPI_MCA_pml=cm +++ export OMPI_MCA_pml +++ INTEL_LICENSE_FILE=/gpfslocalsys/intel/licenses/site_license.lic +++ export INTEL_LICENSE_FILE +++ PKG_CONFIG_PATH_modshare=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig:1 +++ export PKG_CONFIG_PATH_modshare +++ MODULES_LMCONFLICT_modshare='pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3:1:magma/2.5.4-cuda&magma:1:cuda/11.2&cuda:1:cudnn/8.1.1.33-cuda&cudnn:1:intel-mkl/2020.4&intel-mkl:1:nccl/2.9.6-1-cuda&nccl:1:openmpi/4.1.1-cuda&openmpi&intel-mpi:1:gcc/8.3.1&gcc:1' +++ export MODULES_LMCONFLICT_modshare +++ INTEL_LICENSE_FILE_modshare=/gpfslocalsys/intel/licenses/site_license.lic:1 +++ export INTEL_LICENSE_FILE_modshare +++ CUDA_INSTALL_PATH=/gpfslocalsys/cuda/11.2 +++ export CUDA_INSTALL_PATH +++ MODULES_LMNOTUASKED=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda +++ export MODULES_LMNOTUASKED +++ PYTHONUNBUFFERED=1 +++ export PYTHONUNBUFFERED +++ MKLROOT=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl +++ export MKLROOT +++ MPICXX=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpic++ +++ export MPICXX +++ CUDA_PATH=/gpfslocalsys/cuda/11.2 +++ export CUDA_PATH +++ MODULES_LMNOTUASKED_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:cuda/11.2:1:intel-mkl/2020.4:1:magma/2.5.4-cuda:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1 +++ export MODULES_LMNOTUASKED_modshare +++ PSM2_CUDA_MEMCACHE_SIZE=1024 +++ export PSM2_CUDA_MEMCACHE_SIZE +++ NLSPATH=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N +++ export NLSPATH +++ MODULES_LMPREREQ_modshare='cudnn/8.1.1.33-cuda&cuda/11.2|cuda/10.2:1:pytorch-gpu/py3/1.8.1&gcc/8.3.1&cuda/11.2&nccl/2.9.6-1-cuda&cudnn/8.1.1.33-cuda&intel-mkl/2020.4&openmpi/4.1.1-cuda&magma/2.5.4-cuda:1:nccl/2.9.6-1-cuda&cuda/11.2|cuda/10.2:1:openmpi/4.1.1-cuda&nvidia-compilers/21.3|nvidia-compilers/20.11|nvidia-compilers/20.7|pgi/20.4|gcc/8.3.1&cuda/11.2|cuda/10.2:1:magma/2.5.4-cuda&intel-compilers/19.0.4|gcc/8.3.1&cuda/11.2|cuda/10.2:1' +++ export MODULES_LMPREREQ_modshare +++ CPATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:1 +++ export CPATH_modshare +++ C_INCLUDE_PATH=/gpfslocalsys/cuda/11.2/include +++ export C_INCLUDE_PATH +++ LD_LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/cuda/11.2/lib64:1 +++ export LD_LIBRARY_PATH_modshare +++ CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1 +++ export CMAKE_PREFIX_PATH_modshare +++ LOADEDMODULES_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:pytorch-gpu/py3/1.8.1:1:cuda/11.2:1:intel-mkl/2020.4:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1:magma/2.5.4-cuda:1 +++ export LOADEDMODULES_modshare +++ _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1 +++ export _LMFILES__modshare +++ MPIF90=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif90 +++ export MPIF90 +++ CUDA_HOME=/gpfslocalsys/cuda/11.2 +++ export CUDA_HOME +++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH +++ PSM2_CUDA=1 +++ export PSM2_CUDA +++ PATH_modshare=/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/opt/sgi/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1 +++ export PATH_modshare +++ . /gpfslocalsup/pub/anaconda-py3/2021.05/etc/profile.d/conda.sh ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ '[' -z x ']' +++ conda deactivate +++ '[' 1 -lt 1 ']' +++ local cmd=deactivate +++ shift +++ case "$cmd" in +++ __conda_activate deactivate +++ '[' -n '' ']' +++ local cmd=deactivate +++ shift +++ local ask_conda +++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ __add_sys_prefix_to_path +++ '[' -n '' ']' ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05/bin ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05 +++ '[' -n '' ']' +++ PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH ++++ PS1= ++++ /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda shell.posix deactivate +++ ask_conda='export PATH='\''/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' unset CONDA_PREFIX unset CONDA_DEFAULT_ENV unset CONDA_PROMPT_MODIFIER PS1='\'''\'' export CONDA_SHLVL='\''0'\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\''' +++ rc=0 +++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ eval 'export PATH='\''/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' unset CONDA_PREFIX unset CONDA_DEFAULT_ENV unset CONDA_PROMPT_MODIFIER PS1='\'''\'' export CONDA_SHLVL='\''0'\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\''' ++++ export PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ unset CONDA_PREFIX ++++ unset CONDA_DEFAULT_ENV ++++ unset CONDA_PROMPT_MODIFIER ++++ PS1= ++++ export CONDA_SHLVL=0 ++++ CONDA_SHLVL=0 ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python +++ '[' 0 '!=' 0 ']' +++ __conda_hashr +++ '[' -n '' ']' +++ '[' -n '' ']' +++ hash -r +++ conda activate pytorch-1.8.1+py3.8.8-lts +++ '[' 2 -lt 1 ']' +++ local cmd=activate +++ shift +++ case "$cmd" in +++ __conda_activate activate pytorch-1.8.1+py3.8.8-lts +++ '[' -n '' ']' +++ local cmd=activate +++ shift +++ local ask_conda +++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ __add_sys_prefix_to_path +++ '[' -n '' ']' ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05/bin ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05 +++ '[' -n '' ']' +++ PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH ++++ PS1= ++++ /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda shell.posix activate pytorch-1.8.1+py3.8.8-lts +++ ask_conda='PS1='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export PATH='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_SHLVL='\''1'\'' export CONDA_DEFAULT_ENV='\''pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_PROMPT_MODIFIER='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\'' . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/glib_activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/proj4-activate.sh"' +++ rc=0 +++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ eval 'PS1='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export PATH='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_SHLVL='\''1'\'' export CONDA_DEFAULT_ENV='\''pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_PROMPT_MODIFIER='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\'' . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/glib_activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/proj4-activate.sh"' ++++ PS1='(pytorch-1.8.1+py3.8.8-lts) ' ++++ export PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ export CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ export CONDA_SHLVL=1 ++++ CONDA_SHLVL=1 ++++ export CONDA_DEFAULT_ENV=pytorch-1.8.1+py3.8.8-lts ++++ CONDA_DEFAULT_ENV=pytorch-1.8.1+py3.8.8-lts ++++ export 'CONDA_PROMPT_MODIFIER=(pytorch-1.8.1+py3.8.8-lts) ' ++++ CONDA_PROMPT_MODIFIER='(pytorch-1.8.1+py3.8.8-lts) ' ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/glib_activate.sh +++++ export GSETTINGS_SCHEMA_DIR_CONDA_BACKUP= +++++ GSETTINGS_SCHEMA_DIR_CONDA_BACKUP= +++++ export GSETTINGS_SCHEMA_DIR=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/glib-2.0/schemas +++++ GSETTINGS_SCHEMA_DIR=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/glib-2.0/schemas ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/proj4-activate.sh +++++ '[' -n '' ']' +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj ']' +++++ export PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj +++++ PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj +++++ '[' -f /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj/copyright_and_licenses.csv ']' +++++ export PROJ_NETWORK=ON +++++ PROJ_NETWORK=ON +++ '[' 0 '!=' 0 ']' +++ __conda_hashr +++ '[' -n '' ']' +++ '[' -n '' ']' +++ hash -r +++ test 0 ++ _mlstatus=0 ++ '[' -n x ']' ++ IFS=' ' ++ unset _mlre _mlv _mlrv _mlIFS ++ '[' -n '' ']' ++ unset _mlshdbg ++ return 0 ++ module load nvtop git-lfs github-cli mc ++ unset _mlshdbg ++ '[' 0 = 1 ']' ++ unset _mlre _mlIFS ++ '[' -n x ']' ++ _mlIFS=' ' ++ IFS=' ' ++ for _mlv in ${MODULES_RUN_QUARANTINE:-} ++ '[' LD_LIBRARY_PATH = LD_LIBRARY_PATH -a LD_LIBRARY_PATH = LD_LIBRARY_PATH ']' +++ eval 'echo ${LD_LIBRARY_PATH+x}' ++++ echo x ++ '[' -n x ']' +++ eval 'echo ${LD_LIBRARY_PATH}' ++++ echo /gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' ' ++ _mlrv=MODULES_RUNENV_LD_LIBRARY_PATH +++ eval 'echo ${MODULES_RUNENV_LD_LIBRARY_PATH:-}' ++++ echo ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ++ '[' -n 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ']' +++ eval 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\''' 'LD_LIBRARY_PATH='\'''\''' /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash '"$@"' ++++ LD_LIBRARY_PATH_modquar=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++++ LD_LIBRARY_PATH= ++++ /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash load nvtop git-lfs github-cli mc ++ eval 'MANPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man;' export 'MANPATH; CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/;' export 'CMAKE_PREFIX_PATH; LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1:nvtop/1.1.0:git-lfs/2.7.2:github-cli/1.13.1:mc/4.8.26;' export 'LOADEDMODULES; _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26;' export '_LMFILES_; MANPATH_modshare=:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/catman:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/man:1;' export 'MANPATH_modshare; MODULES_LMCONFLICT=gcc/8.3.1\&gcc:cuda/11.2\&cuda:nccl/2.9.6-1-cuda\&nccl:cudnn/8.1.1.33-cuda\&cudnn:intel-mkl/2020.4\&intel-mkl:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:magma/2.5.4-cuda\&magma:pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3:nvtop/1.1.0\&nvtop:git-lfs/2.7.2\&git-lfs:github-cli/1.13.1\&github-cli:mc/4.8.26\&mc;' export 'MODULES_LMCONFLICT; MODULES_LMCONFLICT_modshare=git-lfs/2.7.2\&git-lfs:1:cudnn/8.1.1.33-cuda\&cudnn:1:cuda/11.2\&cuda:1:nccl/2.9.6-1-cuda\&nccl:1:magma/2.5.4-cuda\&magma:1:nvtop/1.1.0\&nvtop:1:pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3:1:intel-mkl/2020.4\&intel-mkl:1:mc/4.8.26\&mc:1:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:1:github-cli/1.13.1\&github-cli:1:gcc/8.3.1\&gcc:1;' export 'MODULES_LMCONFLICT_modshare; XLOCALEDIR=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale;' export 'XLOCALEDIR; XLOCALEDIR_modshare=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale:1;' export 'XLOCALEDIR_modshare; CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1;' export 'CMAKE_PREFIX_PATH_modshare; LOADEDMODULES_modshare=github-cli/1.13.1:1:intel-mkl/2020.4:1:git-lfs/2.7.2:1:nvtop/1.1.0:1:nccl/2.9.6-1-cuda:1:gcc/8.3.1:1:cuda/11.2:1:mc/4.8.26:1:pytorch-gpu/py3/1.8.1:1:magma/2.5.4-cuda:1:openmpi/4.1.1-cuda:1:cudnn/8.1.1.33-cuda:1;' export 'LOADEDMODULES_modshare; _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1;' export '_LMFILES__modshare; PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin;' export 'PATH; PATH_modshare=/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:1:/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/opt/sgi/bin:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1;' export 'PATH_modshare; test' '0;' +++ MANPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man +++ export MANPATH +++ CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/ +++ export CMAKE_PREFIX_PATH +++ LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1:nvtop/1.1.0:git-lfs/2.7.2:github-cli/1.13.1:mc/4.8.26 +++ export LOADEDMODULES +++ _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26 +++ export _LMFILES_ +++ MANPATH_modshare=:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/catman:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/man:1 +++ export MANPATH_modshare +++ MODULES_LMCONFLICT='gcc/8.3.1&gcc:cuda/11.2&cuda:nccl/2.9.6-1-cuda&nccl:cudnn/8.1.1.33-cuda&cudnn:intel-mkl/2020.4&intel-mkl:openmpi/4.1.1-cuda&openmpi&intel-mpi:magma/2.5.4-cuda&magma:pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3:nvtop/1.1.0&nvtop:git-lfs/2.7.2&git-lfs:github-cli/1.13.1&github-cli:mc/4.8.26&mc' +++ export MODULES_LMCONFLICT +++ MODULES_LMCONFLICT_modshare='git-lfs/2.7.2&git-lfs:1:cudnn/8.1.1.33-cuda&cudnn:1:cuda/11.2&cuda:1:nccl/2.9.6-1-cuda&nccl:1:magma/2.5.4-cuda&magma:1:nvtop/1.1.0&nvtop:1:pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3:1:intel-mkl/2020.4&intel-mkl:1:mc/4.8.26&mc:1:openmpi/4.1.1-cuda&openmpi&intel-mpi:1:github-cli/1.13.1&github-cli:1:gcc/8.3.1&gcc:1' +++ export MODULES_LMCONFLICT_modshare +++ XLOCALEDIR=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale +++ export XLOCALEDIR +++ XLOCALEDIR_modshare=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale:1 +++ export XLOCALEDIR_modshare +++ CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1 +++ export CMAKE_PREFIX_PATH_modshare +++ LOADEDMODULES_modshare=github-cli/1.13.1:1:intel-mkl/2020.4:1:git-lfs/2.7.2:1:nvtop/1.1.0:1:nccl/2.9.6-1-cuda:1:gcc/8.3.1:1:cuda/11.2:1:mc/4.8.26:1:pytorch-gpu/py3/1.8.1:1:magma/2.5.4-cuda:1:openmpi/4.1.1-cuda:1:cudnn/8.1.1.33-cuda:1 +++ export LOADEDMODULES_modshare +++ _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1 +++ export _LMFILES__modshare +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH +++ PATH_modshare=/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:1:/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/opt/sgi/bin:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1 +++ export PATH_modshare +++ test 0 ++ _mlstatus=0 ++ '[' -n x ']' ++ IFS=' ' ++ unset _mlre _mlv _mlrv _mlIFS ++ '[' -n '' ']' ++ unset _mlshdbg ++ return 0 ++ export GIT_PROMPT_ONLY_IN_REPO=0 ++ GIT_PROMPT_ONLY_IN_REPO=0 ++ export GIT_PROMPT_THEME=JZPRod ++ GIT_PROMPT_THEME=JZPRod ++ source /gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh +++ _have_find_mmin=1 +++ gp_install_prompt +++ '[' -z '' ']' +++ OLD_GITPROMPT='(pytorch-1.8.1+py3.8.8-lts) ' +++ '[' -z '' ']' ++++ we_are_on_repo +++++ git rev-parse --git-dir ++++ [[ -e '' ]] ++++ echo 0 +++ GIT_PROMPT_OLD_DIR_WAS_GIT=0 +++ '[' -z '' ']' +++ PROMPT_COMMAND=setGitPrompt +++ local setLastCommandStateEntry=setLastCommandState +++ case ";$PROMPT_COMMAND;" in +++ PROMPT_COMMAND='setLastCommandState;setGitPrompt' +++ git_prompt_dir +++ '[' -z '' ']' +++ local SOURCE=/gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh +++ '[' -h /gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh ']' +++++ dirname /gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh ++++ command cd -P /gpfswork/rech/six/commun/envs/.bash-git-prompt ++++ cd -P /gpfswork/rech/six/commun/envs/.bash-git-prompt ++++ pwd +++ __GIT_PROMPT_DIR=/gpfsdswork/projects/rech/six/commun/envs/.bash-git-prompt +++ source /gpfsdswork/projects/rech/six/commun/envs/.bash-git-prompt/git-prompt-help.sh ++ export TRANSFORMERS_CACHE=/gpfswork/rech/six/commun/models ++ TRANSFORMERS_CACHE=/gpfswork/rech/six/commun/models ++ export HF_DATASETS_CACHE=/gpfswork/rech/six/commun/datasets ++ HF_DATASETS_CACHE=/gpfswork/rech/six/commun/datasets ++ export HF_MODULES_CACHE=/gpfswork/rech/six/commun/modules ++ HF_MODULES_CACHE=/gpfswork/rech/six/commun/modules ++ export HF_METRICS_CACHE=/gpfswork/rech/six/commun/metrics ++ HF_METRICS_CACHE=/gpfswork/rech/six/commun/metrics ++ export DATASETS_CUSTOM=/gpfswork/rech/six/commun/datasets-custom ++ DATASETS_CUSTOM=/gpfswork/rech/six/commun/datasets-custom +++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.bash hook ++ __conda_setup='export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' # Copyright (C) 2012 Anaconda, Inc # SPDX-License-Identifier: BSD-3-Clause __add_sys_prefix_to_path() { # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA}" ] && [ -n "${WINDIR+x}" ]; then SYSP=$(\dirname "${CONDA_EXE}") else SYSP=$(\dirname "${CONDA_EXE}") SYSP=$(\dirname "${SYSP}") fi if [ -n "${WINDIR+x}" ]; then PATH="${SYSP}/bin:${PATH}" PATH="${SYSP}/Scripts:${PATH}" PATH="${SYSP}/Library/bin:${PATH}" PATH="${SYSP}/Library/usr/bin:${PATH}" PATH="${SYSP}/Library/mingw-w64/bin:${PATH}" PATH="${SYSP}:${PATH}" else PATH="${SYSP}/bin:${PATH}" fi \export PATH } __conda_hashr() { if [ -n "${ZSH_VERSION:+x}" ]; then \rehash elif [ -n "${POSH_VERSION:+x}" ]; then : # pass else \hash -r fi } __conda_activate() { if [ -n "${CONDA_PS1_BACKUP:+x}" ]; then # Handle transition from shell activated with conda <= 4.3 to a subsequent activation # after conda updated to >= 4.4. See issue #6173. PS1="$CONDA_PS1_BACKUP" \unset CONDA_PS1_BACKUP fi \local cmd="$1" shift \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix "$cmd" "$@")" || \return $? rc=$? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" if [ $rc != 0 ]; then \export PATH fi __conda_hashr } __conda_reactivate() { \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix reactivate)" || \return $? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" __conda_hashr } conda() { if [ "$#" -lt 1 ]; then "$CONDA_EXE" $_CE_M $_CE_CONDA else \local cmd="$1" shift case "$cmd" in activate|deactivate) __conda_activate "$cmd" "$@" ;; install|update|upgrade|remove|uninstall) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" if [ $t1 = 0 ]; then __conda_reactivate else return $t1 fi ;; *) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" return $t1 ;; esac fi } if [ -z "${CONDA_SHLVL+x}" ]; then \export CONDA_SHLVL=0 # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA:+x}" ] && [ -n "${WINDIR+x}" ]; then PATH="$(\dirname "$CONDA_EXE")/condabin${PATH:+":${PATH}"}" else PATH="$(\dirname "$(\dirname "$CONDA_EXE")")/condabin${PATH:+":${PATH}"}" fi \export PATH # We'\''re not allowing PS1 to be unbound. It must at least be set. # However, we'\''re not exporting it, which can cause problems when starting a second shell # via a first shell (i.e. starting zsh from bash). if [ -z "${PS1+x}" ]; then PS1= fi fi conda activate base' ++ '[' 0 -eq 0 ']' ++ eval 'export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' # Copyright (C) 2012 Anaconda, Inc # SPDX-License-Identifier: BSD-3-Clause __add_sys_prefix_to_path() { # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA}" ] && [ -n "${WINDIR+x}" ]; then SYSP=$(\dirname "${CONDA_EXE}") else SYSP=$(\dirname "${CONDA_EXE}") SYSP=$(\dirname "${SYSP}") fi if [ -n "${WINDIR+x}" ]; then PATH="${SYSP}/bin:${PATH}" PATH="${SYSP}/Scripts:${PATH}" PATH="${SYSP}/Library/bin:${PATH}" PATH="${SYSP}/Library/usr/bin:${PATH}" PATH="${SYSP}/Library/mingw-w64/bin:${PATH}" PATH="${SYSP}:${PATH}" else PATH="${SYSP}/bin:${PATH}" fi \export PATH } __conda_hashr() { if [ -n "${ZSH_VERSION:+x}" ]; then \rehash elif [ -n "${POSH_VERSION:+x}" ]; then : # pass else \hash -r fi } __conda_activate() { if [ -n "${CONDA_PS1_BACKUP:+x}" ]; then # Handle transition from shell activated with conda <= 4.3 to a subsequent activation # after conda updated to >= 4.4. See issue #6173. PS1="$CONDA_PS1_BACKUP" \unset CONDA_PS1_BACKUP fi \local cmd="$1" shift \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix "$cmd" "$@")" || \return $? rc=$? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" if [ $rc != 0 ]; then \export PATH fi __conda_hashr } __conda_reactivate() { \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix reactivate)" || \return $? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" __conda_hashr } conda() { if [ "$#" -lt 1 ]; then "$CONDA_EXE" $_CE_M $_CE_CONDA else \local cmd="$1" shift case "$cmd" in activate|deactivate) __conda_activate "$cmd" "$@" ;; install|update|upgrade|remove|uninstall) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" if [ $t1 = 0 ]; then __conda_reactivate else return $t1 fi ;; *) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" return $t1 ;; esac fi } if [ -z "${CONDA_SHLVL+x}" ]; then \export CONDA_SHLVL=0 # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA:+x}" ] && [ -n "${WINDIR+x}" ]; then PATH="$(\dirname "$CONDA_EXE")/condabin${PATH:+":${PATH}"}" else PATH="$(\dirname "$(\dirname "$CONDA_EXE")")/condabin${PATH:+":${PATH}"}" fi \export PATH # We'\''re not allowing PS1 to be unbound. It must at least be set. # However, we'\''re not exporting it, which can cause problems when starting a second shell # via a first shell (i.e. starting zsh from bash). if [ -z "${PS1+x}" ]; then PS1= fi fi conda activate base' +++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ export _CE_M= +++ _CE_M= +++ export _CE_CONDA= +++ _CE_CONDA= +++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ '[' -z x ']' +++ conda activate base +++ '[' 2 -lt 1 ']' +++ local cmd=activate +++ shift +++ case "$cmd" in +++ __conda_activate activate base +++ '[' -n '' ']' +++ local cmd=activate +++ shift +++ local ask_conda +++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ __add_sys_prefix_to_path +++ '[' -n '' ']' ++++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02/bin ++++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02 +++ '[' -n '' ']' +++ PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH ++++ PS1='(pytorch-1.8.1+py3.8.8-lts) ' ++++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.posix activate base +++ ask_conda='. "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/glib_deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\'' export CONDA_SHLVL='\''2'\'' export CONDA_DEFAULT_ENV='\''base'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_1='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' +++ rc=0 +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ eval '. "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/glib_deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\'' export CONDA_SHLVL='\''2'\'' export CONDA_DEFAULT_ENV='\''base'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_1='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/proj4-deactivate.sh +++++ unset PROJ_LIB +++++ unset PROJ_NETWORK +++++ '[' -n '' ']' ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/glib_deactivate.sh +++++ export GSETTINGS_SCHEMA_DIR= +++++ GSETTINGS_SCHEMA_DIR= +++++ unset GSETTINGS_SCHEMA_DIR_CONDA_BACKUP +++++ '[' -z ']' +++++ unset GSETTINGS_SCHEMA_DIR ++++ PS1='(base) ' ++++ export PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ export CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2020.02 ++++ CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2020.02 ++++ export CONDA_SHLVL=2 ++++ CONDA_SHLVL=2 ++++ export CONDA_DEFAULT_ENV=base ++++ CONDA_DEFAULT_ENV=base ++++ export 'CONDA_PROMPT_MODIFIER=(base) ' ++++ CONDA_PROMPT_MODIFIER='(base) ' ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python ++++ export CONDA_PREFIX_1=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ CONDA_PREFIX_1=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh +++++ [[ -n '' ]] +++++ [[ -n '' ]] +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ']' +++++ export GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal +++++ GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal +++++ export GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins +++++ GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins +++++ [[ ! -d /gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ]] +++++ unset GDAL_DRIVER_PATH +++++ export CPL_ZIP_ENCODING=UTF-8 +++++ CPL_ZIP_ENCODING=UTF-8 ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh +++++ [[ -n '' ]] +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/epsg_csv ']' +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/Library/share/epsg_csv ']' ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh +++++ '[' -n '' ']' +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ']' +++++ export PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj +++++ PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj +++++ '[' -f /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj/copyright_and_licenses.csv ']' +++++ export PROJ_NETWORK=ON +++++ PROJ_NETWORK=ON ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh +++++ [[ -n '' ]] +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits ']' +++++ export UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml +++++ UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml +++ '[' 0 '!=' 0 ']' +++ __conda_hashr +++ '[' -n '' ']' +++ '[' -n '' ']' +++ hash -r ++ unset __conda_setup ++ export CONDA_ENVS_PATH=/gpfswork/rech/six/commun/conda ++ CONDA_ENVS_PATH=/gpfswork/rech/six/commun/conda ++ conda activate base ++ '[' 2 -lt 1 ']' ++ local cmd=activate ++ shift ++ case "$cmd" in ++ __conda_activate activate base ++ '[' -n '' ']' ++ local cmd=activate ++ shift ++ local ask_conda ++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ __add_sys_prefix_to_path ++ '[' -n '' ']' +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02/bin +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02 ++ '[' -n '' ']' ++ PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ export PATH +++ PS1='(base) ' +++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.posix activate base ++ ask_conda='. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_SHLVL='\''2'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' ++ rc=0 ++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ eval '. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_SHLVL='\''2'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh ++++ unset UDUNITS2_XML_PATH ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh ++++ unset PROJ_LIB ++++ unset PROJ_NETWORK ++++ '[' -n '' ']' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh ++++ unset GEOTIFF_CSV ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh ++++ unset GDAL_DATA ++++ [[ -n '' ]] ++++ unset GDAL_DRIVER_PATH ++++ [[ -n '' ]] ++++ unset CPL_ZIP_ENCODING +++ PS1='(base) ' +++ export PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export CONDA_SHLVL=2 +++ CONDA_SHLVL=2 +++ export 'CONDA_PROMPT_MODIFIER=(base) ' +++ CONDA_PROMPT_MODIFIER='(base) ' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh ++++ [[ -n '' ]] ++++ [[ -n '' ]] ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ']' ++++ export GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ++++ GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ++++ export GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ++++ GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ++++ [[ ! -d /gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ]] ++++ unset GDAL_DRIVER_PATH ++++ export CPL_ZIP_ENCODING=UTF-8 ++++ CPL_ZIP_ENCODING=UTF-8 +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh ++++ [[ -n '' ]] ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/epsg_csv ']' ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/Library/share/epsg_csv ']' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh ++++ '[' -n '' ']' ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ']' ++++ export PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ++++ PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ++++ '[' -f /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj/copyright_and_licenses.csv ']' ++++ export PROJ_NETWORK=ON ++++ PROJ_NETWORK=ON +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh ++++ [[ -n '' ]] ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits ']' ++++ export UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml ++++ UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml ++ '[' 0 '!=' 0 ']' ++ __conda_hashr ++ '[' -n '' ']' ++ '[' -n '' ']' ++ hash -r ++ conda activate hf-prod ++ '[' 2 -lt 1 ']' ++ local cmd=activate ++ shift ++ case "$cmd" in ++ __conda_activate activate hf-prod ++ '[' -n '' ']' ++ local cmd=activate ++ shift ++ local ask_conda ++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ __add_sys_prefix_to_path ++ '[' -n '' ']' +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02/bin +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02 ++ '[' -n '' ']' ++ PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ export PATH +++ PS1='(base) ' +++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.posix activate hf-prod ++ ask_conda='. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_SHLVL='\''3'\'' export CONDA_DEFAULT_ENV='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_PROMPT_MODIFIER='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_2='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\''' ++ rc=0 ++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ eval '. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_SHLVL='\''3'\'' export CONDA_DEFAULT_ENV='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_PROMPT_MODIFIER='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_2='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\''' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh ++++ unset UDUNITS2_XML_PATH ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh ++++ unset PROJ_LIB ++++ unset PROJ_NETWORK ++++ '[' -n '' ']' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh ++++ unset GEOTIFF_CSV ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh ++++ unset GDAL_DATA ++++ [[ -n '' ]] ++++ unset GDAL_DRIVER_PATH ++++ [[ -n '' ]] ++++ unset CPL_ZIP_ENCODING +++ PS1='(/gpfswork/rech/six/commun/conda/hf-prod) ' +++ export PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export CONDA_PREFIX=/gpfswork/rech/six/commun/conda/hf-prod +++ CONDA_PREFIX=/gpfswork/rech/six/commun/conda/hf-prod +++ export CONDA_SHLVL=3 +++ CONDA_SHLVL=3 +++ export CONDA_DEFAULT_ENV=/gpfswork/rech/six/commun/conda/hf-prod +++ CONDA_DEFAULT_ENV=/gpfswork/rech/six/commun/conda/hf-prod +++ export 'CONDA_PROMPT_MODIFIER=(/gpfswork/rech/six/commun/conda/hf-prod) ' +++ CONDA_PROMPT_MODIFIER='(/gpfswork/rech/six/commun/conda/hf-prod) ' +++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ export _CE_M= +++ _CE_M= +++ export _CE_CONDA= +++ _CE_CONDA= +++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ export CONDA_PREFIX_2=/gpfslocalsup/pub/anaconda-py3/2020.02 +++ CONDA_PREFIX_2=/gpfslocalsup/pub/anaconda-py3/2020.02 ++ '[' 0 '!=' 0 ']' ++ __conda_hashr ++ '[' -n '' ']' ++ '[' -n '' ']' ++ hash -r + ROUND=2 + TESTING=0 + export HF_DATASETS_OFFLINE=1 + HF_DATASETS_OFFLINE=1 + export TRANSFORMERS_OFFLINE=1 + TRANSFORMERS_OFFLINE=1 + OUTPUT_PATH=/gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full/ + MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/commun/code/Megatron-DeepSpeed + [[ 0 == 1 ]] + DATA_PATH=/gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document + pushd /gpfswork/rech/six/commun/code/Megatron-DeepSpeed /gpfswork/rech/six/commun/code/Megatron-DeepSpeed /gpfswork/rech/six/uty16tp/code/big_science ++ perl -le '$_=$ENV{"SLURM_JOB_NODELIST"}; s/,.*//; s/-.*//; s/\[//; print' + MASTER_ADDR=r12i3n6 + MASTER_PORT=6000 + GPUS_PER_NODE=4 + NNODES=16 + PP_SIZE=4 + TP_SIZE=4 + DP_SIZE=4 + MICRO_BATCH_SIZE=16 + GLOBAL_BATCH_SIZE=1024 + TRAIN_ITER=146_484_375 + NLAYERS=24 + NHIDDEN=2048 + NHEADS=16 + FFN_HIDDEN_SIZE=8192 + SEQ_LEN=2048 + [[ 2 == 1 ]] + [[ 2 == 2 ]] + SAVE_INTERVAL=1500 + OPTIMIZER_ARGS=' --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 ' + EXIT_OPTS=' --exit-duration-in-mins 1190 ' + GPT_ARGS=' --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 ' + OUTPUT_ARGS=' --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard ' + ZERO_STAGE=1 + config_json=./ds_config.726697.json + cat + DEEPSPEED_ARGS=' --deepspeed --deepspeed_config ./ds_config.726697.json --zero-stage 1 --deepspeed-activation-checkpointing ' + export 'LAUNCHER=python -u -m torch.distributed.launch --nproc_per_node 4 --nnodes 16 --master_addr r12i3n6 --master_port 6000 ' + LAUNCHER='python -u -m torch.distributed.launch --nproc_per_node 4 --nnodes 16 --master_addr r12i3n6 --master_port 6000 ' ++ pwd + export 'CMD= /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.726697.json --zero-stage 1 --deepspeed-activation-checkpointing ' + CMD=' /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.726697.json --zero-stage 1 --deepspeed-activation-checkpointing ' + echo /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.726697.json --zero-stage 1 --deepspeed-activation-checkpointing /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.726697.json --zero-stage 1 --deepspeed-activation-checkpointing + srun --jobid 726697 bash -c '$LAUNCHER --node_rank $SLURM_PROCID $CMD' + tee /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//logs/tr3-1B3-modeling-baseline.726697.out tee: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//logs/tr3-1B3-modeling-baseline.726697.out: No such file or directory ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** 2021-08-14 06:50:38.397102: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.397639: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.397650: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.419218: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.419855: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.421165: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.451795: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.452097: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.454749: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.457151: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.482160: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.512244: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.531067: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.532977: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.534669: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.576992: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.577082: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.579425: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.605361: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.606814: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.612210: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.612231: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.612291: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.612293: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.620792: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.620825: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.621773: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.621836: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.622367: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.628015: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.636087: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.651762: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.668006: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.670110: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.670568: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.732024: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.778055: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:38.785186: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** 2021-08-14 06:50:40.177687: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:40.177864: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.863627: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.863625: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.863629: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.863627: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.874894: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.874893: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.874896: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.874903: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.889262: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.889263: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.889267: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.889267: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.912240: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.912240: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.912242: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.912240: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.930273: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.930280: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.930274: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.930275: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.972992: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.972992: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.972988: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-14 06:50:41.972991: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninja .................................... [OKAY][OKAY] ---------------------------------------------------------------------------------------------------- op nameop name ................................ installedinstalled .... compatiblecompatible ---------------------------------------------------------------------------------------------------- cpu_adamcpu_adam ............... ...............[YES] [YES]...... ......[OKAY] [OKAY] fused_adamfused_adam .......................... [NO][NO] .............. [OKAY][OKAY] fused_lambfused_lamb .......................... [NO][NO] .............. [OKAY][OKAY] sparse_attnsparse_attn ........................ [NO][NO] .............. [OKAY][OKAY] transformertransformer ........................ [NO][NO] .............. [OKAY][OKAY] stochastic_transformerstochastic_transformer .. [NO][NO] .............. [OKAY][OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ninja............ [NO]..................ninja [OKAY]......................... [OKAY][OKAY]-------------------------------------------------- --------------------------------------------------op namestochastic_transformer ................op name. installed................[NO] ..installed....... compatible..[OKAY] --------------------------------------------------compatible -------------------------------------------------- cpu_adam cpu_adam............... ...............[YES] [YES]...... [OKAY]...... [OKAY] fused_adam ............. fused_adam[NO] .................... [OKAY][NO] ....... fused_lamb[OKAY] ............. [NO] fused_lamb....... [OKAY]............. [NO] ....... [OKAY] sparse_attn ............ [NO] .......sparse_attn [OKAY]............ [NO] .......transformer [OKAY]............ [NO] .......transformer [OKAY]............ [NO] ....... [OKAY]stochastic_transformer . [NO] stochastic_transformer....... [OKAY]. [NO] ....... [OKAY] ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninja--------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninja -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] ninjafused_adam ............................... ninja [NO] [OKAY] ......................... [OKAY][OKAY]-------------------------------------------------- --------------------------------------------------op namefused_lamb .............................op name installed[NO]................ .........installed [OKAY]compatible.. --------------------------------------------------compatible -------------------------------------------------- sparse_attn cpu_adam............ cpu_adam[NO]............... ......................[YES] [YES][OKAY]...... ......[OKAY] transformer[OKAY] ............ [NO] ....... [OKAY]fused_adam .............fused_adam stochastic_transformer [NO] ............. ........ [NO][NO] [OKAY].............. [OKAY][OKAY] fused_lamb ............. fused_lamb[NO] .................... [NO][OKAY] ....... [OKAY] sparse_attn ............ sparse_attn[NO] ................... [NO][OKAY] ....... [OKAY] transformer ............transformer [NO]............ .......[NO] [OKAY]....... [OKAY] stochastic_transformer .stochastic_transformer [NO]. [NO]....... .......[OKAY] [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report ------------------------------------------------------------------------------------------------------------------------------------------------------ NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report ------------------------------------------------------------------------------------------------------------------------------------------------------ JIT compiled ops requires ninjaNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- ninjaop name .................................. installedninja ..[OKAY]ninja compatible.................. ----------------------------------------------------------------------------------------------------..................[OKAY] op name[OKAY] -------------------------------------------------- ................ --------------------------------------------------op nameinstalled cpu_adam................ op name.................installed ................[YES]..compatible ......compatibleinstalled-------------------------------------------------- [OKAY]-------------------------------------------------- .. compatible cpu_adam-------------------------------------------------- ............... cpu_adam[YES] fused_adam..................... cpu_adam ............. [YES][OKAY] ............... [NO] ...... [YES] ....... [OKAY] ......[OKAY] fused_adam [OKAY] ............. fused_lamb[NO]fused_adam ............. ............. .......fused_adam [NO] [NO][OKAY] ............. ....... ....... [NO] [OKAY]fused_lamb[OKAY] .................... [OKAY]fused_lamb[NO] .................... fused_lamb [NO]sparse_attn [OKAY] ............. ....... ............ [OKAY][NO][NO] .............. [OKAY][OKAY] sparse_attn ............ transformer[NO] sparse_attn ............ ....... ............[NO] [OKAY][NO].......sparse_attn ............[OKAY]....... transformer [NO][OKAY]............ .......stochastic_transformer[NO] transformer [OKAY] ............. ....... [NO][NO]transformer [OKAY] ....... ....... [OKAY][OKAY]stochastic_transformer ............ .[NO] [NO]stochastic_transformer....... ........[OKAY] [NO][OKAY] .......stochastic_transformer [OKAY]. [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ...............async_io [NO] ...................... [NO][NO] ....... [NO] transformer_inference .. [NO]transformer_inference ......... [OKAY][NO] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer .............. quantizer[NO] ..................... [NO][OKAY] ....... [OKAY] -------------------------------------------------- --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... async_io[NO] ...................... [NO][NO] ....... [NO] transformer_inference .. [NO] transformer_inference....... ..[OKAY] [NO] ....... [OKAY] utils ..................utils [YES].................. ......[YES] [OKAY]...... [OKAY] quantizerquantizer ............................ [NO][NO] .............. [OKAY][OKAY] ---------------------------------------------------------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op reportJIT compiled ops requires ninja -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninja JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] ninja .................. [OKAY] -------------------------------------------------- sparse_attnop name ............................ [NO]installed ......... [OKAY]compatible -------------------------------------------------- transformer ............ [NO] ....... [OKAY] cpu_adam ............... stochastic_transformer[YES] ....... [NO][OKAY] ....... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] ninjaninja .................................... [OKAY][OKAY] ---------------------------------------------------------------------------------------------------- op nameop name ................................ installedinstalled .... compatiblecompatible ---------------------------------------------------------------------------------------------------- cpu_adamcpu_adam .............................. [YES][YES] ............ [OKAY][OKAY] fused_adam fused_adam............. .............[NO] [NO]....... .......[OKAY] [OKAY] fused_lambfused_lamb .......................... [NO][NO] .............. [OKAY][OKAY] sparse_attnsparse_attn ........................ [NO][NO] .............. [OKAY][OKAY] transformer ............ transformer[NO] ................... [NO][OKAY] ....... [OKAY] stochastic_transformer stochastic_transformer . .[NO] [NO]....... .......[OKAY] [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_ioasync_io ............... ...............[NO] [NO]....... .......[NO] [NO] transformer_inferencetransformer_inference .... [NO][NO] .............. [OKAY][OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer ..............quantizer [NO].............. .......[NO] [OKAY]....... [OKAY] -------------------------------------------------- --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io async_io............... ...............[NO] [NO]....... .......[NO] [NO] transformer_inference .. transformer_inference[NO] ......... [NO][OKAY] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer .............. quantizer[NO] ..................... [NO][OKAY] ....... [OKAY] -------------------------------------------------- --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ...............DeepSpeed general environment info: ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch install path torch version............... .................... 1.8.1 torch cuda version['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ............... 11.1torch version nvcc version.................... .....................1.8.1 11.2 deepspeed install pathtorch cuda version .......................... 11.1['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] nvcc versiondeepspeed info ........................................ 11.20.4.2+unknown, unknown, unknown deepspeed install pathdeepspeed wheel compiled w. ................. torch 1.8, cuda 10.2['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: DeepSpeed general environment info:torch install path ............... torch install path ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']............... torch version .................... 1.8.1['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch cuda versiontorch version ................................... 11.11.8.1 nvcc version .....................torch cuda version 11.2............... 11.1deepspeed install path nvcc version........... ..................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']11.2 deepspeed infodeepspeed install path .............................. 0.4.2+unknown, unknown, unknown ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed wheel compiled w. deepspeed info...... ...................torch 1.8, cuda 10.2 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model using world size: 64, data-parallel-size: 4, tensor-model-parallel size: 4, pipeline-model-parallel size: 4 vocab file is un-used. loading tokenizer from pre-trained model WARNING: overriding default arguments for tokenizer_type:GPT2BPETokenizer with tokenizer_type:PretrainedFromHF using torch.float16 for parameters ... ------------------------ arguments ------------------------ accumulate_allreduce_grads_in_fp32 .............. False adam_beta1 ...................................... 0.9 adam_beta2 ...................................... 0.999 adam_eps ........................................ 1e-08 adlr_autoresume ................................. False adlr_autoresume_interval ........................ 1000 apply_query_key_layer_scaling ................... True apply_residual_connection_post_layernorm ........ False attention_dropout ............................... 0.1 attention_softmax_in_fp32 ....................... False bert_binary_head ................................ True bert_load ....................................... None bf16 ............................................ False bias_dropout_fusion ............................. True bias_gelu_fusion ................................ True biencoder_projection_dim ........................ 0 biencoder_shared_query_context_model ............ False block_data_path ................................. None checkpoint_activations .......................... True checkpoint_in_cpu ............................... False checkpoint_num_layers ........................... 1 clip_grad ....................................... 1.0 consumed_train_samples .......................... 0 consumed_valid_samples .......................... 0 contigious_checkpointing ........................ False cpu_optimizer ................................... False cpu_torch_adam .................................. False data_impl ....................................... mmap data_parallel_size .............................. 4 data_path ....................................... ['/gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document'] dataloader_type ................................. single DDP_impl ........................................ local decoder_seq_length .............................. None deepscale ....................................... False deepscale_config ................................ None deepspeed ....................................... True deepspeed_activation_checkpointing .............. True deepspeed_config ................................ ./ds_config.726697.json deepspeed_mpi ................................... False distribute_checkpointed_activations ............. False distributed_backend ............................. nccl embedding_path .................................. None encoder_seq_length .............................. 2048 eod_mask_loss ................................... False eval_interval ................................... 1000 eval_iters ...................................... 100 evidence_data_path .............................. None exit_duration_in_mins ........................... 1190 exit_interval ................................... None ffn_hidden_size ................................. 8192 finetune ........................................ False fp16 ............................................ True fp16_lm_cross_entropy ........................... False fp32_residual_connection ........................ False global_batch_size ............................... 1024 hidden_dropout .................................. 0.1 hidden_size ..................................... 2048 hysteresis ...................................... 2 ict_head_size ................................... None ict_load ........................................ None img_dim ......................................... 224 indexer_batch_size .............................. 128 indexer_log_interval ............................ 1000 init_method_std ................................. 0.02 init_method_xavier_uniform ...................... False initial_loss_scale .............................. 4294967296 kv_channels ..................................... 128 layernorm_epsilon ............................... 1e-05 lazy_mpu_init ................................... None load ............................................ /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints local_rank ...................................... 0 log_batch_size_to_tensorboard ................... True log_interval .................................... 200 log_learning_rate_to_tensorboard ................ True log_loss_scale_to_tensorboard ................... True log_num_zeros_in_grad ........................... False log_params_norm ................................. False log_timers_to_tensorboard ....................... True log_validation_ppl_to_tensorboard ............... True loss_scale ...................................... 12.0 loss_scale_window ............................... 1000 lr .............................................. 0.0001 lr_decay_iters .................................. None lr_decay_samples ................................ 126953125 lr_decay_style .................................. cosine lr_warmup_fraction .............................. None lr_warmup_iters ................................. 0 lr_warmup_samples ............................... 183105 make_vocab_size_divisible_by .................... 128 mask_prob ....................................... 0.15 masked_softmax_fusion ........................... True max_position_embeddings ......................... None memory_centric_tiled_linear ..................... False merge_file ...................................... None micro_batch_size ................................ 16 min_loss_scale .................................. 1.0 min_lr .......................................... 1e-05 mmap_warmup ..................................... False no_load_optim ................................... None no_load_rng ..................................... None no_save_optim ................................... None no_save_rng ..................................... None num_attention_heads ............................. 16 num_channels .................................... 3 num_classes ..................................... 1000 num_layers ...................................... 24 num_layers_per_virtual_pipeline_stage ........... None num_workers ..................................... 2 onnx_safe ....................................... None openai_gelu ..................................... False optimizer ....................................... adam override_lr_scheduler ........................... False params_dtype .................................... torch.float16 partition_activations ........................... False patch_dim ....................................... 16 pipeline_model_parallel_size .................... 4 position_embedding_type ......................... PositionEmbeddingType.rotary profile_backward ................................ False query_in_block_prob ............................. 0.1 rampup_batch_size ............................... None rank ............................................ 0 remote_device ................................... none reset_attention_mask ............................ False reset_position_ids .............................. False retriever_report_topk_accuracies ................ [] retriever_score_scaling ......................... False retriever_seq_length ............................ 256 sample_rate ..................................... 1.0 save ............................................ /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints save_interval ................................... 1500 scatter_gather_tensors_in_pipeline .............. True scattered_embeddings ............................ False seed ............................................ 1234 seq_length ...................................... 2048 sgd_momentum .................................... 0.9 short_seq_prob .................................. 0.1 split ........................................... 949,50,1 split_transformers .............................. False synchronize_each_layer .......................... False tensor_model_parallel_size ...................... 4 tensorboard_dir ................................. /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard tensorboard_log_interval ........................ 1 tensorboard_queue_size .......................... 5 tile_factor ..................................... 1 titles_data_path ................................ None tokenizer_name_or_path .......................... t5-small tokenizer_type .................................. PretrainedFromHF train_iters ..................................... None train_samples ................................... 146484375 use_checkpoint_lr_scheduler ..................... False use_contiguous_buffers_in_ddp ................... False use_cpu_initialization .......................... None use_one_sent_docs ............................... False use_pin_memory .................................. False virtual_pipeline_model_parallel_size ............ None vocab_extra_ids ................................. 0 vocab_file ...................................... None weight_decay .................................... 0.1 world_size ...................................... 64 zero_allgather_bucket_size ...................... 0.0 zero_contigious_gradients ....................... False zero_reduce_bucket_size ......................... 0.0 zero_reduce_scatter ............................. False zero_stage ...................................... 1 -------------------- end of arguments --------------------- setting number of micro-batches to constant 16 > building PretrainedFromHF tokenizer ... vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io async_io............... [NO]............... .......[NO] [NO]....... [NO] transformer_inference transformer_inference.. [NO].. .......[NO] [OKAY]....... [OKAY] utils utils.................. ..................[YES] [YES]...... ......[OKAY] [OKAY] quantizerquantizer ............................ [NO][NO] .............. [OKAY][OKAY] ----------------------------------------------------------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] async_io-------------------------------------------------- ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path ............... ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.1 11.1nvcc version nvcc version..................... .....................11.2 11.2deepspeed install path deepspeed install path........... ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed info deepspeed info................... ...................0.4.2+unknown, unknown, unknown 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w....... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info deepspeed info................... ...................0.4.2+unknown, unknown, unknown 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w....... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja-------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] ninja .................. [OKAY] -------------------------------------------------- op name ................ninja installed .................... [OKAY]compatibleninja ----------------------------------------------------------------------------------------------------.................. [OKAY]op name ................ --------------------------------------------------installedcpu_adam ..op name ............... compatible ................ [YES] --------------------------------------------------installed...... ..[OKAY] compatible -------------------------------------------------- cpu_adam ............... [YES]fused_adamcpu_adam .................................. [OKAY][YES][NO] ............. [OKAY][OKAY] fused_lamb .............fused_adam [NO]fused_adam............. ....................[NO] [OKAY][NO]....... [OKAY]....... [OKAY] fused_lamb fused_lamb............. sparse_attn[NO]............. ...................[NO] [NO] [OKAY] ....... ....... [OKAY][OKAY] transformer ............ [NO] ....... sparse_attn[OKAY] ............ sparse_attn[NO] stochastic_transformer................... .[NO][OKAY] ....... [NO][OKAY] transformer ....... ............[OKAY] transformer [NO] ................... [NO][OKAY] ....... [OKAY] stochastic_transformer .stochastic_transformer [NO] ........ [OKAY][NO] ....... [OKAY] DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ DeepSpeed general environment info: 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 torch install pathnvcc versionnvcc version .................................... 11.2 deepspeed install path ........... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infotorch version ............................................................ 11.20.4.2+unknown, unknown, unknown1.8.1 deepspeed wheel compiled w. torch cuda version...... ...............torch 1.8, cuda 10.2 deepspeed install path 11.1 ...........nvcc version .....................['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] 11.2deepspeed info deepspeed install path................... ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info 0.4.2+unknown, unknown, unknown................... 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 -------------------------------------------------- DeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op reportNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report -------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] ninja .................. ninja[OKAY] --------------------------------------------------sparse_attn.................. ............op name[OKAY] ninja [NO]................ -------------------------------------------------- .........................installed ..op name[OKAY] [OKAY] compatible................ --------------------------------------------------transformer installed--------------------------------------------------op name ............ .. ................ [NO]installedcompatible .........cpu_adam-------------------------------------------------- [OKAY]............... compatible [YES]--------------------------------------------------stochastic_transformer .......cpu_adam [OKAY][NO]............... cpu_adam[YES]....... ............... ...... [OKAY] [YES]fused_adam[OKAY] ...... .............[OKAY] [NO] ....... [OKAY]fused_adam ............. fused_lamb[NO]fused_adam ................................. [NO][OKAY][NO] .............. fused_lamb[OKAY][OKAY] ............. fused_lamb[NO] ............. .......[NO] sparse_attn ....... [OKAY] ............ [OKAY] [NO] ....... [OKAY] transformer ............sparse_attnsparse_attn [NO]........................ ....... [NO][NO][OKAY] .............. [OKAY]stochastic_transformer[OKAY] . [NO]transformer transformer................... ............[OKAY][NO] [NO]....... .......[OKAY] [OKAY] stochastic_transformer . stochastic_transformer[NO] ........ [NO][OKAY] ....... [OKAY] /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- JIT compiled ops requires ninja--------------------------------------------------JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model ninja .................. [OKAY]ninja --------------------------------------------------.................. [OKAY]op name ................-------------------------------------------------- installed op name.. ................compatible installed --------------------------------------------------.. compatible -------------------------------------------------- cpu_adam ............... [YES]cpu_adam ..................... [OKAY][YES] ...... [OKAY] fused_adam fused_adam............. .............[NO] [NO]....... .......[OKAY] [OKAY] fused_lambfused_lamb ............. .............[NO] [NO]....... .......[OKAY] [OKAY] sparse_attnsparse_attn ........................ [NO][NO] .............. [OKAY][OKAY] transformer transformer............ ............[NO] [NO]....... .......[OKAY] [OKAY] stochastic_transformer . [NO]stochastic_transformer ........ [OKAY][NO] ....... [OKAY] ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatibleninja --------------------------------------------------.................. [OKAY] --------------------------------------------------cpu_adam ...............op name [YES]................ ......installed ..[OKAY] compatible -------------------------------------------------- fused_adam ............. [NO] ....... [OKAY]cpu_adam ............... [YES] fused_lamb...... ............. [NO] [OKAY]....... [OKAY] fused_adam ............. [NO] sparse_attn....... ............[OKAY] [NO] fused_lamb....... .............[OKAY] [NO]transformer ................... [OKAY][NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY]sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- > padded vocab (size: 32100) with 156 dummy tokens (new size: 32256) > initializing torch distributed ... /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] transformer_inference .. [NO]utils ....... ..................[OKAY] [YES] ...... [OKAY] utils quantizer.................. ..............[YES] [NO]...... .......[OKAY] [OKAY] quantizer .............. --------------------------------------------------[NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ****  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op reportNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninjaNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op report-------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- JIT compiled ops requires ninja-------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] ninja-------------------------------------------------- ..................op name [OKAY]................ installed-------------------------------------------------- .. op namecompatible ................ --------------------------------------------------installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] cpu_adam......ninja ninja............... [OKAY] [YES].................. .................. ...... [OKAY] [OKAY][OKAY] fused_adam-------------------------------------------------- -------------------------------------------------- ............. op name op name[NO] ................fused_adam....... ................ .............installed[OKAY] [NO]..installed fused_lamb ......... compatible ............. [OKAY]compatible -------------------------------------------------- [NO] -------------------------------------------------- fused_lamb ....... .............[OKAY] [NO] cpu_adam ....... ...............cpu_adam[OKAY] [YES]............... ......[YES] sparse_attn [OKAY] ...... ............ [NO][OKAY]sparse_attn ................... [OKAY]fused_adam[NO] ....................transformer fused_adam[NO][OKAY]............ .............[NO]transformer....... ............[OKAY][NO]....... .......[OKAY][NO] fused_lamb.......[OKAY] stochastic_transformer [OKAY].............. [NO][NO]fused_lamb .......stochastic_transformer.................... [OKAY] .[OKAY] [NO] [NO] .............. [OKAY][OKAY] sparse_attn ............ [NO] ....... [OKAY] sparse_attn ............ [NO] transformer....... [OKAY]............ [NO] .......transformer [OKAY]............ [NO] ....... stochastic_transformer[OKAY] . [NO] stochastic_transformer....... .[OKAY] [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io async_io............... ...............[NO] [NO]....... .......[NO] [NO] transformer_inferencetransformer_inference .... [NO] [NO]....... .......[OKAY] [OKAY] utilsutils .................................... [YES][YES] ............ [OKAY][OKAY] quantizerquantizer ............................ [NO][NO] .............. [OKAY][OKAY] ---------------------------------------------------------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_ioasync_io .............................. [NO][NO] .............. [NO][NO] transformer_inferencetransformer_inference .... [NO][NO] .............. [OKAY][OKAY] utilsutils .................................... [YES][YES] ............ [OKAY][OKAY] quantizer ..............quantizer [NO].............. .......[NO] [OKAY]....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed general environment info: DeepSpeed general environment info:torch install path ............... torch install path['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ............... torch version .................... 1.8.1 ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch cuda version ...............torch version 11.1.................... nvcc version1.8.1 ..................... torch cuda version11.2 ...............deepspeed install path 11.1........... nvcc version ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']..................... deepspeed info11.2 ................... deepspeed install path0.4.2+unknown, unknown, unknown ........... deepspeed wheel compiled w. ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']...... deepspeed infotorch 1.8, cuda 10.2 ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... async_io[NO] ...................... [NO][NO] ....... [NO] transformer_inference .. transformer_inference[NO] ......... [NO][OKAY] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY]quantizer .............. [NO]quantizer ..................... [OKAY][NO] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report-------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninja-------------------------------------------------- -------------------------------------------------- JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja ........................................................................ [OKAY][OKAY][OKAY] [OKAY] -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- op nameop nameop nameop name ................................................ ................installed installedinstalledinstalled.. ..compatible .. --------------------------------------------------compatible .. compatible compatible---------------------------------------------------------------------------------------------------- -------------------------------------------------- cpu_adam cpu_adam...............cpu_adam ...............[YES]............... [YES] ...... cpu_adam......[YES][OKAY] ...... ...............[OKAY][OKAY] [YES] fused_adam...... .............[OKAY] fused_adam[NO] fused_adam ............. ....... .............[NO] [OKAY] [NO] ....... .......fused_lambfused_adam [OKAY] [OKAY] ............. fused_lamb.............[NO]fused_lamb ............. [NO] [NO].................... ....... ....... [NO] [OKAY][OKAY] ....... [OKAY] [OKAY] fused_lamb ............. [NO] sparse_attn ............sparse_attn .......sparse_attn [NO][OKAY]........................ .......[NO][NO] [OKAY].............. [OKAY][OKAY] transformer ............ transformer[NO]transformer ...............................sparse_attn [NO][NO][OKAY] .......................... [OKAY][NO] stochastic_transformer[OKAY] ........ [NO]stochastic_transformer[OKAY]stochastic_transformer .........transformer [OKAY] [NO][NO] ............ ..............[NO] [OKAY] .......[OKAY] [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report -------------------------------------------------- -------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------JIT compiled ops requires ninja JIT compiled ops requires ninjaNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja ........................................................................ [OKAY][OKAY][OKAY][OKAY] -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- op nameop nameop nameop name ................................................ ................ installedinstalledinstalled installed .. .... .. compatiblecompatible compatible -------------------------------------------------- --------------------------------------------------compatible-------------------------------------------------- cpu_adam-------------------------------------------------- ............... [YES] ......cpu_adamcpu_adam [OKAY].............................. cpu_adam [YES]............... ......[YES] [OKAY]...... [OKAY] fused_adam [YES]............. [NO] .......fused_adam [OKAY]...... fused_adam............. .............fused_lamb[NO] .............[NO]....... [OKAY] [NO][OKAY]....... ....... [OKAY] fused_lamb [OKAY]............. [NO] .......fused_lamb [OKAY]............. [NO] .......fused_adam sparse_attn[OKAY]............. ............sparse_attn [NO] ............ ....... [NO] [NO][OKAY]sparse_attn ................... .......transformer[OKAY][NO] ....... [OKAY]transformer[OKAY] ............ ............[NO] .......[NO]fused_lamb transformer[OKAY] ................................ [NO]stochastic_transformer[NO][OKAY] ....... . ....... [OKAY] stochastic_transformer[NO][OKAY] ........ [OKAY][NO] stochastic_transformer....... .[OKAY] [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ---------------------------------------------------------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op report--------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.--------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja JIT compiled ops requires ninja ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op report-------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. JIT compiled ops requires ninja-------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja ........................................................................ [OKAY][OKAY][OKAY][OKAY] ------------------------------------------------------------------------------------------------------------------------------------------------------ -------------------------------------------------- op name op name op name ................op name ................................ installedinstalled................ installed.. .. installed.. ..compatible compatible -------------------------------------------------- compatible ---------------------------------------------------------------------------------------------------- cpu_adamcompatible ...............cpu_adam cpu_adam[YES]............... ............... ......-------------------------------------------------- [YES] [YES][OKAY] ...... ...... [OKAY][OKAY] cpu_adamfused_adam ............................ fused_adam[NO] fused_adam ............. ....................[NO] [YES][OKAY][NO] ....... ....... [OKAY]fused_lamb......[OKAY] [OKAY]............. fused_lambfused_lamb .............[NO]............. [NO] ....... [NO] .......fused_adam [OKAY] ....... .............[OKAY] [OKAY][NO] ....... [OKAY] sparse_attn ............sparse_attn [NO]fused_lamb............sparse_attn [NO] ....................................... [OKAY][OKAY][NO] [NO]....... transformertransformer [OKAY]............ ................... [NO] [OKAY]transformer [NO] ....... ............ ....... [OKAY] [NO][OKAY] ....... [OKAY]stochastic_transformer stochastic_transformer. .stochastic_transformer[NO] ........[NO] [OKAY]....... [NO] [OKAY]....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] ninjaninjaninja ninja ...................................................... [OKAY][OKAY]..................[OKAY] ------------------------------------------------------------------------------------------------------------------------------------------------------[OKAY] op nameop nameop name-------------------------------------------------- ................ ................................ installedinstalledop nameinstalled ...... ................compatiblecompatiblecompatible --------------------------------------------------installed-------------------------------------------------- -------------------------------------------------- .. compatible --------------------------------------------------cpu_adam cpu_adamcpu_adam ............................................. [YES][YES] ...... cpu_adam .....................[YES][OKAY] [OKAY] [YES] ...... [OKAY]...... [OKAY] fused_adam fused_adam............. fused_adam[NO] .......................... .......[NO][NO] [OKAY] fused_adam .............. [OKAY]fused_lamb............. .............[OKAY] [NO]fused_lamb[NO] fused_lamb.............. ..........................[OKAY] [NO] [OKAY]....... [NO] [OKAY]....... [OKAY] fused_lamb ............. [NO]sparse_attn ................... sparse_attn[NO] .......[OKAY] ............ [OKAY] sparse_attn[NO] ...................transformer [NO][OKAY] ................... [NO] .......transformer [OKAY][OKAY]............ sparse_attn [NO] stochastic_transformertransformer................... .............[OKAY] [NO] [NO] [NO] ..............stochastic_transformer .......[OKAY] . [NO][OKAY] .......[OKAY] [OKAY]stochastic_transformer . [NO]transformer ....... ............ [NO][OKAY] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op report -------------------------------------------------- JIT compiled ops requires ninja---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninjaJIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninja ninja...................................................... ..................[OKAY] [OKAY] --------------------------------------------------[OKAY] [OKAY] --------------------------------------------------op name -------------------------------------------------- -------------------------------------------------- ................op name op name installed ................op name ................ .. installed................installedcompatible .. .. -------------------------------------------------- compatibleinstalledcompatible ----------------------------------------------------------------------------------------------------.. compatible cpu_adam --------------------------------------------------............... cpu_adam[YES]cpu_adam ..................... [OKAY] ............... [YES][YES]cpu_adam ............ ............... [OKAY]fused_adam [OKAY] [YES] ............. [NO]...... ....... [OKAY]fused_adam[OKAY] fused_adam.............fused_lamb ..........................[NO] [NO]....... [NO].......[OKAY] fused_adam .......[OKAY] [OKAY]............. fused_lamb .............[NO] [NO] .......fused_lamb....... .............[OKAY]sparse_attn[OKAY] [NO] ............ .......[NO] .......fused_lamb [OKAY][OKAY] .............sparse_attn ............transformer[NO] [NO] ............ ..............[NO] [OKAY][OKAY]....... sparse_attn[OKAY] ............transformer [NO]stochastic_transformer............ ........ [NO][NO] .......sparse_attn[OKAY]....... [OKAY]............[OKAY] transformer [NO]............stochastic_transformer .......[NO]. [OKAY][NO] .............. [OKAY][OKAY]transformer ............stochastic_transformer [NO]. [NO]....... [OKAY] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO]async_io ...................... [NO][NO] ....... [NO] transformer_inference .. [NO]transformer_inference ......... [OKAY][NO] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer ..............quantizer [NO].............. .......[NO] [OKAY]....... [OKAY] -------------------------------------------------- --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inferenceasync_io .. ...............[NO] [NO]....... .......[OKAY] [NO] utils .................. [YES] ...... transformer_inference[OKAY] .. [NO]quantizer ..................... [OKAY] [NO] ....... [OKAY] utils-------------------------------------------------- .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninja DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_ioasync_io .............................. [NO][NO] .............. [NO][NO] transformer_inferencetransformer_inference .... [NO][NO] .............. [OKAY][OKAY] utilsutils .................................... [YES][YES] ............ [OKAY][OKAY] quantizerquantizer ............................ [NO][NO] .............. [OKAY][OKAY] ----------------------------------------------------------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- ninjaninja ninja.................. .................. [OKAY] ninja[OKAY].................. --------------------------------------------------[OKAY] -------------------------------------------------- ..................op name --------------------------------------------------op name................[OKAY] ................installed--------------------------------------------------op name installed ..................op name.. compatible................installed compatible --------------------------------------------------installed..-------------------------------------------------- ..compatible compatible -------------------------------------------------- -------------------------------------------------- cpu_adamcpu_adam .............................. [YES][YES] cpu_adam............ cpu_adam ............... [OKAY] [OKAY] ............... [YES] [YES]...... ......[OKAY] [OKAY] fused_adam fused_adam............. .............[NO] [NO]....... .......[OKAY]fused_adam fused_adam............. [OKAY] ............. [NO] [NO]....... fused_lambfused_lamb .......[OKAY].......................... [NO][NO][OKAY] ..............fused_lamb [OKAY].............[OKAY]fused_lamb [NO] .................... [NO][OKAY] ....... [OKAY] sparse_attnsparse_attn ........................ [NO][NO] sparse_attn....... sparse_attn....... [OKAY] ............ ............[OKAY] [NO]transformer[NO]transformer ............ ..........................[NO] [OKAY] [OKAY] [NO]....... .......[OKAY] transformer[OKAY]transformer stochastic_transformer ............ ............ . stochastic_transformer [NO] .[NO] [NO]....... .......[NO] [OKAY].......[OKAY]....... [OKAY][OKAY] stochastic_transformer . [NO]stochastic_transformer ....... .[OKAY] [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO]async_io ............... [NO] ....... [NO] transformer_inference .. [NO] transformer_inference....... ..[OKAY] [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] utils .................. [YES]quantizer .................... [NO] ....... [OKAY] [OKAY] -------------------------------------------------- quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_ioasync_io .............................. [NO][NO] .............. [NO][NO] transformer_inference transformer_inference.. ..[NO] [NO] ....... [OKAY] utils ......................... [OKAY][YES] ...... [OKAY] quantizer ..............utils [NO].................. ....... [OKAY] [YES] ......-------------------------------------------------- [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... async_io[NO] ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] transformer_inference .. [NO] ....... [OKAY]utils .................. [YES] ...... utils[OKAY] .................. [YES] quantizer...... ..............[OKAY] [NO] .......quantizer [OKAY].............. [NO] ....... --------------------------------------------------[OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_ioasync_io .............................. [NO][NO] .............. [NO][NO] transformer_inferencetransformer_inference .. ..[NO] [NO]....... .......[OKAY] [OKAY] utilsutils .................................... [YES][YES] ............ [OKAY][OKAY] quantizer .............. [NO] quantizer....... ..............[OKAY] [NO] ....... -------------------------------------------------- [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version torch version.................... ....................1.8.1 1.8.1 torch cuda version torch cuda version............... ...............11.1 11.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ...... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... async_io[NO] ...................... [NO][NO] ....... [NO] transformer_inference .. transformer_inference[NO] ......... [NO][OKAY] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer .............. [NO]quantizer ..................... [OKAY][NO] ....... [OKAY] -------------------------------------------------- --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io async_io............... ...............[NO] [NO]....... .......[NO] [NO] transformer_inferencetransformer_inference .... [NO][NO] .............. [OKAY][OKAY] utilsutils .................. ..................[YES] [YES]...... ...... [OKAY][OKAY] quantizerquantizer ............................ [NO][NO] .............. [OKAY][OKAY] ---------------------------------------------------------------------------------------------------- DeepSpeed general environment info:DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch install path ................................... 1.8.1 torch cuda version ...............['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] 11.1 nvcc versiontorch version ..................... ....................11.2 1.8.1 deepspeed install path ...........torch cuda version ...............['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] 11.1 deepspeed infonvcc version ........................................ 0.4.2+unknown, unknown, unknown11.2 deepspeed wheel compiled w.deepspeed install path ................. torch 1.8, cuda 10.2 ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc versionDeepSpeed general environment info: ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.torch install path ..................... torch 1.8, cuda 10.2 ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: DeepSpeed general environment info:torch install path ............... torch install path ...............['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']1.8.1 torch version .................... 1.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']........... deepspeed info['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ................... deepspeed info0.4.2+unknown, unknown, unknown ................... deepspeed wheel compiled w.0.4.2+unknown, unknown, unknown ...... torch 1.8, cuda 10.2 deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda version torch cuda version............... ...............11.1 11.1 nvcc version nvcc version..................... .....................11.2 11.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed info deepspeed info................... ...................0.4.2+unknown, unknown, unknown 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w....... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info:DeepSpeed general environment info: torch install path ...............torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... torch version1.8.1 .................... 1.8.1torch cuda version ............... torch cuda version11.1 ...............nvcc version 11.1..................... nvcc version11.2 .....................deepspeed install path 11.2........... deepspeed install path['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ........... deepspeed info ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']................... 0.4.2+unknown, unknown, unknowndeepspeed info ...................deepspeed wheel compiled w. ......0.4.2+unknown, unknown, unknown torch 1.8, cuda 10.2deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model > setting tensorboard ... -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model > initializing tensor model parallel with size 4 > initializing pipeline model parallel with size 4 > setting random seeds to 1234 ... [2021-08-14 06:51:00,931] [INFO] [checkpointing.py:226:model_parallel_cuda_manual_seed] > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 > compiling dataset index builder ... make: Entering directory '/gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/data' make: Nothing to be done for 'default'. make: Leaving directory '/gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/data' >>> done with dataset index builder. Compilation time: 0.127 seconds > compiling and loading fused kernels ... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( Detected CUDA files, patching ldflags Emitting ninja build file /gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... Building extension module scaled_upper_triang_masked_softmax_cuda... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module scaled_upper_triang_masked_softmax_cuda... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( Detected CUDA files, patching ldflags Emitting ninja build file /gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... Building extension module scaled_masked_softmax_cuda... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module scaled_masked_softmax_cuda... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( Detected CUDA files, patching ldflags Emitting ninja build file /gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... Building extension module fused_mix_prec_layer_norm_cuda... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module fused_mix_prec_layer_norm_cuda... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( >>> done with compiling and loading fused kernels. Compilation time: 20.044 seconds time to initialize megatron (seconds): 73.142 [after megatron is initialized] datetime: 2021-08-14 06:51:21 building GPT model ... [2021-08-14 06:51:21,239] [INFO] [utils.py:680:see_memory_usage] Before Building Model /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/cuda/memory.py:373: FutureWarning: torch.cuda.memory_cached has been renamed to torch.cuda.memory_reserved warnings.warn( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/cuda/memory.py:381: FutureWarning: torch.cuda.max_memory_cached has been renamed to torch.cuda.max_memory_reserved warnings.warn( [2021-08-14 06:51:21,241] [INFO] [utils.py:681:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB [2021-08-14 06:51:21,241] [INFO] [utils.py:689:see_memory_usage] CPU Virtual Memory: used = 39.55 GB, percent = 21.1% SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=0, model=1): 1, ProcessCoord(pipe=0, data=0, model=2): 2, ProcessCoord(pipe=0, data=0, model=3): 3, ProcessCoord(pipe=0, data=1, model=0): 4, ProcessCoord(pipe=0, data=1, model=1): 5, ProcessCoord(pipe=0, data=1, model=2): 6, ProcessCoord(pipe=0, data=1, model=3): 7, ProcessCoord(pipe=0, data=2, model=0): 8, ProcessCoord(pipe=0, data=2, model=1): 9, ProcessCoord(pipe=0, data=2, model=2): 10, ProcessCoord(pipe=0, data=2, model=3): 11, ProcessCoord(pipe=0, data=3, model=0): 12, ProcessCoord(pipe=0, data=3, model=1): 13, ProcessCoord(pipe=0, data=3, model=2): 14, ProcessCoord(pipe=0, data=3, model=3): 15, ProcessCoord(pipe=1, data=0, model=0): 16, ProcessCoord(pipe=1, data=0, model=1): 17, ProcessCoord(pipe=1, data=0, model=2): 18, ProcessCoord(pipe=1, data=0, model=3): 19, ProcessCoord(pipe=1, data=1, model=0): 20, ProcessCoord(pipe=1, data=1, model=1): 21, ProcessCoord(pipe=1, data=1, model=2): 22, ProcessCoord(pipe=1, data=1, model=3): 23, ProcessCoord(pipe=1, data=2, model=0): 24, ProcessCoord(pipe=1, data=2, model=1): 25, ProcessCoord(pipe=1, data=2, model=2): 26, ProcessCoord(pipe=1, data=2, model=3): 27, ProcessCoord(pipe=1, data=3, model=0): 28, ProcessCoord(pipe=1, data=3, model=1): 29, ProcessCoord(pipe=1, data=3, model=2): 30, ProcessCoord(pipe=1, data=3, model=3): 31, ProcessCoord(pipe=2, data=0, model=0): 32, ProcessCoord(pipe=2, data=0, model=1): 33, ProcessCoord(pipe=2, data=0, model=2): 34, ProcessCoord(pipe=2, data=0, model=3): 35, ProcessCoord(pipe=2, data=1, model=0): 36, ProcessCoord(pipe=2, data=1, model=1): 37, ProcessCoord(pipe=2, data=1, model=2): 38, ProcessCoord(pipe=2, data=1, model=3): 39, ProcessCoord(pipe=2, data=2, model=0): 40, ProcessCoord(pipe=2, data=2, model=1): 41, ProcessCoord(pipe=2, data=2, model=2): 42, ProcessCoord(pipe=2, data=2, model=3): 43, ProcessCoord(pipe=2, data=3, model=0): 44, ProcessCoord(pipe=2, data=3, model=1): 45, ProcessCoord(pipe=2, data=3, model=2): 46, ProcessCoord(pipe=2, data=3, model=3): 47, ProcessCoord(pipe=3, data=0, model=0): 48, ProcessCoord(pipe=3, data=0, model=1): 49, ProcessCoord(pipe=3, data=0, model=2): 50, ProcessCoord(pipe=3, data=0, model=3): 51, ProcessCoord(pipe=3, data=1, model=0): 52, ProcessCoord(pipe=3, data=1, model=1): 53, ProcessCoord(pipe=3, data=1, model=2): 54, ProcessCoord(pipe=3, data=1, model=3): 55, ProcessCoord(pipe=3, data=2, model=0): 56, ProcessCoord(pipe=3, data=2, model=1): 57, ProcessCoord(pipe=3, data=2, model=2): 58, ProcessCoord(pipe=3, data=2, model=3): 59, ProcessCoord(pipe=3, data=3, model=0): 60, ProcessCoord(pipe=3, data=3, model=1): 61, ProcessCoord(pipe=3, data=3, model=2): 62, ProcessCoord(pipe=3, data=3, model=3): 63} [2021-08-14 06:51:21,773] [INFO] [module.py:360:_partition_layers] Partitioning pipeline stages with method type:transformer stage=0 layers=9 0: _to_float16 1: EmbeddingPipe 2: 3: ParallelTransformerLayerPipe 4: ParallelTransformerLayerPipe 5: ParallelTransformerLayerPipe 6: ParallelTransformerLayerPipe 7: ParallelTransformerLayerPipe 8: ParallelTransformerLayerPipe stage=1 layers=6 9: ParallelTransformerLayerPipe 10: ParallelTransformerLayerPipe 11: ParallelTransformerLayerPipe 12: ParallelTransformerLayerPipe 13: ParallelTransformerLayerPipe 14: ParallelTransformerLayerPipe stage=2 layers=6 15: ParallelTransformerLayerPipe 16: ParallelTransformerLayerPipe 17: ParallelTransformerLayerPipe 18: ParallelTransformerLayerPipe 19: ParallelTransformerLayerPipe 20: ParallelTransformerLayerPipe stage=3 layers=10 21: ParallelTransformerLayerPipe 22: ParallelTransformerLayerPipe 23: ParallelTransformerLayerPipe 24: ParallelTransformerLayerPipe 25: ParallelTransformerLayerPipe 26: ParallelTransformerLayerPipe 27: 28: MixedFusedLayerNorm 29: EmbeddingPipe 30: float16_to_fp32 loss: CrossEntropy > number of parameters on (tensor, pipeline) model parallel rank (1, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (0, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (2, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (2, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (3, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (1, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (3, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (0, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (1, 0): 92107776 > number of parameters on (tensor, pipeline) model parallel rank (3, 0): 92107776 > number of parameters on (tensor, pipeline) model parallel rank (2, 0): 92107776 > number of parameters on (tensor, pipeline) model parallel rank (1, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (0, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (3, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (2, 3): 92111872 [2021-08-14 06:51:22,106] [INFO] [utils.py:680:see_memory_usage] After Building Model [2021-08-14 06:51:22,107] [INFO] [utils.py:681:see_memory_usage] MA 0.18 GB Max_MA 0.2 GB CA 0.22 GB Max_CA 0 GB [2021-08-14 06:51:22,107] [INFO] [utils.py:689:see_memory_usage] CPU Virtual Memory: used = 39.95 GB, percent = 21.3% > number of parameters on (tensor, pipeline) model parallel rank (0, 0): 92107776 setting training iterations to 143051 > learning rate decay style: cosine DeepSpeed is enabled. [2021-08-14 06:51:22,108] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.4.2+unknown, git-hash=unknown, git-branch=unknown [2021-08-14 06:51:22,177] [INFO] [engine.py:179:__init__] DeepSpeed Flops Profiler Enabled: False [2021-08-14 06:51:22,177] [INFO] [engine.py:736:_configure_optimizer] Removing param_group that has no 'params' in the client Optimizer [2021-08-14 06:51:22,177] [INFO] [engine.py:741:_configure_optimizer] Using client Optimizer as basic optimizer [2021-08-14 06:51:22,177] [INFO] [engine.py:750:_configure_optimizer] DeepSpeed Basic Optimizer = FusedAdam [2021-08-14 06:51:22,177] [INFO] [utils.py:43:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type= [2021-08-14 06:51:22,177] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 1 optimizer [2021-08-14 06:51:22,177] [INFO] [stage2.py:106:__init__] Reduce bucket size 500000000 [2021-08-14 06:51:22,177] [INFO] [stage2.py:107:__init__] Allgather bucket size 500000000 [2021-08-14 06:51:22,177] [INFO] [stage2.py:108:__init__] CPU Offload: False [2021-08-14 06:51:22,177] [INFO] [stage2.py:109:__init__] Round robin gradient partitioning: False [2021-08-14 06:51:22,416] [INFO] [stage2.py:419:__init__] optimizer state initialized [2021-08-14 06:51:22,416] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam [2021-08-14 06:51:22,417] [INFO] [engine.py:553:_configure_lr_scheduler] DeepSpeed using client LR scheduler [2021-08-14 06:51:22,417] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = [2021-08-14 06:51:22,417] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.999), (0.9, 0.999)] [2021-08-14 06:51:22,417] [INFO] [config.py:900:print] DeepSpeedEngine configuration: [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] activation_checkpointing_config { "partition_activations": false, "contiguous_memory_optimization": false, "cpu_checkpointing": false, "number_checkpoints": null, "synchronize_checkpoint_boundary": false, "profile": false } [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] allreduce_always_fp32 ........ False [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] amp_enabled .................. False [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] amp_params ................... False [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] checkpoint_tag_validation_enabled True [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] checkpoint_tag_validation_fail False [2021-08-14 06:51:22,417] [INFO] [config.py:904:print] disable_allgather ............ False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] dump_state ................... False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] dynamic_loss_scale_args ...... {'init_scale': 4096, 'scale_window': 500, 'delayed_shift': 2, 'min_scale': 1} [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_enabled ........... False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_gas_boundary_resolution 1 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_layer_name ........ bert.encoder.layer [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_layer_num ......... 0 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_max_iter .......... 100 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_stability ......... 1e-06 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_tol ............... 0.01 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] eigenvalue_verbose ........... False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] elasticity_enabled ........... False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] flops_profiler_config ........ { "enabled": false, "profile_step": 1, "module_depth": -1, "top_modules": 1, "detailed": true, "output_file": null } [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] fp16_enabled ................. True [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] fp16_mixed_quantize .......... False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] global_rank .................. 0 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] gradient_accumulation_steps .. 16 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] gradient_clipping ............ 1.0 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] gradient_predivide_factor .... 1.0 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] initial_dynamic_scale ........ 4096 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] loss_scale ................... 0 [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] memory_breakdown ............. False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] optimizer_legacy_fusion ...... False [2021-08-14 06:51:22,418] [INFO] [config.py:904:print] optimizer_name ............... None [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] optimizer_params ............. None [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] pld_enabled .................. False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] pld_params ................... False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] prescale_gradients ........... False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_change_rate ......... 0.001 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_groups .............. 1 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_offset .............. 1000 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_period .............. 1000 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_rounding ............ 0 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_start_bits .......... 16 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_target_bits ......... 8 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_training_enabled .... False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_type ................ 0 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] quantize_verbose ............. False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] scheduler_name ............... None [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] scheduler_params ............. None [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] sparse_attention ............. None [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] sparse_gradients_enabled ..... False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] steps_per_print .............. 2000 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] tensorboard_enabled .......... False [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] tensorboard_job_name ......... DeepSpeedJobName [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] tensorboard_output_path ...... [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] train_batch_size ............. 1024 [2021-08-14 06:51:22,419] [INFO] [config.py:904:print] train_micro_batch_size_per_gpu 16 [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] use_quantizer_kernel ......... False [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] wall_clock_breakdown ......... False [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] world_size ................... 4 [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] zero_allow_untested_optimizer False [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] zero_config .................. { "stage": 1, "contiguous_gradients": false, "reduce_scatter": true, "reduce_bucket_size": 5.000000e+08, "allgather_partitions": true, "allgather_bucket_size": 5.000000e+08, "overlap_comm": false, "load_from_fp32_weights": true, "elastic_checkpoint": true, "offload_param": null, "offload_optimizer": null, "sub_group_size": 1.000000e+09, "prefetch_bucket_size": 5.000000e+07, "param_persistence_threshold": 1.000000e+05, "max_live_parameters": 1.000000e+09, "max_reuse_distance": 1.000000e+09, "gather_fp16_weights_on_model_save": false, "ignore_unused_parameters": true, "round_robin_gradients": false, "legacy_stage1": false } [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] zero_enabled ................. True [2021-08-14 06:51:22,420] [INFO] [config.py:904:print] zero_optimization_stage ...... 1 [2021-08-14 06:51:22,420] [INFO] [config.py:906:print] json = { "train_micro_batch_size_per_gpu": 16, "train_batch_size": 1.024000e+03, "gradient_clipping": 1.0, "zero_optimization": { "stage": 1 }, "fp16": { "enabled": true, "loss_scale": 0, "loss_scale_window": 500, "hysteresis": 2, "min_loss_scale": 1, "initial_scale_power": 12 }, "steps_per_print": 2.000000e+03, "wall_clock_breakdown": false } [2021-08-14 06:51:22,420] [INFO] [engine.py:76:__init__] CONFIG: micro_batches=16 micro_batch_size=16 [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=0 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=1 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=2 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=3 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=16 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=17 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=18 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=48 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=49 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=50 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=19 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=51 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=35 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=32 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=33 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-14 06:51:22,711] [INFO] [engine.py:134:__init__] RANK=34 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) > using checkpoint value 0.0001 for learning rate > using checkpoint value 1e-05 for minimum learning rate > using checkpoint value 183105 for warmup iterations > using checkpoint value 126953125 for total number of iterations > using checkpoint value cosine for decay style successfully loaded 4 ZeRO state_dicts for rank 36 successfully loaded 4 ZeRO state_dicts for rank 32 successfully loaded 4 ZeRO state_dicts for rank 46 successfully loaded 4 ZeRO state_dicts for rank 42 successfully loaded 4 ZeRO state_dicts for rank 16 successfully loaded 4 ZeRO state_dicts for rank 34 successfully loaded 4 ZeRO state_dicts for rank 39 successfully loaded 4 ZeRO state_dicts for rank 43 successfully loaded 4 ZeRO state_dicts for rank 58 successfully loaded 4 ZeRO state_dicts for rank 62 successfully loaded 4 ZeRO state_dicts for rank 33 successfully loaded 4 ZeRO state_dicts for rank 18 successfully loaded 4 ZeRO state_dicts for rank 47 successfully loaded 4 ZeRO state_dicts for rank 35 successfully loaded 4 ZeRO state_dicts for rank 24 successfully loaded 4 ZeRO state_dicts for rank 56 successfully loaded 4 ZeRO state_dicts for rank 26 successfully loaded 4 ZeRO state_dicts for rank 30 successfully loaded 4 ZeRO state_dicts for rank 22 successfully loaded 4 ZeRO state_dicts for rank 28 successfully loaded 4 ZeRO state_dicts for rank 27 successfully loaded 4 ZeRO state_dicts for rank 8 successfully loaded 4 ZeRO state_dicts for rank 31 successfully loaded 4 ZeRO state_dicts for rank 59 successfully loaded 4 ZeRO state_dicts for rank 20 successfully loaded 4 ZeRO state_dicts for rank 29 successfully loaded 4 ZeRO state_dicts for rank 57 successfully loaded 4 ZeRO state_dicts for rank 41 successfully loaded 4 ZeRO state_dicts for rank 21 successfully loaded 4 ZeRO state_dicts for rank 38 successfully loaded 4 ZeRO state_dicts for rank 45 successfully loaded 4 ZeRO state_dicts for rank 25 successfully loaded 4 ZeRO state_dicts for rank 37 successfully loaded 4 ZeRO state_dicts for rank 17 loading 4 zero partition checkpoints for rank 36 successfully loaded 4 ZeRO state_dicts for rank 63 successfully loaded 4 ZeRO state_dicts for rank 52 successfully loaded 4 ZeRO state_dicts for rank 44 successfully loaded 4 ZeRO state_dicts for rank 4 successfully loaded 4 ZeRO state_dicts for rank 40 successfully loaded 4 ZeRO state_dicts for rank 23 loading 4 zero partition checkpoints for rank 32 successfully loaded 4 ZeRO state_dicts for rank 55 successfully loaded 4 ZeRO state_dicts for rank 60 successfully loaded 4 ZeRO state_dicts for rank 11 successfully loaded 4 ZeRO state_dicts for rank 48 successfully loaded 4 ZeRO state_dicts for rank 6 successfully loaded 4 ZeRO state_dicts for rank 5 successfully loaded 4 ZeRO state_dicts for rank 19 successfully loaded 4 ZeRO state_dicts for rank 51 successfully loaded 4 ZeRO state_dicts for rank 50 successfully loaded 4 ZeRO state_dicts for rank 7 successfully loaded 4 ZeRO state_dicts for rank 49 successfully loaded 4 ZeRO state_dicts for rank 54 successfully loaded 4 ZeRO state_dicts for rank 10 loading 4 zero partition checkpoints for rank 46 successfully loaded 4 ZeRO state_dicts for rank 61 successfully loaded 4 ZeRO state_dicts for rank 0 successfully loaded 4 ZeRO state_dicts for rank 53 successfully loaded 4 ZeRO state_dicts for rank 12 successfully loaded 4 ZeRO state_dicts for rank 15 loading 4 zero partition checkpoints for rank 42 loading 4 zero partition checkpoints for rank 16 successfully loaded 4 ZeRO state_dicts for rank 14 loading 4 zero partition checkpoints for rank 34 loading 4 zero partition checkpoints for rank 39 successfully loaded 4 ZeRO state_dicts for rank 3 successfully loaded 4 ZeRO state_dicts for rank 1 successfully loaded 4 ZeRO state_dicts for rank 2 loading 4 zero partition checkpoints for rank 43 successfully loaded 4 ZeRO state_dicts for rank 9 loading 4 zero partition checkpoints for rank 24 loading 4 zero partition checkpoints for rank 33 loading 4 zero partition checkpoints for rank 35 successfully loaded 4 ZeRO state_dicts for rank 13 loading 4 zero partition checkpoints for rank 47 loading 4 zero partition checkpoints for rank 18 loading 4 zero partition checkpoints for rank 28 loading 4 zero partition checkpoints for rank 26 loading 4 zero partition checkpoints for rank 30 loading 4 zero partition checkpoints for rank 27 loading 4 zero partition checkpoints for rank 22 loading 4 zero partition checkpoints for rank 38 loading 4 zero partition checkpoints for rank 31 loading 4 zero partition checkpoints for rank 37 loading 4 zero partition checkpoints for rank 20 loading 4 zero partition checkpoints for rank 41 loading 4 zero partition checkpoints for rank 25 loading 4 zero partition checkpoints for rank 21 loading 4 zero partition checkpoints for rank 45 loading 4 zero partition checkpoints for rank 29 loading 4 zero partition checkpoints for rank 44 loading 4 zero partition checkpoints for rank 62 loading 4 zero partition checkpoints for rank 17 loading 4 zero partition checkpoints for rank 58 loading 4 zero partition checkpoints for rank 56 loading 4 zero partition checkpoints for rank 40 loading 4 zero partition checkpoints for rank 23 loading 4 zero partition checkpoints for rank 59 loading 4 zero partition checkpoints for rank 8 loading 4 zero partition checkpoints for rank 19 loading 4 zero partition checkpoints for rank 57 loading 4 zero partition checkpoints for rank 63 loading 4 zero partition checkpoints for rank 52 loading 4 zero partition checkpoints for rank 4 loading 4 zero partition checkpoints for rank 60 loading 4 zero partition checkpoints for rank 55 loading 4 zero partition checkpoints for rank 11 loading 4 zero partition checkpoints for rank 7 loading 4 zero partition checkpoints for rank 6 loading 4 zero partition checkpoints for rank 48 loading 4 zero partition checkpoints for rank 51 loading 4 zero partition checkpoints for rank 5 loading 4 zero partition checkpoints for rank 54 loading 4 zero partition checkpoints for rank 49 loading 4 zero partition checkpoints for rank 50 loading 4 zero partition checkpoints for rank 10 loading 4 zero partition checkpoints for rank 53 loading 4 zero partition checkpoints for rank 61 loading 4 zero partition checkpoints for rank 0 checkpoint version 3.0 loading 4 zero partition checkpoints for rank 12 loading 4 zero partition checkpoints for rank 15 loading 4 zero partition checkpoints for rank 3 loading 4 zero partition checkpoints for rank 1 loading 4 zero partition checkpoints for rank 2 loading 4 zero partition checkpoints for rank 14 loading 4 zero partition checkpoints for rank 9 loading 4 zero partition checkpoints for rank 13 successfully loaded checkpoint from /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints at iteration 18691 time (ms) | load-checkpoint: 2609.70 [after model, optimizer, and learning rate scheduler are built] datetime: 2021-08-14 06:51:25 > building train, validation, and test datasets ... > datasets target sizes (minimum size): train: 146484375 validation: 14745600 test: 102400 > building train, validation, and test datasets for GPT ... > building dataset index ... reading sizes... reading pointers... reading document index... creating numpy buffer of mmap... creating memory view of numpy buffer... > finished creating indexed dataset in 6.579407 seconds number of documents: 364868892 > dataset split: train: document indices in [0, 346260578) total of 346260578 documents validation: document indices in [346260578, 364504023) total of 18243445 documents test: document indices in [364504023, 364868892) total of 364869 documents > loading doc-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_train_indexmap_146484375ns_2048sl_1234s_doc_idx.npy > loading sample-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_train_indexmap_146484375ns_2048sl_1234s_sample_idx.npy > loading shuffle-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_train_indexmap_146484375ns_2048sl_1234s_shuffle_idx.npy loaded indexed file in 0.066 seconds total number of samples: 171386255 total number of epochs: 2 > loading doc-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_valid_indexmap_14745600ns_2048sl_1234s_doc_idx.npy > loading sample-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_valid_indexmap_14745600ns_2048sl_1234s_sample_idx.npy > loading shuffle-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_valid_indexmap_14745600ns_2048sl_1234s_shuffle_idx.npy loaded indexed file in 0.068 seconds total number of samples: 18059589 total number of epochs: 4 > loading doc-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_test_indexmap_102400ns_2048sl_1234s_doc_idx.npy > loading sample-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_test_indexmap_102400ns_2048sl_1234s_sample_idx.npy > loading shuffle-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_test_indexmap_102400ns_2048sl_1234s_shuffle_idx.npy loaded indexed file in 0.013 seconds total number of samples: 180044 total number of epochs: 2 > finished creating GPT datasets ... [after dataloaders are built] datetime: 2021-08-14 06:51:38 done with setup ... training ... time (ms) | model-and-optimizer-setup: 4255.71 | train/valid/test-data-iterators-setup: 12196.23 [before the start of training step] datetime: 2021-08-14 06:51:38 [2021-08-14 06:51:38,415] [INFO] [checkpointing.py:408:forward] Activation Checkpointing Information [2021-08-14 06:51:38,415] [INFO] [checkpointing.py:409:forward] ----Partition Activations False, CPU CHECKPOINTING False [2021-08-14 06:51:38,415] [INFO] [checkpointing.py:412:forward] ----contiguous Memory Checkpointing False with 24 total layers [2021-08-14 06:51:38,416] [INFO] [checkpointing.py:415:forward] ----Synchronization False [2021-08-14 06:51:38,416] [INFO] [checkpointing.py:416:forward] ----Profiling time in checkpointing False [Rank 1] (after 18800 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10410.0 | max reserved: 10410.0 [Rank 33] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8316.0 | max reserved: 8316.0 [Rank 17] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9084.0 | max reserved: 9084.0 [Rank 19] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 8732.0 | max reserved: 8732.0 [Rank 35] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8316.0 | max reserved: 8316.0 [Rank 34] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8348.0 | max reserved: 8348.0 [Rank 3] (after 18800 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10410.0 | max reserved: 10410.0 [Rank 2] (after 18800 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10282.0 | max reserved: 10282.0 [Rank 18] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9244.0 | max reserved: 9244.0 iteration 18800/ 143051 | consumed samples: 19251200 | elapsed time per iteration (ms): 11082.2 | learning rate: 9.509E-05 | global batch size: 1024 | lm loss: 2.728995E+00 | loss scale: 524288.0 | grad norm: 41017.825 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | [Rank 16] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9116.0 | max reserved: 9116.0 [Rank 0] (after 18800 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7307.27001953125 | reserved: 10282.0 | max reserved: 10282.0 [Rank 32] (after 18800 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8476.0 | max reserved: 8476.0 [Rank 49] (after 18800 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 [Rank 50] (after 18800 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 [Rank 51] (after 18800 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 [Rank 48] (after 18800 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 time (ms) iteration 19000/ 143051 | consumed samples: 19456000 | elapsed time per iteration (ms): 10995.8 | learning rate: 9.498E-05 | global batch size: 1024 | lm loss: 2.722016E+00 | loss scale: 1048576.0 | grad norm: 96853.240 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------- validation loss at iteration 19000 | lm loss value: 2.670474E+00 | lm loss PPL: 1.444681E+01 | ------------------------------------------------------------------------------------------------- iteration 19200/ 143051 | consumed samples: 19660800 | elapsed time per iteration (ms): 12496.3 | learning rate: 9.488E-05 | global batch size: 1024 | lm loss: 2.723232E+00 | loss scale: 1048576.0 | grad norm: 101557.877 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 19400/ 143051 | consumed samples: 19865600 | elapsed time per iteration (ms): 10997.2 | learning rate: 9.477E-05 | global batch size: 1024 | lm loss: 2.724056E+00 | loss scale: 1048576.0 | grad norm: 112444.763 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) saving checkpoint at iteration 19500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-14 09:25:05,783] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step19500/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 19500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 4025.86 iteration 19600/ 143051 | consumed samples: 20070400 | elapsed time per iteration (ms): 11029.4 | learning rate: 9.466E-05 | global batch size: 1024 | lm loss: 2.723614E+00 | loss scale: 2097152.0 | grad norm: 259635.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 19800/ 143051 | consumed samples: 20275200 | elapsed time per iteration (ms): 11024.8 | learning rate: 9.456E-05 | global batch size: 1024 | lm loss: 2.724031E+00 | loss scale: 1048576.0 | grad norm: 113610.779 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) [2021-08-14 10:57:00,597] [INFO] [logging.py:68:log_dist] [Rank 0] step=20000, skipped=35, lr=[9.444569362952077e-05, 9.444569362952077e-05], mom=[(0.9, 0.999), (0.9, 0.999)] iteration 20000/ 143051 | consumed samples: 20480000 | elapsed time per iteration (ms): 11028.2 | learning rate: 9.445E-05 | global batch size: 1024 | lm loss: 2.724005E+00 | loss scale: 1048576.0 | grad norm: 128319.374 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) steps: 20000 loss: 2.7178 iter time (s): 0.006 samples/sec: 185890.315 ------------------------------------------------------------------------------------------------- validation loss at iteration 20000 | lm loss value: 2.675483E+00 | lm loss PPL: 1.451936E+01 | ------------------------------------------------------------------------------------------------- iteration 20200/ 143051 | consumed samples: 20684800 | elapsed time per iteration (ms): 12550.7 | learning rate: 9.434E-05 | global batch size: 1024 | lm loss: 2.725625E+00 | loss scale: 1048576.0 | grad norm: 118072.657 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 20400/ 143051 | consumed samples: 20889600 | elapsed time per iteration (ms): 11010.2 | learning rate: 9.422E-05 | global batch size: 1024 | lm loss: 2.723615E+00 | loss scale: 1048576.0 | grad norm: 200255.720 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 20600/ 143051 | consumed samples: 21094400 | elapsed time per iteration (ms): 11017.7 | learning rate: 9.411E-05 | global batch size: 1024 | lm loss: 2.727028E+00 | loss scale: 1048576.0 | grad norm: 167987.697 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 20800/ 143051 | consumed samples: 21299200 | elapsed time per iteration (ms): 11026.8 | learning rate: 9.400E-05 | global batch size: 1024 | lm loss: 2.721147E+00 | loss scale: 1048576.0 | grad norm: 129443.145 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 21000/ 143051 | consumed samples: 21504000 | elapsed time per iteration (ms): 11021.6 | learning rate: 9.389E-05 | global batch size: 1024 | lm loss: 2.721672E+00 | loss scale: 1048576.0 | grad norm: 132878.342 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------- validation loss at iteration 21000 | lm loss value: 2.668458E+00 | lm loss PPL: 1.441772E+01 | ------------------------------------------------------------------------------------------------- saving checkpoint at iteration 21000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-14 14:10:50,179] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step21000/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 21000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3184.07 iteration 21200/ 143051 | consumed samples: 21708800 | elapsed time per iteration (ms): 12567.9 | learning rate: 9.377E-05 | global batch size: 1024 | lm loss: 2.718020E+00 | loss scale: 2097152.0 | grad norm: 260125.858 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 21400/ 143051 | consumed samples: 21913600 | elapsed time per iteration (ms): 11039.2 | learning rate: 9.366E-05 | global batch size: 1024 | lm loss: 2.718357E+00 | loss scale: 1048576.0 | grad norm: 119133.556 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 21600/ 143051 | consumed samples: 22118400 | elapsed time per iteration (ms): 11047.4 | learning rate: 9.354E-05 | global batch size: 1024 | lm loss: 2.714213E+00 | loss scale: 1048576.0 | grad norm: 125452.096 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 21800/ 143051 | consumed samples: 22323200 | elapsed time per iteration (ms): 11046.7 | learning rate: 9.342E-05 | global batch size: 1024 | lm loss: 2.714619E+00 | loss scale: 1048576.0 | grad norm: 125570.215 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) [2021-08-14 17:14:54,168] [INFO] [logging.py:68:log_dist] [Rank 0] step=22000, skipped=43, lr=[9.33009938419849e-05, 9.33009938419849e-05], mom=[(0.9, 0.999), (0.9, 0.999)] steps: 22000 loss: 2.7238 iter time (s): 0.006 samples/sec: 185859.848 iteration 22000/ 143051 | consumed samples: 22528000 | elapsed time per iteration (ms): 11039.7 | learning rate: 9.330E-05 | global batch size: 1024 | lm loss: 2.711757E+00 | loss scale: 1048576.0 | grad norm: 188116.024 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------- validation loss at iteration 22000 | lm loss value: 2.661935E+00 | lm loss PPL: 1.432398E+01 | ------------------------------------------------------------------------------------------------- iteration 22200/ 143051 | consumed samples: 22732800 | elapsed time per iteration (ms): 12546.7 | learning rate: 9.318E-05 | global batch size: 1024 | lm loss: 2.711640E+00 | loss scale: 1048576.0 | grad norm: 124876.755 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 22400/ 143051 | consumed samples: 22937600 | elapsed time per iteration (ms): 11030.9 | learning rate: 9.306E-05 | global batch size: 1024 | lm loss: 2.709934E+00 | loss scale: 1048576.0 | grad norm: 113756.712 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) saving checkpoint at iteration 22500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-14 18:51:53,734] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step22500/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 22500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3102.18 iteration 22600/ 143051 | consumed samples: 23142400 | elapsed time per iteration (ms): 11018.7 | learning rate: 9.294E-05 | global batch size: 1024 | lm loss: 3.026591E+00 | loss scale: 16384.0 | grad norm: 1670.748 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 22800/ 143051 | consumed samples: 23347200 | elapsed time per iteration (ms): 10989.2 | learning rate: 9.282E-05 | global batch size: 1024 | lm loss: 2.748755E+00 | loss scale: 16384.0 | grad norm: 1954.184 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 23000/ 143051 | consumed samples: 23552000 | elapsed time per iteration (ms): 10998.2 | learning rate: 9.269E-05 | global batch size: 1024 | lm loss: 2.722062E+00 | loss scale: 32768.0 | grad norm: 4164.056 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------- validation loss at iteration 23000 | lm loss value: 2.661246E+00 | lm loss PPL: 1.431411E+01 | ------------------------------------------------------------------------------------------------- iteration 23200/ 143051 | consumed samples: 23756800 | elapsed time per iteration (ms): 12507.6 | learning rate: 9.257E-05 | global batch size: 1024 | lm loss: 2.712610E+00 | loss scale: 32768.0 | grad norm: 3871.525 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 23400/ 143051 | consumed samples: 23961600 | elapsed time per iteration (ms): 11000.6 | learning rate: 9.244E-05 | global batch size: 1024 | lm loss: 2.709696E+00 | loss scale: 32768.0 | grad norm: 3150.127 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 23600/ 143051 | consumed samples: 24166400 | elapsed time per iteration (ms): 10980.4 | learning rate: 9.232E-05 | global batch size: 1024 | lm loss: 2.706349E+00 | loss scale: 65536.0 | grad norm: 6942.192 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 23800/ 143051 | consumed samples: 24371200 | elapsed time per iteration (ms): 10996.9 | learning rate: 9.219E-05 | global batch size: 1024 | lm loss: 2.703401E+00 | loss scale: 65536.0 | grad norm: 7866.693 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) [2021-08-14 23:31:48,028] [INFO] [logging.py:68:log_dist] [Rank 0] step=24000, skipped=51, lr=[9.20584436172674e-05, 9.20584436172674e-05], mom=[(0.9, 0.999), (0.9, 0.999)] steps: 24000 loss: 2.6949 iter time (s): 0.005 samples/sec: 186348.103 iteration 24000/ 143051 | consumed samples: 24576000 | elapsed time per iteration (ms): 11000.1 | learning rate: 9.206E-05 | global batch size: 1024 | lm loss: 2.700356E+00 | loss scale: 131072.0 | grad norm: 18542.543 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------- validation loss at iteration 24000 | lm loss value: 2.649487E+00 | lm loss PPL: 1.414678E+01 | ------------------------------------------------------------------------------------------------- saving checkpoint at iteration 24000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-14 23:36:53,103] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step24000/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 24000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3157.45 iteration 24200/ 143051 | consumed samples: 24780800 | elapsed time per iteration (ms): 12529.9 | learning rate: 9.193E-05 | global batch size: 1024 | lm loss: 2.697063E+00 | loss scale: 131072.0 | grad norm: 16316.797 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 24400/ 143051 | consumed samples: 24985600 | elapsed time per iteration (ms): 11012.1 | learning rate: 9.180E-05 | global batch size: 1024 | lm loss: 2.696595E+00 | loss scale: 131072.0 | grad norm: 15553.255 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 24600/ 143051 | consumed samples: 25190400 | elapsed time per iteration (ms): 11049.5 | learning rate: 9.167E-05 | global batch size: 1024 | lm loss: 2.694385E+00 | loss scale: 262144.0 | grad norm: 31189.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 24800/ 143051 | consumed samples: 25395200 | elapsed time per iteration (ms): 11028.7 | learning rate: 9.153E-05 | global batch size: 1024 | lm loss: 2.693230E+00 | loss scale: 262144.0 | grad norm: 43832.870 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) saving checkpoint at iteration 24998 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-15 02:40:16,706] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step24998/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 24998 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3141.03 [exiting program after 1190.111633213361 minutes] datetime: 2021-08-15 02:40:17