+ source /gpfswork/rech/six/commun/start-prod ++ module purge ++ unset _mlshdbg ++ '[' 0 = 1 ']' ++ unset _mlre _mlIFS ++ '[' -n x ']' ++ _mlIFS=' ' ++ IFS=' ' ++ for _mlv in ${MODULES_RUN_QUARANTINE:-} ++ '[' LD_LIBRARY_PATH = LD_LIBRARY_PATH -a LD_LIBRARY_PATH = LD_LIBRARY_PATH ']' +++ eval 'echo ${LD_LIBRARY_PATH+x}' ++++ echo x ++ '[' -n x ']' +++ eval 'echo ${LD_LIBRARY_PATH}' ++++ echo /gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' ' ++ _mlrv=MODULES_RUNENV_LD_LIBRARY_PATH +++ eval 'echo ${MODULES_RUNENV_LD_LIBRARY_PATH:-}' ++++ echo ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ++ '[' -n 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ']' +++ eval 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\''' 'LD_LIBRARY_PATH='\'''\''' /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash '"$@"' ++++ LD_LIBRARY_PATH_modquar=/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++++ LD_LIBRARY_PATH= ++++ /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash purge ++ eval ++ _mlstatus=0 ++ '[' -n x ']' ++ IFS=' ' ++ unset _mlre _mlv _mlrv _mlIFS ++ '[' -n '' ']' ++ unset _mlshdbg ++ return 0 ++ module load pytorch-gpu/py3/1.8.1 ++ unset _mlshdbg ++ '[' 0 = 1 ']' ++ unset _mlre _mlIFS ++ '[' -n x ']' ++ _mlIFS=' ' ++ IFS=' ' ++ for _mlv in ${MODULES_RUN_QUARANTINE:-} ++ '[' LD_LIBRARY_PATH = LD_LIBRARY_PATH -a LD_LIBRARY_PATH = LD_LIBRARY_PATH ']' +++ eval 'echo ${LD_LIBRARY_PATH+x}' ++++ echo x ++ '[' -n x ']' +++ eval 'echo ${LD_LIBRARY_PATH}' ++++ echo /gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' ' ++ _mlrv=MODULES_RUNENV_LD_LIBRARY_PATH +++ eval 'echo ${MODULES_RUNENV_LD_LIBRARY_PATH:-}' ++++ echo ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ++ '[' -n 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ']' +++ eval 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\''' 'LD_LIBRARY_PATH='\'''\''' /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash '"$@"' ++++ LD_LIBRARY_PATH_modquar=/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++++ LD_LIBRARY_PATH= ++++ /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash load pytorch-gpu/py3/1.8.1 Loading pytorch-gpu/py3/1.8.1 Loading requirement: gcc/8.3.1 cuda/11.2 nccl/2.9.6-1-cuda cudnn/8.1.1.33-cuda intel-mkl/2020.4 openmpi/4.1.1-cuda magma/2.5.4-cuda ++ eval 'PSM2_GPUDIRECT=1;' export 'PSM2_GPUDIRECT; MODULES_LMPREREQ=nccl/2.9.6-1-cuda\&cuda/11.2\|cuda/10.2:cudnn/8.1.1.33-cuda\&cuda/11.2\|cuda/10.2:openmpi/4.1.1-cuda\&nvidia-compilers/21.3\|nvidia-compilers/20.11\|nvidia-compilers/20.7\|pgi/20.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:magma/2.5.4-cuda\&intel-compilers/19.0.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:pytorch-gpu/py3/1.8.1\&gcc/8.3.1\&cuda/11.2\&nccl/2.9.6-1-cuda\&cudnn/8.1.1.33-cuda\&intel-mkl/2020.4\&openmpi/4.1.1-cuda\&magma/2.5.4-cuda;' export 'MODULES_LMPREREQ; CPATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include;' export 'CPATH; LD_LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib;' export 'LD_LIBRARY_PATH; SLURM_MPI_TYPE=pmix;' export 'SLURM_MPI_TYPE; OMPI_MCA_mtl=psm2;' export 'OMPI_MCA_mtl; MANPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man;' export 'MANPATH; LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/lib64/stubs:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib;' export 'LIBRARY_PATH; MPIF77=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif77;' export 'MPIF77; MKLROOT_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl:1;' export 'MKLROOT_modshare; CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/;' export 'CMAKE_PREFIX_PATH; LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1;' export 'LOADEDMODULES; _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1;' export '_LMFILES_; PKG_CONFIG_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig;' export 'PKG_CONFIG_PATH; MANPATH_modshare=:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/man:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/catman:1;' export 'MANPATH_modshare; LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/cuda/11.2/lib64/stubs:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/lib64:1;' export 'LIBRARY_PATH_modshare; MODULES_LMCONFLICT=gcc/8.3.1\&gcc:cuda/11.2\&cuda:nccl/2.9.6-1-cuda\&nccl:cudnn/8.1.1.33-cuda\&cudnn:intel-mkl/2020.4\&intel-mkl:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:magma/2.5.4-cuda\&magma:pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3;' export 'MODULES_LMCONFLICT; MPICC=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpicc;' export 'MPICC; NLSPATH_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N:1;' export 'NLSPATH_modshare; OMPI_MCA_pml=cm;' export 'OMPI_MCA_pml; INTEL_LICENSE_FILE=/gpfslocalsys/intel/licenses/site_license.lic;' export 'INTEL_LICENSE_FILE; PKG_CONFIG_PATH_modshare=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig:1;' export 'PKG_CONFIG_PATH_modshare; MODULES_LMCONFLICT_modshare=pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3:1:magma/2.5.4-cuda\&magma:1:cuda/11.2\&cuda:1:cudnn/8.1.1.33-cuda\&cudnn:1:intel-mkl/2020.4\&intel-mkl:1:nccl/2.9.6-1-cuda\&nccl:1:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:1:gcc/8.3.1\&gcc:1;' export 'MODULES_LMCONFLICT_modshare; INTEL_LICENSE_FILE_modshare=/gpfslocalsys/intel/licenses/site_license.lic:1;' export 'INTEL_LICENSE_FILE_modshare; CUDA_INSTALL_PATH=/gpfslocalsys/cuda/11.2;' export 'CUDA_INSTALL_PATH; MODULES_LMNOTUASKED=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda;' export 'MODULES_LMNOTUASKED; PYTHONUNBUFFERED=1;' export 'PYTHONUNBUFFERED; MKLROOT=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl;' export 'MKLROOT; MPICXX=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpic++;' export 'MPICXX; CUDA_PATH=/gpfslocalsys/cuda/11.2;' export 'CUDA_PATH; MODULES_LMNOTUASKED_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:cuda/11.2:1:intel-mkl/2020.4:1:magma/2.5.4-cuda:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1;' export 'MODULES_LMNOTUASKED_modshare; PSM2_CUDA_MEMCACHE_SIZE=1024;' export 'PSM2_CUDA_MEMCACHE_SIZE; NLSPATH=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N;' export 'NLSPATH; MODULES_LMPREREQ_modshare=cudnn/8.1.1.33-cuda\&cuda/11.2\|cuda/10.2:1:pytorch-gpu/py3/1.8.1\&gcc/8.3.1\&cuda/11.2\&nccl/2.9.6-1-cuda\&cudnn/8.1.1.33-cuda\&intel-mkl/2020.4\&openmpi/4.1.1-cuda\&magma/2.5.4-cuda:1:nccl/2.9.6-1-cuda\&cuda/11.2\|cuda/10.2:1:openmpi/4.1.1-cuda\&nvidia-compilers/21.3\|nvidia-compilers/20.11\|nvidia-compilers/20.7\|pgi/20.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:1:magma/2.5.4-cuda\&intel-compilers/19.0.4\|gcc/8.3.1\&cuda/11.2\|cuda/10.2:1;' export 'MODULES_LMPREREQ_modshare; CPATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:1;' export 'CPATH_modshare; C_INCLUDE_PATH=/gpfslocalsys/cuda/11.2/include;' export 'C_INCLUDE_PATH; LD_LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/cuda/11.2/lib64:1;' export 'LD_LIBRARY_PATH_modshare; CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1;' export 'CMAKE_PREFIX_PATH_modshare; LOADEDMODULES_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:pytorch-gpu/py3/1.8.1:1:cuda/11.2:1:intel-mkl/2020.4:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1:magma/2.5.4-cuda:1;' export 'LOADEDMODULES_modshare; _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1;' export '_LMFILES__modshare; MPIF90=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif90;' export 'MPIF90; CUDA_HOME=/gpfslocalsys/cuda/11.2;' export 'CUDA_HOME; PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin;' export 'PATH; PSM2_CUDA=1;' export 'PSM2_CUDA; PATH_modshare=/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/opt/sgi/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1;' export 'PATH_modshare; .' '/gpfslocalsup/pub/anaconda-py3/2021.05/etc/profile.d/conda.sh; conda' 'deactivate; conda' activate 'pytorch-1.8.1+py3.8.8-lts; test' '0;' +++ PSM2_GPUDIRECT=1 +++ export PSM2_GPUDIRECT +++ MODULES_LMPREREQ='nccl/2.9.6-1-cuda&cuda/11.2|cuda/10.2:cudnn/8.1.1.33-cuda&cuda/11.2|cuda/10.2:openmpi/4.1.1-cuda&nvidia-compilers/21.3|nvidia-compilers/20.11|nvidia-compilers/20.7|pgi/20.4|gcc/8.3.1&cuda/11.2|cuda/10.2:magma/2.5.4-cuda&intel-compilers/19.0.4|gcc/8.3.1&cuda/11.2|cuda/10.2:pytorch-gpu/py3/1.8.1&gcc/8.3.1&cuda/11.2&nccl/2.9.6-1-cuda&cudnn/8.1.1.33-cuda&intel-mkl/2020.4&openmpi/4.1.1-cuda&magma/2.5.4-cuda' +++ export MODULES_LMPREREQ +++ CPATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include +++ export CPATH +++ LD_LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib +++ export LD_LIBRARY_PATH +++ SLURM_MPI_TYPE=pmix +++ export SLURM_MPI_TYPE +++ OMPI_MCA_mtl=psm2 +++ export OMPI_MCA_mtl +++ MANPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man +++ export MANPATH +++ LIBRARY_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/lib64/stubs:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib +++ export LIBRARY_PATH +++ MPIF77=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif77 +++ export MPIF77 +++ MKLROOT_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl:1 +++ export MKLROOT_modshare +++ CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/ +++ export CMAKE_PREFIX_PATH +++ LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1 +++ export LOADEDMODULES +++ _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1 +++ export _LMFILES_ +++ PKG_CONFIG_PATH=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig +++ export PKG_CONFIG_PATH +++ MANPATH_modshare=:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/man:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/catman:1 +++ export MANPATH_modshare +++ LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/cuda/11.2/lib64/stubs:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/lib64:1 +++ export LIBRARY_PATH_modshare +++ MODULES_LMCONFLICT='gcc/8.3.1&gcc:cuda/11.2&cuda:nccl/2.9.6-1-cuda&nccl:cudnn/8.1.1.33-cuda&cudnn:intel-mkl/2020.4&intel-mkl:openmpi/4.1.1-cuda&openmpi&intel-mpi:magma/2.5.4-cuda&magma:pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3' +++ export MODULES_LMCONFLICT +++ MPICC=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpicc +++ export MPICC +++ NLSPATH_modshare=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N:1 +++ export NLSPATH_modshare +++ OMPI_MCA_pml=cm +++ export OMPI_MCA_pml +++ INTEL_LICENSE_FILE=/gpfslocalsys/intel/licenses/site_license.lic +++ export INTEL_LICENSE_FILE +++ PKG_CONFIG_PATH_modshare=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib/pkgconfig:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib/pkgconfig:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/bin/pkgconfig:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib/pkgconfig:1 +++ export PKG_CONFIG_PATH_modshare +++ MODULES_LMCONFLICT_modshare='pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3:1:magma/2.5.4-cuda&magma:1:cuda/11.2&cuda:1:cudnn/8.1.1.33-cuda&cudnn:1:intel-mkl/2020.4&intel-mkl:1:nccl/2.9.6-1-cuda&nccl:1:openmpi/4.1.1-cuda&openmpi&intel-mpi:1:gcc/8.3.1&gcc:1' +++ export MODULES_LMCONFLICT_modshare +++ INTEL_LICENSE_FILE_modshare=/gpfslocalsys/intel/licenses/site_license.lic:1 +++ export INTEL_LICENSE_FILE_modshare +++ CUDA_INSTALL_PATH=/gpfslocalsys/cuda/11.2 +++ export CUDA_INSTALL_PATH +++ MODULES_LMNOTUASKED=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda +++ export MODULES_LMNOTUASKED +++ PYTHONUNBUFFERED=1 +++ export PYTHONUNBUFFERED +++ MKLROOT=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl +++ export MKLROOT +++ MPICXX=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpic++ +++ export MPICXX +++ CUDA_PATH=/gpfslocalsys/cuda/11.2 +++ export CUDA_PATH +++ MODULES_LMNOTUASKED_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:cuda/11.2:1:intel-mkl/2020.4:1:magma/2.5.4-cuda:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1 +++ export MODULES_LMNOTUASKED_modshare +++ PSM2_CUDA_MEMCACHE_SIZE=1024 +++ export PSM2_CUDA_MEMCACHE_SIZE +++ NLSPATH=/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin/locale/%l_%t/%N +++ export NLSPATH +++ MODULES_LMPREREQ_modshare='cudnn/8.1.1.33-cuda&cuda/11.2|cuda/10.2:1:pytorch-gpu/py3/1.8.1&gcc/8.3.1&cuda/11.2&nccl/2.9.6-1-cuda&cudnn/8.1.1.33-cuda&intel-mkl/2020.4&openmpi/4.1.1-cuda&magma/2.5.4-cuda:1:nccl/2.9.6-1-cuda&cuda/11.2|cuda/10.2:1:openmpi/4.1.1-cuda&nvidia-compilers/21.3|nvidia-compilers/20.11|nvidia-compilers/20.7|pgi/20.4|gcc/8.3.1&cuda/11.2|cuda/10.2:1:magma/2.5.4-cuda&intel-compilers/19.0.4|gcc/8.3.1&cuda/11.2|cuda/10.2:1' +++ export MODULES_LMPREREQ_modshare +++ CPATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/include:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/include:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/include:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/include:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/include:1 +++ export CPATH_modshare +++ C_INCLUDE_PATH=/gpfslocalsys/cuda/11.2/include +++ export C_INCLUDE_PATH +++ LD_LIBRARY_PATH_modshare=/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:1:/gpfslocalsys/slurm/current/lib:1:/gpfslocalsys/cuda/11.2/nvvm/lib64:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:1:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:1:/gpfslocalsys/slurm/current/lib/slurm:1:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:1:/gpfslocalsys/cuda/11.2/lib64:1 +++ export LD_LIBRARY_PATH_modshare +++ CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1 +++ export CMAKE_PREFIX_PATH_modshare +++ LOADEDMODULES_modshare=gcc/8.3.1:1:nccl/2.9.6-1-cuda:1:pytorch-gpu/py3/1.8.1:1:cuda/11.2:1:intel-mkl/2020.4:1:cudnn/8.1.1.33-cuda:1:openmpi/4.1.1-cuda:1:magma/2.5.4-cuda:1 +++ export LOADEDMODULES_modshare +++ _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1 +++ export _LMFILES__modshare +++ MPIF90=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin/mpif90 +++ export MPIF90 +++ CUDA_HOME=/gpfslocalsys/cuda/11.2 +++ export CUDA_HOME +++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH +++ PSM2_CUDA=1 +++ export PSM2_CUDA +++ PATH_modshare=/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/opt/sgi/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1 +++ export PATH_modshare +++ . /gpfslocalsup/pub/anaconda-py3/2021.05/etc/profile.d/conda.sh ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ '[' -z x ']' +++ conda deactivate +++ '[' 1 -lt 1 ']' +++ local cmd=deactivate +++ shift +++ case "$cmd" in +++ __conda_activate deactivate +++ '[' -n '' ']' +++ local cmd=deactivate +++ shift +++ local ask_conda +++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ __add_sys_prefix_to_path +++ '[' -n '' ']' ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05/bin ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05 +++ '[' -n '' ']' +++ PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH ++++ PS1= ++++ /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda shell.posix deactivate +++ ask_conda='export PATH='\''/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' unset CONDA_PREFIX unset CONDA_DEFAULT_ENV unset CONDA_PROMPT_MODIFIER PS1='\'''\'' export CONDA_SHLVL='\''0'\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\''' +++ rc=0 +++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ eval 'export PATH='\''/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' unset CONDA_PREFIX unset CONDA_DEFAULT_ENV unset CONDA_PROMPT_MODIFIER PS1='\'''\'' export CONDA_SHLVL='\''0'\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\''' ++++ export PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ unset CONDA_PREFIX ++++ unset CONDA_DEFAULT_ENV ++++ unset CONDA_PROMPT_MODIFIER ++++ PS1= ++++ export CONDA_SHLVL=0 ++++ CONDA_SHLVL=0 ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python +++ '[' 0 '!=' 0 ']' +++ __conda_hashr +++ '[' -n '' ']' +++ '[' -n '' ']' +++ hash -r +++ conda activate pytorch-1.8.1+py3.8.8-lts +++ '[' 2 -lt 1 ']' +++ local cmd=activate +++ shift +++ case "$cmd" in +++ __conda_activate activate pytorch-1.8.1+py3.8.8-lts +++ '[' -n '' ']' +++ local cmd=activate +++ shift +++ local ask_conda +++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ __add_sys_prefix_to_path +++ '[' -n '' ']' ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05/bin ++++ dirname /gpfslocalsup/pub/anaconda-py3/2021.05/bin +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2021.05 +++ '[' -n '' ']' +++ PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH ++++ PS1= ++++ /gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda shell.posix activate pytorch-1.8.1+py3.8.8-lts +++ ask_conda='PS1='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export PATH='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_SHLVL='\''1'\'' export CONDA_DEFAULT_ENV='\''pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_PROMPT_MODIFIER='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\'' . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/glib_activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/proj4-activate.sh"' +++ rc=0 +++ PATH=/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ eval 'PS1='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export PATH='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_SHLVL='\''1'\'' export CONDA_DEFAULT_ENV='\''pytorch-1.8.1+py3.8.8-lts'\'' export CONDA_PROMPT_MODIFIER='\''(pytorch-1.8.1+py3.8.8-lts) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python'\'' . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/glib_activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/proj4-activate.sh"' ++++ PS1='(pytorch-1.8.1+py3.8.8-lts) ' ++++ export PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ PATH=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ export CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ export CONDA_SHLVL=1 ++++ CONDA_SHLVL=1 ++++ export CONDA_DEFAULT_ENV=pytorch-1.8.1+py3.8.8-lts ++++ CONDA_DEFAULT_ENV=pytorch-1.8.1+py3.8.8-lts ++++ export 'CONDA_PROMPT_MODIFIER=(pytorch-1.8.1+py3.8.8-lts) ' ++++ CONDA_PROMPT_MODIFIER='(pytorch-1.8.1+py3.8.8-lts) ' ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2021.05/bin/python ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/glib_activate.sh +++++ export GSETTINGS_SCHEMA_DIR_CONDA_BACKUP= +++++ GSETTINGS_SCHEMA_DIR_CONDA_BACKUP= +++++ export GSETTINGS_SCHEMA_DIR=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/glib-2.0/schemas +++++ GSETTINGS_SCHEMA_DIR=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/glib-2.0/schemas ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/activate.d/proj4-activate.sh +++++ '[' -n '' ']' +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj ']' +++++ export PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj +++++ PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj +++++ '[' -f /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/share/proj/copyright_and_licenses.csv ']' +++++ export PROJ_NETWORK=ON +++++ PROJ_NETWORK=ON +++ '[' 0 '!=' 0 ']' +++ __conda_hashr +++ '[' -n '' ']' +++ '[' -n '' ']' +++ hash -r +++ test 0 ++ _mlstatus=0 ++ '[' -n x ']' ++ IFS=' ' ++ unset _mlre _mlv _mlrv _mlIFS ++ '[' -n '' ']' ++ unset _mlshdbg ++ return 0 ++ module load nvtop git-lfs github-cli mc ++ unset _mlshdbg ++ '[' 0 = 1 ']' ++ unset _mlre _mlIFS ++ '[' -n x ']' ++ _mlIFS=' ' ++ IFS=' ' ++ for _mlv in ${MODULES_RUN_QUARANTINE:-} ++ '[' LD_LIBRARY_PATH = LD_LIBRARY_PATH -a LD_LIBRARY_PATH = LD_LIBRARY_PATH ']' +++ eval 'echo ${LD_LIBRARY_PATH+x}' ++++ echo x ++ '[' -n x ']' +++ eval 'echo ${LD_LIBRARY_PATH}' ++++ echo /gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' ' ++ _mlrv=MODULES_RUNENV_LD_LIBRARY_PATH +++ eval 'echo ${MODULES_RUNENV_LD_LIBRARY_PATH:-}' ++++ echo ++ _mlre='LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ++ '[' -n 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\'' LD_LIBRARY_PATH='\'''\'' ' ']' +++ eval 'LD_LIBRARY_PATH_modquar='\''/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib'\''' 'LD_LIBRARY_PATH='\'''\''' /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash '"$@"' ++++ LD_LIBRARY_PATH_modquar=/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/lib:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/lib:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/mkl/lib/intel64_lin:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/lib64:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/lib:/gpfslocalsys/cuda/11.2/nvvm/lib64:/gpfslocalsys/cuda/11.2/extras/CUPTI/lib64:/gpfslocalsys/cuda/11.2/lib64:/gpfslocalsys/cuda/11.2/samples/common/lib/linux/x86_64:/gpfslocalsys/cuda/11.2/targets/x86_64-linux/lib:/gpfslocalsys/slurm/current/lib/slurm:/gpfslocalsys/slurm/current/lib ++++ LD_LIBRARY_PATH= ++++ /gpfslocalsup/spack_soft/tcl/8.6.8/gcc-4.8.5-5nqkfcnctewdheju62zvqbsonnzszr6m/bin/tclsh /gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/libexec/modulecmd.tcl bash load nvtop git-lfs github-cli mc ++ eval 'MANPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man;' export 'MANPATH; CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/;' export 'CMAKE_PREFIX_PATH; LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1:nvtop/1.1.0:git-lfs/2.7.2:github-cli/1.13.1:mc/4.8.26;' export 'LOADEDMODULES; _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26;' export '_LMFILES_; MANPATH_modshare=:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/catman:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/man:1;' export 'MANPATH_modshare; MODULES_LMCONFLICT=gcc/8.3.1\&gcc:cuda/11.2\&cuda:nccl/2.9.6-1-cuda\&nccl:cudnn/8.1.1.33-cuda\&cudnn:intel-mkl/2020.4\&intel-mkl:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:magma/2.5.4-cuda\&magma:pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3:nvtop/1.1.0\&nvtop:git-lfs/2.7.2\&git-lfs:github-cli/1.13.1\&github-cli:mc/4.8.26\&mc;' export 'MODULES_LMCONFLICT; MODULES_LMCONFLICT_modshare=git-lfs/2.7.2\&git-lfs:1:cudnn/8.1.1.33-cuda\&cudnn:1:cuda/11.2\&cuda:1:nccl/2.9.6-1-cuda\&nccl:1:magma/2.5.4-cuda\&magma:1:nvtop/1.1.0\&nvtop:1:pytorch-gpu/py3/1.8.1\&python\&tensorflow\&pytorch\&caffe\&anaconda-py2\&anaconda-py3:1:intel-mkl/2020.4\&intel-mkl:1:mc/4.8.26\&mc:1:openmpi/4.1.1-cuda\&openmpi\&intel-mpi:1:github-cli/1.13.1\&github-cli:1:gcc/8.3.1\&gcc:1;' export 'MODULES_LMCONFLICT_modshare; XLOCALEDIR=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale;' export 'XLOCALEDIR; XLOCALEDIR_modshare=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale:1;' export 'XLOCALEDIR_modshare; CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1;' export 'CMAKE_PREFIX_PATH_modshare; LOADEDMODULES_modshare=github-cli/1.13.1:1:intel-mkl/2020.4:1:git-lfs/2.7.2:1:nvtop/1.1.0:1:nccl/2.9.6-1-cuda:1:gcc/8.3.1:1:cuda/11.2:1:mc/4.8.26:1:pytorch-gpu/py3/1.8.1:1:magma/2.5.4-cuda:1:openmpi/4.1.1-cuda:1:cudnn/8.1.1.33-cuda:1;' export 'LOADEDMODULES_modshare; _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1;' export '_LMFILES__modshare; PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin;' export 'PATH; PATH_modshare=/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:1:/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/opt/sgi/bin:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1;' export 'PATH_modshare; test' '0;' +++ MANPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:/gpfslocalsys/cuda/11.2/doc/man::/opt/c3/man:/opt/clmgr/man:/opt/sgi/share/man:/opt/clmgr/share/man:/opt/clmgr/lib/cm-cli/man:/gpfslocalsys/slurm/current/share/man:/usr/share/catman:/usr/share/man:/usr/catman:/usr/man +++ export MANPATH +++ CMAKE_PREFIX_PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/ +++ export CMAKE_PREFIX_PATH +++ LOADEDMODULES=gcc/8.3.1:cuda/11.2:nccl/2.9.6-1-cuda:cudnn/8.1.1.33-cuda:intel-mkl/2020.4:openmpi/4.1.1-cuda:magma/2.5.4-cuda:pytorch-gpu/py3/1.8.1:nvtop/1.1.0:git-lfs/2.7.2:github-cli/1.13.1:mc/4.8.26 +++ export LOADEDMODULES +++ _LMFILES_=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26 +++ export _LMFILES_ +++ MANPATH_modshare=:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/share/man:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/share/man:1:/opt/sgi/share/man:1:/opt/c3/man:1:/gpfslocalsys/slurm/current/share/man:1:/opt/clmgr/share/man:1:/opt/clmgr/lib/cm-cli/man:1:/usr/man:1:/usr/catman:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/share/man:1:/gpfslocalsys/intel/parallel_studio_xe_2020_update4_cluster_edition/compilers_and_libraries_2020.4.304/linux/man/common:1:/opt/clmgr/man:1:/usr/share/catman:1:/gpfslocalsys/cuda/11.2/doc/man:1:/usr/share/man:1 +++ export MANPATH_modshare +++ MODULES_LMCONFLICT='gcc/8.3.1&gcc:cuda/11.2&cuda:nccl/2.9.6-1-cuda&nccl:cudnn/8.1.1.33-cuda&cudnn:intel-mkl/2020.4&intel-mkl:openmpi/4.1.1-cuda&openmpi&intel-mpi:magma/2.5.4-cuda&magma:pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3:nvtop/1.1.0&nvtop:git-lfs/2.7.2&git-lfs:github-cli/1.13.1&github-cli:mc/4.8.26&mc' +++ export MODULES_LMCONFLICT +++ MODULES_LMCONFLICT_modshare='git-lfs/2.7.2&git-lfs:1:cudnn/8.1.1.33-cuda&cudnn:1:cuda/11.2&cuda:1:nccl/2.9.6-1-cuda&nccl:1:magma/2.5.4-cuda&magma:1:nvtop/1.1.0&nvtop:1:pytorch-gpu/py3/1.8.1&python&tensorflow&pytorch&caffe&anaconda-py2&anaconda-py3:1:intel-mkl/2020.4&intel-mkl:1:mc/4.8.26&mc:1:openmpi/4.1.1-cuda&openmpi&intel-mpi:1:github-cli/1.13.1&github-cli:1:gcc/8.3.1&gcc:1' +++ export MODULES_LMCONFLICT_modshare +++ XLOCALEDIR=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale +++ export XLOCALEDIR +++ XLOCALEDIR_modshare=/gpfslocalsup/spack_soft/libx11/1.6.7/gcc-8.3.1-5blc7mmmrectsgimcul637qqzrcvtwn5/share/X11/locale:1 +++ export XLOCALEDIR_modshare +++ CMAKE_PREFIX_PATH_modshare=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/:1:/gpfslocalsup/spack_soft/cudnn/8.1.1.33-11.2/gcc-8.3.1-dgsfwc7e23vzc36jopc5nxyunptm2ieh/:1:/gpfslocalsup/spack_soft/nccl/2.9.6-1/gcc-8.3.1-2c3rwcolksekujszxrg7r72uxmekxx2y/:1:/gpfslocalsup/spack_soft/magma/2.5.4/gcc-8.3.1-d4zfylwpieuqujdgbuwv5yu6voe4okke/:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/:1 +++ export CMAKE_PREFIX_PATH_modshare +++ LOADEDMODULES_modshare=github-cli/1.13.1:1:intel-mkl/2020.4:1:git-lfs/2.7.2:1:nvtop/1.1.0:1:nccl/2.9.6-1-cuda:1:gcc/8.3.1:1:cuda/11.2:1:mc/4.8.26:1:pytorch-gpu/py3/1.8.1:1:magma/2.5.4-cuda:1:openmpi/4.1.1-cuda:1:cudnn/8.1.1.33-cuda:1 +++ export LOADEDMODULES_modshare +++ _LMFILES__modshare=/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/github-cli/1.13.1:1:/gpfslocalsup/pub/module-rh/modulefiles/cuda/11.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/magma/2.5.4-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/openmpi/4.1.1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/git-lfs/2.7.2:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nvtop/1.1.0:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/nccl/2.9.6-1-cuda:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/gcc/8.3.1:1:/gpfslocalsup/pub/module-rh/modulefiles/intel-mkl/2020.4:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/mc/4.8.26:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/pytorch-gpu/py3/1.8.1:1:/gpfslocalsup/pub/modules-idris-env4/modulefiles/linux-rhel8-skylake_avx512/cudnn/8.1.1.33-cuda:1 +++ export _LMFILES__modshare +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH +++ PATH_modshare=/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:1:/usr/bin:1:/gpfslocalsup/bin:1:/usr/local/bin:1:/opt/sgi/bin:1:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:1:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:1:/gpfslocalsys/slurm/current/bin:1:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:1:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:1:/gpfslocalsys/cuda/11.2/bin:1:/opt/clmgr/bin:1:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:1:/opt/sgi/sbin:1:/bin:1:/opt/clmgr/sbin:1:/gpfslocalsys/bin:1:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:1:/gpfslocalsys/cuda/11.2/samples:1:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:1:/sbin:1:/gpfslocalsys/cuda/11.2/nvvm/bin:1:/usr/sbin:1:/gpfslocalsys/idrzap/current/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/opt/c3/bin:1 +++ export PATH_modshare +++ test 0 ++ _mlstatus=0 ++ '[' -n x ']' ++ IFS=' ' ++ unset _mlre _mlv _mlrv _mlIFS ++ '[' -n '' ']' ++ unset _mlshdbg ++ return 0 ++ export GIT_PROMPT_ONLY_IN_REPO=0 ++ GIT_PROMPT_ONLY_IN_REPO=0 ++ export GIT_PROMPT_THEME=JZPRod ++ GIT_PROMPT_THEME=JZPRod ++ source /gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh +++ _have_find_mmin=1 +++ gp_install_prompt +++ '[' -z '' ']' +++ OLD_GITPROMPT='(pytorch-1.8.1+py3.8.8-lts) ' +++ '[' -z '' ']' ++++ we_are_on_repo +++++ git rev-parse --git-dir ++++ [[ -e '' ]] ++++ echo 0 +++ GIT_PROMPT_OLD_DIR_WAS_GIT=0 +++ '[' -z '' ']' +++ PROMPT_COMMAND=setGitPrompt +++ local setLastCommandStateEntry=setLastCommandState +++ case ";$PROMPT_COMMAND;" in +++ PROMPT_COMMAND='setLastCommandState;setGitPrompt' +++ git_prompt_dir +++ '[' -z '' ']' +++ local SOURCE=/gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh +++ '[' -h /gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh ']' +++++ dirname /gpfswork/rech/six/commun/envs/.bash-git-prompt/gitprompt.sh ++++ command cd -P /gpfswork/rech/six/commun/envs/.bash-git-prompt ++++ cd -P /gpfswork/rech/six/commun/envs/.bash-git-prompt ++++ pwd +++ __GIT_PROMPT_DIR=/gpfsdswork/projects/rech/six/commun/envs/.bash-git-prompt +++ source /gpfsdswork/projects/rech/six/commun/envs/.bash-git-prompt/git-prompt-help.sh ++ export TRANSFORMERS_CACHE=/gpfswork/rech/six/commun/models ++ TRANSFORMERS_CACHE=/gpfswork/rech/six/commun/models ++ export HF_DATASETS_CACHE=/gpfswork/rech/six/commun/datasets ++ HF_DATASETS_CACHE=/gpfswork/rech/six/commun/datasets ++ export HF_MODULES_CACHE=/gpfswork/rech/six/commun/modules ++ HF_MODULES_CACHE=/gpfswork/rech/six/commun/modules ++ export HF_METRICS_CACHE=/gpfswork/rech/six/commun/metrics ++ HF_METRICS_CACHE=/gpfswork/rech/six/commun/metrics ++ export DATASETS_CUSTOM=/gpfswork/rech/six/commun/datasets-custom ++ DATASETS_CUSTOM=/gpfswork/rech/six/commun/datasets-custom +++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.bash hook ++ __conda_setup='export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' # Copyright (C) 2012 Anaconda, Inc # SPDX-License-Identifier: BSD-3-Clause __add_sys_prefix_to_path() { # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA}" ] && [ -n "${WINDIR+x}" ]; then SYSP=$(\dirname "${CONDA_EXE}") else SYSP=$(\dirname "${CONDA_EXE}") SYSP=$(\dirname "${SYSP}") fi if [ -n "${WINDIR+x}" ]; then PATH="${SYSP}/bin:${PATH}" PATH="${SYSP}/Scripts:${PATH}" PATH="${SYSP}/Library/bin:${PATH}" PATH="${SYSP}/Library/usr/bin:${PATH}" PATH="${SYSP}/Library/mingw-w64/bin:${PATH}" PATH="${SYSP}:${PATH}" else PATH="${SYSP}/bin:${PATH}" fi \export PATH } __conda_hashr() { if [ -n "${ZSH_VERSION:+x}" ]; then \rehash elif [ -n "${POSH_VERSION:+x}" ]; then : # pass else \hash -r fi } __conda_activate() { if [ -n "${CONDA_PS1_BACKUP:+x}" ]; then # Handle transition from shell activated with conda <= 4.3 to a subsequent activation # after conda updated to >= 4.4. See issue #6173. PS1="$CONDA_PS1_BACKUP" \unset CONDA_PS1_BACKUP fi \local cmd="$1" shift \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix "$cmd" "$@")" || \return $? rc=$? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" if [ $rc != 0 ]; then \export PATH fi __conda_hashr } __conda_reactivate() { \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix reactivate)" || \return $? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" __conda_hashr } conda() { if [ "$#" -lt 1 ]; then "$CONDA_EXE" $_CE_M $_CE_CONDA else \local cmd="$1" shift case "$cmd" in activate|deactivate) __conda_activate "$cmd" "$@" ;; install|update|upgrade|remove|uninstall) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" if [ $t1 = 0 ]; then __conda_reactivate else return $t1 fi ;; *) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" return $t1 ;; esac fi } if [ -z "${CONDA_SHLVL+x}" ]; then \export CONDA_SHLVL=0 # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA:+x}" ] && [ -n "${WINDIR+x}" ]; then PATH="$(\dirname "$CONDA_EXE")/condabin${PATH:+":${PATH}"}" else PATH="$(\dirname "$(\dirname "$CONDA_EXE")")/condabin${PATH:+":${PATH}"}" fi \export PATH # We'\''re not allowing PS1 to be unbound. It must at least be set. # However, we'\''re not exporting it, which can cause problems when starting a second shell # via a first shell (i.e. starting zsh from bash). if [ -z "${PS1+x}" ]; then PS1= fi fi conda activate base' ++ '[' 0 -eq 0 ']' ++ eval 'export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' # Copyright (C) 2012 Anaconda, Inc # SPDX-License-Identifier: BSD-3-Clause __add_sys_prefix_to_path() { # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA}" ] && [ -n "${WINDIR+x}" ]; then SYSP=$(\dirname "${CONDA_EXE}") else SYSP=$(\dirname "${CONDA_EXE}") SYSP=$(\dirname "${SYSP}") fi if [ -n "${WINDIR+x}" ]; then PATH="${SYSP}/bin:${PATH}" PATH="${SYSP}/Scripts:${PATH}" PATH="${SYSP}/Library/bin:${PATH}" PATH="${SYSP}/Library/usr/bin:${PATH}" PATH="${SYSP}/Library/mingw-w64/bin:${PATH}" PATH="${SYSP}:${PATH}" else PATH="${SYSP}/bin:${PATH}" fi \export PATH } __conda_hashr() { if [ -n "${ZSH_VERSION:+x}" ]; then \rehash elif [ -n "${POSH_VERSION:+x}" ]; then : # pass else \hash -r fi } __conda_activate() { if [ -n "${CONDA_PS1_BACKUP:+x}" ]; then # Handle transition from shell activated with conda <= 4.3 to a subsequent activation # after conda updated to >= 4.4. See issue #6173. PS1="$CONDA_PS1_BACKUP" \unset CONDA_PS1_BACKUP fi \local cmd="$1" shift \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix "$cmd" "$@")" || \return $? rc=$? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" if [ $rc != 0 ]; then \export PATH fi __conda_hashr } __conda_reactivate() { \local ask_conda CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path ask_conda="$(PS1="$PS1" "$CONDA_EXE" $_CE_M $_CE_CONDA shell.posix reactivate)" || \return $? PATH="${CONDA_INTERNAL_OLDPATH}" \eval "$ask_conda" __conda_hashr } conda() { if [ "$#" -lt 1 ]; then "$CONDA_EXE" $_CE_M $_CE_CONDA else \local cmd="$1" shift case "$cmd" in activate|deactivate) __conda_activate "$cmd" "$@" ;; install|update|upgrade|remove|uninstall) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" if [ $t1 = 0 ]; then __conda_reactivate else return $t1 fi ;; *) CONDA_INTERNAL_OLDPATH="${PATH}" __add_sys_prefix_to_path "$CONDA_EXE" $_CE_M $_CE_CONDA "$cmd" "$@" \local t1=$? PATH="${CONDA_INTERNAL_OLDPATH}" return $t1 ;; esac fi } if [ -z "${CONDA_SHLVL+x}" ]; then \export CONDA_SHLVL=0 # In dev-mode CONDA_EXE is python.exe and on Windows # it is in a different relative location to condabin. if [ -n "${_CE_CONDA:+x}" ] && [ -n "${WINDIR+x}" ]; then PATH="$(\dirname "$CONDA_EXE")/condabin${PATH:+":${PATH}"}" else PATH="$(\dirname "$(\dirname "$CONDA_EXE")")/condabin${PATH:+":${PATH}"}" fi \export PATH # We'\''re not allowing PS1 to be unbound. It must at least be set. # However, we'\''re not exporting it, which can cause problems when starting a second shell # via a first shell (i.e. starting zsh from bash). if [ -z "${PS1+x}" ]; then PS1= fi fi conda activate base' +++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ export _CE_M= +++ _CE_M= +++ export _CE_CONDA= +++ _CE_CONDA= +++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ '[' -z x ']' +++ conda activate base +++ '[' 2 -lt 1 ']' +++ local cmd=activate +++ shift +++ case "$cmd" in +++ __conda_activate activate base +++ '[' -n '' ']' +++ local cmd=activate +++ shift +++ local ask_conda +++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ __add_sys_prefix_to_path +++ '[' -n '' ']' ++++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02/bin ++++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin +++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02 +++ '[' -n '' ']' +++ PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export PATH ++++ PS1='(pytorch-1.8.1+py3.8.8-lts) ' ++++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.posix activate base +++ ask_conda='. "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/glib_deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\'' export CONDA_SHLVL='\''2'\'' export CONDA_DEFAULT_ENV='\''base'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_1='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' +++ rc=0 +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ eval '. "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/glib_deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\'' export CONDA_SHLVL='\''2'\'' export CONDA_DEFAULT_ENV='\''base'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_1='\''/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts'\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/proj4-deactivate.sh +++++ unset PROJ_LIB +++++ unset PROJ_NETWORK +++++ '[' -n '' ']' ++++ . /gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts/etc/conda/deactivate.d/glib_deactivate.sh +++++ export GSETTINGS_SCHEMA_DIR= +++++ GSETTINGS_SCHEMA_DIR= +++++ unset GSETTINGS_SCHEMA_DIR_CONDA_BACKUP +++++ '[' -z ']' +++++ unset GSETTINGS_SCHEMA_DIR ++++ PS1='(base) ' ++++ export PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++++ export CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2020.02 ++++ CONDA_PREFIX=/gpfslocalsup/pub/anaconda-py3/2020.02 ++++ export CONDA_SHLVL=2 ++++ CONDA_SHLVL=2 ++++ export CONDA_DEFAULT_ENV=base ++++ CONDA_DEFAULT_ENV=base ++++ export 'CONDA_PROMPT_MODIFIER=(base) ' ++++ CONDA_PROMPT_MODIFIER='(base) ' ++++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++++ export _CE_M= ++++ _CE_M= ++++ export _CE_CONDA= ++++ _CE_CONDA= ++++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python ++++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python ++++ export CONDA_PREFIX_1=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ CONDA_PREFIX_1=/gpfslocalsup/pub/anaconda-py3/2021.05/envs/pytorch-1.8.1+py3.8.8-lts ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh +++++ [[ -n '' ]] +++++ [[ -n '' ]] +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ']' +++++ export GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal +++++ GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal +++++ export GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins +++++ GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins +++++ [[ ! -d /gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ]] +++++ unset GDAL_DRIVER_PATH +++++ export CPL_ZIP_ENCODING=UTF-8 +++++ CPL_ZIP_ENCODING=UTF-8 ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh +++++ [[ -n '' ]] +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/epsg_csv ']' +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/Library/share/epsg_csv ']' ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh +++++ '[' -n '' ']' +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ']' +++++ export PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj +++++ PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj +++++ '[' -f /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj/copyright_and_licenses.csv ']' +++++ export PROJ_NETWORK=ON +++++ PROJ_NETWORK=ON ++++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh +++++ [[ -n '' ]] +++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits ']' +++++ export UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml +++++ UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml +++ '[' 0 '!=' 0 ']' +++ __conda_hashr +++ '[' -n '' ']' +++ '[' -n '' ']' +++ hash -r ++ unset __conda_setup ++ export CONDA_ENVS_PATH=/gpfswork/rech/six/commun/conda ++ CONDA_ENVS_PATH=/gpfswork/rech/six/commun/conda ++ conda activate base ++ '[' 2 -lt 1 ']' ++ local cmd=activate ++ shift ++ case "$cmd" in ++ __conda_activate activate base ++ '[' -n '' ']' ++ local cmd=activate ++ shift ++ local ask_conda ++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ __add_sys_prefix_to_path ++ '[' -n '' ']' +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02/bin +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02 ++ '[' -n '' ']' ++ PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ export PATH +++ PS1='(base) ' +++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.posix activate base ++ ask_conda='. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_SHLVL='\''2'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' ++ rc=0 ++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ eval '. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(base) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_SHLVL='\''2'\'' export CONDA_PROMPT_MODIFIER='\''(base) '\'' . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh"' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh ++++ unset UDUNITS2_XML_PATH ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh ++++ unset PROJ_LIB ++++ unset PROJ_NETWORK ++++ '[' -n '' ']' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh ++++ unset GEOTIFF_CSV ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh ++++ unset GDAL_DATA ++++ [[ -n '' ]] ++++ unset GDAL_DRIVER_PATH ++++ [[ -n '' ]] ++++ unset CPL_ZIP_ENCODING +++ PS1='(base) ' +++ export PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export CONDA_SHLVL=2 +++ CONDA_SHLVL=2 +++ export 'CONDA_PROMPT_MODIFIER=(base) ' +++ CONDA_PROMPT_MODIFIER='(base) ' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/gdal-activate.sh ++++ [[ -n '' ]] ++++ [[ -n '' ]] ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ']' ++++ export GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ++++ GDAL_DATA=/gpfslocalsup/pub/anaconda-py3/2020.02/share/gdal ++++ export GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ++++ GDAL_DRIVER_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ++++ [[ ! -d /gpfslocalsup/pub/anaconda-py3/2020.02/lib/gdalplugins ]] ++++ unset GDAL_DRIVER_PATH ++++ export CPL_ZIP_ENCODING=UTF-8 ++++ CPL_ZIP_ENCODING=UTF-8 +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/geotiff-activate.sh ++++ [[ -n '' ]] ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/epsg_csv ']' ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/Library/share/epsg_csv ']' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/proj4-activate.sh ++++ '[' -n '' ']' ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ']' ++++ export PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ++++ PROJ_LIB=/gpfslocalsup/pub/anaconda-py3/2020.02/share/proj ++++ '[' -f /gpfslocalsup/pub/anaconda-py3/2020.02/share/proj/copyright_and_licenses.csv ']' ++++ export PROJ_NETWORK=ON ++++ PROJ_NETWORK=ON +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/activate.d/udunits2-activate.sh ++++ [[ -n '' ]] ++++ '[' -d /gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits ']' ++++ export UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml ++++ UDUNITS2_XML_PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/share/udunits/udunits2.xml ++ '[' 0 '!=' 0 ']' ++ __conda_hashr ++ '[' -n '' ']' ++ '[' -n '' ']' ++ hash -r ++ conda activate hf-prod ++ '[' 2 -lt 1 ']' ++ local cmd=activate ++ shift ++ case "$cmd" in ++ __conda_activate activate hf-prod ++ '[' -n '' ']' ++ local cmd=activate ++ shift ++ local ask_conda ++ CONDA_INTERNAL_OLDPATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ __add_sys_prefix_to_path ++ '[' -n '' ']' +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02/bin +++ dirname /gpfslocalsup/pub/anaconda-py3/2020.02/bin ++ SYSP=/gpfslocalsup/pub/anaconda-py3/2020.02 ++ '[' -n '' ']' ++ PATH=/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ export PATH +++ PS1='(base) ' +++ /gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda shell.posix activate hf-prod ++ ask_conda='. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_SHLVL='\''3'\'' export CONDA_DEFAULT_ENV='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_PROMPT_MODIFIER='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_2='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\''' ++ rc=0 ++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfslocalsup/pub/anaconda-py3/2020.02/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin ++ eval '. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh" . "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh" PS1='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export PATH='\''/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin'\'' export CONDA_PREFIX='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_SHLVL='\''3'\'' export CONDA_DEFAULT_ENV='\''/gpfswork/rech/six/commun/conda/hf-prod'\'' export CONDA_PROMPT_MODIFIER='\''(/gpfswork/rech/six/commun/conda/hf-prod) '\'' export CONDA_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda'\'' export _CE_M='\'''\'' export _CE_CONDA='\'''\'' export CONDA_PYTHON_EXE='\''/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python'\'' export CONDA_PREFIX_2='\''/gpfslocalsup/pub/anaconda-py3/2020.02'\''' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/udunits2-deactivate.sh ++++ unset UDUNITS2_XML_PATH ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/proj4-deactivate.sh ++++ unset PROJ_LIB ++++ unset PROJ_NETWORK ++++ '[' -n '' ']' +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/geotiff-deactivate.sh ++++ unset GEOTIFF_CSV ++++ [[ -n '' ]] +++ . /gpfslocalsup/pub/anaconda-py3/2020.02/etc/conda/deactivate.d/gdal-deactivate.sh ++++ unset GDAL_DATA ++++ [[ -n '' ]] ++++ unset GDAL_DRIVER_PATH ++++ [[ -n '' ]] ++++ unset CPL_ZIP_ENCODING +++ PS1='(/gpfswork/rech/six/commun/conda/hf-prod) ' +++ export PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ PATH=/gpfslocalsup/spack_soft/mc/4.8.26/gcc-8.3.1-srs5eoj2p6imxmzcvqwx7hvvkt5prwbe/bin:/gpfslocalsup/pub/gitlab-cli/1.13.1/bin:/gpfslocalsup/spack_soft/git-lfs/2.7.2/gcc-8.3.1-o4avesibx2cgqxwaqvn5kbz247it7x2e/bin:/gpfslocalsup/spack_soft/nvtop/1.1.0/gcc-8.3.1-jmbzz7ikh5wgjxgjk62hdddcuiggxerz/bin:/gpfswork/rech/six/commun/conda/hf-prod/bin:/gpfslocalsup/spack_soft/openmpi/4.1.1/gcc-8.3.1-buyiit4vlnfnuq6vgvlsmlkgexrh6myv/bin:/gpfslocalsys/cuda/11.2/samples:/gpfslocalsys/cuda/11.2/nvvm/bin:/gpfslocalsys/cuda/11.2/bin:/gpfswork/rech/rcy/uty16tp/miniconda3/condabin:/gpfslocalsup/spack_soft/environment-modules/4.3.1/gcc-4.8.5-ism7cdy4xverxywj27jvjstqwk5oxe2v/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/usr/lpp/mmfs/bin:/sbin:/bin:/gpfslocalsys/slurm/current/bin:/gpfslocalsup/bin:/gpfslocalsys/bin:/gpfslocalsys/idrzap/current/bin +++ export CONDA_PREFIX=/gpfswork/rech/six/commun/conda/hf-prod +++ CONDA_PREFIX=/gpfswork/rech/six/commun/conda/hf-prod +++ export CONDA_SHLVL=3 +++ CONDA_SHLVL=3 +++ export CONDA_DEFAULT_ENV=/gpfswork/rech/six/commun/conda/hf-prod +++ CONDA_DEFAULT_ENV=/gpfswork/rech/six/commun/conda/hf-prod +++ export 'CONDA_PROMPT_MODIFIER=(/gpfswork/rech/six/commun/conda/hf-prod) ' +++ CONDA_PROMPT_MODIFIER='(/gpfswork/rech/six/commun/conda/hf-prod) ' +++ export CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ CONDA_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda +++ export _CE_M= +++ _CE_M= +++ export _CE_CONDA= +++ _CE_CONDA= +++ export CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ CONDA_PYTHON_EXE=/gpfslocalsup/pub/anaconda-py3/2020.02/bin/python +++ export CONDA_PREFIX_2=/gpfslocalsup/pub/anaconda-py3/2020.02 +++ CONDA_PREFIX_2=/gpfslocalsup/pub/anaconda-py3/2020.02 ++ '[' 0 '!=' 0 ']' ++ __conda_hashr ++ '[' -n '' ']' ++ '[' -n '' ']' ++ hash -r + ROUND=2 + TESTING=0 + export HF_DATASETS_OFFLINE=1 + HF_DATASETS_OFFLINE=1 + export TRANSFORMERS_OFFLINE=1 + TRANSFORMERS_OFFLINE=1 + OUTPUT_PATH=/gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full/ + MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/commun/code/Megatron-DeepSpeed + [[ 0 == 1 ]] + DATA_PATH=/gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document + pushd /gpfswork/rech/six/commun/code/Megatron-DeepSpeed /gpfswork/rech/six/commun/code/Megatron-DeepSpeed /gpfswork/rech/six/uty16tp/code/big_science ++ perl -le '$_=$ENV{"SLURM_JOB_NODELIST"}; s/,.*//; s/-.*//; s/\[//; print' + MASTER_ADDR=r11i2n4 + MASTER_PORT=6000 + GPUS_PER_NODE=4 + NNODES=16 + PP_SIZE=4 + TP_SIZE=4 + DP_SIZE=4 + MICRO_BATCH_SIZE=16 + GLOBAL_BATCH_SIZE=1024 + TRAIN_ITER=146_484_375 + NLAYERS=24 + NHIDDEN=2048 + NHEADS=16 + FFN_HIDDEN_SIZE=8192 + SEQ_LEN=2048 + [[ 2 == 1 ]] + [[ 2 == 2 ]] + SAVE_INTERVAL=1500 + OPTIMIZER_ARGS=' --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 ' + EXIT_OPTS=' --exit-duration-in-mins 1190 ' + GPT_ARGS=' --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 ' + OUTPUT_ARGS=' --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard ' + ZERO_STAGE=1 + config_json=./ds_config.716258.json + cat + DEEPSPEED_ARGS=' --deepspeed --deepspeed_config ./ds_config.716258.json --zero-stage 1 --deepspeed-activation-checkpointing ' + export 'LAUNCHER=python -u -m torch.distributed.launch --nproc_per_node 4 --nnodes 16 --master_addr r11i2n4 --master_port 6000 ' + LAUNCHER='python -u -m torch.distributed.launch --nproc_per_node 4 --nnodes 16 --master_addr r11i2n4 --master_port 6000 ' ++ pwd + export 'CMD= /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.716258.json --zero-stage 1 --deepspeed-activation-checkpointing ' + CMD=' /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.716258.json --zero-stage 1 --deepspeed-activation-checkpointing ' + echo /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.716258.json --zero-stage 1 --deepspeed-activation-checkpointing /gpfswork/rech/six/commun/code/Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 24 --hidden-size 2048 --num-attention-heads 16 --ffn-hidden-size 8192 --seq-length 2048 --micro-batch-size 16 --global-batch-size 1024 --train-samples 146_484_375 --tokenizer-type PretrainedFromHF --tokenizer-name-or-path t5-small --loss-scale 12 --clip-grad 1.0 --fp16 --checkpoint-activations --position-embedding-type rotary --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 1e-4 --min-lr 1e-5 --lr-decay-style cosine --lr-decay-samples 126_953_125 --lr-warmup-samples 183_105 --clip-grad 1.0 --weight-decay 1e-1 --exit-duration-in-mins 1190 --log-interval 200 --save-interval 1500 --eval-interval 1000 --eval-iters 100 --tensorboard-dir /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --load /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints --data-path /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document --data-impl mmap --split 949,50,1 --distributed-backend nccl --deepspeed --deepspeed_config ./ds_config.716258.json --zero-stage 1 --deepspeed-activation-checkpointing + srun --jobid 716258 bash -c '$LAUNCHER --node_rank $SLURM_PROCID $CMD' + tee /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//logs/tr3-1B3-modeling-baseline.716258.out tee: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//logs/tr3-1B3-modeling-baseline.716258.out: No such file or directory ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** ***************************************** Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. ***************************************** 2021-08-12 15:09:28.693683: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.693831: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.694055: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.694074: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.702812: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.702884: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.702988: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.703038: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.705820: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.706096: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.706260: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.706259: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.707256: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.707344: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.707483: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.707528: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.708103: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.709841: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.748643: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.749004: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.749010: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.749117: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.867028: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.867034: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.867042: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:28.867050: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.358261: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.358424: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.358441: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.358903: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.517353: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.555117: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916287: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916283: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916293: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916296: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916543: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916538: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916547: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.916545: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.940788: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.940791: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.940786: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.940795: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.954414: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.954410: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.954419: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.954417: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.986056: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.986051: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.986051: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.986053: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.991129: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.991127: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.991132: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:29.991131: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.038561: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.038562: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.038559: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.038564: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.063871: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.063868: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.063879: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2021-08-12 15:09:30.063883: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninjaJIT compiled ops requires ninja ninjaninja .................................... [OKAY][OKAY] ---------------------------------------------------------------------------------------------------- op nameop name ................................ installedinstalled .... compatiblecompatible ---------------------------------------------------------------------------------------------------- cpu_adamcpu_adam .............................. [YES][YES] ............ [OKAY][OKAY] fused_adamfused_adam .......................... [NO][NO] .............. [OKAY][OKAY] fused_lamb fused_lamb............. .............[NO] [NO]....... .......[OKAY] [OKAY] sparse_attnsparse_attn ........................ [NO][NO] .............. [OKAY][OKAY] transformertransformer ........................ [NO][NO] .............. [OKAY][OKAY] stochastic_transformer stochastic_transformer. [NO]. .......[NO] [OKAY]....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ------------------------------------------------------------------------------------------------------------------------------------------------------ NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. JIT compiled ops requires ninja DeepSpeed C++/CUDA extension op report -------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja .................. ....................................[OKAY] .................. [OKAY][OKAY] -------------------------------------------------- [OKAY] ----------------------------------------------------------------------------------------------------op name ................-------------------------------------------------- op nameinstalledop name ..................................op name installedinstalledcompatible .................... -------------------------------------------------- installed compatiblecompatible .. -------------------------------------------------- --------------------------------------------------compatible cpu_adam --------------------------------------------------............... cpu_adam[YES] ..................... cpu_adam [YES][OKAY] cpu_adam ............... ...... [YES] ............... [OKAY] ...... [YES] fused_adam[OKAY] ...... ............. fused_adam[OKAY] [NO] .................... [NO][OKAY]fused_adam .......fused_adam .............fused_lamb [OKAY] ............. [NO]............. fused_lamb[NO].......[NO] ...........................[OKAY] [OKAY][OKAY][NO] fused_lamb ....... fused_lamb ............. [OKAY] .............[NO] sparse_attn .......[NO]............ [OKAY].......[NO] [OKAY].......sparse_attn ............[OKAY] [NO] .......transformer [OKAY]............ sparse_attn[NO] .......transformer sparse_attn............[OKAY]............ ............[NO][NO] .......[NO]stochastic_transformer....... [OKAY]........ [OKAY] [NO][OKAY] stochastic_transformer....... transformer.[OKAY] transformer[NO]............ ...................[NO] [NO][OKAY]....... .......[OKAY] [OKAY] stochastic_transformerstochastic_transformer .. [NO][NO] .............. [OKAY][OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- DeepSpeed C++/CUDA extension op report--------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------JIT compiled ops requires ninja-------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ----------------------------------------------------------------------------------------------------DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninjaJIT compiled ops requires ninja -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.---------------------------------------------------------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninjaJIT compiled ops requires ninja-------------------------------------------------- JIT compiled ops requires ninjaJIT compiled ops requires ninja  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op reportNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.JIT compiled ops requires ninjaJIT compiled ops requires ninjaNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninja JIT compiled ops requires ninja ninjaninjaninjaninja .................................... ....................................[OKAY][OKAY] [OKAY][OKAY]---------------------------------------------------------------------------------------------------- op name-------------------------------------------------- --------------------------------------------------op name................ ................op nameinstalled installed op name .................. .. ................ compatiblecompatibleinstalled ----------------------------------------------------------------------------------------------------installed.. ..compatible compatible-------------------------------------------------- -------------------------------------------------- cpu_adam cpu_adam............... ...............[YES] cpu_adam[YES]...... cpu_adam...... [OKAY] [OKAY] .............................. [YES][YES] ............ [OKAY][OKAY] fused_adamfused_adam .......................... [NO][NO] .......fused_adam....... fused_adam [OKAY]............. [OKAY]............. fused_lamb [NO] [NO].................... fused_lamb ....... [NO][OKAY][OKAY]............. .......[NO] [OKAY]fused_lamb....... fused_lamb [OKAY].......................... [NO][NO] .............. [OKAY]sparse_attn[OKAY] ............ [NO]sparse_attn ................... [OKAY][NO] .......sparse_attn transformer[OKAY]sparse_attn ............ ............ transformer............[NO][NO] ............[NO] ..............[NO] .......[OKAY][OKAY] ....... [OKAY] [OKAY]stochastic_transformertransformer transformer . ........................[NO] stochastic_transformer[NO]....... [NO] . [OKAY] .............. [NO] [OKAY] [OKAY] ....... [OKAY]stochastic_transformer stochastic_transformer. .[NO] [NO]....... .......[OKAY] [OKAY] ninjaninjaninjaninja ...................................................... ..................[OKAY][OKAY][OKAY] [OKAY]---------------------------------------------------------------------------------------------------- -------------------------------------------------- --------------------------------------------------op nameop name op name ................ ................op name................ installedinstalledinstalled................ ...... installed compatible compatible compatible.. -------------------------------------------------- -------------------------------------------------- --------------------------------------------------compatible -------------------------------------------------- cpu_adamcpu_adam cpu_adam.............................. ...............cpu_adam[YES][YES] ...............[YES] .................. [OKAY][OKAY][OKAY][YES] ...... [OKAY] fused_adamfused_adam fused_adam .......................... fused_adam.............[NO] [NO] ....................[NO] ....... [NO] .......[OKAY] [OKAY] ....... [OKAY] [OKAY]fused_lambfused_lamb ..........................fused_lambfused_lamb [NO]............. [NO]............. ....... [NO] .......[NO] [OKAY] [OKAY]....... ....... [OKAY][OKAY] sparse_attnsparse_attn ............sparse_attn............sparse_attn [NO] [NO] ............ .......................... [NO] [NO][OKAY][OKAY]....... .......transformer [OKAY]transformer[OKAY] ............ ............[NO] [NO]transformertransformer....... ................... [OKAY]............ [OKAY][NO][NO] stochastic_transformer ...............stochastic_transformer [OKAY] [OKAY][NO] . [NO].......stochastic_transformer stochastic_transformer.......[OKAY] . .[OKAY] [NO][NO] .............. [OKAY][OKAY] ninjaninjaninjaninja ........................................................................ [OKAY][OKAY][OKAY][OKAY] -------------------------------------------------- ---------------------------------------------------------------------------------------------------- -------------------------------------------------- op nameop nameop name op name................................ ................ ................ installedinstalled installed ..installed.. ....compatible compatible compatible-------------------------------------------------- compatible -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- cpu_adam cpu_adam...............cpu_adam cpu_adam ............... [YES].............................. [YES] [YES]......[YES] ...... ...... ......[OKAY][OKAY] [OKAY][OKAY] fused_adamfused_adam fused_adam.............fused_adam............. .............[NO][NO] ............. ....... [NO] .......[NO] [OKAY] [OKAY]....... fused_lamb.......[OKAY] .............fused_lamb[OKAY] [NO]fused_lamb............. fused_lamb....................[NO] .............[OKAY].......[NO] [OKAY][NO]....... .......[OKAY] [OKAY] sparse_attn ............ [NO] sparse_attn....... ............sparse_attn[OKAY] sparse_attn[NO] ............ ............transformer .......[NO] [OKAY][NO]............ ..............[NO] [OKAY]transformer[OKAY]....... ............[OKAY] transformer [NO]transformer ............................... stochastic_transformer [NO] [OKAY][NO] . ..............[NO] stochastic_transformer [OKAY] [OKAY]....... . [OKAY][NO]stochastic_transformer stochastic_transformer ........ . [OKAY][NO][NO] .............. [OKAY][OKAY] -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report -------------------------------------------------- DeepSpeed C++/CUDA extension op reportNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ------------------------------------------------------------------------------------------------------------------------------------------------------ JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.JIT compiled ops requires ninja --------------------------------------------------DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninja -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja .................................... .................. ..................[OKAY][OKAY] [OKAY][OKAY]---------------------------------------------------------------------------------------------------- --------------------------------------------------op nameop name -------------------------------------------------- ................................op nameop name installed installed................................ .. ..installed compatible installedcompatible..-------------------------------------------------- ..--------------------------------------------------compatible compatible ---------------------------------------------------------------------------------------------------- cpu_adam ............... [YES]cpu_adam ..................... [OKAY]cpu_adamcpu_adam[YES] .................................... [YES][YES][OKAY] fused_adam............ .............[OKAY][OKAY] [NO] fused_adam .................... [OKAY][NO] ....... fused_lamb[OKAY] fused_adamfused_adam............. fused_lamb............. ..........................[NO] .......[NO] [NO] [NO]....... [OKAY] ....... .......[OKAY][OKAY] [OKAY] fused_lamb fused_lamb............. sparse_attn ............. [NO]............ [NO].......[NO]sparse_attn ..........................[OKAY] [NO][OKAY][OKAY] .......transformer [OKAY]............ [NO]transformer ...................sparse_attn [OKAY] [NO] sparse_attn............ [NO]...................stochastic_transformer ........[OKAY] [NO] [NO][OKAY] .......stochastic_transformer .......[OKAY] [OKAY] .transformer [NO] transformer ................... ............[NO][OKAY] [NO]....... .......[OKAY] [OKAY] stochastic_transformer stochastic_transformer. .[NO] [NO]....... .......[OKAY] [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op reportJIT compiled ops requires ninja DeepSpeed C++/CUDA extension op report -------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninja JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- ninjaninjaninjaninja ...................................................... .................. [OKAY][OKAY] [OKAY] [OKAY] ------------------------------------------------------------------------------------------------------------------------------------------------------ -------------------------------------------------- op nameop nameop nameop name ................ ................................ ................ installed installed installed installed.... ....compatiblecompatible compatible-------------------------------------------------- compatible-------------------------------------------------- -------------------------------------------------- -------------------------------------------------- cpu_adamcpu_adam cpu_adam cpu_adam............... ............... ............... ...............[YES] [YES] [YES] [YES]...... ...... ............[OKAY][OKAY] [OKAY] [OKAY] fused_adam .............fused_adamfused_adamfused_adam [NO]....................................... [NO][NO].......[NO] .......[OKAY].............. [OKAY] [OKAY] fused_lamb[OKAY] fused_lamb............. .............fused_lamb[NO] fused_lamb[NO]............. ....................[NO]....... [OKAY] .......[OKAY][NO] [OKAY]....... [OKAY] sparse_attnsparse_attn ........................ sparse_attn[NO] [NO] sparse_attn................... .......[NO] ............ [OKAY]....... [OKAY] [NO] [OKAY] transformertransformer ....... ............transformer[OKAY]............ ............ [NO] [NO] transformer.......[NO] .......................... [OKAY][NO][OKAY] [OKAY] .......stochastic_transformer [OKAY]stochastic_transformer. stochastic_transformer[NO]. .[NO]stochastic_transformer....... [NO]........ [OKAY] [NO] [OKAY] ....... ....... [OKAY][OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] .......async_io [NO] ............... [NO] ....... [NO]transformer_inference .. [NO] ....... [OKAY] utils ..................transformer_inference [YES] ........ [OKAY][NO] ....... [OKAY]quantizer .............. [NO] ....... [OKAY] utils-------------------------------------------------- .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] async_io ............... [NO] .......utils [NO].................. [YES] ...... [OKAY] quantizer transformer_inference.............. ..[NO] [NO]....... .......[OKAY] [OKAY] -------------------------------------------------- utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... async_io[NO] ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY]transformer_inference .. [NO] ....... utils[OKAY] .................. [YES] ...... [OKAY] utils ..................quantizer [YES].............. ......[NO] [OKAY]....... [OKAY] quantizer --------------------------------------------------.............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] async_iotransformer_inference ................. [NO][NO] .............. [NO][OKAY] utils .................. [YES] transformer_inference...... ..[OKAY] [NO] ....... [OKAY]quantizer .............. [NO] ....... [OKAY] utils .................. [YES]-------------------------------------------------- ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: DeepSpeed general environment info:torch install path ............... torch install path['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ............... torch version .................... 1.8.1['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch cuda versiontorch version ................................... 11.11.8.1 nvcc version .....................torch cuda version 11.2............... 11.1deepspeed install path nvcc version........... ..................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']11.2 deepspeed infodeepspeed install path .............................. 0.4.2+unknown, unknown, unknown ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed wheel compiled w. deepspeed info...... ...................torch 1.8, cuda 10.2 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w...................... ......11.2 torch 1.8, cuda 10.2deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. ............... [NO] ....... [NO]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. transformer_inference ..async_io [NO]............... .......[NO] [OKAY]....... [NO] utils ..................async_io [YES] ..................... [NO][OKAY] .......transformer_inference [NO].. quantizer[NO] ..................... [NO][OKAY] ....... [OKAY] transformer_inference utils.. --------------------------------------------------.................. [NO][YES] ............. [OKAY][OKAY] quantizer .............. utils[NO] ......................... [YES][OKAY] ...... [OKAY] -------------------------------------------------- quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install path torch install path............... ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']torch version .................... 1.8.1 torch version torch cuda version.................... ............... 1.8.111.1 nvcc version .....................torch cuda version 11.2 ...............deepspeed install path 11.1........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']nvcc version deepspeed info..................... ................... 11.20.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed install path ...... ...........torch 1.8, cuda 10.2 ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: DeepSpeed general environment info: torch install path ............... torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] .................... 1.8.1torch version .................... torch cuda version1.8.1 ............... torch cuda version11.1 ...............nvcc version 11.1..................... nvcc version11.2 .....................deepspeed install path 11.2........... deepspeed install path ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']........... deepspeed info ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']................... deepspeed info0.4.2+unknown, unknown, unknown ...................deepspeed wheel compiled w. 0.4.2+unknown, unknown, unknown...... deepspeed wheel compiled w.torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: DeepSpeed general environment info: torch install path ...............torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version ....................torch version 1.8.1.................... 1.8.1 torch cuda version ...............torch cuda version 11.1............... 11.1nvcc version nvcc version..................... .....................11.2 11.2deepspeed install path deepspeed install path........... ...........['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed info ...................deepspeed info 0.4.2+unknown, unknown, unknown................... 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w....... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info:DeepSpeed general environment info: torch install path torch install path............... ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']torch version .................... torch version1.8.1 .................... 1.8.1torch cuda version ............... torch cuda version11.1 ...............nvcc version 11.1..................... nvcc version11.2 .....................deepspeed install path 11.2........... deepspeed install path ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']........... deepspeed info ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']................... deepspeed info0.4.2+unknown, unknown, unknown ...................deepspeed wheel compiled w. 0.4.2+unknown, unknown, unknown...... deepspeed wheel compiled w.torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info deepspeed info................... ...................0.4.2+unknown, unknown, unknown 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. ......deepspeed wheel compiled w. torch 1.8, cuda 10.2...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info:DeepSpeed general environment info: torch install path ...............torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']torch version .................... torch version1.8.1 .................... torch cuda version1.8.1 ............... torch cuda version11.1 ...............nvcc version 11.1..................... nvcc version11.2 .....................deepspeed install path 11.2........... deepspeed install path ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']........... deepspeed info['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ................... deepspeed info0.4.2+unknown, unknown, unknown ................... deepspeed wheel compiled w.0.4.2+unknown, unknown, unknown ...... deepspeed wheel compiled w.torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report ----------------------------------------------------------------------------------------------------DeepSpeed C++/CUDA extension op report NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- JIT compiled ops requires ninja ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. DeepSpeed C++/CUDA extension op reportJIT compiled ops requires ninja-------------------------------------------------- --------------------------------------------------JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja .................. .................. ....................................[OKAY] [OKAY][OKAY][OKAY] -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- op nameop name op nameop name ................ ................................ ................installed installed installedinstalled .... .... compatible compatible compatiblecompatible -------------------------------------------------- ---------------------------------------------------------------------------------------------------- -------------------------------------------------- cpu_adamcpu_adam cpu_adam..............................cpu_adam [YES][YES] ............... ..................... ...... [OKAY][YES][YES] [OKAY] ............ [OKAY][OKAY] fused_adamfused_adam .......................... [NO]fused_adam[NO] fused_adam ....... ................................. [NO] [OKAY][OKAY].......[NO] [OKAY]fused_lamb .......fused_lamb ............. [OKAY]............. fused_lamb [NO] [NO] ............. .............. fused_lamb [NO] [OKAY] [OKAY] .................... [NO][OKAY] ....... [OKAY] sparse_attn ............sparse_attn [NO]............ sparse_attn.......[NO]sparse_attn [OKAY]............................... [NO]transformer[NO][OKAY] ..........................transformer [NO][OKAY][OKAY] ............ .......[NO]transformer [OKAY]transformer................... ............[OKAY][NO] stochastic_transformer [NO] ....... ........stochastic_transformer [NO][OKAY][OKAY]. ....... [NO][OKAY] .......stochastic_transformerstochastic_transformer [OKAY]. . [NO][NO] .............. [OKAY][OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op reportNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. DeepSpeed C++/CUDA extension op report -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 ninjaninja ninja ..................ninja .................. .................. [OKAY] [OKAY] ..................[OKAY] -------------------------------------------------- --------------------------------------------------[OKAY]--------------------------------------------------op name op name................-------------------------------------------------- op nameinstalled ................ op name ..................installed................ .. installedcompatibleinstalledcompatible .. ---------------------------------------------------------------------------------------------------- .. compatible compatible -------------------------------------------------- -------------------------------------------------- cpu_adam ...............cpu_adam [YES]............... cpu_adam cpu_adam......[YES] [OKAY]............... ............... ......[YES] [OKAY]......[YES] [OKAY]...... [OKAY] fused_adam ............. [NO]fused_adam .................... fused_adam [OKAY]fused_adam [NO] ............. ............. fused_lamb ....... [NO] [NO]............. [OKAY][NO] ....... ....... ....... fused_lamb[OKAY][OKAY][OKAY] ............. [NO]fused_lamb fused_lamb....... ............. ............. [OKAY] [NO][NO] sparse_attn ....... ....... ............ [OKAY] [OKAY] [NO] sparse_attn....... ............[OKAY] [NO] .......transformer [OKAY]............ sparse_attn sparse_attn[NO] transformer ............ ................... ............ [NO][OKAY] [NO] .......[NO] ..............[OKAY]stochastic_transformer [OKAY].[OKAY] transformer[NO]transformer stochastic_transformer ................... ............ . [OKAY][NO] [NO] [NO] ....... .............. [OKAY][OKAY][OKAY] stochastic_transformerstochastic_transformer .. [NO] [NO]....... .......[OKAY] [OKAY] /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io async_io............... [NO]............... .......[NO] [NO]....... [NO] transformer_inference .. transformer_inference[NO] ......... [OKAY][NO] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer ..............quantizer [NO].............. .......[NO] [OKAY]....... [OKAY] ---------------------------------------------------------------------------------------------------- /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... async_io[NO] ...................... [NO][NO] ....... [NO] transformer_inference .. transformer_inference[NO] ......... [OKAY][NO] ....... [OKAY] utils .................. utils[YES] ........................ [YES][OKAY] ...... [OKAY] quantizer .............. [NO]quantizer ..................... [NO][OKAY] ....... [OKAY] -------------------------------------------------- -------------------------------------------------- DeepSpeed general environment info: torch install pathDeepSpeed general environment info: ............... torch install path ...............['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version ....................['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] 1.8.1 torch version ....................torch cuda version 1.8.1............... 11.1 torch cuda versionnvcc version .................................... 11.111.2 nvcc versiondeepspeed install path ................................ 11.2['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed install path deepspeed info........... ................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']0.4.2+unknown, unknown, unknown deepspeed infodeepspeed wheel compiled w. ......................... 0.4.2+unknown, unknown, unknowntorch 1.8, cuda 10.2 deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja ...................................................... .................. [OKAY][OKAY][OKAY][OKAY] ------------------------------------------------------------------------------------------------------------------------------------------------------ -------------------------------------------------- op nameop nameop nameop name ................................................ ................ installedinstalled installed installed .... .. .. compatiblecompatible compatiblecompatible -------------------------------------------------- ---------------------------------------------------------------------------------------------------- -------------------------------------------------- cpu_adam cpu_adam...............cpu_adamcpu_adam [YES] ............... ............... .....................[YES] ...... [YES][OKAY] [YES] ...... [OKAY] ...... [OKAY] [OKAY] fused_adam .............fused_adam [NO]fused_adam............. fused_adam .......[NO] ..........................[OKAY]....... [NO] fused_lamb.......[OKAY] [NO] ............. [OKAY][NO]fused_lamb....... ............. [OKAY] ....... [NO] fused_lamb[OKAY]fused_lamb ....... ..........................[OKAY] [NO][NO] .............. [OKAY][OKAY] sparse_attn ............ sparse_attn[NO] ................... sparse_attn[NO][OKAY]sparse_attn ...............................transformer [OKAY] [NO]............ [NO]....... [NO].......transformer[OKAY] ....... ............ [OKAY][OKAY] transformer [NO] ............transformerstochastic_transformer....... [NO].............[OKAY] [NO] .......[NO].......stochastic_transformer [OKAY] [OKAY] . ....... [NO] [OKAY]stochastic_transformer....... .[OKAY] stochastic_transformer[NO] ........ [OKAY][NO] ....... [OKAY] DeepSpeed general environment info:DeepSpeed general environment info: torch install path torch install path............... ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2 -------------------------------------------------- DeepSpeed C++/CUDA extension op report-------------------------------------------------- ---------------------------------------------------------------------------------------------------- -------------------------------------------------- DeepSpeed C++/CUDA extension op reportNOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report-------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.--------------------------------------------------JIT compiled ops requires ninja JIT compiled ops requires ninja-------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja ........................................................................ [OKAY][OKAY][OKAY][OKAY] ------------------------------------------------------------------------------------------------------------------------------------------------------ -------------------------------------------------- op nameop nameop nameop name ................ ................ ................................ installed installedinstalledinstalled ........ compatiblecompatiblecompatible compatible -------------------------------------------------- -------------------------------------------------- ---------------------------------------------------------------------------------------------------- cpu_adam cpu_adamcpu_adam............... cpu_adam .............................. [YES] ...............[YES] [YES]......[YES] ...... ............[OKAY] [OKAY][OKAY][OKAY] fused_adam .............fused_adamfused_adam fused_adam [NO] ....................................... ....... [NO][NO] [NO] .......[OKAY] ....... .......[OKAY][OKAY] [OKAY]fused_lamb .............fused_lambfused_lamb fused_lamb[NO].......................... .......[NO].............[NO] [OKAY].......[NO]....... [OKAY].......[OKAY] [OKAY] sparse_attnsparse_attn sparse_attn sparse_attn............ ............ ........................ [NO][NO] [NO][NO] ....... ....... ....... [OKAY]....... [OKAY] [OKAY] [OKAY] transformer transformer............transformer transformer............[NO]............ [NO]...................[NO] [NO] ....... [OKAY]....... ....... [OKAY] [OKAY] [OKAY] stochastic_transformer .stochastic_transformer stochastic_transformer stochastic_transformer[NO] . .. ....... [NO] [NO] [OKAY][NO].............. ....... [OKAY] [OKAY] [OKAY] /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown ****  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] transformer_inference .. [NO]utils ......................... [OKAY][YES] ...... [OKAY] utilsquantizer ................................ [YES][NO] ............. [OKAY] [OKAY] quantizer --------------------------------------------------.............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op report-------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- ----------------------------------------------------------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninjaDeepSpeed C++/CUDA extension op report -------------------------------------------------- DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninja-------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninjaJIT compiled ops requires ninja /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** ninjaninjaninjaninja ........................................................................ [OKAY][OKAY][OKAY][OKAY] -------------------------------------------------- ---------------------------------------------------------------------------------------------------- --------------------------------------------------op name op nameop name................ ................op name installed ................ installed ....................installed compatiblecompatible..installed ----------------------------------------------------------------------------------------------------..compatible --------------------------------------------------compatible --------------------------------------------------cpu_adam cpu_adam............... cpu_adam...............[YES] ...............[YES]cpu_adam ...... [YES] ..................... [OKAY] ......[OKAY] [YES] [OKAY] ...... [OKAY] fused_adam ............. fused_adam[NO]fused_adam .......................... fused_adam....... [NO] [NO] .............[OKAY].............. [NO][OKAY][OKAY] fused_lamb....... ............. [OKAY]fused_lamb[NO]fused_lamb ................................. fused_lamb[NO][NO][OKAY] ........................... [NO][OKAY][OKAY] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] sparse_attnsparse_attn sparse_attntransformer........................ ............[NO]............[NO] [NO].......[NO] ....... .......[OKAY] ....... [OKAY][OKAY] transformer [OKAY] ............ transformer stochastic_transformer [NO]transformer ............ ........ ............ [NO][OKAY] [NO][NO] ....... ....... .......stochastic_transformer [OKAY] [OKAY] [OKAY]. [NO] stochastic_transformer.......stochastic_transformer [OKAY].. [NO][NO] .............. [OKAY][OKAY] vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model -------------------------------------------------- --------------------------------------------------DeepSpeed C++/CUDA extension op report --------------------------------------------------DeepSpeed C++/CUDA extension op report NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.-------------------------------------------------- ---------------------------------------------------------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.DeepSpeed C++/CUDA extension op reportJIT compiled ops requires ninja DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninja--------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] async_io ............... utils[NO] ......................... [YES][NO] ...... [OKAY] quantizer ..............transformer_inference [NO].. .......[NO] [OKAY]....... [OKAY] -------------------------------------------------- utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- ninjaninjaninja ninja .................................... .................. ..................[OKAY] [OKAY] [OKAY]--------------------------------------------------[OKAY] --------------------------------------------------op name---------------------------------------------------------------------------------------------------- op name................op name op name ................installed ................ ................installed installed..installed.. ..compatible ..compatible compatible ---------------------------------------------------------------------------------------------------- compatible-------------------------------------------------- -------------------------------------------------- cpu_adam cpu_adam............... cpu_adamcpu_adam ............... [YES] ............... ...............[YES] ...... [YES] ...... [OKAY][YES] ......[OKAY] ...... [OKAY] [OKAY] fused_adam ............. [NO] fused_adam....... fused_adam............. fused_adam[OKAY] ............. [NO] ............. [NO].......fused_lamb [NO]............. ....... [OKAY].......[NO][OKAY] [OKAY].......fused_lambfused_lamb [OKAY]............. .............fused_lamb [NO] [NO] ............. ....... ....... [NO] [OKAY] [OKAY]sparse_attn ....... ............[OKAY] [NO] ....... sparse_attn[OKAY] sparse_attn............ transformer............[NO] sparse_attn[NO]................... ...................[NO] [OKAY]....... [OKAY] [NO] [OKAY] transformer.......transformer ........................[OKAY]stochastic_transformer [NO][NO]. transformer.......[NO] ....... ............[OKAY]....... [OKAY] [NO][OKAY] stochastic_transformerstochastic_transformer ....... ..[OKAY] [NO][NO] ..............stochastic_transformer [OKAY][OKAY]. [NO] ....... [OKAY] DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninja-------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ---------------------------------------------------------------------------------------------------- JIT compiled ops requires ninjaJIT compiled ops requires ninja ninjaninjaninjaninja .................................... .................. [OKAY].................. [OKAY] [OKAY] [OKAY] -------------------------------------------------- ----------------------------------------------------------------------------------------------------op name-------------------------------------------------- op name................op name op name................installed ................................installed.. installedinstalled.. compatible.. compatible.. compatible-------------------------------------------------- compatible-------------------------------------------------- -------------------------------------------------- -------------------------------------------------- cpu_adam cpu_adam...............cpu_adam [YES].............................. ...... [YES] [YES] cpu_adam[OKAY] ...... ...... ............... [OKAY] [OKAY] fused_adam ............. [NO]fused_adamfused_adam[YES] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY]................................ [NO][NO]fused_lamb [OKAY]....... ....... [OKAY].............[OKAY] [NO] ....... fused_lamb[OKAY] fused_adam............. .............[NO]sparse_attn [NO]sparse_attn ............ .............. [NO] [OKAY] ................... [OKAY] [OKAY] transformer [NO] ................... [OKAY] fused_lambsparse_attn [NO]............transformer............. [NO] ....... ............[NO] ....... [OKAY] [NO][OKAY] ..............stochastic_transformer transformer [OKAY]. [NO] [OKAY]....... stochastic_transformer ............[OKAY] .[NO] [NO]....... .......[OKAY] [OKAY] stochastic_transformer sparse_attn. [NO] ................... [OKAY] [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install path .............................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version .............................. 11.111.1 nvcc versionnvcc version .......................................... 11.211.2 deepspeed install pathdeepspeed install path ...................... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed infodeepspeed info ...................................... 0.4.2+unknown, unknown, unknown0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.deepspeed wheel compiled w. ............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] async_io ............... [NO]transformer_inference ......... [NO][NO] ....... [OKAY] utils transformer_inference.................. ..[YES] ......[NO] [OKAY]....... [OKAY] quantizer .............. [NO] utils....... ..................[OKAY] [YES] ......-------------------------------------------------- [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path DeepSpeed general environment info:............... DeepSpeed general environment info: ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch install pathtorch install path torch version ............... ............... .................... 1.8.1 torch cuda version['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ............... 11.1torch version torch version nvcc version.................... .........................................1.8.1 1.8.111.2 torch cuda versiondeepspeed install path torch cuda version.......................... ...............11.1 ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']11.1nvcc version deepspeed infonvcc version..................... ........................................11.2 0.4.2+unknown, unknown, unknown11.2deepspeed install path deepspeed wheel compiled w.deepspeed install path........... ................. ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']torch 1.8, cuda 10.2 ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed info deepspeed info................... ...................0.4.2+unknown, unknown, unknown 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w....... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... async_io[NO] ...................... [NO][NO] ....... [NO] transformer_inference .. [NO] transformer_inference....... ..[OKAY] [NO] ....... [OKAY] utils .................. [YES] ...... utils[OKAY] .................. [YES] quantizer...... ..............[OKAY] [NO] ....... [OKAY]quantizer .............. [NO]-------------------------------------------------- ....... [OKAY] -------------------------------------------------- /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ****  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_ioasync_io .............................. [NO][NO] .............. [NO][NO] transformer_inferencetransformer_inference .... [NO][NO] .............. [OKAY][OKAY] utils ..................utils [YES].................. ......[YES] [OKAY]...... [OKAY] quantizer quantizer.............. ..............[NO] [NO]....... .......[OKAY] [OKAY] ----------------------------------------------------------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- ---------------------------------------------------------------------------------------------------- DeepSpeed C++/CUDA extension op report DeepSpeed C++/CUDA extension op report-------------------------------------------------- --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. --------------------------------------------------NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. ----------------------------------------------------------------------------------------------------DeepSpeed C++/CUDA extension op report JIT compiled ops requires ninjaJIT compiled ops requires ninja-------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- DeepSpeed C++/CUDA extension op reportJIT compiled ops requires ninja -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninjaninjaninjaninja .................. .................................... .................. [OKAY][OKAY][OKAY][OKAY] -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- op nameop nameop nameop name ................................................................ installed installedinstalled installed ........ compatiblecompatiblecompatiblecompatible -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- cpu_adam ............... cpu_adam[YES]cpu_adamcpu_adam .................................... ............... [YES][OKAY] [YES] [YES]............ ......[OKAY][OKAY] [OKAY] fused_adam ............. [NO] ....... [OKAY]fused_adamfused_adamfused_adam ....................................... fused_lamb [NO] [NO] [NO]............. ....... .............. [NO][OKAY][OKAY] [OKAY] ....... [OKAY]fused_lamb fused_lamb fused_lamb ............. ............. ............. [NO] [NO] [NO] ....... ....... ....... [OKAY] [OKAY] [OKAY] sparse_attn ............ [NO] ....... [OKAY] sparse_attnsparse_attntransformer sparse_attn .................................... ............ [NO][NO][NO] ....... .......[NO][OKAY]....... [OKAY].......transformer [OKAY] [OKAY] ............ stochastic_transformer[NO] transformer........ transformer ............ [NO]............[NO] [OKAY]....... [OKAY][NO] ....... ....... stochastic_transformer [OKAY] [OKAY] . [NO] ....... stochastic_transformerstochastic_transformer[OKAY] .. [NO][NO] .............. [OKAY][OKAY] DeepSpeed general environment info:DeepSpeed general environment info: torch install path torch install path............... ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch versiontorch version ........................................ 1.8.11.8.1 torch cuda versiontorch cuda version ............... ...............11.1 11.1nvcc version nvcc version..................... .....................11.2 11.2deepspeed install path ...........deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed info deepspeed info................... ...................0.4.2+unknown, unknown, unknown 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w. deepspeed wheel compiled w....... ......torch 1.8, cuda 10.2 torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... DeepSpeed general environment info:['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w.torch install path ..................... torch 1.8, cuda 10.2 ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. vocab file is un-used. loading tokenizer from pre-trained model async_io ............... [NO] ....... [NO] async_io ...............async_io [NO]............... .......[NO] [NO]transformer_inference....... [NO].. [NO] ....... [OKAY] transformer_inference .. [NO]utilstransformer_inference ........................... [OKAY][YES][NO] ............. [OKAY][OKAY] utils ..................quantizer [YES].............. utils......[NO] ..................[OKAY]....... [YES][OKAY] quantizer...... ..............[OKAY] -------------------------------------------------- [NO] ....... quantizer[OKAY] .............. [NO] .......-------------------------------------------------- [OKAY] -------------------------------------------------- DeepSpeed general environment info:DeepSpeed general environment info: torch install path ...............torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']torch version .................... 1.8.1torch version .................... torch cuda version1.8.1 ............... 11.1torch cuda version ...............nvcc version 11.1..................... nvcc version11.2 .....................deepspeed install path 11.2........... deepspeed install path ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']........... deepspeed info ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']................... deepspeed info0.4.2+unknown, unknown, unknown ...................deepspeed wheel compiled w. 0.4.2+unknown, unknown, unknown...... deepspeed wheel compiled w.torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: DeepSpeed general environment info: torch install path ............... torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']torch version .................... torch version1.8.1 .................... 1.8.1torch cuda version ............... torch cuda version11.1 ...............nvcc version 11.1..................... 11.2nvcc version deepspeed install path..................... ...........11.2 deepspeed install path['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ........... deepspeed info ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']................... deepspeed info0.4.2+unknown, unknown, unknown ...................deepspeed wheel compiled w. 0.4.2+unknown, unknown, unknown...... deepspeed wheel compiled w.torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** using world size: 64, data-parallel-size: 4, tensor-model-parallel size: 4, pipeline-model-parallel size: 4 WARNING: overriding default arguments for tokenizer_type:GPT2BPETokenizer with tokenizer_type:PretrainedFromHF using torch.float16 for parameters ... ------------------------ arguments ------------------------ vocab file is un-used. loading tokenizer from pre-trained model accumulate_allreduce_grads_in_fp32 .............. False adam_beta1 ...................................... 0.9 adam_beta2 ...................................... 0.999 adam_eps ........................................ 1e-08 adlr_autoresume ................................. False adlr_autoresume_interval ........................ 1000 apply_query_key_layer_scaling ................... True apply_residual_connection_post_layernorm ........ False attention_dropout ............................... 0.1 attention_softmax_in_fp32 ....................... False bert_binary_head ................................ True bert_load ....................................... None bf16 ............................................ False bias_dropout_fusion ............................. True bias_gelu_fusion ................................ True biencoder_projection_dim ........................ 0 biencoder_shared_query_context_model ............ False block_data_path ................................. None checkpoint_activations .......................... True checkpoint_in_cpu ............................... False checkpoint_num_layers ........................... 1 clip_grad ....................................... 1.0 consumed_train_samples .......................... 0 consumed_valid_samples .......................... 0 contigious_checkpointing ........................ False cpu_optimizer ................................... False cpu_torch_adam .................................. False data_impl ....................................... mmap data_parallel_size .............................. 4 data_path ....................................... ['/gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document'] dataloader_type ................................. single DDP_impl ........................................ local decoder_seq_length .............................. None deepscale ....................................... False deepscale_config ................................ None deepspeed ....................................... True deepspeed_activation_checkpointing .............. True deepspeed_config ................................ ./ds_config.716258.json deepspeed_mpi ................................... False distribute_checkpointed_activations ............. False distributed_backend ............................. nccl embedding_path .................................. None encoder_seq_length .............................. 2048 eod_mask_loss ................................... False eval_interval ................................... 1000 eval_iters ...................................... 100 evidence_data_path .............................. None exit_duration_in_mins ........................... 1190 exit_interval ................................... None ffn_hidden_size ................................. 8192 finetune ........................................ False fp16 ............................................ True fp16_lm_cross_entropy ........................... False fp32_residual_connection ........................ False global_batch_size ............................... 1024 hidden_dropout .................................. 0.1 hidden_size ..................................... 2048 hysteresis ...................................... 2 ict_head_size ................................... None ict_load ........................................ None img_dim ......................................... 224 indexer_batch_size .............................. 128 indexer_log_interval ............................ 1000 init_method_std ................................. 0.02 init_method_xavier_uniform ...................... False initial_loss_scale .............................. 4294967296 kv_channels ..................................... 128 layernorm_epsilon ............................... 1e-05 lazy_mpu_init ................................... None load ............................................ /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints local_rank ...................................... 0 log_batch_size_to_tensorboard ................... True log_interval .................................... 200 log_learning_rate_to_tensorboard ................ True log_loss_scale_to_tensorboard ................... True log_num_zeros_in_grad ........................... False log_params_norm ................................. False log_timers_to_tensorboard ....................... True log_validation_ppl_to_tensorboard ............... True loss_scale ...................................... 12.0 loss_scale_window ............................... 1000 lr .............................................. 0.0001 lr_decay_iters .................................. None lr_decay_samples ................................ 126953125 lr_decay_style .................................. cosine lr_warmup_fraction .............................. None lr_warmup_iters ................................. 0 lr_warmup_samples ............................... 183105 make_vocab_size_divisible_by .................... 128 mask_prob ....................................... 0.15 masked_softmax_fusion ........................... True max_position_embeddings ......................... None memory_centric_tiled_linear ..................... False merge_file ...................................... None micro_batch_size ................................ 16 min_loss_scale .................................. 1.0 min_lr .......................................... 1e-05 mmap_warmup ..................................... False no_load_optim ................................... None no_load_rng ..................................... None no_save_optim ................................... None no_save_rng ..................................... None num_attention_heads ............................. 16 num_channels .................................... 3 num_classes ..................................... 1000 num_layers ...................................... 24 num_layers_per_virtual_pipeline_stage ........... None num_workers ..................................... 2 onnx_safe ....................................... None openai_gelu ..................................... False optimizer ....................................... adam override_lr_scheduler ........................... False params_dtype .................................... torch.float16 partition_activations ........................... False patch_dim ....................................... 16 pipeline_model_parallel_size .................... 4 position_embedding_type ......................... PositionEmbeddingType.rotary profile_backward ................................ False query_in_block_prob ............................. 0.1 rampup_batch_size ............................... None rank ............................................ 0 remote_device ................................... none reset_attention_mask ............................ False reset_position_ids .............................. False retriever_report_topk_accuracies ................ [] retriever_score_scaling ......................... False retriever_seq_length ............................ 256 sample_rate ..................................... 1.0 save ............................................ /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints save_interval ................................... 1500 scatter_gather_tensors_in_pipeline .............. True scattered_embeddings ............................ False seed ............................................ 1234 seq_length ...................................... 2048 sgd_momentum .................................... 0.9 short_seq_prob .................................. 0.1 split ........................................... 949,50,1 split_transformers .............................. False synchronize_each_layer .......................... False tensor_model_parallel_size ...................... 4 tensorboard_dir ................................. /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//tensorboard tensorboard_log_interval ........................ 1 tensorboard_queue_size .......................... 5 tile_factor ..................................... 1 titles_data_path ................................ None tokenizer_name_or_path .......................... t5-small tokenizer_type .................................. PretrainedFromHF train_iters ..................................... None train_samples ................................... 146484375 use_checkpoint_lr_scheduler ..................... False use_contiguous_buffers_in_ddp ................... False use_cpu_initialization .......................... None use_one_sent_docs ............................... False use_pin_memory .................................. False virtual_pipeline_model_parallel_size ............ None vocab_extra_ids ................................. 0 vocab_file ...................................... None weight_decay .................................... 0.1 world_size ...................................... 64 zero_allgather_bucket_size ...................... 0.0 zero_contigious_gradients ....................... False zero_reduce_bucket_size ......................... 0.0 zero_reduce_scatter ............................. False zero_stage ...................................... 1 -------------------- end of arguments --------------------- setting number of micro-batches to constant 16 > building PretrainedFromHF tokenizer ... vocab file is un-used. loading tokenizer from pre-trained model DeepSpeed general environment info:DeepSpeed general environment info:DeepSpeed general environment info: torch install pathtorch install pathtorch install path ............................................. ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch']['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version ....................torch versiontorch version 1.8.1........................................ 1.8.11.8.1torch cuda version ...............torch cuda version torch cuda version 11.1 ............... ...............nvcc version 11.1 11.1 ..................... nvcc versionnvcc version11.2 .......................................... deepspeed install path 11.2 11.2 ........... deepspeed install path deepspeed install path ......................['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']deepspeed info['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] ...................deepspeed infodeepspeed info 0.4.2+unknown, unknown, unknown...................................... 0.4.2+unknown, unknown, unknowndeepspeed wheel compiled w.0.4.2+unknown, unknown, unknown ......deepspeed wheel compiled w.deepspeed wheel compiled w. torch 1.8, cuda 10.2............ torch 1.8, cuda 10.2torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** -------------------------------------------------- DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. DeepSpeed C++/CUDA extension op reportDeepSpeed C++/CUDA extension op report-------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------ JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. DeepSpeed C++/CUDA extension op report ---------------------------------------------------------------------------------------------------- -------------------------------------------------- JIT compiled ops requires ninja NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op.JIT compiled ops requires ninja -------------------------------------------------- JIT compiled ops requires ninja vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model ninjaninjaninjaninja .................................... .................. ..................[OKAY] [OKAY] [OKAY] [OKAY] -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- --------------------------------------------------op name op name ................op name................ op name installed installed ................ .................. .. installed installedcompatible compatible..--------------------------------------------------.. --------------------------------------------------compatiblecompatible ---------------------------------------------------------------------------------------------------- cpu_adam ...............cpu_adam [YES]............... ......cpu_adam[YES]cpu_adam [OKAY]...... .............................. [YES][OKAY][YES] ............ fused_adam [OKAY] [OKAY] ............. fused_adam[NO] .................... [NO][OKAY]fused_adam fused_adam ....... fused_lamb............. .............[OKAY] .............[NO][NO] fused_lamb [NO] ....... ...........................[OKAY] [NO][OKAY][OKAY] .......fused_lamb [OKAY]............. fused_lamb [NO]............. .......[NO] [OKAY]sparse_attn ....... ............[OKAY] [NO] sparse_attn....... [OKAY]............sparse_attn [NO]transformer............ ....... sparse_attn............ [NO] [OKAY][NO] ................... ....... [OKAY]transformer[NO] [OKAY] ............transformer....... [NO]stochastic_transformer ............ ....... [OKAY].[NO] [OKAY] [NO] ....... transformer....... stochastic_transformer [OKAY]............ [OKAY] . [NO][NO] stochastic_transformer ....... ........[OKAY] [OKAY][NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`.  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] .......async_io [NO]............... [NO] ....... [NO] transformer_inference .. [NO]transformer_inference ......... [OKAY][NO] ....... [OKAY] utils .................. [YES] utils...... ..................[OKAY] [YES] ......quantizer [OKAY].............. [NO] ....... quantizer[OKAY] .............. [NO] ....... --------------------------------------------------[OKAY] -------------------------------------------------- > setting tensorboard ... > padded vocab (size: 32100) with 156 dummy tokens (new size: 32256) > initializing torch distributed ... DeepSpeed general environment info: DeepSpeed general environment info:torch install path ............... torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version ....................torch version 1.8.1.................... 1.8.1torch cuda version ...............torch cuda version 11.1 ............... nvcc version11.1 ..................... nvcc version11.2 .....................deepspeed install path 11.2........... deepspeed install path ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']........... deepspeed info ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed']................... deepspeed info0.4.2+unknown, unknown, unknown ...................deepspeed wheel compiled w. 0.4.2+unknown, unknown, unknown...... deepspeed wheel compiled w.torch 1.8, cuda 10.2 ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown ******** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed C++/CUDA extension op report -------------------------------------------------- NOTE: Ops not installed will be just-in-time (JIT) compiled at runtime if needed. Op compatibility means that your system meet the required dependencies to JIT install the op. -------------------------------------------------- JIT compiled ops requires ninja ninja .................. [OKAY] -------------------------------------------------- op name ................ installed .. compatible -------------------------------------------------- cpu_adam ............... [YES] ...... [OKAY] fused_adam ............. [NO] ....... [OKAY] fused_lamb ............. [NO] ....... [OKAY] sparse_attn ............ [NO] ....... [OKAY] transformer ............ [NO] ....... [OKAY] stochastic_transformer . [NO] ....... [OKAY]  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] --------------------------------------------------  [WARNING]  async_io requires the libraries: ['libaio-dev'] but are missing. Can be fixed by: `apt install libaio-dev`. async_io ............... [NO] ....... [NO] transformer_inference .. [NO] ....... [OKAY] utils .................. [YES] ...... [OKAY] quantizer .............. [NO] ....... [OKAY] -------------------------------------------------- DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 DeepSpeed general environment info: torch install path ............... ['/gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch'] torch version .................... 1.8.1 torch cuda version ............... 11.1 nvcc version ..................... 11.2 deepspeed install path ........... ['/gpfsssd/worksf/projects/rech/six/commun/code/deepspeed-big-science/deepspeed'] deepspeed info ................... 0.4.2+unknown, unknown, unknown deepspeed wheel compiled w. ...... torch 1.8, cuda 10.2 /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** /bin/sh: line 0: type: git: not found **** Git info for Megatron: git_hash=unknown git_branch=unknown **** vocab file is un-used. loading tokenizer from pre-trained model vocab file is un-used. loading tokenizer from pre-trained model > initializing tensor model parallel with size 4 > initializing pipeline model parallel with size 4 > setting random seeds to 1234 ... [2021-08-12 15:09:44,110] [INFO] [checkpointing.py:226:model_parallel_cuda_manual_seed] > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 > compiling dataset index builder ... make: Entering directory '/gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/data' make: Nothing to be done for 'default'. make: Leaving directory '/gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/data' >>> done with dataset index builder. Compilation time: 0.119 seconds > compiling and loading fused kernels ... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( Detected CUDA files, patching ldflags Emitting ninja build file /gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... Building extension module scaled_upper_triang_masked_softmax_cuda... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module scaled_upper_triang_masked_softmax_cuda... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( Detected CUDA files, patching ldflags Emitting ninja build file /gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... Building extension module scaled_masked_softmax_cuda... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module scaled_masked_softmax_cuda... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( Detected CUDA files, patching ldflags Emitting ninja build file /gpfsssd/worksf/projects/rech/six/commun/code/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... Building extension module fused_mix_prec_layer_norm_cuda... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module fused_mix_prec_layer_norm_cuda... /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: !! WARNING !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Your compiler (c++) is not compatible with the compiler Pytorch was built with for this platform, which is g++ on linux. Please use g++ to to compile your extension. Alternatively, you may compile PyTorch from source using c++, and then you can also use c++ to compile your extension. See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help with compiling PyTorch from source. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! WARNING !! warnings.warn(WRONG_COMPILER_WARNING.format( >>> done with compiling and loading fused kernels. Compilation time: 19.148 seconds time to initialize megatron (seconds): 43.434 [after megatron is initialized] datetime: 2021-08-12 15:10:03 building GPT model ... [2021-08-12 15:10:03,535] [INFO] [utils.py:680:see_memory_usage] Before Building Model /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/cuda/memory.py:373: FutureWarning: torch.cuda.memory_cached has been renamed to torch.cuda.memory_reserved warnings.warn( /gpfswork/rech/six/commun/conda/hf-prod/lib/python3.8/site-packages/torch/cuda/memory.py:381: FutureWarning: torch.cuda.max_memory_cached has been renamed to torch.cuda.max_memory_reserved warnings.warn( [2021-08-12 15:10:03,538] [INFO] [utils.py:681:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB [2021-08-12 15:10:03,538] [INFO] [utils.py:689:see_memory_usage] CPU Virtual Memory: used = 40.14 GB, percent = 21.4% SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=0, model=1): 1, ProcessCoord(pipe=0, data=0, model=2): 2, ProcessCoord(pipe=0, data=0, model=3): 3, ProcessCoord(pipe=0, data=1, model=0): 4, ProcessCoord(pipe=0, data=1, model=1): 5, ProcessCoord(pipe=0, data=1, model=2): 6, ProcessCoord(pipe=0, data=1, model=3): 7, ProcessCoord(pipe=0, data=2, model=0): 8, ProcessCoord(pipe=0, data=2, model=1): 9, ProcessCoord(pipe=0, data=2, model=2): 10, ProcessCoord(pipe=0, data=2, model=3): 11, ProcessCoord(pipe=0, data=3, model=0): 12, ProcessCoord(pipe=0, data=3, model=1): 13, ProcessCoord(pipe=0, data=3, model=2): 14, ProcessCoord(pipe=0, data=3, model=3): 15, ProcessCoord(pipe=1, data=0, model=0): 16, ProcessCoord(pipe=1, data=0, model=1): 17, ProcessCoord(pipe=1, data=0, model=2): 18, ProcessCoord(pipe=1, data=0, model=3): 19, ProcessCoord(pipe=1, data=1, model=0): 20, ProcessCoord(pipe=1, data=1, model=1): 21, ProcessCoord(pipe=1, data=1, model=2): 22, ProcessCoord(pipe=1, data=1, model=3): 23, ProcessCoord(pipe=1, data=2, model=0): 24, ProcessCoord(pipe=1, data=2, model=1): 25, ProcessCoord(pipe=1, data=2, model=2): 26, ProcessCoord(pipe=1, data=2, model=3): 27, ProcessCoord(pipe=1, data=3, model=0): 28, ProcessCoord(pipe=1, data=3, model=1): 29, ProcessCoord(pipe=1, data=3, model=2): 30, ProcessCoord(pipe=1, data=3, model=3): 31, ProcessCoord(pipe=2, data=0, model=0): 32, ProcessCoord(pipe=2, data=0, model=1): 33, ProcessCoord(pipe=2, data=0, model=2): 34, ProcessCoord(pipe=2, data=0, model=3): 35, ProcessCoord(pipe=2, data=1, model=0): 36, ProcessCoord(pipe=2, data=1, model=1): 37, ProcessCoord(pipe=2, data=1, model=2): 38, ProcessCoord(pipe=2, data=1, model=3): 39, ProcessCoord(pipe=2, data=2, model=0): 40, ProcessCoord(pipe=2, data=2, model=1): 41, ProcessCoord(pipe=2, data=2, model=2): 42, ProcessCoord(pipe=2, data=2, model=3): 43, ProcessCoord(pipe=2, data=3, model=0): 44, ProcessCoord(pipe=2, data=3, model=1): 45, ProcessCoord(pipe=2, data=3, model=2): 46, ProcessCoord(pipe=2, data=3, model=3): 47, ProcessCoord(pipe=3, data=0, model=0): 48, ProcessCoord(pipe=3, data=0, model=1): 49, ProcessCoord(pipe=3, data=0, model=2): 50, ProcessCoord(pipe=3, data=0, model=3): 51, ProcessCoord(pipe=3, data=1, model=0): 52, ProcessCoord(pipe=3, data=1, model=1): 53, ProcessCoord(pipe=3, data=1, model=2): 54, ProcessCoord(pipe=3, data=1, model=3): 55, ProcessCoord(pipe=3, data=2, model=0): 56, ProcessCoord(pipe=3, data=2, model=1): 57, ProcessCoord(pipe=3, data=2, model=2): 58, ProcessCoord(pipe=3, data=2, model=3): 59, ProcessCoord(pipe=3, data=3, model=0): 60, ProcessCoord(pipe=3, data=3, model=1): 61, ProcessCoord(pipe=3, data=3, model=2): 62, ProcessCoord(pipe=3, data=3, model=3): 63} [2021-08-12 15:10:04,071] [INFO] [module.py:360:_partition_layers] Partitioning pipeline stages with method type:transformer stage=0 layers=9 0: _to_float16 1: EmbeddingPipe 2: 3: ParallelTransformerLayerPipe 4: ParallelTransformerLayerPipe 5: ParallelTransformerLayerPipe 6: ParallelTransformerLayerPipe 7: ParallelTransformerLayerPipe 8: ParallelTransformerLayerPipe stage=1 layers=6 9: ParallelTransformerLayerPipe 10: ParallelTransformerLayerPipe 11: ParallelTransformerLayerPipe 12: ParallelTransformerLayerPipe 13: ParallelTransformerLayerPipe 14: ParallelTransformerLayerPipe stage=2 layers=6 15: ParallelTransformerLayerPipe 16: ParallelTransformerLayerPipe 17: ParallelTransformerLayerPipe 18: ParallelTransformerLayerPipe 19: ParallelTransformerLayerPipe 20: ParallelTransformerLayerPipe stage=3 layers=10 21: ParallelTransformerLayerPipe 22: ParallelTransformerLayerPipe 23: ParallelTransformerLayerPipe 24: ParallelTransformerLayerPipe 25: ParallelTransformerLayerPipe 26: ParallelTransformerLayerPipe 27: 28: MixedFusedLayerNorm 29: EmbeddingPipe 30: float16_to_fp32 loss: CrossEntropy > number of parameters on (tensor, pipeline) model parallel rank (1, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (3, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (2, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (2, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (3, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (0, 1): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (1, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (0, 2): 75592704 > number of parameters on (tensor, pipeline) model parallel rank (0, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (2, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (1, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (2, 0): 92107776 > number of parameters on (tensor, pipeline) model parallel rank (3, 3): 92111872 > number of parameters on (tensor, pipeline) model parallel rank (1, 0): 92107776 > number of parameters on (tensor, pipeline) model parallel rank (3, 0): 92107776 [2021-08-12 15:10:04,418] [INFO] [utils.py:680:see_memory_usage] After Building Model [2021-08-12 15:10:04,418] [INFO] [utils.py:681:see_memory_usage] MA 0.18 GB Max_MA 0.2 GB CA 0.22 GB Max_CA 0 GB [2021-08-12 15:10:04,419] [INFO] [utils.py:689:see_memory_usage] CPU Virtual Memory: used = 40.54 GB, percent = 21.6% > number of parameters on (tensor, pipeline) model parallel rank (0, 0): 92107776 setting training iterations to 143051 > learning rate decay style: cosine DeepSpeed is enabled. [2021-08-12 15:10:04,420] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.4.2+unknown, git-hash=unknown, git-branch=unknown [2021-08-12 15:10:04,489] [INFO] [engine.py:179:__init__] DeepSpeed Flops Profiler Enabled: False [2021-08-12 15:10:04,490] [INFO] [engine.py:736:_configure_optimizer] Removing param_group that has no 'params' in the client Optimizer [2021-08-12 15:10:04,490] [INFO] [engine.py:741:_configure_optimizer] Using client Optimizer as basic optimizer [2021-08-12 15:10:04,490] [INFO] [engine.py:750:_configure_optimizer] DeepSpeed Basic Optimizer = FusedAdam [2021-08-12 15:10:04,490] [INFO] [utils.py:43:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type= [2021-08-12 15:10:04,490] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 1 optimizer [2021-08-12 15:10:04,490] [INFO] [stage2.py:106:__init__] Reduce bucket size 500000000 [2021-08-12 15:10:04,490] [INFO] [stage2.py:107:__init__] Allgather bucket size 500000000 [2021-08-12 15:10:04,490] [INFO] [stage2.py:108:__init__] CPU Offload: False [2021-08-12 15:10:04,490] [INFO] [stage2.py:109:__init__] Round robin gradient partitioning: False [2021-08-12 15:10:04,706] [INFO] [stage2.py:419:__init__] optimizer state initialized [2021-08-12 15:10:04,706] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam [2021-08-12 15:10:04,707] [INFO] [engine.py:553:_configure_lr_scheduler] DeepSpeed using client LR scheduler [2021-08-12 15:10:04,707] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = [2021-08-12 15:10:04,707] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.999), (0.9, 0.999)] [2021-08-12 15:10:04,707] [INFO] [config.py:900:print] DeepSpeedEngine configuration: [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] activation_checkpointing_config { "partition_activations": false, "contiguous_memory_optimization": false, "cpu_checkpointing": false, "number_checkpoints": null, "synchronize_checkpoint_boundary": false, "profile": false } [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] allreduce_always_fp32 ........ False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] amp_enabled .................. False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] amp_params ................... False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] checkpoint_tag_validation_enabled True [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] checkpoint_tag_validation_fail False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] disable_allgather ............ False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] dump_state ................... False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] dynamic_loss_scale_args ...... {'init_scale': 4096, 'scale_window': 500, 'delayed_shift': 2, 'min_scale': 1} [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] eigenvalue_enabled ........... False [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] eigenvalue_gas_boundary_resolution 1 [2021-08-12 15:10:04,707] [INFO] [config.py:904:print] eigenvalue_layer_name ........ bert.encoder.layer [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] eigenvalue_layer_num ......... 0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] eigenvalue_max_iter .......... 100 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] eigenvalue_stability ......... 1e-06 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] eigenvalue_tol ............... 0.01 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] eigenvalue_verbose ........... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] elasticity_enabled ........... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] flops_profiler_config ........ { "enabled": false, "profile_step": 1, "module_depth": -1, "top_modules": 1, "detailed": true, "output_file": null } [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] fp16_enabled ................. True [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] fp16_mixed_quantize .......... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] global_rank .................. 0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] gradient_accumulation_steps .. 16 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] gradient_clipping ............ 1.0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] gradient_predivide_factor .... 1.0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] initial_dynamic_scale ........ 4096 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] loss_scale ................... 0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] memory_breakdown ............. False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] optimizer_legacy_fusion ...... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] optimizer_name ............... None [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] optimizer_params ............. None [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] pld_enabled .................. False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] pld_params ................... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] prescale_gradients ........... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_change_rate ......... 0.001 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_groups .............. 1 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_offset .............. 1000 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_period .............. 1000 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_rounding ............ 0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_start_bits .......... 16 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_target_bits ......... 8 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_training_enabled .... False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_type ................ 0 [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] quantize_verbose ............. False [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] scheduler_name ............... None [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] scheduler_params ............. None [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] sparse_attention ............. None [2021-08-12 15:10:04,708] [INFO] [config.py:904:print] sparse_gradients_enabled ..... False [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] steps_per_print .............. 2000 [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] tensorboard_enabled .......... False [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] tensorboard_job_name ......... DeepSpeedJobName [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] tensorboard_output_path ...... [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] train_batch_size ............. 1024 [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] train_micro_batch_size_per_gpu 16 [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] use_quantizer_kernel ......... False [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] wall_clock_breakdown ......... False [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] world_size ................... 4 [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] zero_allow_untested_optimizer False [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] zero_config .................. { "stage": 1, "contiguous_gradients": false, "reduce_scatter": true, "reduce_bucket_size": 5.000000e+08, "allgather_partitions": true, "allgather_bucket_size": 5.000000e+08, "overlap_comm": false, "load_from_fp32_weights": true, "elastic_checkpoint": true, "offload_param": null, "offload_optimizer": null, "sub_group_size": 1.000000e+09, "prefetch_bucket_size": 5.000000e+07, "param_persistence_threshold": 1.000000e+05, "max_live_parameters": 1.000000e+09, "max_reuse_distance": 1.000000e+09, "gather_fp16_weights_on_model_save": false, "ignore_unused_parameters": true, "round_robin_gradients": false, "legacy_stage1": false } [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] zero_enabled ................. True [2021-08-12 15:10:04,709] [INFO] [config.py:904:print] zero_optimization_stage ...... 1 [2021-08-12 15:10:04,709] [INFO] [config.py:906:print] json = { "train_micro_batch_size_per_gpu": 16, "train_batch_size": 1.024000e+03, "gradient_clipping": 1.0, "zero_optimization": { "stage": 1 }, "fp16": { "enabled": true, "loss_scale": 0, "loss_scale_window": 500, "hysteresis": 2, "min_loss_scale": 1, "initial_scale_power": 12 }, "steps_per_print": 2.000000e+03, "wall_clock_breakdown": false } [2021-08-12 15:10:04,709] [INFO] [engine.py:76:__init__] CONFIG: micro_batches=16 micro_batch_size=16 [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=0 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=2 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=1 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=3 STAGE=0 LAYERS=9 [0, 9) STAGE_PARAMS=92107776 (92.108M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=32 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=35 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=33 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=34 STAGE=2 LAYERS=6 [15, 21) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=16 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=17 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=19 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=18 STAGE=1 LAYERS=6 [9, 15) STAGE_PARAMS=75592704 (75.593M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=51 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=48 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=50 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) [2021-08-12 15:10:04,999] [INFO] [engine.py:134:__init__] RANK=49 STAGE=3 LAYERS=10 [21, 31) STAGE_PARAMS=92111872 (92.112M) TOTAL_PARAMS=1341620224 (1341.620M) UNIQUE_PARAMS=1275559936 (1275.560M) > using checkpoint value 0.0001 for learning rate > using checkpoint value 1e-05 for minimum learning rate > using checkpoint value 183105 for warmup iterations > using checkpoint value 126953125 for total number of iterations > using checkpoint value cosine for decay style successfully loaded 4 ZeRO state_dicts for rank 28 successfully loaded 4 ZeRO state_dicts for rank 16 successfully loaded 4 ZeRO state_dicts for rank 27 successfully loaded 4 ZeRO state_dicts for rank 19 successfully loaded 4 ZeRO state_dicts for rank 40 successfully loaded 4 ZeRO state_dicts for rank 37 successfully loaded 4 ZeRO state_dicts for rank 24 successfully loaded 4 ZeRO state_dicts for rank 31 successfully loaded 4 ZeRO state_dicts for rank 39 successfully loaded 4 ZeRO state_dicts for rank 38 successfully loaded 4 ZeRO state_dicts for rank 42 successfully loaded 4 ZeRO state_dicts for rank 34 successfully loaded 4 ZeRO state_dicts for rank 43 successfully loaded 4 ZeRO state_dicts for rank 23 successfully loaded 4 ZeRO state_dicts for rank 44 successfully loaded 4 ZeRO state_dicts for rank 35 successfully loaded 4 ZeRO state_dicts for rank 32 successfully loaded 4 ZeRO state_dicts for rank 46 successfully loaded 4 ZeRO state_dicts for rank 36 successfully loaded 4 ZeRO state_dicts for rank 26 successfully loaded 4 ZeRO state_dicts for rank 45 successfully loaded 4 ZeRO state_dicts for rank 33 successfully loaded 4 ZeRO state_dicts for rank 41 successfully loaded 4 ZeRO state_dicts for rank 20 successfully loaded 4 ZeRO state_dicts for rank 47 successfully loaded 4 ZeRO state_dicts for rank 18 successfully loaded 4 ZeRO state_dicts for rank 22 successfully loaded 4 ZeRO state_dicts for rank 30 successfully loaded 4 ZeRO state_dicts for rank 29 successfully loaded 4 ZeRO state_dicts for rank 25 successfully loaded 4 ZeRO state_dicts for rank 17 successfully loaded 4 ZeRO state_dicts for rank 21 successfully loaded 4 ZeRO state_dicts for rank 60 loading 4 zero partition checkpoints for rank 28 loading 4 zero partition checkpoints for rank 16 successfully loaded 4 ZeRO state_dicts for rank 52 loading 4 zero partition checkpoints for rank 27 loading 4 zero partition checkpoints for rank 31 loading 4 zero partition checkpoints for rank 40 successfully loaded 4 ZeRO state_dicts for rank 54 loading 4 zero partition checkpoints for rank 19 loading 4 zero partition checkpoints for rank 23 loading 4 zero partition checkpoints for rank 42 loading 4 zero partition checkpoints for rank 37 loading 4 zero partition checkpoints for rank 43 loading 4 zero partition checkpoints for rank 24 successfully loaded 4 ZeRO state_dicts for rank 55 loading 4 zero partition checkpoints for rank 38 loading 4 zero partition checkpoints for rank 39 loading 4 zero partition checkpoints for rank 32 loading 4 zero partition checkpoints for rank 45 loading 4 zero partition checkpoints for rank 36 loading 4 zero partition checkpoints for rank 34 loading 4 zero partition checkpoints for rank 44 loading 4 zero partition checkpoints for rank 41 loading 4 zero partition checkpoints for rank 18 loading 4 zero partition checkpoints for rank 35 loading 4 zero partition checkpoints for rank 46 loading 4 zero partition checkpoints for rank 26 loading 4 zero partition checkpoints for rank 33 loading 4 zero partition checkpoints for rank 20 loading 4 zero partition checkpoints for rank 47 successfully loaded 4 ZeRO state_dicts for rank 6 successfully loaded 4 ZeRO state_dicts for rank 10 successfully loaded 4 ZeRO state_dicts for rank 58 loading 4 zero partition checkpoints for rank 22 successfully loaded 4 ZeRO state_dicts for rank 53 loading 4 zero partition checkpoints for rank 30 successfully loaded 4 ZeRO state_dicts for rank 7 successfully loaded 4 ZeRO state_dicts for rank 49 successfully loaded 4 ZeRO state_dicts for rank 51 successfully loaded 4 ZeRO state_dicts for rank 62 successfully loaded 4 ZeRO state_dicts for rank 56 successfully loaded 4 ZeRO state_dicts for rank 57 successfully loaded 4 ZeRO state_dicts for rank 9 successfully loaded 4 ZeRO state_dicts for rank 61 successfully loaded 4 ZeRO state_dicts for rank 59 loading 4 zero partition checkpoints for rank 25 successfully loaded 4 ZeRO state_dicts for rank 11 loading 4 zero partition checkpoints for rank 21 loading 4 zero partition checkpoints for rank 29 successfully loaded 4 ZeRO state_dicts for rank 50 loading 4 zero partition checkpoints for rank 17 successfully loaded 4 ZeRO state_dicts for rank 63 successfully loaded 4 ZeRO state_dicts for rank 48 successfully loaded 4 ZeRO state_dicts for rank 4 successfully loaded 4 ZeRO state_dicts for rank 8 successfully loaded 4 ZeRO state_dicts for rank 12 successfully loaded 4 ZeRO state_dicts for rank 0 successfully loaded 4 ZeRO state_dicts for rank 14 successfully loaded 4 ZeRO state_dicts for rank 2 successfully loaded 4 ZeRO state_dicts for rank 3 successfully loaded 4 ZeRO state_dicts for rank 15 successfully loaded 4 ZeRO state_dicts for rank 1 successfully loaded 4 ZeRO state_dicts for rank 13 successfully loaded 4 ZeRO state_dicts for rank 5 loading 4 zero partition checkpoints for rank 60 loading 4 zero partition checkpoints for rank 52 loading 4 zero partition checkpoints for rank 54 loading 4 zero partition checkpoints for rank 55 loading 4 zero partition checkpoints for rank 6 loading 4 zero partition checkpoints for rank 10 loading 4 zero partition checkpoints for rank 58 loading 4 zero partition checkpoints for rank 56 loading 4 zero partition checkpoints for rank 49 loading 4 zero partition checkpoints for rank 53 loading 4 zero partition checkpoints for rank 7 loading 4 zero partition checkpoints for rank 11 loading 4 zero partition checkpoints for rank 62 loading 4 zero partition checkpoints for rank 51 loading 4 zero partition checkpoints for rank 9 loading 4 zero partition checkpoints for rank 63 loading 4 zero partition checkpoints for rank 57 loading 4 zero partition checkpoints for rank 61 loading 4 zero partition checkpoints for rank 4 loading 4 zero partition checkpoints for rank 59 loading 4 zero partition checkpoints for rank 8 loading 4 zero partition checkpoints for rank 50 loading 4 zero partition checkpoints for rank 48 loading 4 zero partition checkpoints for rank 2 loading 4 zero partition checkpoints for rank 14 loading 4 zero partition checkpoints for rank 12 loading 4 zero partition checkpoints for rank 0 loading 4 zero partition checkpoints for rank 15 checkpoint version 3.0 loading 4 zero partition checkpoints for rank 3 loading 4 zero partition checkpoints for rank 1 loading 4 zero partition checkpoints for rank 5 loading 4 zero partition checkpoints for rank 13 successfully loaded checkpoint from /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints at iteration 6273 time (ms) | load-checkpoint: 2506.60 [after model, optimizer, and learning rate scheduler are built] datetime: 2021-08-12 15:10:07 > building train, validation, and test datasets ... > datasets target sizes (minimum size): train: 146484375 validation: 14745600 test: 102400 > building train, validation, and test datasets for GPT ... > building dataset index ... reading sizes... reading pointers... reading document index... creating numpy buffer of mmap... creating memory view of numpy buffer... > finished creating indexed dataset in 0.043899 seconds number of documents: 364868892 > dataset split: train: document indices in [0, 346260578) total of 346260578 documents validation: document indices in [346260578, 364504023) total of 18243445 documents test: document indices in [364504023, 364868892) total of 364869 documents > loading doc-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_train_indexmap_146484375ns_2048sl_1234s_doc_idx.npy > loading sample-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_train_indexmap_146484375ns_2048sl_1234s_sample_idx.npy > loading shuffle-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_train_indexmap_146484375ns_2048sl_1234s_shuffle_idx.npy loaded indexed file in 0.081 seconds total number of samples: 171386255 total number of epochs: 2 > loading doc-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_valid_indexmap_14745600ns_2048sl_1234s_doc_idx.npy > loading sample-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_valid_indexmap_14745600ns_2048sl_1234s_sample_idx.npy > loading shuffle-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_valid_indexmap_14745600ns_2048sl_1234s_shuffle_idx.npy loaded indexed file in 0.066 seconds total number of samples: 18059589 total number of epochs: 4 > loading doc-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_test_indexmap_102400ns_2048sl_1234s_doc_idx.npy > loading sample-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_test_indexmap_102400ns_2048sl_1234s_sample_idx.npy > loading shuffle-idx mapping from /gpfsscratch/rech/six/commun/datasets-custom/c4_preprocessing/c4_en_train_text_document_test_indexmap_102400ns_2048sl_1234s_shuffle_idx.npy loaded indexed file in 0.012 seconds total number of samples: 180044 total number of epochs: 2 > finished creating GPT datasets ... [after dataloaders are built] datetime: 2021-08-12 15:10:14 done with setup ... training ... time (ms) | model-and-optimizer-setup: 4166.30 | train/valid/test-data-iterators-setup: 5702.72 [before the start of training step] datetime: 2021-08-12 15:10:14 [2021-08-12 15:10:14,167] [INFO] [checkpointing.py:408:forward] Activation Checkpointing Information [2021-08-12 15:10:14,168] [INFO] [checkpointing.py:409:forward] ----Partition Activations False, CPU CHECKPOINTING False [2021-08-12 15:10:14,168] [INFO] [checkpointing.py:412:forward] ----contiguous Memory Checkpointing False with 24 total layers [2021-08-12 15:10:14,168] [INFO] [checkpointing.py:415:forward] ----Synchronization False [2021-08-12 15:10:14,168] [INFO] [checkpointing.py:416:forward] ----Profiling time in checkpointing False iteration 6400/ 143051 | consumed samples: 6553600 | elapsed time per iteration (ms): 11099.8 | learning rate: 9.944E-05 | global batch size: 1024 | lm loss: 2.964395E+00 | loss scale: 524288.0 | grad norm: 49615.756 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | [Rank 2] (after 6400 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10410.0 | max reserved: 10410.0 [Rank 0] (after 6400 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10538.0 | max reserved: 10538.0 [Rank 16] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9116.0 | max reserved: 9116.0 [Rank 32] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8348.0 | max reserved: 8348.0 [Rank 35] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8348.0 | max reserved: 8348.0 [Rank 19] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9116.0 | max reserved: 9116.0 [Rank 3] (after 6400 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10282.0 | max reserved: 10282.0 [Rank 33] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 8348.0 | max reserved: 8348.0 [Rank 1] (after 6400 iterations) memory (MB) | allocated: 504.96142578125 | max allocated: 7306.77001953125 | reserved: 10410.0 | max reserved: 10410.0 [Rank 17] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9244.0 | max reserved: 9244.0 [Rank 49] (after 6400 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0[Rank 51] (after 6400 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 [Rank 50] (after 6400 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 [Rank 48] (after 6400 iterations) memory (MB) | allocated: 3011.2392578125 | max allocated: 6739.93017578125 | reserved: 10358.0 | max reserved: 10358.0 [Rank 34] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 5347.14501953125 | reserved: 7964.0 | max reserved: 7964.0 [Rank 18] (after 6400 iterations) memory (MB) | allocated: 434.46240234375 | max allocated: 6147.1455078125 | reserved: 9244.0 | max reserved: 9244.0 time (ms) iteration 6600/ 143051 | consumed samples: 6758400 | elapsed time per iteration (ms): 11005.2 | learning rate: 9.941E-05 | global batch size: 1024 | lm loss: 2.957889E+00 | loss scale: 524288.0 | grad norm: 71210.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 6800/ 143051 | consumed samples: 6963200 | elapsed time per iteration (ms): 11012.5 | learning rate: 9.937E-05 | global batch size: 1024 | lm loss: 2.955018E+00 | loss scale: 1048576.0 | grad norm: 161792.607 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 7000/ 143051 | consumed samples: 7168000 | elapsed time per iteration (ms): 11026.2 | learning rate: 9.933E-05 | global batch size: 1024 | lm loss: 2.952314E+00 | loss scale: 1048576.0 | grad norm: 141289.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------ validation loss at iteration 7000 | lm loss value: 2.894326E+00 | lm loss PPL: 1.807131E+01 | ------------------------------------------------------------------------------------------------ iteration 7200/ 143051 | consumed samples: 7372800 | elapsed time per iteration (ms): 12515.4 | learning rate: 9.929E-05 | global batch size: 1024 | lm loss: 2.949153E+00 | loss scale: 1048576.0 | grad norm: 201758.259 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 7400/ 143051 | consumed samples: 7577600 | elapsed time per iteration (ms): 11008.0 | learning rate: 9.925E-05 | global batch size: 1024 | lm loss: 2.944497E+00 | loss scale: 2097152.0 | grad norm: 317430.133 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) saving checkpoint at iteration 7500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-12 19:00:40,973] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step7500/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 7500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3446.84 iteration 7600/ 143051 | consumed samples: 7782400 | elapsed time per iteration (ms): 11045.3 | learning rate: 9.921E-05 | global batch size: 1024 | lm loss: 2.938465E+00 | loss scale: 2097152.0 | grad norm: 341684.535 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 7800/ 143051 | consumed samples: 7987200 | elapsed time per iteration (ms): 11062.8 | learning rate: 9.916E-05 | global batch size: 1024 | lm loss: 2.934677E+00 | loss scale: 2097152.0 | grad norm: 309021.920 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) [2021-08-12 20:32:48,895] [INFO] [logging.py:68:log_dist] [Rank 0] step=8000, skipped=8, lr=[9.911837837186685e-05, 9.911837837186685e-05], mom=[(0.9, 0.999), (0.9, 0.999)] iteration 8000/ 143051 | consumed samples: 8192000 | elapsed time per iteration (ms): 11050.4 | learning rate: 9.912E-05 | global batch size: 1024 | lm loss: 2.924052E+00 | loss scale: 2097152.0 | grad norm: 374151.717 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) steps: 8000 loss: 2.9296 iter time (s): 0.006 samples/sec: 185583.220 ------------------------------------------------------------------------------------------------ validation loss at iteration 8000 | lm loss value: 2.864850E+00 | lm loss PPL: 1.754642E+01 | ------------------------------------------------------------------------------------------------ iteration 8200/ 143051 | consumed samples: 8396800 | elapsed time per iteration (ms): 12540.5 | learning rate: 9.907E-05 | global batch size: 1024 | lm loss: 2.920811E+00 | loss scale: 2097152.0 | grad norm: 296746.615 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 8400/ 143051 | consumed samples: 8601600 | elapsed time per iteration (ms): 11014.8 | learning rate: 9.903E-05 | global batch size: 1024 | lm loss: 3.857497E+00 | loss scale: 16384.0 | grad norm: 11963.927 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 8600/ 143051 | consumed samples: 8806400 | elapsed time per iteration (ms): 11013.2 | learning rate: 9.898E-05 | global batch size: 1024 | lm loss: 4.257671E+00 | loss scale: 16384.0 | grad norm: 1960.374 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 8800/ 143051 | consumed samples: 9011200 | elapsed time per iteration (ms): 11029.9 | learning rate: 9.893E-05 | global batch size: 1024 | lm loss: 2.975549E+00 | loss scale: 16384.0 | grad norm: 1738.907 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 9000/ 143051 | consumed samples: 9216000 | elapsed time per iteration (ms): 11038.2 | learning rate: 9.888E-05 | global batch size: 1024 | lm loss: 2.930870E+00 | loss scale: 32768.0 | grad norm: 3816.081 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------ validation loss at iteration 9000 | lm loss value: 2.863462E+00 | lm loss PPL: 1.752209E+01 | ------------------------------------------------------------------------------------------------ saving checkpoint at iteration 9000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-12 23:46:42,880] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step9000/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 9000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3105.20 iteration 9200/ 143051 | consumed samples: 9420800 | elapsed time per iteration (ms): 12568.6 | learning rate: 9.883E-05 | global batch size: 1024 | lm loss: 2.914299E+00 | loss scale: 32768.0 | grad norm: 4057.599 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 9400/ 143051 | consumed samples: 9625600 | elapsed time per iteration (ms): 11026.1 | learning rate: 9.878E-05 | global batch size: 1024 | lm loss: 2.902115E+00 | loss scale: 65536.0 | grad norm: 8103.395 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 9600/ 143051 | consumed samples: 9830400 | elapsed time per iteration (ms): 11059.4 | learning rate: 9.872E-05 | global batch size: 1024 | lm loss: 2.896519E+00 | loss scale: 65536.0 | grad norm: 7686.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 9800/ 143051 | consumed samples: 10035200 | elapsed time per iteration (ms): 11040.7 | learning rate: 9.867E-05 | global batch size: 1024 | lm loss: 2.886656E+00 | loss scale: 65536.0 | grad norm: 9158.341 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) [2021-08-13 02:50:46,343] [INFO] [logging.py:68:log_dist] [Rank 0] step=10000, skipped=15, lr=[9.861386012799114e-05, 9.861386012799114e-05], mom=[(0.9, 0.999), (0.9, 0.999)] iteration 10000/ 143051 | consumed samples: 10240000 | elapsed time per iteration (ms): 11055.8 | learning rate: 9.861E-05 | global batch size: 1024 | lm loss: 2.883275E+00 | loss scale: 131072.0 | grad norm: 17703.174 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) steps: 10000 loss: 2.8931 iter time (s): 0.006 samples/sec: 185511.173 ------------------------------------------------------------------------------------------------- validation loss at iteration 10000 | lm loss value: 2.824834E+00 | lm loss PPL: 1.685814E+01 | ------------------------------------------------------------------------------------------------- iteration 10200/ 143051 | consumed samples: 10444800 | elapsed time per iteration (ms): 12571.6 | learning rate: 9.856E-05 | global batch size: 1024 | lm loss: 2.876481E+00 | loss scale: 131072.0 | grad norm: 20278.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 10400/ 143051 | consumed samples: 10649600 | elapsed time per iteration (ms): 11057.7 | learning rate: 9.850E-05 | global batch size: 1024 | lm loss: 2.870816E+00 | loss scale: 262144.0 | grad norm: 42463.558 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) saving checkpoint at iteration 10500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-13 04:27:58,699] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step10500/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 10500 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3000.24 iteration 10600/ 143051 | consumed samples: 10854400 | elapsed time per iteration (ms): 11058.2 | learning rate: 9.844E-05 | global batch size: 1024 | lm loss: 2.865280E+00 | loss scale: 262144.0 | grad norm: 30747.913 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 10800/ 143051 | consumed samples: 11059200 | elapsed time per iteration (ms): 11062.7 | learning rate: 9.838E-05 | global batch size: 1024 | lm loss: 2.859862E+00 | loss scale: 262144.0 | grad norm: 35158.853 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 11000/ 143051 | consumed samples: 11264000 | elapsed time per iteration (ms): 11044.8 | learning rate: 9.832E-05 | global batch size: 1024 | lm loss: 2.854428E+00 | loss scale: 524288.0 | grad norm: 88177.074 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) ------------------------------------------------------------------------------------------------- validation loss at iteration 11000 | lm loss value: 2.800371E+00 | lm loss PPL: 1.645076E+01 | ------------------------------------------------------------------------------------------------- iteration 11200/ 143051 | consumed samples: 11468800 | elapsed time per iteration (ms): 12555.7 | learning rate: 9.826E-05 | global batch size: 1024 | lm loss: 2.851350E+00 | loss scale: 524288.0 | grad norm: 72002.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 11400/ 143051 | consumed samples: 11673600 | elapsed time per iteration (ms): 11024.1 | learning rate: 9.819E-05 | global batch size: 1024 | lm loss: 2.846467E+00 | loss scale: 1048576.0 | grad norm: 141012.706 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 11600/ 143051 | consumed samples: 11878400 | elapsed time per iteration (ms): 11031.4 | learning rate: 9.813E-05 | global batch size: 1024 | lm loss: 2.841996E+00 | loss scale: 1048576.0 | grad norm: 141865.915 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 11800/ 143051 | consumed samples: 12083200 | elapsed time per iteration (ms): 11056.6 | learning rate: 9.806E-05 | global batch size: 1024 | lm loss: 2.837910E+00 | loss scale: 1048576.0 | grad norm: 141413.805 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) [2021-08-13 09:09:04,724] [INFO] [logging.py:68:log_dist] [Rank 0] step=12000, skipped=17, lr=[9.799612185090217e-05, 9.799612185090217e-05], mom=[(0.9, 0.999), (0.9, 0.999)] iteration 12000/ 143051 | consumed samples: 12288000 | elapsed time per iteration (ms): 11029.2 | learning rate: 9.800E-05 | global batch size: 1024 | lm loss: 2.834040E+00 | loss scale: 1048576.0 | grad norm: 167157.523 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) steps: 12000 loss: 2.8596 iter time (s): 0.006 samples/sec: 185852.220 ------------------------------------------------------------------------------------------------- validation loss at iteration 12000 | lm loss value: 2.778867E+00 | lm loss PPL: 1.610076E+01 | ------------------------------------------------------------------------------------------------- saving checkpoint at iteration 12000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-13 09:14:11,076] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step12000/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 12000 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3048.71 iteration 12200/ 143051 | consumed samples: 12492800 | elapsed time per iteration (ms): 12557.0 | learning rate: 9.793E-05 | global batch size: 1024 | lm loss: 2.829142E+00 | loss scale: 1048576.0 | grad norm: 156483.211 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) iteration 12400/ 143051 | consumed samples: 12697600 | elapsed time per iteration (ms): 11032.7 | learning rate: 9.786E-05 | global batch size: 1024 | lm loss: 2.826435E+00 | loss scale: 2097152.0 | grad norm: 299692.596 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | time (ms) saving checkpoint at iteration 12572 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints [2021-08-13 10:59:22,464] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints/global_step12572/mp_rank_00_model_states.pt successfully saved checkpoint at iteration 12572 to /gpfsscratch/rech/six/uty16tp/synched_exps/tr4-1B3-full//checkpoints time (ms) | save-checkpoint: 3198.90 [exiting program after 1190.0082771499951 minutes] datetime: 2021-08-13 10:59:23