daze-unlv's picture
Training in progress, epoch 3
26e3773 verified
[1/4] /home/ludaze/usr/local/cuda/cuda-11.8/bin/nvcc --generate-dependencies-with-compile --dependency-output fast_lsh_cumulation_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=fast_lsh_cumulation -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/TH -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/THC -isystem /home/ludaze/usr/local/cuda/cuda-11.8/include -isystem /home/ludaze/.conda/envs/axolotl/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -std=c++17 -c /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/transformers/kernels/yoso/fast_lsh_cumulation_cuda.cu -o fast_lsh_cumulation_cuda.cuda.o
[2/4] c++ -MMD -MF fast_lsh_cumulation_torch.o.d -DTORCH_EXTENSION_NAME=fast_lsh_cumulation -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/TH -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/THC -isystem /home/ludaze/usr/local/cuda/cuda-11.8/include -isystem /home/ludaze/.conda/envs/axolotl/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/transformers/kernels/yoso/fast_lsh_cumulation_torch.cpp -o fast_lsh_cumulation_torch.o
[3/4] /home/ludaze/usr/local/cuda/cuda-11.8/bin/nvcc --generate-dependencies-with-compile --dependency-output fast_lsh_cumulation.cuda.o.d -DTORCH_EXTENSION_NAME=fast_lsh_cumulation -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/TH -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/THC -isystem /home/ludaze/usr/local/cuda/cuda-11.8/include -isystem /home/ludaze/.conda/envs/axolotl/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -std=c++17 -c /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/transformers/kernels/yoso/fast_lsh_cumulation.cu -o fast_lsh_cumulation.cuda.o
[4/4] c++ fast_lsh_cumulation_torch.o fast_lsh_cumulation.cuda.o fast_lsh_cumulation_cuda.cuda.o -shared -L/home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/home/ludaze/usr/local/cuda/cuda-11.8/lib64 -lcudart -o fast_lsh_cumulation.so
{'loss': 1.3875, 'grad_norm': 0.290032297372818, 'learning_rate': 4.708318749270797e-05, 'epoch': 0.18}
{'loss': 1.387, 'grad_norm': 0.24705088138580322, 'learning_rate': 4.416637498541594e-05, 'epoch': 0.35}
{'loss': 1.387, 'grad_norm': 0.2913823425769806, 'learning_rate': 4.124956247812391e-05, 'epoch': 0.53}
{'loss': 1.3867, 'grad_norm': 0.2294648140668869, 'learning_rate': 3.8332749970831875e-05, 'epoch': 0.7}
{'loss': 1.3868, 'grad_norm': 0.25805965065956116, 'learning_rate': 3.5415937463539846e-05, 'epoch': 0.88}
[0 0 3 ... 0 1 2] [0 0 2 ... 0 1 0]
{'eval_loss': 1.3863803148269653, 'eval_accuracy': 0.24671288548888357, 'eval_runtime': 9.3297, 'eval_samples_per_second': 448.352, 'eval_steps_per_second': 7.074, 'epoch': 1.0}
{'loss': 1.3868, 'grad_norm': 0.3866879940032959, 'learning_rate': 3.249912495624781e-05, 'epoch': 1.05}
{'loss': 1.3864, 'grad_norm': 0.2645956873893738, 'learning_rate': 2.9582312448955786e-05, 'epoch': 1.23}
{'loss': 1.3864, 'grad_norm': 0.22899721562862396, 'learning_rate': 2.666549994166375e-05, 'epoch': 1.4}
{'loss': 1.3867, 'grad_norm': 0.2155468761920929, 'learning_rate': 2.374868743437172e-05, 'epoch': 1.58}
{'loss': 1.3865, 'grad_norm': 0.1302017718553543, 'learning_rate': 2.083187492707969e-05, 'epoch': 1.75}
{'loss': 1.3866, 'grad_norm': 0.19115537405014038, 'learning_rate': 1.7915062419787655e-05, 'epoch': 1.93}
[3 2 1 ... 2 0 1] [0 0 2 ... 0 1 0]
{'eval_loss': 1.3862448930740356, 'eval_accuracy': 0.24695194836241932, 'eval_runtime': 9.364, 'eval_samples_per_second': 446.711, 'eval_steps_per_second': 7.048, 'epoch': 2.0}
{'loss': 1.3865, 'grad_norm': 0.16356079280376434, 'learning_rate': 1.4998249912495627e-05, 'epoch': 2.1}
{'loss': 1.3865, 'grad_norm': 0.19154764711856842, 'learning_rate': 1.2081437405203595e-05, 'epoch': 2.28}
{'loss': 1.3865, 'grad_norm': 0.4934549629688263, 'learning_rate': 9.164624897911563e-06, 'epoch': 2.45}
{'loss': 1.3863, 'grad_norm': 0.3885808289051056, 'learning_rate': 6.247812390619531e-06, 'epoch': 2.63}
{'loss': 1.3863, 'grad_norm': 0.15357373654842377, 'learning_rate': 3.3309998833275e-06, 'epoch': 2.8}
{'loss': 1.3863, 'grad_norm': 0.18562105298042297, 'learning_rate': 4.141873760354685e-07, 'epoch': 2.98}