[1/4] /home/ludaze/usr/local/cuda/cuda-11.8/bin/nvcc --generate-dependencies-with-compile --dependency-output fast_lsh_cumulation_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=fast_lsh_cumulation -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/TH -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/THC -isystem /home/ludaze/usr/local/cuda/cuda-11.8/include -isystem /home/ludaze/.conda/envs/axolotl/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -std=c++17 -c /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/transformers/kernels/yoso/fast_lsh_cumulation_cuda.cu -o fast_lsh_cumulation_cuda.cuda.o [2/4] c++ -MMD -MF fast_lsh_cumulation_torch.o.d -DTORCH_EXTENSION_NAME=fast_lsh_cumulation -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/TH -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/THC -isystem /home/ludaze/usr/local/cuda/cuda-11.8/include -isystem /home/ludaze/.conda/envs/axolotl/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/transformers/kernels/yoso/fast_lsh_cumulation_torch.cpp -o fast_lsh_cumulation_torch.o [3/4] /home/ludaze/usr/local/cuda/cuda-11.8/bin/nvcc --generate-dependencies-with-compile --dependency-output fast_lsh_cumulation.cuda.o.d -DTORCH_EXTENSION_NAME=fast_lsh_cumulation -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/TH -isystem /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/include/THC -isystem /home/ludaze/usr/local/cuda/cuda-11.8/include -isystem /home/ludaze/.conda/envs/axolotl/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -std=c++17 -c /home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/transformers/kernels/yoso/fast_lsh_cumulation.cu -o fast_lsh_cumulation.cuda.o [4/4] c++ fast_lsh_cumulation_torch.o fast_lsh_cumulation.cuda.o fast_lsh_cumulation_cuda.cuda.o -shared -L/home/ludaze/.conda/envs/axolotl/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/home/ludaze/usr/local/cuda/cuda-11.8/lib64 -lcudart -o fast_lsh_cumulation.so {'loss': 1.3875, 'grad_norm': 0.290032297372818, 'learning_rate': 4.708318749270797e-05, 'epoch': 0.18} {'loss': 1.387, 'grad_norm': 0.24705088138580322, 'learning_rate': 4.416637498541594e-05, 'epoch': 0.35} {'loss': 1.387, 'grad_norm': 0.2913823425769806, 'learning_rate': 4.124956247812391e-05, 'epoch': 0.53} {'loss': 1.3867, 'grad_norm': 0.2294648140668869, 'learning_rate': 3.8332749970831875e-05, 'epoch': 0.7} {'loss': 1.3868, 'grad_norm': 0.25805965065956116, 'learning_rate': 3.5415937463539846e-05, 'epoch': 0.88} [0 0 3 ... 0 1 2] [0 0 2 ... 0 1 0] {'eval_loss': 1.3863803148269653, 'eval_accuracy': 0.24671288548888357, 'eval_runtime': 9.3297, 'eval_samples_per_second': 448.352, 'eval_steps_per_second': 7.074, 'epoch': 1.0} {'loss': 1.3868, 'grad_norm': 0.3866879940032959, 'learning_rate': 3.249912495624781e-05, 'epoch': 1.05} {'loss': 1.3864, 'grad_norm': 0.2645956873893738, 'learning_rate': 2.9582312448955786e-05, 'epoch': 1.23} {'loss': 1.3864, 'grad_norm': 0.22899721562862396, 'learning_rate': 2.666549994166375e-05, 'epoch': 1.4} {'loss': 1.3867, 'grad_norm': 0.2155468761920929, 'learning_rate': 2.374868743437172e-05, 'epoch': 1.58} {'loss': 1.3865, 'grad_norm': 0.1302017718553543, 'learning_rate': 2.083187492707969e-05, 'epoch': 1.75} {'loss': 1.3866, 'grad_norm': 0.19115537405014038, 'learning_rate': 1.7915062419787655e-05, 'epoch': 1.93} [3 2 1 ... 2 0 1] [0 0 2 ... 0 1 0] {'eval_loss': 1.3862448930740356, 'eval_accuracy': 0.24695194836241932, 'eval_runtime': 9.364, 'eval_samples_per_second': 446.711, 'eval_steps_per_second': 7.048, 'epoch': 2.0} {'loss': 1.3865, 'grad_norm': 0.16356079280376434, 'learning_rate': 1.4998249912495627e-05, 'epoch': 2.1} {'loss': 1.3865, 'grad_norm': 0.19154764711856842, 'learning_rate': 1.2081437405203595e-05, 'epoch': 2.28} {'loss': 1.3865, 'grad_norm': 0.4934549629688263, 'learning_rate': 9.164624897911563e-06, 'epoch': 2.45} {'loss': 1.3863, 'grad_norm': 0.3885808289051056, 'learning_rate': 6.247812390619531e-06, 'epoch': 2.63} {'loss': 1.3863, 'grad_norm': 0.15357373654842377, 'learning_rate': 3.3309998833275e-06, 'epoch': 2.8} {'loss': 1.3863, 'grad_norm': 0.18562105298042297, 'learning_rate': 4.141873760354685e-07, 'epoch': 2.98}