Adil1567 commited on
Commit
af12fdb
·
verified ·
1 Parent(s): 5869bd5

Training in progress, epoch 1

Browse files
logs/training_log.txt CHANGED
@@ -1,39 +1,35 @@
1
- 2025-01-08 17:40:36,980 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp76e8z5da/test.c -o /tmp/tmp76e8z5da/test.o
2
- 2025-01-08 17:40:37,011 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp76e8z5da/test.o -laio -o /tmp/tmp76e8z5da/a.out
3
- 2025-01-08 17:40:37,024 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp0z9hrob6/test.c -o /tmp/tmp0z9hrob6/test.o
4
- 2025-01-08 17:40:37,042 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp0z9hrob6/test.o -laio -o /tmp/tmp0z9hrob6/a.out
5
- 2025-01-08 17:40:37,093 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpivmpgbne/test.c -o /tmp/tmpivmpgbne/test.o
6
- 2025-01-08 17:40:37,096 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp20zdp2x1/test.c -o /tmp/tmp20zdp2x1/test.o
7
- 2025-01-08 17:40:37,111 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpivmpgbne/test.o -laio -o /tmp/tmpivmpgbne/a.out
8
- 2025-01-08 17:40:37,121 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp20zdp2x1/test.o -laio -o /tmp/tmp20zdp2x1/a.out
9
- 2025-01-08 17:40:37,424 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpb15bvnyi/test.c -o /tmp/tmpb15bvnyi/test.o
10
- 2025-01-08 17:40:37,452 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpb15bvnyi/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpb15bvnyi/a.out
11
- 2025-01-08 17:40:37,477 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpaqf_tb6f/test.c -o /tmp/tmpaqf_tb6f/test.o
12
- 2025-01-08 17:40:37,505 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpaqf_tb6f/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpaqf_tb6f/a.out
13
- 2025-01-08 17:40:37,520 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpkcvvfuyn/test.c -o /tmp/tmpkcvvfuyn/test.o
14
- 2025-01-08 17:40:37,542 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpcde9wj__/test.c -o /tmp/tmpcde9wj__/test.o
15
- 2025-01-08 17:40:37,544 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpkcvvfuyn/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpkcvvfuyn/a.out
16
- 2025-01-08 17:40:37,564 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpcde9wj__/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpcde9wj__/a.out
17
- 2025-01-08 17:45:22,791 - INFO - Training started
18
- 2025-01-08 17:45:22,791 - INFO - Total steps: 2
19
- 2025-01-08 17:48:31,433 - INFO - Loss improved from inf to 1.97711
20
- 2025-01-08 17:48:31,433 - INFO - Loss improved from inf to 1.97711
21
- 2025-01-08 17:48:31,433 - INFO - Loss improved from inf to 1.97711
22
- 2025-01-08 17:48:31,435 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 572.94s, elapsed_time: 572.94s
23
- 2025-01-08 17:48:31,436 - INFO - Evaluation Results:
24
- eval_loss: 1.9771
25
- eval_runtime: 24.9754
26
  eval_samples_per_second: 0.3200
27
  eval_steps_per_second: 0.0800
28
  epoch: 1.0000
29
- elapsed_time: 572.94s
30
- step_time: 572.94s
31
- 2025-01-08 17:48:31,436 - INFO - Loss improved from inf to 1.97711
32
- 2025-01-08 17:51:58,678 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
33
- 2025-01-08 17:52:02,292 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
34
- 2025-01-08 17:52:08,611 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
35
- 2025-01-08 17:52:14,794 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
36
- 2025-01-08 17:56:48,201 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
37
- 2025-01-08 17:56:51,286 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
38
- 2025-01-08 17:56:57,344 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
39
- 2025-01-08 17:57:03,316 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
 
1
+ 2025-01-08 18:29:22,070 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpw2j4jae_/test.c -o /tmp/tmpw2j4jae_/test.o
2
+ 2025-01-08 18:29:22,097 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpw2j4jae_/test.o -laio -o /tmp/tmpw2j4jae_/a.out
3
+ 2025-01-08 18:29:22,252 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp3xs2q_w0/test.c -o /tmp/tmp3xs2q_w0/test.o
4
+ 2025-01-08 18:29:22,279 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp3xs2q_w0/test.o -laio -o /tmp/tmp3xs2q_w0/a.out
5
+ 2025-01-08 18:29:22,281 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpoehuhgbl/test.c -o /tmp/tmpoehuhgbl/test.o
6
+ 2025-01-08 18:29:22,307 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpoehuhgbl/test.o -laio -o /tmp/tmpoehuhgbl/a.out
7
+ 2025-01-08 18:29:22,311 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp5eog_7fp/test.c -o /tmp/tmp5eog_7fp/test.o
8
+ 2025-01-08 18:29:22,334 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp5eog_7fp/test.o -laio -o /tmp/tmp5eog_7fp/a.out
9
+ 2025-01-08 18:29:22,519 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp7o4d197o/test.c -o /tmp/tmp7o4d197o/test.o
10
+ 2025-01-08 18:29:22,545 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp7o4d197o/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp7o4d197o/a.out
11
+ 2025-01-08 18:29:22,683 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpskkmpgdv/test.c -o /tmp/tmpskkmpgdv/test.o
12
+ 2025-01-08 18:29:22,710 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpskkmpgdv/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpskkmpgdv/a.out
13
+ 2025-01-08 18:29:22,759 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp0i19mv7y/test.c -o /tmp/tmp0i19mv7y/test.o
14
+ 2025-01-08 18:29:22,778 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmptzck4dvd/test.c -o /tmp/tmptzck4dvd/test.o
15
+ 2025-01-08 18:29:22,785 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp0i19mv7y/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp0i19mv7y/a.out
16
+ 2025-01-08 18:29:22,795 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmptzck4dvd/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmptzck4dvd/a.out
17
+ 2025-01-08 18:34:07,128 - INFO - Training started
18
+ 2025-01-08 18:34:07,129 - INFO - Total steps: 2
19
+ 2025-01-08 18:37:14,796 - INFO - Loss improved from inf to 1.98041
20
+ 2025-01-08 18:37:14,796 - INFO - Loss improved from inf to 1.98041
21
+ 2025-01-08 18:37:14,797 - INFO - Loss improved from inf to 1.98041
22
+ 2025-01-08 18:37:14,798 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 571.32s, elapsed_time: 571.32s
23
+ 2025-01-08 18:37:14,799 - INFO - Evaluation Results:
24
+ eval_loss: 1.9804
25
+ eval_runtime: 24.9974
26
  eval_samples_per_second: 0.3200
27
  eval_steps_per_second: 0.0800
28
  epoch: 1.0000
29
+ elapsed_time: 571.32s
30
+ step_time: 571.32s
31
+ 2025-01-08 18:37:14,799 - INFO - Loss improved from inf to 1.98041
32
+ 2025-01-08 18:40:40,756 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
33
+ 2025-01-08 18:40:44,085 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
34
+ 2025-01-08 18:40:50,139 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
35
+ 2025-01-08 18:40:56,423 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
 
 
 
 
runs/Jan08_18-27-42_gpu-server/events.out.tfevents.1736361245.gpu-server.1036251.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f4fd9fe7a80d6e1c4c5a86f03cb20099d95e1a97be4d604cd0ff2fd23d5717
3
+ size 5873
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9f5c7aa7013d36b5ee68c81864e29537e133a0502614df790ce001f4e88afd1
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9099e841f56ef2dbca18d7a3750f478bbc35b0d6001546c8b501b0c4a1d3ff0
3
  size 5560