Training in progress, epoch 1
Browse files
logs/training_log.txt
CHANGED
@@ -1,39 +1,35 @@
|
|
1 |
-
2025-01-08
|
2 |
-
2025-01-08
|
3 |
-
2025-01-08
|
4 |
-
2025-01-08
|
5 |
-
2025-01-08
|
6 |
-
2025-01-08
|
7 |
-
2025-01-08
|
8 |
-
2025-01-08
|
9 |
-
2025-01-08
|
10 |
-
2025-01-08
|
11 |
-
2025-01-08
|
12 |
-
2025-01-08
|
13 |
-
2025-01-08
|
14 |
-
2025-01-08
|
15 |
-
2025-01-08
|
16 |
-
2025-01-08
|
17 |
-
2025-01-08
|
18 |
-
2025-01-08
|
19 |
-
2025-01-08
|
20 |
-
2025-01-08
|
21 |
-
2025-01-08
|
22 |
-
2025-01-08
|
23 |
-
2025-01-08
|
24 |
-
eval_loss: 1.
|
25 |
-
eval_runtime: 24.
|
26 |
eval_samples_per_second: 0.3200
|
27 |
eval_steps_per_second: 0.0800
|
28 |
epoch: 1.0000
|
29 |
-
elapsed_time:
|
30 |
-
step_time:
|
31 |
-
2025-01-08
|
32 |
-
2025-01-08
|
33 |
-
2025-01-08
|
34 |
-
2025-01-08
|
35 |
-
2025-01-08
|
36 |
-
2025-01-08 17:56:48,201 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
|
37 |
-
2025-01-08 17:56:51,286 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
|
38 |
-
2025-01-08 17:56:57,344 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
|
39 |
-
2025-01-08 17:57:03,316 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
|
|
|
1 |
+
2025-01-08 18:29:22,070 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpw2j4jae_/test.c -o /tmp/tmpw2j4jae_/test.o
|
2 |
+
2025-01-08 18:29:22,097 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpw2j4jae_/test.o -laio -o /tmp/tmpw2j4jae_/a.out
|
3 |
+
2025-01-08 18:29:22,252 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp3xs2q_w0/test.c -o /tmp/tmp3xs2q_w0/test.o
|
4 |
+
2025-01-08 18:29:22,279 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp3xs2q_w0/test.o -laio -o /tmp/tmp3xs2q_w0/a.out
|
5 |
+
2025-01-08 18:29:22,281 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpoehuhgbl/test.c -o /tmp/tmpoehuhgbl/test.o
|
6 |
+
2025-01-08 18:29:22,307 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpoehuhgbl/test.o -laio -o /tmp/tmpoehuhgbl/a.out
|
7 |
+
2025-01-08 18:29:22,311 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp5eog_7fp/test.c -o /tmp/tmp5eog_7fp/test.o
|
8 |
+
2025-01-08 18:29:22,334 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp5eog_7fp/test.o -laio -o /tmp/tmp5eog_7fp/a.out
|
9 |
+
2025-01-08 18:29:22,519 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp7o4d197o/test.c -o /tmp/tmp7o4d197o/test.o
|
10 |
+
2025-01-08 18:29:22,545 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp7o4d197o/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp7o4d197o/a.out
|
11 |
+
2025-01-08 18:29:22,683 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpskkmpgdv/test.c -o /tmp/tmpskkmpgdv/test.o
|
12 |
+
2025-01-08 18:29:22,710 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpskkmpgdv/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpskkmpgdv/a.out
|
13 |
+
2025-01-08 18:29:22,759 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp0i19mv7y/test.c -o /tmp/tmp0i19mv7y/test.o
|
14 |
+
2025-01-08 18:29:22,778 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmptzck4dvd/test.c -o /tmp/tmptzck4dvd/test.o
|
15 |
+
2025-01-08 18:29:22,785 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp0i19mv7y/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp0i19mv7y/a.out
|
16 |
+
2025-01-08 18:29:22,795 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmptzck4dvd/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmptzck4dvd/a.out
|
17 |
+
2025-01-08 18:34:07,128 - INFO - Training started
|
18 |
+
2025-01-08 18:34:07,129 - INFO - Total steps: 2
|
19 |
+
2025-01-08 18:37:14,796 - INFO - Loss improved from inf to 1.98041
|
20 |
+
2025-01-08 18:37:14,796 - INFO - Loss improved from inf to 1.98041
|
21 |
+
2025-01-08 18:37:14,797 - INFO - Loss improved from inf to 1.98041
|
22 |
+
2025-01-08 18:37:14,798 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 571.32s, elapsed_time: 571.32s
|
23 |
+
2025-01-08 18:37:14,799 - INFO - Evaluation Results:
|
24 |
+
eval_loss: 1.9804
|
25 |
+
eval_runtime: 24.9974
|
26 |
eval_samples_per_second: 0.3200
|
27 |
eval_steps_per_second: 0.0800
|
28 |
epoch: 1.0000
|
29 |
+
elapsed_time: 571.32s
|
30 |
+
step_time: 571.32s
|
31 |
+
2025-01-08 18:37:14,799 - INFO - Loss improved from inf to 1.98041
|
32 |
+
2025-01-08 18:40:40,756 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
|
33 |
+
2025-01-08 18:40:44,085 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
|
34 |
+
2025-01-08 18:40:50,139 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
|
35 |
+
2025-01-08 18:40:56,423 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
|
|
|
|
|
|
|
|
runs/Jan08_18-27-42_gpu-server/events.out.tfevents.1736361245.gpu-server.1036251.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05f4fd9fe7a80d6e1c4c5a86f03cb20099d95e1a97be4d604cd0ff2fd23d5717
|
3 |
+
size 5873
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9099e841f56ef2dbca18d7a3750f478bbc35b0d6001546c8b501b0c4a1d3ff0
|
3 |
size 5560
|