Commit
·
90e0792
1
Parent(s):
0fe8392
Model save
Browse files- all_results.json +7 -7
- eval_results.json +4 -4
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699575456.ip-26-0-155-187.230765.0 +3 -0
- runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699587432.ip-26-0-155-187.230765.1 +3 -0
- train_results.json +3 -3
- trainer_state.json +16 -16
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
"eval_samples": 23110,
|
6 |
-
"eval_samples_per_second": 69.
|
7 |
-
"eval_steps_per_second": 0.
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
"train_samples": 207865,
|
11 |
-
"train_samples_per_second": 17.
|
12 |
"train_steps_per_second": 0.035
|
13 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
+
"eval_loss": 0.932437002658844,
|
4 |
+
"eval_runtime": 331.0457,
|
5 |
"eval_samples": 23110,
|
6 |
+
"eval_samples_per_second": 69.809,
|
7 |
+
"eval_steps_per_second": 0.547,
|
8 |
+
"train_loss": 0.9717322877224754,
|
9 |
+
"train_runtime": 11645.5121,
|
10 |
"train_samples": 207865,
|
11 |
+
"train_samples_per_second": 17.849,
|
12 |
"train_steps_per_second": 0.035
|
13 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
"eval_samples": 23110,
|
6 |
-
"eval_samples_per_second": 69.
|
7 |
-
"eval_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
+
"eval_loss": 0.932437002658844,
|
4 |
+
"eval_runtime": 331.0457,
|
5 |
"eval_samples": 23110,
|
6 |
+
"eval_samples_per_second": 69.809,
|
7 |
+
"eval_steps_per_second": 0.547
|
8 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68b1e135ad66cee90fef5c9335e8c80b8e60b16254ff5f4e88d3369ebcce96a4
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8df5a386b05f0393662aaa5d39d8dd052a22a366fe6f5cd42a0c7bc940898d6e
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:202c8e90c2ac2219a17c1bce35622a063977658808c635d326433237022d2ea3
|
3 |
size 4540516344
|
runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699575456.ip-26-0-155-187.230765.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3189372cc8c6459ee460d3d31cbaddfb99a40696a85cc9050a23dea2a8e6a339
|
3 |
+
size 13430
|
runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699587432.ip-26-0-155-187.230765.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2868aa7d42743dcea1cc11cb77830e050620d5fc59cb3c0b74befa9a82dd81e
|
3 |
+
size 359
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 207865,
|
6 |
-
"train_samples_per_second": 17.
|
7 |
"train_steps_per_second": 0.035
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
+
"train_loss": 0.9717322877224754,
|
4 |
+
"train_runtime": 11645.5121,
|
5 |
"train_samples": 207865,
|
6 |
+
"train_samples_per_second": 17.849,
|
7 |
"train_steps_per_second": 0.035
|
8 |
}
|
trainer_state.json
CHANGED
@@ -17,25 +17,25 @@
|
|
17 |
{
|
18 |
"epoch": 0.01,
|
19 |
"learning_rate": 1.999251652147735e-05,
|
20 |
-
"loss": 1.
|
21 |
"step": 5
|
22 |
},
|
23 |
{
|
24 |
"epoch": 0.02,
|
25 |
"learning_rate": 1.997007728639956e-05,
|
26 |
-
"loss": 1.
|
27 |
"step": 10
|
28 |
},
|
29 |
{
|
30 |
"epoch": 0.04,
|
31 |
"learning_rate": 1.9932715879473385e-05,
|
32 |
-
"loss": 1.
|
33 |
"step": 15
|
34 |
},
|
35 |
{
|
36 |
"epoch": 0.05,
|
37 |
"learning_rate": 1.9880488219356086e-05,
|
38 |
-
"loss": 1.
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
@@ -59,7 +59,7 @@
|
|
59 |
{
|
60 |
"epoch": 0.1,
|
61 |
"learning_rate": 1.9524809490566878e-05,
|
62 |
-
"loss": 0.
|
63 |
"step": 40
|
64 |
},
|
65 |
{
|
@@ -71,7 +71,7 @@
|
|
71 |
{
|
72 |
"epoch": 0.12,
|
73 |
"learning_rate": 1.926084840336821e-05,
|
74 |
-
"loss": 0.
|
75 |
"step": 50
|
76 |
},
|
77 |
{
|
@@ -113,7 +113,7 @@
|
|
113 |
{
|
114 |
"epoch": 0.21,
|
115 |
"learning_rate": 1.791386494010081e-05,
|
116 |
-
"loss": 0.
|
117 |
"step": 85
|
118 |
},
|
119 |
{
|
@@ -125,7 +125,7 @@
|
|
125 |
{
|
126 |
"epoch": 0.23,
|
127 |
"learning_rate": 1.7417625312098453e-05,
|
128 |
-
"loss": 0.
|
129 |
"step": 95
|
130 |
},
|
131 |
{
|
@@ -143,13 +143,13 @@
|
|
143 |
{
|
144 |
"epoch": 0.27,
|
145 |
"learning_rate": 1.659103377877423e-05,
|
146 |
-
"loss": 0.
|
147 |
"step": 110
|
148 |
},
|
149 |
{
|
150 |
"epoch": 0.28,
|
151 |
"learning_rate": 1.629520819706912e-05,
|
152 |
-
"loss": 0.
|
153 |
"step": 115
|
154 |
},
|
155 |
{
|
@@ -340,9 +340,9 @@
|
|
340 |
},
|
341 |
{
|
342 |
"epoch": 0.67,
|
343 |
-
"eval_loss": 0.
|
344 |
-
"eval_runtime": 337.
|
345 |
-
"eval_samples_per_second": 68.
|
346 |
"eval_steps_per_second": 0.537,
|
347 |
"step": 272
|
348 |
},
|
@@ -350,9 +350,9 @@
|
|
350 |
"epoch": 0.67,
|
351 |
"step": 272,
|
352 |
"total_flos": 455322233733120.0,
|
353 |
-
"train_loss": 0.
|
354 |
-
"train_runtime":
|
355 |
-
"train_samples_per_second": 17.
|
356 |
"train_steps_per_second": 0.035
|
357 |
}
|
358 |
],
|
|
|
17 |
{
|
18 |
"epoch": 0.01,
|
19 |
"learning_rate": 1.999251652147735e-05,
|
20 |
+
"loss": 1.6995,
|
21 |
"step": 5
|
22 |
},
|
23 |
{
|
24 |
"epoch": 0.02,
|
25 |
"learning_rate": 1.997007728639956e-05,
|
26 |
+
"loss": 1.1502,
|
27 |
"step": 10
|
28 |
},
|
29 |
{
|
30 |
"epoch": 0.04,
|
31 |
"learning_rate": 1.9932715879473385e-05,
|
32 |
+
"loss": 1.0714,
|
33 |
"step": 15
|
34 |
},
|
35 |
{
|
36 |
"epoch": 0.05,
|
37 |
"learning_rate": 1.9880488219356086e-05,
|
38 |
+
"loss": 1.0487,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
|
|
59 |
{
|
60 |
"epoch": 0.1,
|
61 |
"learning_rate": 1.9524809490566878e-05,
|
62 |
+
"loss": 0.9805,
|
63 |
"step": 40
|
64 |
},
|
65 |
{
|
|
|
71 |
{
|
72 |
"epoch": 0.12,
|
73 |
"learning_rate": 1.926084840336821e-05,
|
74 |
+
"loss": 0.9814,
|
75 |
"step": 50
|
76 |
},
|
77 |
{
|
|
|
113 |
{
|
114 |
"epoch": 0.21,
|
115 |
"learning_rate": 1.791386494010081e-05,
|
116 |
+
"loss": 0.9642,
|
117 |
"step": 85
|
118 |
},
|
119 |
{
|
|
|
125 |
{
|
126 |
"epoch": 0.23,
|
127 |
"learning_rate": 1.7417625312098453e-05,
|
128 |
+
"loss": 0.9444,
|
129 |
"step": 95
|
130 |
},
|
131 |
{
|
|
|
143 |
{
|
144 |
"epoch": 0.27,
|
145 |
"learning_rate": 1.659103377877423e-05,
|
146 |
+
"loss": 0.9498,
|
147 |
"step": 110
|
148 |
},
|
149 |
{
|
150 |
"epoch": 0.28,
|
151 |
"learning_rate": 1.629520819706912e-05,
|
152 |
+
"loss": 0.9464,
|
153 |
"step": 115
|
154 |
},
|
155 |
{
|
|
|
340 |
},
|
341 |
{
|
342 |
"epoch": 0.67,
|
343 |
+
"eval_loss": 0.9322898387908936,
|
344 |
+
"eval_runtime": 337.0923,
|
345 |
+
"eval_samples_per_second": 68.557,
|
346 |
"eval_steps_per_second": 0.537,
|
347 |
"step": 272
|
348 |
},
|
|
|
350 |
"epoch": 0.67,
|
351 |
"step": 272,
|
352 |
"total_flos": 455322233733120.0,
|
353 |
+
"train_loss": 0.9717322877224754,
|
354 |
+
"train_runtime": 11645.5121,
|
355 |
+
"train_samples_per_second": 17.849,
|
356 |
"train_steps_per_second": 0.035
|
357 |
}
|
358 |
],
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c5e5f1c2e60e8566b60a42c429bdeebcc5f5392a53938ef2fe0c39224dde9fc
|
3 |
size 5624
|