Training in progress, epoch 1
Browse files- all_results.json +9 -9
- eval_results.json +3 -3
- predict_results.json +3 -3
- pytorch_model.bin +1 -1
- runs/Mar25_21-10-32_kogecha/events.out.tfevents.1711371554.kogecha +3 -0
- runs/Mar26_15-40-21_kogecha/events.out.tfevents.1711435232.kogecha +3 -0
- train_results.json +3 -3
- trainer_state.json +18 -18
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -6,23 +6,23 @@
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
-
"eval_runtime":
|
10 |
"eval_samples": 286,
|
11 |
-
"eval_samples_per_second": 0.
|
12 |
-
"eval_steps_per_second": 0.
|
13 |
"predict_gen_len": 86.58898305084746,
|
14 |
"predict_loss": 3.0853381156921387,
|
15 |
"predict_rouge1": 18.1172,
|
16 |
"predict_rouge2": 3.4127,
|
17 |
"predict_rougeL": 11.2062,
|
18 |
"predict_rougeLsum": 12.5441,
|
19 |
-
"predict_runtime":
|
20 |
"predict_samples": 236,
|
21 |
-
"predict_samples_per_second": 1.
|
22 |
-
"predict_steps_per_second": 1.
|
23 |
"train_loss": 0.4568321267587167,
|
24 |
-
"train_runtime":
|
25 |
"train_samples": 2025,
|
26 |
-
"train_samples_per_second": 3.
|
27 |
-
"train_steps_per_second": 3.
|
28 |
}
|
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
+
"eval_runtime": 321.129,
|
10 |
"eval_samples": 286,
|
11 |
+
"eval_samples_per_second": 0.891,
|
12 |
+
"eval_steps_per_second": 0.891,
|
13 |
"predict_gen_len": 86.58898305084746,
|
14 |
"predict_loss": 3.0853381156921387,
|
15 |
"predict_rouge1": 18.1172,
|
16 |
"predict_rouge2": 3.4127,
|
17 |
"predict_rougeL": 11.2062,
|
18 |
"predict_rougeLsum": 12.5441,
|
19 |
+
"predict_runtime": 225.8529,
|
20 |
"predict_samples": 236,
|
21 |
+
"predict_samples_per_second": 1.045,
|
22 |
+
"predict_steps_per_second": 1.045,
|
23 |
"train_loss": 0.4568321267587167,
|
24 |
+
"train_runtime": 2567.7496,
|
25 |
"train_samples": 2025,
|
26 |
+
"train_samples_per_second": 3.943,
|
27 |
+
"train_steps_per_second": 3.943
|
28 |
}
|
eval_results.json
CHANGED
@@ -6,8 +6,8 @@
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
-
"eval_runtime":
|
10 |
"eval_samples": 286,
|
11 |
-
"eval_samples_per_second": 0.
|
12 |
-
"eval_steps_per_second": 0.
|
13 |
}
|
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
+
"eval_runtime": 321.129,
|
10 |
"eval_samples": 286,
|
11 |
+
"eval_samples_per_second": 0.891,
|
12 |
+
"eval_steps_per_second": 0.891
|
13 |
}
|
predict_results.json
CHANGED
@@ -5,8 +5,8 @@
|
|
5 |
"predict_rouge2": 3.4127,
|
6 |
"predict_rougeL": 11.2062,
|
7 |
"predict_rougeLsum": 12.5441,
|
8 |
-
"predict_runtime":
|
9 |
"predict_samples": 236,
|
10 |
-
"predict_samples_per_second": 1.
|
11 |
-
"predict_steps_per_second": 1.
|
12 |
}
|
|
|
5 |
"predict_rouge2": 3.4127,
|
6 |
"predict_rougeL": 11.2062,
|
7 |
"predict_rougeLsum": 12.5441,
|
8 |
+
"predict_runtime": 225.8529,
|
9 |
"predict_samples": 236,
|
10 |
+
"predict_samples_per_second": 1.045,
|
11 |
+
"predict_steps_per_second": 1.045
|
12 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 501807853
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:479a16662531508b9f6eeda30775691b89ca046731b6cce103ef89e37dd889c1
|
3 |
size 501807853
|
runs/Mar25_21-10-32_kogecha/events.out.tfevents.1711371554.kogecha
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b1959bdf24bdb87157ee62a6e57719d9bec11ec5ca7920cd98b3ee9a570bb2e
|
3 |
+
size 565
|
runs/Mar26_15-40-21_kogecha/events.out.tfevents.1711435232.kogecha
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5e0e85d9e8010a68e7db22ca0f54222a780cd88f4c034241da7ec79c9785fac
|
3 |
+
size 6326
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"train_loss": 0.4568321267587167,
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 2025,
|
6 |
-
"train_samples_per_second": 3.
|
7 |
-
"train_steps_per_second": 3.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"train_loss": 0.4568321267587167,
|
4 |
+
"train_runtime": 2567.7496,
|
5 |
"train_samples": 2025,
|
6 |
+
"train_samples_per_second": 3.943,
|
7 |
+
"train_steps_per_second": 3.943
|
8 |
}
|
trainer_state.json
CHANGED
@@ -39,9 +39,9 @@
|
|
39 |
"eval_rouge2": 1.2888,
|
40 |
"eval_rougeL": 11.0128,
|
41 |
"eval_rougeLsum": 15.2802,
|
42 |
-
"eval_runtime":
|
43 |
-
"eval_samples_per_second": 1.
|
44 |
-
"eval_steps_per_second": 1.
|
45 |
"step": 2025
|
46 |
},
|
47 |
{
|
@@ -76,9 +76,9 @@
|
|
76 |
"eval_rouge2": 2.8091,
|
77 |
"eval_rougeL": 10.9133,
|
78 |
"eval_rougeLsum": 15.4068,
|
79 |
-
"eval_runtime":
|
80 |
-
"eval_samples_per_second": 0.
|
81 |
-
"eval_steps_per_second": 0.
|
82 |
"step": 4050
|
83 |
},
|
84 |
{
|
@@ -113,9 +113,9 @@
|
|
113 |
"eval_rouge2": 3.0026,
|
114 |
"eval_rougeL": 10.272,
|
115 |
"eval_rougeLsum": 14.0716,
|
116 |
-
"eval_runtime":
|
117 |
-
"eval_samples_per_second": 1.
|
118 |
-
"eval_steps_per_second": 1.
|
119 |
"step": 6075
|
120 |
},
|
121 |
{
|
@@ -150,9 +150,9 @@
|
|
150 |
"eval_rouge2": 2.9937,
|
151 |
"eval_rougeL": 10.8765,
|
152 |
"eval_rougeLsum": 15.6203,
|
153 |
-
"eval_runtime":
|
154 |
-
"eval_samples_per_second": 0.
|
155 |
-
"eval_steps_per_second": 0.
|
156 |
"step": 8100
|
157 |
},
|
158 |
{
|
@@ -187,9 +187,9 @@
|
|
187 |
"eval_rouge2": 3.1211,
|
188 |
"eval_rougeL": 10.9379,
|
189 |
"eval_rougeLsum": 15.8203,
|
190 |
-
"eval_runtime":
|
191 |
-
"eval_samples_per_second": 0.
|
192 |
-
"eval_steps_per_second": 0.
|
193 |
"step": 10125
|
194 |
},
|
195 |
{
|
@@ -197,9 +197,9 @@
|
|
197 |
"step": 10125,
|
198 |
"total_flos": 1931007009669120.0,
|
199 |
"train_loss": 0.4568321267587167,
|
200 |
-
"train_runtime":
|
201 |
-
"train_samples_per_second": 3.
|
202 |
-
"train_steps_per_second": 3.
|
203 |
}
|
204 |
],
|
205 |
"max_steps": 10125,
|
|
|
39 |
"eval_rouge2": 1.2888,
|
40 |
"eval_rougeL": 11.0128,
|
41 |
"eval_rougeLsum": 15.2802,
|
42 |
+
"eval_runtime": 269.5917,
|
43 |
+
"eval_samples_per_second": 1.061,
|
44 |
+
"eval_steps_per_second": 1.061,
|
45 |
"step": 2025
|
46 |
},
|
47 |
{
|
|
|
76 |
"eval_rouge2": 2.8091,
|
77 |
"eval_rougeL": 10.9133,
|
78 |
"eval_rougeLsum": 15.4068,
|
79 |
+
"eval_runtime": 324.1284,
|
80 |
+
"eval_samples_per_second": 0.882,
|
81 |
+
"eval_steps_per_second": 0.882,
|
82 |
"step": 4050
|
83 |
},
|
84 |
{
|
|
|
113 |
"eval_rouge2": 3.0026,
|
114 |
"eval_rougeL": 10.272,
|
115 |
"eval_rougeLsum": 14.0716,
|
116 |
+
"eval_runtime": 281.1095,
|
117 |
+
"eval_samples_per_second": 1.017,
|
118 |
+
"eval_steps_per_second": 1.017,
|
119 |
"step": 6075
|
120 |
},
|
121 |
{
|
|
|
150 |
"eval_rouge2": 2.9937,
|
151 |
"eval_rougeL": 10.8765,
|
152 |
"eval_rougeLsum": 15.6203,
|
153 |
+
"eval_runtime": 326.4072,
|
154 |
+
"eval_samples_per_second": 0.876,
|
155 |
+
"eval_steps_per_second": 0.876,
|
156 |
"step": 8100
|
157 |
},
|
158 |
{
|
|
|
187 |
"eval_rouge2": 3.1211,
|
188 |
"eval_rougeL": 10.9379,
|
189 |
"eval_rougeLsum": 15.8203,
|
190 |
+
"eval_runtime": 320.7013,
|
191 |
+
"eval_samples_per_second": 0.892,
|
192 |
+
"eval_steps_per_second": 0.892,
|
193 |
"step": 10125
|
194 |
},
|
195 |
{
|
|
|
197 |
"step": 10125,
|
198 |
"total_flos": 1931007009669120.0,
|
199 |
"train_loss": 0.4568321267587167,
|
200 |
+
"train_runtime": 2567.7496,
|
201 |
+
"train_samples_per_second": 3.943,
|
202 |
+
"train_steps_per_second": 3.943
|
203 |
}
|
204 |
],
|
205 |
"max_steps": 10125,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4475
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1f06036d74c60635455d9d063b586bccc9945de86b85390e3d282dc4ada8f2c
|
3 |
size 4475
|