Training in progress, epoch 1
Browse files- all_results.json +9 -9
- eval_results.json +3 -3
- predict_results.json +3 -3
- pytorch_model.bin +1 -1
- runs/Mar26_15-40-21_kogecha/events.out.tfevents.1711438179.kogecha +3 -0
- runs/Mar26_17-48-18_kogecha/events.out.tfevents.1711442909.kogecha +3 -0
- train_results.json +3 -3
- trainer_state.json +18 -18
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -6,23 +6,23 @@
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
-
"eval_runtime":
|
10 |
"eval_samples": 286,
|
11 |
-
"eval_samples_per_second": 0.
|
12 |
-
"eval_steps_per_second": 0.
|
13 |
"predict_gen_len": 86.58898305084746,
|
14 |
"predict_loss": 3.0853381156921387,
|
15 |
"predict_rouge1": 18.1172,
|
16 |
"predict_rouge2": 3.4127,
|
17 |
"predict_rougeL": 11.2062,
|
18 |
"predict_rougeLsum": 12.5441,
|
19 |
-
"predict_runtime":
|
20 |
"predict_samples": 236,
|
21 |
-
"predict_samples_per_second":
|
22 |
-
"predict_steps_per_second":
|
23 |
"train_loss": 0.4568321267587167,
|
24 |
-
"train_runtime":
|
25 |
"train_samples": 2025,
|
26 |
-
"train_samples_per_second": 3.
|
27 |
-
"train_steps_per_second": 3.
|
28 |
}
|
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
+
"eval_runtime": 317.8178,
|
10 |
"eval_samples": 286,
|
11 |
+
"eval_samples_per_second": 0.9,
|
12 |
+
"eval_steps_per_second": 0.9,
|
13 |
"predict_gen_len": 86.58898305084746,
|
14 |
"predict_loss": 3.0853381156921387,
|
15 |
"predict_rouge1": 18.1172,
|
16 |
"predict_rouge2": 3.4127,
|
17 |
"predict_rougeL": 11.2062,
|
18 |
"predict_rougeLsum": 12.5441,
|
19 |
+
"predict_runtime": 262.8252,
|
20 |
"predict_samples": 236,
|
21 |
+
"predict_samples_per_second": 0.898,
|
22 |
+
"predict_steps_per_second": 0.898,
|
23 |
"train_loss": 0.4568321267587167,
|
24 |
+
"train_runtime": 2611.1815,
|
25 |
"train_samples": 2025,
|
26 |
+
"train_samples_per_second": 3.878,
|
27 |
+
"train_steps_per_second": 3.878
|
28 |
}
|
eval_results.json
CHANGED
@@ -6,8 +6,8 @@
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
-
"eval_runtime":
|
10 |
"eval_samples": 286,
|
11 |
-
"eval_samples_per_second": 0.
|
12 |
-
"eval_steps_per_second": 0.
|
13 |
}
|
|
|
6 |
"eval_rouge2": 3.1211,
|
7 |
"eval_rougeL": 10.9379,
|
8 |
"eval_rougeLsum": 15.8203,
|
9 |
+
"eval_runtime": 317.8178,
|
10 |
"eval_samples": 286,
|
11 |
+
"eval_samples_per_second": 0.9,
|
12 |
+
"eval_steps_per_second": 0.9
|
13 |
}
|
predict_results.json
CHANGED
@@ -5,8 +5,8 @@
|
|
5 |
"predict_rouge2": 3.4127,
|
6 |
"predict_rougeL": 11.2062,
|
7 |
"predict_rougeLsum": 12.5441,
|
8 |
-
"predict_runtime":
|
9 |
"predict_samples": 236,
|
10 |
-
"predict_samples_per_second":
|
11 |
-
"predict_steps_per_second":
|
12 |
}
|
|
|
5 |
"predict_rouge2": 3.4127,
|
6 |
"predict_rougeL": 11.2062,
|
7 |
"predict_rougeLsum": 12.5441,
|
8 |
+
"predict_runtime": 262.8252,
|
9 |
"predict_samples": 236,
|
10 |
+
"predict_samples_per_second": 0.898,
|
11 |
+
"predict_steps_per_second": 0.898
|
12 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 501807853
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:479a16662531508b9f6eeda30775691b89ca046731b6cce103ef89e37dd889c1
|
3 |
size 501807853
|
runs/Mar26_15-40-21_kogecha/events.out.tfevents.1711438179.kogecha
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d059a5c52765c4c18d2bacb1ca15570de6e765fa72cb103d2da1a5aed2ecd957
|
3 |
+
size 565
|
runs/Mar26_17-48-18_kogecha/events.out.tfevents.1711442909.kogecha
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:897d6d9ae1ab575031ec746d70a6311affa2d4a23606fc7b83434961c2b803fe
|
3 |
+
size 6326
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"train_loss": 0.4568321267587167,
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 2025,
|
6 |
-
"train_samples_per_second": 3.
|
7 |
-
"train_steps_per_second": 3.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"train_loss": 0.4568321267587167,
|
4 |
+
"train_runtime": 2611.1815,
|
5 |
"train_samples": 2025,
|
6 |
+
"train_samples_per_second": 3.878,
|
7 |
+
"train_steps_per_second": 3.878
|
8 |
}
|
trainer_state.json
CHANGED
@@ -39,9 +39,9 @@
|
|
39 |
"eval_rouge2": 1.2888,
|
40 |
"eval_rougeL": 11.0128,
|
41 |
"eval_rougeLsum": 15.2802,
|
42 |
-
"eval_runtime":
|
43 |
-
"eval_samples_per_second": 1.
|
44 |
-
"eval_steps_per_second": 1.
|
45 |
"step": 2025
|
46 |
},
|
47 |
{
|
@@ -76,9 +76,9 @@
|
|
76 |
"eval_rouge2": 2.8091,
|
77 |
"eval_rougeL": 10.9133,
|
78 |
"eval_rougeLsum": 15.4068,
|
79 |
-
"eval_runtime":
|
80 |
-
"eval_samples_per_second": 0.
|
81 |
-
"eval_steps_per_second": 0.
|
82 |
"step": 4050
|
83 |
},
|
84 |
{
|
@@ -113,9 +113,9 @@
|
|
113 |
"eval_rouge2": 3.0026,
|
114 |
"eval_rougeL": 10.272,
|
115 |
"eval_rougeLsum": 14.0716,
|
116 |
-
"eval_runtime":
|
117 |
-
"eval_samples_per_second": 1.
|
118 |
-
"eval_steps_per_second": 1.
|
119 |
"step": 6075
|
120 |
},
|
121 |
{
|
@@ -150,9 +150,9 @@
|
|
150 |
"eval_rouge2": 2.9937,
|
151 |
"eval_rougeL": 10.8765,
|
152 |
"eval_rougeLsum": 15.6203,
|
153 |
-
"eval_runtime":
|
154 |
-
"eval_samples_per_second": 0.
|
155 |
-
"eval_steps_per_second": 0.
|
156 |
"step": 8100
|
157 |
},
|
158 |
{
|
@@ -187,9 +187,9 @@
|
|
187 |
"eval_rouge2": 3.1211,
|
188 |
"eval_rougeL": 10.9379,
|
189 |
"eval_rougeLsum": 15.8203,
|
190 |
-
"eval_runtime":
|
191 |
-
"eval_samples_per_second": 0.
|
192 |
-
"eval_steps_per_second": 0.
|
193 |
"step": 10125
|
194 |
},
|
195 |
{
|
@@ -197,9 +197,9 @@
|
|
197 |
"step": 10125,
|
198 |
"total_flos": 1931007009669120.0,
|
199 |
"train_loss": 0.4568321267587167,
|
200 |
-
"train_runtime":
|
201 |
-
"train_samples_per_second": 3.
|
202 |
-
"train_steps_per_second": 3.
|
203 |
}
|
204 |
],
|
205 |
"max_steps": 10125,
|
|
|
39 |
"eval_rouge2": 1.2888,
|
40 |
"eval_rougeL": 11.0128,
|
41 |
"eval_rougeLsum": 15.2802,
|
42 |
+
"eval_runtime": 267.3657,
|
43 |
+
"eval_samples_per_second": 1.07,
|
44 |
+
"eval_steps_per_second": 1.07,
|
45 |
"step": 2025
|
46 |
},
|
47 |
{
|
|
|
76 |
"eval_rouge2": 2.8091,
|
77 |
"eval_rougeL": 10.9133,
|
78 |
"eval_rougeLsum": 15.4068,
|
79 |
+
"eval_runtime": 356.3388,
|
80 |
+
"eval_samples_per_second": 0.803,
|
81 |
+
"eval_steps_per_second": 0.803,
|
82 |
"step": 4050
|
83 |
},
|
84 |
{
|
|
|
113 |
"eval_rouge2": 3.0026,
|
114 |
"eval_rougeL": 10.272,
|
115 |
"eval_rougeLsum": 14.0716,
|
116 |
+
"eval_runtime": 279.7235,
|
117 |
+
"eval_samples_per_second": 1.022,
|
118 |
+
"eval_steps_per_second": 1.022,
|
119 |
"step": 6075
|
120 |
},
|
121 |
{
|
|
|
150 |
"eval_rouge2": 2.9937,
|
151 |
"eval_rougeL": 10.8765,
|
152 |
"eval_rougeLsum": 15.6203,
|
153 |
+
"eval_runtime": 323.891,
|
154 |
+
"eval_samples_per_second": 0.883,
|
155 |
+
"eval_steps_per_second": 0.883,
|
156 |
"step": 8100
|
157 |
},
|
158 |
{
|
|
|
187 |
"eval_rouge2": 3.1211,
|
188 |
"eval_rougeL": 10.9379,
|
189 |
"eval_rougeLsum": 15.8203,
|
190 |
+
"eval_runtime": 317.0639,
|
191 |
+
"eval_samples_per_second": 0.902,
|
192 |
+
"eval_steps_per_second": 0.902,
|
193 |
"step": 10125
|
194 |
},
|
195 |
{
|
|
|
197 |
"step": 10125,
|
198 |
"total_flos": 1931007009669120.0,
|
199 |
"train_loss": 0.4568321267587167,
|
200 |
+
"train_runtime": 2611.1815,
|
201 |
+
"train_samples_per_second": 3.878,
|
202 |
+
"train_steps_per_second": 3.878
|
203 |
}
|
204 |
],
|
205 |
"max_steps": 10125,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4475
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe408de49090e9d0c183138d8685c145d59b62f9884c078cb6fbadb736db474a
|
3 |
size 4475
|