GlycerinLOL
commited on
Commit
•
5c423b3
1
Parent(s):
39c8ef4
End of training
Browse files- all_results.json +5 -5
- train_results.json +5 -5
- trainer_state.json +69 -25
all_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 7.99,
|
3 |
+
"train_loss": 0.637062493348733,
|
4 |
+
"train_runtime": 6322.7563,
|
5 |
+
"train_samples_per_second": 63.264,
|
6 |
+
"train_steps_per_second": 0.493
|
7 |
}
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 7.99,
|
3 |
+
"train_loss": 0.637062493348733,
|
4 |
+
"train_runtime": 6322.7563,
|
5 |
+
"train_samples_per_second": 63.264,
|
6 |
+
"train_steps_per_second": 0.493
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -113,37 +113,81 @@
|
|
113 |
"step": 2000
|
114 |
},
|
115 |
{
|
116 |
-
"epoch":
|
117 |
-
"eval_f1": 0.
|
118 |
-
"eval_gen_len": 26.
|
119 |
-
"eval_loss": 1.
|
120 |
-
"eval_precision": 0.
|
121 |
-
"eval_recall": 0.
|
122 |
-
"eval_rouge1": 0.
|
123 |
-
"eval_rouge2": 0.
|
124 |
-
"eval_rougeL": 0.
|
125 |
-
"eval_rougeLsum": 0.
|
126 |
-
"eval_runtime":
|
127 |
-
"eval_samples_per_second": 5.
|
128 |
-
"eval_steps_per_second": 0.
|
129 |
"step": 2340
|
130 |
},
|
131 |
{
|
132 |
-
"epoch":
|
133 |
-
"
|
134 |
-
"
|
135 |
-
"
|
136 |
-
|
137 |
-
|
138 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
}
|
140 |
],
|
141 |
"logging_steps": 500,
|
142 |
-
"max_steps":
|
143 |
"num_input_tokens_seen": 0,
|
144 |
-
"num_train_epochs":
|
145 |
"save_steps": 500,
|
146 |
-
"total_flos":
|
147 |
"train_batch_size": 32,
|
148 |
"trial_name": null,
|
149 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.994241842610364,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3120,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
113 |
"step": 2000
|
114 |
},
|
115 |
{
|
116 |
+
"epoch": 6.0,
|
117 |
+
"eval_f1": 0.9077,
|
118 |
+
"eval_gen_len": 26.438909090909092,
|
119 |
+
"eval_loss": 1.6896997690200806,
|
120 |
+
"eval_precision": 0.9089,
|
121 |
+
"eval_recall": 0.9069,
|
122 |
+
"eval_rouge1": 0.4592,
|
123 |
+
"eval_rouge2": 0.2114,
|
124 |
+
"eval_rougeL": 0.3762,
|
125 |
+
"eval_rougeLsum": 0.3759,
|
126 |
+
"eval_runtime": 506.0324,
|
127 |
+
"eval_samples_per_second": 5.434,
|
128 |
+
"eval_steps_per_second": 0.34,
|
129 |
"step": 2340
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 6.41,
|
133 |
+
"learning_rate": 3.974358974358974e-06,
|
134 |
+
"loss": 1.7833,
|
135 |
+
"step": 2500
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 7.0,
|
139 |
+
"eval_f1": 0.9079,
|
140 |
+
"eval_gen_len": 26.374545454545455,
|
141 |
+
"eval_loss": 1.6819266080856323,
|
142 |
+
"eval_precision": 0.9092,
|
143 |
+
"eval_recall": 0.9071,
|
144 |
+
"eval_rouge1": 0.4598,
|
145 |
+
"eval_rouge2": 0.2115,
|
146 |
+
"eval_rougeL": 0.3764,
|
147 |
+
"eval_rougeLsum": 0.376,
|
148 |
+
"eval_runtime": 506.9661,
|
149 |
+
"eval_samples_per_second": 5.424,
|
150 |
+
"eval_steps_per_second": 0.339,
|
151 |
+
"step": 2731
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"epoch": 7.69,
|
155 |
+
"learning_rate": 7.692307692307694e-07,
|
156 |
+
"loss": 1.7683,
|
157 |
+
"step": 3000
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"epoch": 7.99,
|
161 |
+
"eval_f1": 0.9081,
|
162 |
+
"eval_gen_len": 26.465454545454545,
|
163 |
+
"eval_loss": 1.6796071529388428,
|
164 |
+
"eval_precision": 0.9092,
|
165 |
+
"eval_recall": 0.9073,
|
166 |
+
"eval_rouge1": 0.4613,
|
167 |
+
"eval_rouge2": 0.2127,
|
168 |
+
"eval_rougeL": 0.3775,
|
169 |
+
"eval_rougeLsum": 0.3772,
|
170 |
+
"eval_runtime": 504.4764,
|
171 |
+
"eval_samples_per_second": 5.451,
|
172 |
+
"eval_steps_per_second": 0.341,
|
173 |
+
"step": 3120
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"epoch": 7.99,
|
177 |
+
"step": 3120,
|
178 |
+
"total_flos": 5.768064442218578e+17,
|
179 |
+
"train_loss": 0.637062493348733,
|
180 |
+
"train_runtime": 6322.7563,
|
181 |
+
"train_samples_per_second": 63.264,
|
182 |
+
"train_steps_per_second": 0.493
|
183 |
}
|
184 |
],
|
185 |
"logging_steps": 500,
|
186 |
+
"max_steps": 3120,
|
187 |
"num_input_tokens_seen": 0,
|
188 |
+
"num_train_epochs": 8,
|
189 |
"save_steps": 500,
|
190 |
+
"total_flos": 5.768064442218578e+17,
|
191 |
"train_batch_size": 32,
|
192 |
"trial_name": null,
|
193 |
"trial_params": null
|