Training in progress, step 5200
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +69 -5
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2843228158
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19a0167071d6077d8f26fcfb7304a29ea809d034ec769f851d56de0c8bea89c8
|
3 |
size 2843228158
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421588461
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b64bf1490035f1c45fec685049f0d6a1e6e790b80c14856fab49fcd0b7114687
|
3 |
size 1421588461
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:493e658da36031d3825842e705fcbf542f533c87882496ee53fda9fcd47858eb
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bda94d2ae823993ee852e2514dee8cef8ce45edf13d5b4966c68949e7923c4ab
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49b37a86f5ceab52bac4fb464c21074a6c79d3fab0788b80ac9b85da08906513
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-base/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -150,11 +150,75 @@
|
|
150 |
"eval_samples_per_second": 990.544,
|
151 |
"eval_steps_per_second": 15.499,
|
152 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
}
|
154 |
],
|
155 |
"max_steps": 25752,
|
156 |
"num_train_epochs": 3,
|
157 |
-
"total_flos":
|
158 |
"trial_name": null,
|
159 |
"trial_params": null
|
160 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.9206360860900841,
|
3 |
+
"best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-base/checkpoint-5200",
|
4 |
+
"epoch": 0.6057781919850885,
|
5 |
+
"global_step": 5200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
150 |
"eval_samples_per_second": 990.544,
|
151 |
"eval_steps_per_second": 15.499,
|
152 |
"step": 3600
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 0.47,
|
156 |
+
"learning_rate": 8.893476128188358e-06,
|
157 |
+
"loss": 0.3027,
|
158 |
+
"step": 4000
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 0.47,
|
162 |
+
"eval_accuracy": 0.9191221296484454,
|
163 |
+
"eval_f1": 0.9190571565466592,
|
164 |
+
"eval_loss": 0.23602379858493805,
|
165 |
+
"eval_runtime": 10.6438,
|
166 |
+
"eval_samples_per_second": 924.674,
|
167 |
+
"eval_steps_per_second": 14.469,
|
168 |
+
"step": 4000
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 0.51,
|
172 |
+
"learning_rate": 8.729970568999347e-06,
|
173 |
+
"loss": 0.2968,
|
174 |
+
"step": 4400
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.51,
|
178 |
+
"eval_accuracy": 0.9187157081893924,
|
179 |
+
"eval_f1": 0.9182261319193824,
|
180 |
+
"eval_loss": 0.23287305235862732,
|
181 |
+
"eval_runtime": 9.9312,
|
182 |
+
"eval_samples_per_second": 991.014,
|
183 |
+
"eval_steps_per_second": 15.507,
|
184 |
+
"step": 4400
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.56,
|
188 |
+
"learning_rate": 8.566465009810335e-06,
|
189 |
+
"loss": 0.2888,
|
190 |
+
"step": 4800
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"epoch": 0.56,
|
194 |
+
"eval_accuracy": 0.9196301564722618,
|
195 |
+
"eval_f1": 0.9189361658403055,
|
196 |
+
"eval_loss": 0.24621723592281342,
|
197 |
+
"eval_runtime": 9.9085,
|
198 |
+
"eval_samples_per_second": 993.291,
|
199 |
+
"eval_steps_per_second": 15.542,
|
200 |
+
"step": 4800
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"epoch": 0.61,
|
204 |
+
"learning_rate": 8.402959450621321e-06,
|
205 |
+
"loss": 0.2898,
|
206 |
+
"step": 5200
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 0.61,
|
210 |
+
"eval_accuracy": 0.9211542369437107,
|
211 |
+
"eval_f1": 0.9206360860900841,
|
212 |
+
"eval_loss": 0.23345668613910675,
|
213 |
+
"eval_runtime": 10.0954,
|
214 |
+
"eval_samples_per_second": 974.901,
|
215 |
+
"eval_steps_per_second": 15.254,
|
216 |
+
"step": 5200
|
217 |
}
|
218 |
],
|
219 |
"max_steps": 25752,
|
220 |
"num_train_epochs": 3,
|
221 |
+
"total_flos": 3.2222917166779776e+16,
|
222 |
"trial_name": null,
|
223 |
"trial_params": null
|
224 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421588461
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b64bf1490035f1c45fec685049f0d6a1e6e790b80c14856fab49fcd0b7114687
|
3 |
size 1421588461
|