Training in progress, step 14500
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +23 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 838981
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d72bcd5d67677336ea44ac21b39046210c3ff91ff61c07e085af69880771da09
|
3 |
size 838981
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 242014297
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a43f747cfa3f540c13e90a6adfa0fd0b4bd0db108e4f2ac14203bd4eef11db2
|
3 |
size 242014297
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc4dd445fffcceb22a20bf5eed3ac992f6b1b4353bf8125d39cce5fb62c1155e
|
3 |
size 14575
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c90c29bc2f74036c291681a1031c81300dba59e8fc6586a175a7c2aa56888cb
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05d8c2508419cd87dcc10e1d12fb122dbba1aed33cf9d8f5c2664992fcb15521
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": 0.5734578017441343,
|
3 |
"best_model_checkpoint": "results/checkpoint-12000",
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -566,11 +566,31 @@
|
|
566 |
"eval_samples_per_second": 36.697,
|
567 |
"eval_steps_per_second": 1.835,
|
568 |
"step": 14000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
}
|
570 |
],
|
571 |
"max_steps": 15000,
|
572 |
"num_train_epochs": 3,
|
573 |
-
"total_flos": 3.
|
574 |
"trial_name": null,
|
575 |
"trial_params": null
|
576 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 0.5734578017441343,
|
3 |
"best_model_checkpoint": "results/checkpoint-12000",
|
4 |
+
"epoch": 2.9,
|
5 |
+
"global_step": 14500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
566 |
"eval_samples_per_second": 36.697,
|
567 |
"eval_steps_per_second": 1.835,
|
568 |
"step": 14000
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"epoch": 2.9,
|
572 |
+
"learning_rate": 3.550877192982456e-06,
|
573 |
+
"loss": 0.4984,
|
574 |
+
"step": 14500
|
575 |
+
},
|
576 |
+
{
|
577 |
+
"epoch": 2.9,
|
578 |
+
"eval_bleu": 0.13386113820042875,
|
579 |
+
"eval_exact_match": 0.166,
|
580 |
+
"eval_loss": 0.7553720474243164,
|
581 |
+
"eval_rouge1": 0.5732764191737172,
|
582 |
+
"eval_rouge2": 0.4072722620184557,
|
583 |
+
"eval_rougeL": 0.5550480838843836,
|
584 |
+
"eval_rougeLsum": 0.5626680551333753,
|
585 |
+
"eval_runtime": 55.1875,
|
586 |
+
"eval_samples_per_second": 36.24,
|
587 |
+
"eval_steps_per_second": 1.812,
|
588 |
+
"step": 14500
|
589 |
}
|
590 |
],
|
591 |
"max_steps": 15000,
|
592 |
"num_train_epochs": 3,
|
593 |
+
"total_flos": 3.762840964104192e+16,
|
594 |
"trial_name": null,
|
595 |
"trial_params": null
|
596 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 242014297
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a43f747cfa3f540c13e90a6adfa0fd0b4bd0db108e4f2ac14203bd4eef11db2
|
3 |
size 242014297
|