Training in progress, step 550000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c26dfdb95640433391e289b9d54c29ba637889e22ac54f90b0e4758926e0b34
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:affd4afd21029b52701e8d0046ed5d64853de0037c32a35ed3d7e452fd7c0e84
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:668a667c0141b3007fbd5f23f3195d50d9a952d3b42af4d471955e90b35901bf
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed9a47d2a307e89814d3463ee5fc48f3b3365083fd9c82a5f581a3087e2941c8
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e8a638d577539d28ce2053ec5b75e3eed1d5ad4a147b887facb578c40f25088
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30723df40dcd4e911f2e4b8fee07e2767c8ee7ced5c90fa064aabaf279f01230
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8de7df7ff53e44669a043f69e39b55baa82d81ac1777f09e2f6159ffcb51dd66
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4002,11 +4002,85 @@
|
|
4002 |
"eval_samples_per_second": 849.063,
|
4003 |
"eval_steps_per_second": 13.585,
|
4004 |
"step": 540000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4005 |
}
|
4006 |
],
|
4007 |
"max_steps": 1000000,
|
4008 |
"num_train_epochs": 16,
|
4009 |
-
"total_flos": 3.
|
4010 |
"trial_name": null,
|
4011 |
"trial_params": null
|
4012 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.398613465267916,
|
5 |
+
"global_step": 550000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4002 |
"eval_samples_per_second": 849.063,
|
4003 |
"eval_steps_per_second": 13.585,
|
4004 |
"step": 540000
|
4005 |
+
},
|
4006 |
+
{
|
4007 |
+
"epoch": 8.26,
|
4008 |
+
"learning_rate": 7.629795553284005e-05,
|
4009 |
+
"loss": 0.2602,
|
4010 |
+
"step": 541000
|
4011 |
+
},
|
4012 |
+
{
|
4013 |
+
"epoch": 8.28,
|
4014 |
+
"learning_rate": 7.606681437777081e-05,
|
4015 |
+
"loss": 0.2605,
|
4016 |
+
"step": 542000
|
4017 |
+
},
|
4018 |
+
{
|
4019 |
+
"epoch": 8.29,
|
4020 |
+
"learning_rate": 7.583571623538939e-05,
|
4021 |
+
"loss": 0.26,
|
4022 |
+
"step": 543000
|
4023 |
+
},
|
4024 |
+
{
|
4025 |
+
"epoch": 8.31,
|
4026 |
+
"learning_rate": 7.560466363294806e-05,
|
4027 |
+
"loss": 0.2596,
|
4028 |
+
"step": 544000
|
4029 |
+
},
|
4030 |
+
{
|
4031 |
+
"epoch": 8.32,
|
4032 |
+
"learning_rate": 7.537365909720104e-05,
|
4033 |
+
"loss": 0.2595,
|
4034 |
+
"step": 545000
|
4035 |
+
},
|
4036 |
+
{
|
4037 |
+
"epoch": 8.32,
|
4038 |
+
"eval_runtime": 1.1629,
|
4039 |
+
"eval_samples_per_second": 859.911,
|
4040 |
+
"eval_steps_per_second": 13.759,
|
4041 |
+
"step": 545000
|
4042 |
+
},
|
4043 |
+
{
|
4044 |
+
"epoch": 8.34,
|
4045 |
+
"learning_rate": 7.514270515437691e-05,
|
4046 |
+
"loss": 0.2595,
|
4047 |
+
"step": 546000
|
4048 |
+
},
|
4049 |
+
{
|
4050 |
+
"epoch": 8.35,
|
4051 |
+
"learning_rate": 7.491180433015101e-05,
|
4052 |
+
"loss": 0.2594,
|
4053 |
+
"step": 547000
|
4054 |
+
},
|
4055 |
+
{
|
4056 |
+
"epoch": 8.37,
|
4057 |
+
"learning_rate": 7.468095914961777e-05,
|
4058 |
+
"loss": 0.2596,
|
4059 |
+
"step": 548000
|
4060 |
+
},
|
4061 |
+
{
|
4062 |
+
"epoch": 8.38,
|
4063 |
+
"learning_rate": 7.445017213726307e-05,
|
4064 |
+
"loss": 0.2596,
|
4065 |
+
"step": 549000
|
4066 |
+
},
|
4067 |
+
{
|
4068 |
+
"epoch": 8.4,
|
4069 |
+
"learning_rate": 7.421944581693674e-05,
|
4070 |
+
"loss": 0.2594,
|
4071 |
+
"step": 550000
|
4072 |
+
},
|
4073 |
+
{
|
4074 |
+
"epoch": 8.4,
|
4075 |
+
"eval_runtime": 0.9899,
|
4076 |
+
"eval_samples_per_second": 1010.184,
|
4077 |
+
"eval_steps_per_second": 16.163,
|
4078 |
+
"step": 550000
|
4079 |
}
|
4080 |
],
|
4081 |
"max_steps": 1000000,
|
4082 |
"num_train_epochs": 16,
|
4083 |
+
"total_flos": 3.855509100123903e+22,
|
4084 |
"trial_name": null,
|
4085 |
"trial_params": null
|
4086 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:affd4afd21029b52701e8d0046ed5d64853de0037c32a35ed3d7e452fd7c0e84
|
3 |
size 449471589
|