Training in progress, step 420000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2f3d6b44eb31a5904720c6ef9f3e390e0825c4e9bc44d807b8966079fa39c18
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43bdf8dfd2503753ae90f61b7bb9fe12717af35ea212a47dbf67cd5903f5bff5
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2e9e51fe4eee7b642ed745674287b01af0553fd6bc1b8f03d1bfb83d45dd88
|
3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7303688061045aa652b3652b66ace516eee8a80cc0cf60b455e243337822a42
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f87a487e7aba37c8dacc6a7f2504b596187c112ee85cf9f5586b459f5ed62ff7
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8eef20e1504bbd40c9a4046a6d63e017b73406d2bc77b0cb51859e2e7910822
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ecb3d640c7c2649b2bba280f1d77ce8c7f1955289fab0ee4959a38aa2646819
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3040,11 +3040,85 @@
|
|
3040 |
"eval_samples_per_second": 926.22,
|
3041 |
"eval_steps_per_second": 14.82,
|
3042 |
"step": 410000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3043 |
}
|
3044 |
],
|
3045 |
"max_steps": 1000000,
|
3046 |
"num_train_epochs": 16,
|
3047 |
-
"total_flos": 2.
|
3048 |
"trial_name": null,
|
3049 |
"trial_params": null
|
3050 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.41348664620459,
|
5 |
+
"global_step": 420000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3040 |
"eval_samples_per_second": 926.22,
|
3041 |
"eval_steps_per_second": 14.82,
|
3042 |
"step": 410000
|
3043 |
+
},
|
3044 |
+
{
|
3045 |
+
"epoch": 6.28,
|
3046 |
+
"learning_rate": 0.00010576871868792746,
|
3047 |
+
"loss": 0.275,
|
3048 |
+
"step": 411000
|
3049 |
+
},
|
3050 |
+
{
|
3051 |
+
"epoch": 6.29,
|
3052 |
+
"learning_rate": 0.0001055533481490004,
|
3053 |
+
"loss": 0.2746,
|
3054 |
+
"step": 412000
|
3055 |
+
},
|
3056 |
+
{
|
3057 |
+
"epoch": 6.31,
|
3058 |
+
"learning_rate": 0.000105337698162752,
|
3059 |
+
"loss": 0.2741,
|
3060 |
+
"step": 413000
|
3061 |
+
},
|
3062 |
+
{
|
3063 |
+
"epoch": 6.32,
|
3064 |
+
"learning_rate": 0.00010512177108749594,
|
3065 |
+
"loss": 0.2746,
|
3066 |
+
"step": 414000
|
3067 |
+
},
|
3068 |
+
{
|
3069 |
+
"epoch": 6.34,
|
3070 |
+
"learning_rate": 0.00010490556928457616,
|
3071 |
+
"loss": 0.2743,
|
3072 |
+
"step": 415000
|
3073 |
+
},
|
3074 |
+
{
|
3075 |
+
"epoch": 6.34,
|
3076 |
+
"eval_runtime": 1.0107,
|
3077 |
+
"eval_samples_per_second": 989.389,
|
3078 |
+
"eval_steps_per_second": 15.83,
|
3079 |
+
"step": 415000
|
3080 |
+
},
|
3081 |
+
{
|
3082 |
+
"epoch": 6.35,
|
3083 |
+
"learning_rate": 0.00010468909511834088,
|
3084 |
+
"loss": 0.2741,
|
3085 |
+
"step": 416000
|
3086 |
+
},
|
3087 |
+
{
|
3088 |
+
"epoch": 6.37,
|
3089 |
+
"learning_rate": 0.00010447235095611692,
|
3090 |
+
"loss": 0.2738,
|
3091 |
+
"step": 417000
|
3092 |
+
},
|
3093 |
+
{
|
3094 |
+
"epoch": 6.38,
|
3095 |
+
"learning_rate": 0.00010425533916818376,
|
3096 |
+
"loss": 0.2738,
|
3097 |
+
"step": 418000
|
3098 |
+
},
|
3099 |
+
{
|
3100 |
+
"epoch": 6.4,
|
3101 |
+
"learning_rate": 0.00010403806212774747,
|
3102 |
+
"loss": 0.2742,
|
3103 |
+
"step": 419000
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 6.41,
|
3107 |
+
"learning_rate": 0.000103820522210915,
|
3108 |
+
"loss": 0.2737,
|
3109 |
+
"step": 420000
|
3110 |
+
},
|
3111 |
+
{
|
3112 |
+
"epoch": 6.41,
|
3113 |
+
"eval_runtime": 1.055,
|
3114 |
+
"eval_samples_per_second": 947.861,
|
3115 |
+
"eval_steps_per_second": 15.166,
|
3116 |
+
"step": 420000
|
3117 |
}
|
3118 |
],
|
3119 |
"max_steps": 1000000,
|
3120 |
"num_train_epochs": 16,
|
3121 |
+
"total_flos": 2.9442070208656875e+22,
|
3122 |
"trial_name": null,
|
3123 |
"trial_params": null
|
3124 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43bdf8dfd2503753ae90f61b7bb9fe12717af35ea212a47dbf67cd5903f5bff5
|
3 |
size 449471589
|