mikhail-panzo commited on
Commit
81af930
1 Parent(s): 1e99ba1

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a584a492538bb24e0beb0120c9e311383859fd9fcaa74b83cda8b091454f3999
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8d21651e5db2ed67f31319a652131e535151d2fbf4e6b24b3edeb923bb9d99f
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59681d4244706e73565370df5bf260b6b7fe82b1324c42724de2186ca2208b75
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f742d32e28e8158fb337c29ed9b09546167bb83f74364f94ebdc185bff0685
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8e9e1cb9616c0992e8a53454fc42e93677bd4ae9b2b9c42178c1e894f2ad0e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7a2dc2fd34210e38a0dafa68768e7a8ac00e877b03b70d54a43bd027bc5930
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:507e305dfe4256b26c68fb0c82345ea0f5163e277ea888ae062da4422c6fa9ea
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cbb951fc4257a68dab12f51ae2258de85fc85dd8c8f4de0474b3e6fac987a51
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.39598318934440613,
3
  "best_model_checkpoint": "mikhail_panzo/ceb_b64_le4_s8000/checkpoint-3500",
4
- "epoch": 277.2277227722772,
5
  "eval_steps": 500,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1099,6 +1099,84 @@
1099
  "eval_samples_per_second": 27.369,
1100
  "eval_steps_per_second": 3.497,
1101
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
  }
1103
  ],
1104
  "logging_steps": 50,
@@ -1118,7 +1196,7 @@
1118
  "attributes": {}
1119
  }
1120
  },
1121
- "total_flos": 7.563870212869475e+16,
1122
  "train_batch_size": 16,
1123
  "trial_name": null,
1124
  "trial_params": null
 
1
  {
2
  "best_metric": 0.39598318934440613,
3
  "best_model_checkpoint": "mikhail_panzo/ceb_b64_le4_s8000/checkpoint-3500",
4
+ "epoch": 297.029702970297,
5
  "eval_steps": 500,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1099
  "eval_samples_per_second": 27.369,
1100
  "eval_steps_per_second": 3.497,
1101
  "step": 7000
1102
+ },
1103
+ {
1104
+ "epoch": 279.2079207920792,
1105
+ "grad_norm": 0.5922915935516357,
1106
+ "learning_rate": 1.5850000000000002e-05,
1107
+ "loss": 0.331,
1108
+ "step": 7050
1109
+ },
1110
+ {
1111
+ "epoch": 281.18811881188117,
1112
+ "grad_norm": 0.49854084849357605,
1113
+ "learning_rate": 1.5016666666666668e-05,
1114
+ "loss": 0.3292,
1115
+ "step": 7100
1116
+ },
1117
+ {
1118
+ "epoch": 283.16831683168317,
1119
+ "grad_norm": 0.534227192401886,
1120
+ "learning_rate": 1.4183333333333335e-05,
1121
+ "loss": 0.3295,
1122
+ "step": 7150
1123
+ },
1124
+ {
1125
+ "epoch": 285.1485148514852,
1126
+ "grad_norm": 0.4879334568977356,
1127
+ "learning_rate": 1.3350000000000001e-05,
1128
+ "loss": 0.3295,
1129
+ "step": 7200
1130
+ },
1131
+ {
1132
+ "epoch": 287.1287128712871,
1133
+ "grad_norm": 0.4761298596858978,
1134
+ "learning_rate": 1.2516666666666668e-05,
1135
+ "loss": 0.333,
1136
+ "step": 7250
1137
+ },
1138
+ {
1139
+ "epoch": 289.1089108910891,
1140
+ "grad_norm": 0.5835270881652832,
1141
+ "learning_rate": 1.1683333333333334e-05,
1142
+ "loss": 0.3311,
1143
+ "step": 7300
1144
+ },
1145
+ {
1146
+ "epoch": 291.08910891089107,
1147
+ "grad_norm": 0.5297247767448425,
1148
+ "learning_rate": 1.0866666666666667e-05,
1149
+ "loss": 0.333,
1150
+ "step": 7350
1151
+ },
1152
+ {
1153
+ "epoch": 293.0693069306931,
1154
+ "grad_norm": 0.44668009877204895,
1155
+ "learning_rate": 1.0033333333333333e-05,
1156
+ "loss": 0.3282,
1157
+ "step": 7400
1158
+ },
1159
+ {
1160
+ "epoch": 295.0495049504951,
1161
+ "grad_norm": 0.47231703996658325,
1162
+ "learning_rate": 9.2e-06,
1163
+ "loss": 0.3309,
1164
+ "step": 7450
1165
+ },
1166
+ {
1167
+ "epoch": 297.029702970297,
1168
+ "grad_norm": 0.5559085011482239,
1169
+ "learning_rate": 8.366666666666667e-06,
1170
+ "loss": 0.3345,
1171
+ "step": 7500
1172
+ },
1173
+ {
1174
+ "epoch": 297.029702970297,
1175
+ "eval_loss": 0.40512633323669434,
1176
+ "eval_runtime": 6.9059,
1177
+ "eval_samples_per_second": 26.065,
1178
+ "eval_steps_per_second": 3.33,
1179
+ "step": 7500
1180
  }
1181
  ],
1182
  "logging_steps": 50,
 
1196
  "attributes": {}
1197
  }
1198
  },
1199
+ "total_flos": 8.103931252887646e+16,
1200
  "train_batch_size": 16,
1201
  "trial_name": null,
1202
  "trial_params": null