AntoineBlanot commited on
Commit
bf395d2
1 Parent(s): e3f67a1

Best checkpoint (step 4176)

Browse files
Files changed (2) hide show
  1. pytorch_model.bin +1 -1
  2. trainer_state.json +3 -106
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ff68740dbd51618e05d8e926e72bd32831e0cc00fa4d8693a1c4b6ccd39cc0
3
  size 498673009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9a8fd5458b8d1577edb8a797dafb789dccb27c5daa17ed565b29753696bc342
3
  size 498673009
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 4638,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1146,114 +1146,11 @@
1146
  "eval_samples_per_second": 1517.169,
1147
  "eval_steps_per_second": 11.869,
1148
  "step": 4176
1149
- },
1150
- {
1151
- "epoch": 2.71,
1152
- "learning_rate": 5.498322951605176e-06,
1153
- "loss": 0.5301,
1154
- "step": 4183
1155
- },
1156
- {
1157
- "epoch": 2.74,
1158
- "learning_rate": 4.935313847628174e-06,
1159
- "loss": 0.5402,
1160
- "step": 4230
1161
- },
1162
- {
1163
- "epoch": 2.77,
1164
- "learning_rate": 4.372304743651174e-06,
1165
- "loss": 0.5167,
1166
- "step": 4277
1167
- },
1168
- {
1169
- "epoch": 2.8,
1170
- "learning_rate": 3.8092956396741735e-06,
1171
- "loss": 0.5004,
1172
- "step": 4324
1173
- },
1174
- {
1175
- "epoch": 2.83,
1176
- "learning_rate": 3.246286535697173e-06,
1177
- "loss": 0.5148,
1178
- "step": 4371
1179
- },
1180
- {
1181
- "epoch": 2.85,
1182
- "eval_accuracy": 0.7637243375237406,
1183
- "eval_b_acc": 0.6382943967754277,
1184
- "eval_f1": 0.7612453985473553,
1185
- "eval_f1_anger": 0.6462075848303392,
1186
- "eval_f1_disgust": 0.4666666666666667,
1187
- "eval_f1_fear": 0.6678592725104352,
1188
- "eval_f1_joy": 0.7563368765331154,
1189
- "eval_f1_neutral": 0.8485270885605427,
1190
- "eval_f1_sadness": 0.6918630502445532,
1191
- "eval_f1_surprise": 0.4760213143872114,
1192
- "eval_loss": 0.6637689471244812,
1193
- "eval_prec": 0.7597715932015663,
1194
- "eval_prec_anger": 0.6741280583029672,
1195
- "eval_prec_disgust": 0.5,
1196
- "eval_prec_fear": 0.681265206812652,
1197
- "eval_prec_joy": 0.7394084732214229,
1198
- "eval_prec_neutral": 0.8400183992640294,
1199
- "eval_prec_sadness": 0.6921708185053381,
1200
- "eval_prec_surprise": 0.5296442687747036,
1201
- "eval_recall": 0.7637243375237406,
1202
- "eval_recall_anger": 0.6205079060852899,
1203
- "eval_recall_disgust": 0.4375,
1204
- "eval_recall_fear": 0.6549707602339181,
1205
- "eval_recall_joy": 0.7740585774058577,
1206
- "eval_recall_neutral": 0.857209913631243,
1207
- "eval_recall_sadness": 0.6915555555555556,
1208
- "eval_recall_surprise": 0.432258064516129,
1209
- "eval_runtime": 14.5644,
1210
- "eval_samples_per_second": 1518.363,
1211
- "eval_steps_per_second": 11.878,
1212
- "step": 4408
1213
- },
1214
- {
1215
- "epoch": 2.86,
1216
- "learning_rate": 2.6832774317201726e-06,
1217
- "loss": 0.5231,
1218
- "step": 4418
1219
- },
1220
- {
1221
- "epoch": 2.89,
1222
- "learning_rate": 2.120268327743172e-06,
1223
- "loss": 0.5223,
1224
- "step": 4465
1225
- },
1226
- {
1227
- "epoch": 2.92,
1228
- "learning_rate": 1.5572592237661715e-06,
1229
- "loss": 0.525,
1230
- "step": 4512
1231
- },
1232
- {
1233
- "epoch": 2.95,
1234
- "learning_rate": 9.942501197891712e-07,
1235
- "loss": 0.5219,
1236
- "step": 4559
1237
- },
1238
- {
1239
- "epoch": 2.98,
1240
- "learning_rate": 4.312410158121706e-07,
1241
- "loss": 0.516,
1242
- "step": 4606
1243
- },
1244
- {
1245
- "epoch": 3.0,
1246
- "step": 4638,
1247
- "total_flos": 3.903427734912e+16,
1248
- "train_loss": 0.6570020180521673,
1249
- "train_runtime": 1377.6596,
1250
- "train_samples_per_second": 430.73,
1251
- "train_steps_per_second": 3.367
1252
  }
1253
  ],
1254
  "max_steps": 4638,
1255
  "num_train_epochs": 3,
1256
- "total_flos": 3.903427734912e+16,
1257
  "trial_name": null,
1258
  "trial_params": null
1259
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.701164294954722,
5
+ "global_step": 4176,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1146
  "eval_samples_per_second": 1517.169,
1147
  "eval_steps_per_second": 11.869,
1148
  "step": 4176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1149
  }
1150
  ],
1151
  "max_steps": 4638,
1152
  "num_train_epochs": 3,
1153
+ "total_flos": 3.51500575835136e+16,
1154
  "trial_name": null,
1155
  "trial_params": null
1156
  }