lombardata commited on
Commit
0722a99
1 Parent(s): 26e4e72

🍻 cheers

Browse files
Files changed (6) hide show
  1. README.md +10 -6
  2. all_results.json +14 -14
  3. config.json +1 -1
  4. eval_results.json +10 -10
  5. train_results.json +6 -6
  6. trainer_state.json +730 -14
README.md CHANGED
@@ -1,7 +1,11 @@
1
  ---
 
 
2
  license: apache-2.0
3
  base_model: facebook/dinov2-large
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # dinov2-large-2024_01_05-kornia_img-size518_batch-size32_epochs70_freeze
17
 
18
- This model is a fine-tuned version of [facebook/dinov2-large](https://huggingface.co/facebook/dinov2-large) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0819
21
- - F1 Micro: 0.8564
22
- - F1 Macro: 0.7560
23
- - Roc Auc: 0.9061
24
- - Accuracy: 0.5656
25
  - Learning Rate: 0.0000
26
 
27
  ## Model description
 
1
  ---
2
+ language:
3
+ - eng
4
  license: apache-2.0
5
  base_model: facebook/dinov2-large
6
  tags:
7
+ - multilabel-image-classification
8
+ - multilabel
9
  - generated_from_trainer
10
  metrics:
11
  - accuracy
 
19
 
20
  # dinov2-large-2024_01_05-kornia_img-size518_batch-size32_epochs70_freeze
21
 
22
+ This model is a fine-tuned version of [facebook/dinov2-large](https://huggingface.co/facebook/dinov2-large) on the multilabel_complete_dataset dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.0825
25
+ - F1 Micro: 0.8570
26
+ - F1 Macro: 0.7430
27
+ - Roc Auc: 0.9080
28
+ - Accuracy: 0.5739
29
  - Learning Rate: 0.0000
30
 
31
  ## Model description
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 70.0,
3
- "eval_accuracy": 0.5605742296918768,
4
- "eval_f1_macro": 0.7342630546801885,
5
- "eval_f1_micro": 0.8543162417321499,
6
- "eval_loss": 0.08401281386613846,
7
- "eval_roc_auc": 0.9076857807628663,
8
- "eval_runtime": 670.4543,
9
- "eval_samples_per_second": 4.26,
10
- "eval_steps_per_second": 0.134,
11
- "learning_rate": 0.0001,
12
- "train_loss": 0.11672632308896316,
13
- "train_runtime": 200748.2354,
14
- "train_samples_per_second": 3.057,
15
- "train_steps_per_second": 0.096
16
  }
 
1
  {
2
+ "epoch": 114.0,
3
+ "eval_accuracy": 0.5738795518207283,
4
+ "eval_f1_macro": 0.7429818572746157,
5
+ "eval_f1_micro": 0.8570132153593103,
6
+ "eval_loss": 0.08252906054258347,
7
+ "eval_roc_auc": 0.9080498428032193,
8
+ "eval_runtime": 681.2636,
9
+ "eval_samples_per_second": 4.192,
10
+ "eval_steps_per_second": 0.132,
11
+ "learning_rate": 1.0000000000000002e-07,
12
+ "train_loss": 0.033337813025782426,
13
+ "train_runtime": 131818.0427,
14
+ "train_samples_per_second": 9.311,
15
+ "train_steps_per_second": 0.291
16
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/dinov2-large",
3
  "apply_layernorm": true,
4
  "architectures": [
5
  "NewheadDinov2ForImageClassification"
 
1
  {
2
+ "_name_or_path": "facebook/dinov2-large2024_01_08",
3
  "apply_layernorm": true,
4
  "architectures": [
5
  "NewheadDinov2ForImageClassification"
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 70.0,
3
- "eval_accuracy": 0.5605742296918768,
4
- "eval_f1_macro": 0.7342630546801885,
5
- "eval_f1_micro": 0.8543162417321499,
6
- "eval_loss": 0.08401281386613846,
7
- "eval_roc_auc": 0.9076857807628663,
8
- "eval_runtime": 670.4543,
9
- "eval_samples_per_second": 4.26,
10
- "eval_steps_per_second": 0.134,
11
- "learning_rate": 0.0001
12
  }
 
1
  {
2
+ "epoch": 114.0,
3
+ "eval_accuracy": 0.5738795518207283,
4
+ "eval_f1_macro": 0.7429818572746157,
5
+ "eval_f1_micro": 0.8570132153593103,
6
+ "eval_loss": 0.08252906054258347,
7
+ "eval_roc_auc": 0.9080498428032193,
8
+ "eval_runtime": 681.2636,
9
+ "eval_samples_per_second": 4.192,
10
+ "eval_steps_per_second": 0.132,
11
+ "learning_rate": 1.0000000000000002e-07
12
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 70.0,
3
- "learning_rate": 0.0001,
4
- "train_loss": 0.11672632308896316,
5
- "train_runtime": 200748.2354,
6
- "train_samples_per_second": 3.057,
7
- "train_steps_per_second": 0.096
8
  }
 
1
  {
2
+ "epoch": 114.0,
3
+ "learning_rate": 1.0000000000000002e-07,
4
+ "train_loss": 0.033337813025782426,
5
+ "train_runtime": 131818.0427,
6
+ "train_samples_per_second": 9.311,
7
+ "train_steps_per_second": 0.291
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08306006342172623,
3
- "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_01_05-kornia_img-size518_batch-size32_epochs70_freeze/checkpoint-19180",
4
- "epoch": 70.0,
5
  "eval_steps": 500,
6
- "global_step": 19180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1147,21 +1147,737 @@
1147
  "step": 19180
1148
  },
1149
  {
1150
- "epoch": 70.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1151
  "learning_rate": 0.0001,
1152
- "step": 19180,
1153
- "total_flos": 9.099793269879256e+20,
1154
- "train_loss": 0.11672632308896316,
1155
- "train_runtime": 200748.2354,
1156
- "train_samples_per_second": 3.057,
1157
- "train_steps_per_second": 0.096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1158
  }
1159
  ],
1160
  "logging_steps": 500,
1161
- "max_steps": 19180,
1162
- "num_train_epochs": 70,
1163
  "save_steps": 500,
1164
- "total_flos": 9.099793269879256e+20,
1165
  "trial_name": null,
1166
  "trial_params": null
1167
  }
 
1
  {
2
+ "best_metric": 0.08124219626188278,
3
+ "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_01_05-kornia_img-size518_batch-size32_epochs70_freeze/checkpoint-28496",
4
+ "epoch": 114.0,
5
  "eval_steps": 500,
6
+ "global_step": 31236,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1147
  "step": 19180
1148
  },
1149
  {
1150
+ "epoch": 71.0,
1151
+ "eval_accuracy": 0.557920446615492,
1152
+ "eval_f1_macro": 0.74841480686329,
1153
+ "eval_f1_micro": 0.8557041347283614,
1154
+ "eval_loss": 0.08348394185304642,
1155
+ "eval_roc_auc": 0.9102149107350765,
1156
+ "eval_runtime": 688.9991,
1157
+ "eval_samples_per_second": 4.16,
1158
+ "eval_steps_per_second": 0.131,
1159
+ "learning_rate": 0.0001,
1160
+ "step": 19454
1161
+ },
1162
+ {
1163
+ "epoch": 71.17,
1164
+ "learning_rate": 0.0001,
1165
+ "loss": 0.0907,
1166
+ "step": 19500
1167
+ },
1168
+ {
1169
+ "epoch": 72.0,
1170
+ "eval_accuracy": 0.5610607117934403,
1171
+ "eval_f1_macro": 0.7445560004670461,
1172
+ "eval_f1_micro": 0.8531667140017041,
1173
+ "eval_loss": 0.08317266404628754,
1174
+ "eval_roc_auc": 0.9036835353705291,
1175
+ "eval_runtime": 686.0467,
1176
+ "eval_samples_per_second": 4.178,
1177
+ "eval_steps_per_second": 0.131,
1178
+ "learning_rate": 0.0001,
1179
+ "step": 19728
1180
+ },
1181
+ {
1182
+ "epoch": 72.99,
1183
+ "learning_rate": 0.0001,
1184
+ "loss": 0.0905,
1185
+ "step": 20000
1186
+ },
1187
+ {
1188
+ "epoch": 73.0,
1189
+ "eval_accuracy": 0.5575715282623867,
1190
+ "eval_f1_macro": 0.7511661878734329,
1191
+ "eval_f1_micro": 0.8557848235754626,
1192
+ "eval_loss": 0.0826636552810669,
1193
+ "eval_roc_auc": 0.9104516775814524,
1194
+ "eval_runtime": 696.2462,
1195
+ "eval_samples_per_second": 4.116,
1196
+ "eval_steps_per_second": 0.129,
1197
+ "learning_rate": 0.0001,
1198
+ "step": 20002
1199
+ },
1200
+ {
1201
+ "epoch": 74.0,
1202
+ "eval_accuracy": 0.5589672016748081,
1203
+ "eval_f1_macro": 0.7519078199055363,
1204
+ "eval_f1_micro": 0.8547880690737834,
1205
+ "eval_loss": 0.08345863223075867,
1206
+ "eval_roc_auc": 0.9089978296204139,
1207
+ "eval_runtime": 684.5044,
1208
+ "eval_samples_per_second": 4.187,
1209
+ "eval_steps_per_second": 0.131,
1210
+ "learning_rate": 0.0001,
1211
+ "step": 20276
1212
+ },
1213
+ {
1214
+ "epoch": 74.82,
1215
+ "learning_rate": 0.0001,
1216
+ "loss": 0.0896,
1217
+ "step": 20500
1218
+ },
1219
+ {
1220
+ "epoch": 75.0,
1221
+ "eval_accuracy": 0.5565247732030705,
1222
+ "eval_f1_macro": 0.7427640048071817,
1223
+ "eval_f1_micro": 0.8535384963511908,
1224
+ "eval_loss": 0.08291995525360107,
1225
+ "eval_roc_auc": 0.9052642407433178,
1226
+ "eval_runtime": 693.0369,
1227
+ "eval_samples_per_second": 4.135,
1228
+ "eval_steps_per_second": 0.13,
1229
+ "learning_rate": 0.0001,
1230
+ "step": 20550
1231
+ },
1232
+ {
1233
+ "epoch": 76.0,
1234
+ "eval_accuracy": 0.5642009769713887,
1235
+ "eval_f1_macro": 0.7449358839801685,
1236
+ "eval_f1_micro": 0.8560516708789666,
1237
+ "eval_loss": 0.08278567343950272,
1238
+ "eval_roc_auc": 0.9091009598641902,
1239
+ "eval_runtime": 689.5027,
1240
+ "eval_samples_per_second": 4.157,
1241
+ "eval_steps_per_second": 0.131,
1242
+ "learning_rate": 0.0001,
1243
+ "step": 20824
1244
+ },
1245
+ {
1246
+ "epoch": 76.64,
1247
+ "learning_rate": 0.0001,
1248
+ "loss": 0.089,
1249
+ "step": 21000
1250
+ },
1251
+ {
1252
+ "epoch": 77.0,
1253
+ "eval_accuracy": 0.5603628750872296,
1254
+ "eval_f1_macro": 0.7507123347472292,
1255
+ "eval_f1_micro": 0.8567585743106928,
1256
+ "eval_loss": 0.08268015086650848,
1257
+ "eval_roc_auc": 0.9102413784270108,
1258
+ "eval_runtime": 691.3385,
1259
+ "eval_samples_per_second": 4.146,
1260
+ "eval_steps_per_second": 0.13,
1261
+ "learning_rate": 0.0001,
1262
+ "step": 21098
1263
+ },
1264
+ {
1265
+ "epoch": 78.0,
1266
+ "eval_accuracy": 0.557920446615492,
1267
+ "eval_f1_macro": 0.7435504236075495,
1268
+ "eval_f1_micro": 0.8528667079277555,
1269
+ "eval_loss": 0.0833190307021141,
1270
+ "eval_roc_auc": 0.9067310989812636,
1271
+ "eval_runtime": 698.1968,
1272
+ "eval_samples_per_second": 4.105,
1273
+ "eval_steps_per_second": 0.129,
1274
+ "learning_rate": 0.0001,
1275
+ "step": 21372
1276
+ },
1277
+ {
1278
+ "epoch": 78.47,
1279
+ "learning_rate": 0.0001,
1280
+ "loss": 0.0892,
1281
+ "step": 21500
1282
+ },
1283
+ {
1284
+ "epoch": 79.0,
1285
+ "eval_accuracy": 0.5589672016748081,
1286
+ "eval_f1_macro": 0.7501683098189544,
1287
+ "eval_f1_micro": 0.8540393754243042,
1288
+ "eval_loss": 0.08301213383674622,
1289
+ "eval_roc_auc": 0.905519599881194,
1290
+ "eval_runtime": 693.3725,
1291
+ "eval_samples_per_second": 4.133,
1292
+ "eval_steps_per_second": 0.13,
1293
  "learning_rate": 0.0001,
1294
+ "step": 21646
1295
+ },
1296
+ {
1297
+ "epoch": 80.0,
1298
+ "eval_accuracy": 0.5600139567341242,
1299
+ "eval_f1_macro": 0.7461247141006309,
1300
+ "eval_f1_micro": 0.8548240635641317,
1301
+ "eval_loss": 0.08266153186559677,
1302
+ "eval_roc_auc": 0.9048757931076342,
1303
+ "eval_runtime": 692.1533,
1304
+ "eval_samples_per_second": 4.141,
1305
+ "eval_steps_per_second": 0.13,
1306
+ "learning_rate": 1e-05,
1307
+ "step": 21920
1308
+ },
1309
+ {
1310
+ "epoch": 80.29,
1311
+ "learning_rate": 1e-05,
1312
+ "loss": 0.0879,
1313
+ "step": 22000
1314
+ },
1315
+ {
1316
+ "epoch": 81.0,
1317
+ "eval_accuracy": 0.560711793440335,
1318
+ "eval_f1_macro": 0.7542778186740821,
1319
+ "eval_f1_micro": 0.8576379185065117,
1320
+ "eval_loss": 0.08230110257863998,
1321
+ "eval_roc_auc": 0.9116332184274908,
1322
+ "eval_runtime": 691.9752,
1323
+ "eval_samples_per_second": 4.142,
1324
+ "eval_steps_per_second": 0.13,
1325
+ "learning_rate": 1e-05,
1326
+ "step": 22194
1327
+ },
1328
+ {
1329
+ "epoch": 82.0,
1330
+ "eval_accuracy": 0.5631542219120725,
1331
+ "eval_f1_macro": 0.7536111063550082,
1332
+ "eval_f1_micro": 0.8576225654801881,
1333
+ "eval_loss": 0.08222728967666626,
1334
+ "eval_roc_auc": 0.9111816666216643,
1335
+ "eval_runtime": 688.2687,
1336
+ "eval_samples_per_second": 4.164,
1337
+ "eval_steps_per_second": 0.131,
1338
+ "learning_rate": 1e-05,
1339
+ "step": 22468
1340
+ },
1341
+ {
1342
+ "epoch": 82.12,
1343
+ "learning_rate": 1e-05,
1344
+ "loss": 0.0867,
1345
+ "step": 22500
1346
+ },
1347
+ {
1348
+ "epoch": 83.0,
1349
+ "eval_accuracy": 0.5624563852058618,
1350
+ "eval_f1_macro": 0.7519980936807484,
1351
+ "eval_f1_micro": 0.8553623024191264,
1352
+ "eval_loss": 0.08224428445100784,
1353
+ "eval_roc_auc": 0.9057647902327958,
1354
+ "eval_runtime": 675.1087,
1355
+ "eval_samples_per_second": 4.245,
1356
+ "eval_steps_per_second": 0.133,
1357
+ "learning_rate": 1e-05,
1358
+ "step": 22742
1359
+ },
1360
+ {
1361
+ "epoch": 83.94,
1362
+ "learning_rate": 1e-05,
1363
+ "loss": 0.0864,
1364
+ "step": 23000
1365
+ },
1366
+ {
1367
+ "epoch": 84.0,
1368
+ "eval_accuracy": 0.5638520586182834,
1369
+ "eval_f1_macro": 0.751088987860079,
1370
+ "eval_f1_micro": 0.8550642905481616,
1371
+ "eval_loss": 0.08214889466762543,
1372
+ "eval_roc_auc": 0.9071555340236915,
1373
+ "eval_runtime": 684.6287,
1374
+ "eval_samples_per_second": 4.186,
1375
+ "eval_steps_per_second": 0.131,
1376
+ "learning_rate": 1e-05,
1377
+ "step": 23016
1378
+ },
1379
+ {
1380
+ "epoch": 85.0,
1381
+ "eval_accuracy": 0.5617585484996511,
1382
+ "eval_f1_macro": 0.7532850236770345,
1383
+ "eval_f1_micro": 0.8560117633751838,
1384
+ "eval_loss": 0.08197268098592758,
1385
+ "eval_roc_auc": 0.9067157953185159,
1386
+ "eval_runtime": 685.9962,
1387
+ "eval_samples_per_second": 4.178,
1388
+ "eval_steps_per_second": 0.131,
1389
+ "learning_rate": 1e-05,
1390
+ "step": 23290
1391
+ },
1392
+ {
1393
+ "epoch": 85.77,
1394
+ "learning_rate": 1e-05,
1395
+ "loss": 0.0865,
1396
+ "step": 23500
1397
+ },
1398
+ {
1399
+ "epoch": 86.0,
1400
+ "eval_accuracy": 0.5600139567341242,
1401
+ "eval_f1_macro": 0.7495768803790436,
1402
+ "eval_f1_micro": 0.8553152949167893,
1403
+ "eval_loss": 0.0821395218372345,
1404
+ "eval_roc_auc": 0.9060269285615214,
1405
+ "eval_runtime": 676.0683,
1406
+ "eval_samples_per_second": 4.239,
1407
+ "eval_steps_per_second": 0.133,
1408
+ "learning_rate": 1e-05,
1409
+ "step": 23564
1410
+ },
1411
+ {
1412
+ "epoch": 87.0,
1413
+ "eval_accuracy": 0.5586182833217027,
1414
+ "eval_f1_macro": 0.7518865385404515,
1415
+ "eval_f1_micro": 0.8558878188883257,
1416
+ "eval_loss": 0.08169202506542206,
1417
+ "eval_roc_auc": 0.9080884949227034,
1418
+ "eval_runtime": 683.932,
1419
+ "eval_samples_per_second": 4.19,
1420
+ "eval_steps_per_second": 0.132,
1421
+ "learning_rate": 1e-05,
1422
+ "step": 23838
1423
+ },
1424
+ {
1425
+ "epoch": 87.59,
1426
+ "learning_rate": 1e-05,
1427
+ "loss": 0.0868,
1428
+ "step": 24000
1429
+ },
1430
+ {
1431
+ "epoch": 88.0,
1432
+ "eval_accuracy": 0.5621074668527565,
1433
+ "eval_f1_macro": 0.7526313975322289,
1434
+ "eval_f1_micro": 0.8558239036198839,
1435
+ "eval_loss": 0.08171343803405762,
1436
+ "eval_roc_auc": 0.9081685470699702,
1437
+ "eval_runtime": 687.6304,
1438
+ "eval_samples_per_second": 4.168,
1439
+ "eval_steps_per_second": 0.131,
1440
+ "learning_rate": 1e-05,
1441
+ "step": 24112
1442
+ },
1443
+ {
1444
+ "epoch": 89.0,
1445
+ "eval_accuracy": 0.5638520586182834,
1446
+ "eval_f1_macro": 0.7535945449960157,
1447
+ "eval_f1_micro": 0.8569978572234127,
1448
+ "eval_loss": 0.08180436491966248,
1449
+ "eval_roc_auc": 0.9082570227224631,
1450
+ "eval_runtime": 685.8987,
1451
+ "eval_samples_per_second": 4.178,
1452
+ "eval_steps_per_second": 0.131,
1453
+ "learning_rate": 1e-05,
1454
+ "step": 24386
1455
+ },
1456
+ {
1457
+ "epoch": 89.42,
1458
+ "learning_rate": 1e-05,
1459
+ "loss": 0.0857,
1460
+ "step": 24500
1461
+ },
1462
+ {
1463
+ "epoch": 90.0,
1464
+ "eval_accuracy": 0.5617585484996511,
1465
+ "eval_f1_macro": 0.7521712775972677,
1466
+ "eval_f1_micro": 0.8557914296976182,
1467
+ "eval_loss": 0.08184907585382462,
1468
+ "eval_roc_auc": 0.9080738403314199,
1469
+ "eval_runtime": 2199.2107,
1470
+ "eval_samples_per_second": 1.303,
1471
+ "eval_steps_per_second": 0.041,
1472
+ "learning_rate": 1e-05,
1473
+ "step": 24660
1474
+ },
1475
+ {
1476
+ "epoch": 91.0,
1477
+ "eval_accuracy": 0.5631542219120725,
1478
+ "eval_f1_macro": 0.7496381590553177,
1479
+ "eval_f1_micro": 0.856868937514099,
1480
+ "eval_loss": 0.08175913989543915,
1481
+ "eval_roc_auc": 0.9081476613926293,
1482
+ "eval_runtime": 685.7142,
1483
+ "eval_samples_per_second": 4.18,
1484
+ "eval_steps_per_second": 0.131,
1485
+ "learning_rate": 1e-05,
1486
+ "step": 24934
1487
+ },
1488
+ {
1489
+ "epoch": 91.24,
1490
+ "learning_rate": 1e-05,
1491
+ "loss": 0.0862,
1492
+ "step": 25000
1493
+ },
1494
+ {
1495
+ "epoch": 92.0,
1496
+ "eval_accuracy": 0.5648988136775994,
1497
+ "eval_f1_macro": 0.7551884126903309,
1498
+ "eval_f1_micro": 0.8566131025957974,
1499
+ "eval_loss": 0.08205177634954453,
1500
+ "eval_roc_auc": 0.9092762668547993,
1501
+ "eval_runtime": 688.0975,
1502
+ "eval_samples_per_second": 4.165,
1503
+ "eval_steps_per_second": 0.131,
1504
+ "learning_rate": 1e-05,
1505
+ "step": 25208
1506
+ },
1507
+ {
1508
+ "epoch": 93.0,
1509
+ "eval_accuracy": 0.5628053035589672,
1510
+ "eval_f1_macro": 0.7579638727848569,
1511
+ "eval_f1_micro": 0.8588642195693407,
1512
+ "eval_loss": 0.08153587579727173,
1513
+ "eval_roc_auc": 0.9129889700545543,
1514
+ "eval_runtime": 686.4355,
1515
+ "eval_samples_per_second": 4.175,
1516
+ "eval_steps_per_second": 0.131,
1517
+ "learning_rate": 1e-05,
1518
+ "step": 25482
1519
+ },
1520
+ {
1521
+ "epoch": 93.07,
1522
+ "learning_rate": 1e-05,
1523
+ "loss": 0.0851,
1524
+ "step": 25500
1525
+ },
1526
+ {
1527
+ "epoch": 94.0,
1528
+ "eval_accuracy": 0.5600139567341242,
1529
+ "eval_f1_macro": 0.7565755790887538,
1530
+ "eval_f1_micro": 0.8570630932439977,
1531
+ "eval_loss": 0.0816139355301857,
1532
+ "eval_roc_auc": 0.911680023691974,
1533
+ "eval_runtime": 686.9681,
1534
+ "eval_samples_per_second": 4.172,
1535
+ "eval_steps_per_second": 0.131,
1536
+ "learning_rate": 1e-05,
1537
+ "step": 25756
1538
+ },
1539
+ {
1540
+ "epoch": 94.89,
1541
+ "learning_rate": 1e-05,
1542
+ "loss": 0.0854,
1543
+ "step": 26000
1544
+ },
1545
+ {
1546
+ "epoch": 95.0,
1547
+ "eval_accuracy": 0.5631542219120725,
1548
+ "eval_f1_macro": 0.7553215815461111,
1549
+ "eval_f1_micro": 0.8563582357226924,
1550
+ "eval_loss": 0.08153104782104492,
1551
+ "eval_roc_auc": 0.9100006738804182,
1552
+ "eval_runtime": 792.6719,
1553
+ "eval_samples_per_second": 3.616,
1554
+ "eval_steps_per_second": 0.114,
1555
+ "learning_rate": 1e-05,
1556
+ "step": 26030
1557
+ },
1558
+ {
1559
+ "epoch": 96.0,
1560
+ "eval_accuracy": 0.5621074668527565,
1561
+ "eval_f1_macro": 0.7584737150271473,
1562
+ "eval_f1_micro": 0.8576208800401539,
1563
+ "eval_loss": 0.081536203622818,
1564
+ "eval_roc_auc": 0.9123942619313182,
1565
+ "eval_runtime": 689.1211,
1566
+ "eval_samples_per_second": 4.159,
1567
+ "eval_steps_per_second": 0.131,
1568
+ "learning_rate": 1e-05,
1569
+ "step": 26304
1570
+ },
1571
+ {
1572
+ "epoch": 96.72,
1573
+ "learning_rate": 1e-05,
1574
+ "loss": 0.0854,
1575
+ "step": 26500
1576
+ },
1577
+ {
1578
+ "epoch": 97.0,
1579
+ "eval_accuracy": 0.5628053035589672,
1580
+ "eval_f1_macro": 0.7578675241138624,
1581
+ "eval_f1_micro": 0.8575591168889387,
1582
+ "eval_loss": 0.0817214846611023,
1583
+ "eval_roc_auc": 0.9107227875201963,
1584
+ "eval_runtime": 791.8215,
1585
+ "eval_samples_per_second": 3.62,
1586
+ "eval_steps_per_second": 0.114,
1587
+ "learning_rate": 1e-05,
1588
+ "step": 26578
1589
+ },
1590
+ {
1591
+ "epoch": 98.0,
1592
+ "eval_accuracy": 0.5638520586182834,
1593
+ "eval_f1_macro": 0.7526862577512524,
1594
+ "eval_f1_micro": 0.8570947651910453,
1595
+ "eval_loss": 0.08161807805299759,
1596
+ "eval_roc_auc": 0.9100232038721354,
1597
+ "eval_runtime": 689.7672,
1598
+ "eval_samples_per_second": 4.155,
1599
+ "eval_steps_per_second": 0.13,
1600
+ "learning_rate": 1e-05,
1601
+ "step": 26852
1602
+ },
1603
+ {
1604
+ "epoch": 98.54,
1605
+ "learning_rate": 1e-05,
1606
+ "loss": 0.0855,
1607
+ "step": 27000
1608
+ },
1609
+ {
1610
+ "epoch": 99.0,
1611
+ "eval_accuracy": 0.5642009769713887,
1612
+ "eval_f1_macro": 0.7556285126609241,
1613
+ "eval_f1_micro": 0.8578035986237239,
1614
+ "eval_loss": 0.08184286206960678,
1615
+ "eval_roc_auc": 0.9086036990034648,
1616
+ "eval_runtime": 685.3364,
1617
+ "eval_samples_per_second": 4.182,
1618
+ "eval_steps_per_second": 0.131,
1619
+ "learning_rate": 1e-05,
1620
+ "step": 27126
1621
+ },
1622
+ {
1623
+ "epoch": 100.0,
1624
+ "eval_accuracy": 0.5631542219120725,
1625
+ "eval_f1_macro": 0.753319281703831,
1626
+ "eval_f1_micro": 0.857062242537103,
1627
+ "eval_loss": 0.08161789923906326,
1628
+ "eval_roc_auc": 0.9080422377630124,
1629
+ "eval_runtime": 680.1579,
1630
+ "eval_samples_per_second": 4.214,
1631
+ "eval_steps_per_second": 0.132,
1632
+ "learning_rate": 1.0000000000000002e-06,
1633
+ "step": 27400
1634
+ },
1635
+ {
1636
+ "epoch": 100.36,
1637
+ "learning_rate": 1.0000000000000002e-06,
1638
+ "loss": 0.0837,
1639
+ "step": 27500
1640
+ },
1641
+ {
1642
+ "epoch": 101.0,
1643
+ "eval_accuracy": 0.5645498953244941,
1644
+ "eval_f1_macro": 0.7553159980269647,
1645
+ "eval_f1_micro": 0.8575286968264686,
1646
+ "eval_loss": 0.08142262697219849,
1647
+ "eval_roc_auc": 0.9092807526598085,
1648
+ "eval_runtime": 693.9432,
1649
+ "eval_samples_per_second": 4.13,
1650
+ "eval_steps_per_second": 0.13,
1651
+ "learning_rate": 1.0000000000000002e-06,
1652
+ "step": 27674
1653
+ },
1654
+ {
1655
+ "epoch": 102.0,
1656
+ "eval_accuracy": 0.5652477320307048,
1657
+ "eval_f1_macro": 0.7559444477928223,
1658
+ "eval_f1_micro": 0.8571749382438806,
1659
+ "eval_loss": 0.08143333345651627,
1660
+ "eval_roc_auc": 0.9098557722819599,
1661
+ "eval_runtime": 695.0171,
1662
+ "eval_samples_per_second": 4.124,
1663
+ "eval_steps_per_second": 0.129,
1664
+ "learning_rate": 1.0000000000000002e-06,
1665
+ "step": 27948
1666
+ },
1667
+ {
1668
+ "epoch": 102.19,
1669
+ "learning_rate": 1.0000000000000002e-06,
1670
+ "loss": 0.085,
1671
+ "step": 28000
1672
+ },
1673
+ {
1674
+ "epoch": 103.0,
1675
+ "eval_accuracy": 0.5645498953244941,
1676
+ "eval_f1_macro": 0.7565514506209431,
1677
+ "eval_f1_micro": 0.8570462613399448,
1678
+ "eval_loss": 0.08157742768526077,
1679
+ "eval_roc_auc": 0.9085338156424723,
1680
+ "eval_runtime": 710.2061,
1681
+ "eval_samples_per_second": 4.035,
1682
+ "eval_steps_per_second": 0.127,
1683
+ "learning_rate": 1.0000000000000002e-06,
1684
+ "step": 28222
1685
+ },
1686
+ {
1687
+ "epoch": 104.0,
1688
+ "eval_accuracy": 0.5645498953244941,
1689
+ "eval_f1_macro": 0.7572978759334409,
1690
+ "eval_f1_micro": 0.857623884617543,
1691
+ "eval_loss": 0.08124219626188278,
1692
+ "eval_roc_auc": 0.9102385369363781,
1693
+ "eval_runtime": 703.8703,
1694
+ "eval_samples_per_second": 4.072,
1695
+ "eval_steps_per_second": 0.128,
1696
+ "learning_rate": 1.0000000000000002e-06,
1697
+ "step": 28496
1698
+ },
1699
+ {
1700
+ "epoch": 104.01,
1701
+ "learning_rate": 1.0000000000000002e-06,
1702
+ "loss": 0.0844,
1703
+ "step": 28500
1704
+ },
1705
+ {
1706
+ "epoch": 105.0,
1707
+ "eval_accuracy": 0.5603628750872296,
1708
+ "eval_f1_macro": 0.7588882568234135,
1709
+ "eval_f1_micro": 0.8571747004736696,
1710
+ "eval_loss": 0.08170615136623383,
1711
+ "eval_roc_auc": 0.9124156957134514,
1712
+ "eval_runtime": 693.4706,
1713
+ "eval_samples_per_second": 4.133,
1714
+ "eval_steps_per_second": 0.13,
1715
+ "learning_rate": 1.0000000000000002e-06,
1716
+ "step": 28770
1717
+ },
1718
+ {
1719
+ "epoch": 105.84,
1720
+ "learning_rate": 1.0000000000000002e-06,
1721
+ "loss": 0.0845,
1722
+ "step": 29000
1723
+ },
1724
+ {
1725
+ "epoch": 106.0,
1726
+ "eval_accuracy": 0.5628053035589672,
1727
+ "eval_f1_macro": 0.7514055460147794,
1728
+ "eval_f1_micro": 0.8562891131532955,
1729
+ "eval_loss": 0.08144387602806091,
1730
+ "eval_roc_auc": 0.9079250010327448,
1731
+ "eval_runtime": 693.924,
1732
+ "eval_samples_per_second": 4.13,
1733
+ "eval_steps_per_second": 0.13,
1734
+ "learning_rate": 1.0000000000000002e-06,
1735
+ "step": 29044
1736
+ },
1737
+ {
1738
+ "epoch": 107.0,
1739
+ "eval_accuracy": 0.5635031402651779,
1740
+ "eval_f1_macro": 0.7490061784836257,
1741
+ "eval_f1_micro": 0.8557823129251702,
1742
+ "eval_loss": 0.08168599754571915,
1743
+ "eval_roc_auc": 0.9057833825242961,
1744
+ "eval_runtime": 697.3665,
1745
+ "eval_samples_per_second": 4.11,
1746
+ "eval_steps_per_second": 0.129,
1747
+ "learning_rate": 1.0000000000000002e-06,
1748
+ "step": 29318
1749
+ },
1750
+ {
1751
+ "epoch": 107.66,
1752
+ "learning_rate": 1.0000000000000002e-06,
1753
+ "loss": 0.0854,
1754
+ "step": 29500
1755
+ },
1756
+ {
1757
+ "epoch": 108.0,
1758
+ "eval_accuracy": 0.5642009769713887,
1759
+ "eval_f1_macro": 0.7569164155873785,
1760
+ "eval_f1_micro": 0.8569341425039335,
1761
+ "eval_loss": 0.08160943537950516,
1762
+ "eval_roc_auc": 0.9094149373671999,
1763
+ "eval_runtime": 692.7895,
1764
+ "eval_samples_per_second": 4.137,
1765
+ "eval_steps_per_second": 0.13,
1766
+ "learning_rate": 1.0000000000000002e-06,
1767
+ "step": 29592
1768
+ },
1769
+ {
1770
+ "epoch": 109.0,
1771
+ "eval_accuracy": 0.5652477320307048,
1772
+ "eval_f1_macro": 0.7558234898859766,
1773
+ "eval_f1_micro": 0.8573669467787115,
1774
+ "eval_loss": 0.08135490119457245,
1775
+ "eval_roc_auc": 0.9106934783376293,
1776
+ "eval_runtime": 695.7903,
1777
+ "eval_samples_per_second": 4.119,
1778
+ "eval_steps_per_second": 0.129,
1779
+ "learning_rate": 1.0000000000000002e-06,
1780
+ "step": 29866
1781
+ },
1782
+ {
1783
+ "epoch": 109.49,
1784
+ "learning_rate": 1.0000000000000002e-06,
1785
+ "loss": 0.0854,
1786
+ "step": 30000
1787
+ },
1788
+ {
1789
+ "epoch": 110.0,
1790
+ "eval_accuracy": 0.5638520586182834,
1791
+ "eval_f1_macro": 0.7564667466516518,
1792
+ "eval_f1_micro": 0.8577973962116557,
1793
+ "eval_loss": 0.08132224529981613,
1794
+ "eval_roc_auc": 0.9118372864958749,
1795
+ "eval_runtime": 682.0266,
1796
+ "eval_samples_per_second": 4.202,
1797
+ "eval_steps_per_second": 0.132,
1798
+ "learning_rate": 1.0000000000000002e-06,
1799
+ "step": 30140
1800
+ },
1801
+ {
1802
+ "epoch": 111.0,
1803
+ "eval_accuracy": 0.5638520586182834,
1804
+ "eval_f1_macro": 0.7579481466413214,
1805
+ "eval_f1_micro": 0.8576381122791322,
1806
+ "eval_loss": 0.0814104825258255,
1807
+ "eval_roc_auc": 0.911498485615307,
1808
+ "eval_runtime": 686.5544,
1809
+ "eval_samples_per_second": 4.174,
1810
+ "eval_steps_per_second": 0.131,
1811
+ "learning_rate": 1.0000000000000002e-07,
1812
+ "step": 30414
1813
+ },
1814
+ {
1815
+ "epoch": 111.31,
1816
+ "learning_rate": 1.0000000000000002e-07,
1817
+ "loss": 0.0851,
1818
+ "step": 30500
1819
+ },
1820
+ {
1821
+ "epoch": 112.0,
1822
+ "eval_accuracy": 0.5631542219120725,
1823
+ "eval_f1_macro": 0.7575801018117111,
1824
+ "eval_f1_micro": 0.8580721134974485,
1825
+ "eval_loss": 0.08170080929994583,
1826
+ "eval_roc_auc": 0.91075603440298,
1827
+ "eval_runtime": 746.778,
1828
+ "eval_samples_per_second": 3.838,
1829
+ "eval_steps_per_second": 0.121,
1830
+ "learning_rate": 1.0000000000000002e-07,
1831
+ "step": 30688
1832
+ },
1833
+ {
1834
+ "epoch": 113.0,
1835
+ "eval_accuracy": 0.5614096301465457,
1836
+ "eval_f1_macro": 0.7562692232145891,
1837
+ "eval_f1_micro": 0.8583059164668487,
1838
+ "eval_loss": 0.081505186855793,
1839
+ "eval_roc_auc": 0.912813663063945,
1840
+ "eval_runtime": 687.4479,
1841
+ "eval_samples_per_second": 4.169,
1842
+ "eval_steps_per_second": 0.131,
1843
+ "learning_rate": 1.0000000000000002e-07,
1844
+ "step": 30962
1845
+ },
1846
+ {
1847
+ "epoch": 113.14,
1848
+ "learning_rate": 1.0000000000000002e-07,
1849
+ "loss": 0.0848,
1850
+ "step": 31000
1851
+ },
1852
+ {
1853
+ "epoch": 114.0,
1854
+ "eval_accuracy": 0.5655966503838102,
1855
+ "eval_f1_macro": 0.7560403631861836,
1856
+ "eval_f1_micro": 0.8563654914408797,
1857
+ "eval_loss": 0.08186182379722595,
1858
+ "eval_roc_auc": 0.9061407756963643,
1859
+ "eval_runtime": 731.2304,
1860
+ "eval_samples_per_second": 3.919,
1861
+ "eval_steps_per_second": 0.123,
1862
+ "learning_rate": 1.0000000000000002e-07,
1863
+ "step": 31236
1864
+ },
1865
+ {
1866
+ "epoch": 114.0,
1867
+ "learning_rate": 1.0000000000000002e-07,
1868
+ "step": 31236,
1869
+ "total_flos": 1.4819663325231928e+21,
1870
+ "train_loss": 0.033337813025782426,
1871
+ "train_runtime": 131818.0427,
1872
+ "train_samples_per_second": 9.311,
1873
+ "train_steps_per_second": 0.291
1874
  }
1875
  ],
1876
  "logging_steps": 500,
1877
+ "max_steps": 38360,
1878
+ "num_train_epochs": 140,
1879
  "save_steps": 500,
1880
+ "total_flos": 1.4819663325231928e+21,
1881
  "trial_name": null,
1882
  "trial_params": null
1883
  }