ylacombe HF staff commited on
Commit
d58c6c2
1 Parent(s): d3e5c2a

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: mit
3
  base_model: facebook/w2v-bert-2.0
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,11 +17,11 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-bert-CV16-en-libri
17
 
18
- This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.1490
21
- - Wer: 0.1163
22
- - Cer: 0.0298
23
 
24
  ## Model description
25
 
 
2
  license: mit
3
  base_model: facebook/w2v-bert-2.0
4
  tags:
5
+ - automatic-speech-recognition
6
+ - librispeech_asr
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-bert-CV16-en-libri
19
 
20
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the LIBRISPEECH_ASR - CLEAN dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.1331
23
+ - Wer: 0.0997
24
+ - Cer: 0.0264
25
 
26
  ## Model description
27
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_cer": 0.04829169911539767,
4
- "eval_loss": 0.22062508761882782,
5
- "eval_runtime": 59.6361,
6
  "eval_samples": 2528,
7
- "eval_samples_per_second": 42.39,
8
- "eval_steps_per_second": 1.191,
9
- "eval_wer": 0.1923713703515028,
10
- "train_loss": 1.5876106701113961,
11
- "train_runtime": 5725.9395,
12
  "train_samples": 28538,
13
- "train_samples_per_second": 14.952,
14
- "train_steps_per_second": 0.207
15
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_cer": 0.02643822760039138,
4
+ "eval_loss": 0.13311129808425903,
5
+ "eval_runtime": 59.8158,
6
  "eval_samples": 2528,
7
+ "eval_samples_per_second": 42.263,
8
+ "eval_steps_per_second": 1.187,
9
+ "eval_wer": 0.09965613856342333,
10
+ "train_loss": 0.09581804365822763,
11
+ "train_runtime": 4900.8909,
12
  "train_samples": 28538,
13
+ "train_samples_per_second": 29.115,
14
+ "train_steps_per_second": 0.404
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_cer": 0.04829169911539767,
4
- "eval_loss": 0.22062508761882782,
5
- "eval_runtime": 59.6361,
6
  "eval_samples": 2528,
7
- "eval_samples_per_second": 42.39,
8
- "eval_steps_per_second": 1.191,
9
- "eval_wer": 0.1923713703515028
10
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_cer": 0.02643822760039138,
4
+ "eval_loss": 0.13311129808425903,
5
+ "eval_runtime": 59.8158,
6
  "eval_samples": 2528,
7
+ "eval_samples_per_second": 42.263,
8
+ "eval_steps_per_second": 1.187,
9
+ "eval_wer": 0.09965613856342333
10
  }
runs/Jan16_13-07-53_vorace/events.out.tfevents.1705415718.vorace.502984.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd9555772ff7031c1763a0f0080c729251887efd088d16d480fd9fec99c0d7e
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 1.5876106701113961,
4
- "train_runtime": 5725.9395,
5
  "train_samples": 28538,
6
- "train_samples_per_second": 14.952,
7
- "train_steps_per_second": 0.207
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.09581804365822763,
4
+ "train_runtime": 4900.8909,
5
  "train_samples": 28538,
6
+ "train_samples_per_second": 29.115,
7
+ "train_steps_per_second": 0.404
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9962168978562422,
5
  "eval_steps": 250,
6
- "global_step": 1188,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1249,243 +1249,1227 @@
1249
  "step": 1000
1250
  },
1251
  {
1252
- "epoch": 2.53,
1253
  "learning_rate": 3.006e-06,
1254
- "loss": 0.306,
1255
  "step": 1005
1256
  },
1257
  {
1258
  "epoch": 2.55,
1259
- "learning_rate": 3.021e-06,
1260
- "loss": 0.3832,
1261
  "step": 1010
1262
  },
1263
  {
1264
  "epoch": 2.56,
1265
- "learning_rate": 3.036e-06,
1266
- "loss": 0.2945,
1267
  "step": 1015
1268
  },
1269
  {
1270
- "epoch": 2.57,
1271
- "learning_rate": 3.051e-06,
1272
- "loss": 0.2986,
1273
  "step": 1020
1274
  },
1275
  {
1276
  "epoch": 2.59,
1277
- "learning_rate": 3.066e-06,
1278
- "loss": 0.328,
1279
  "step": 1025
1280
  },
1281
  {
1282
  "epoch": 2.6,
1283
- "learning_rate": 3.0810000000000002e-06,
1284
- "loss": 0.341,
1285
  "step": 1030
1286
  },
1287
  {
1288
  "epoch": 2.61,
1289
- "learning_rate": 3.096e-06,
1290
- "loss": 0.2928,
1291
  "step": 1035
1292
  },
1293
  {
1294
- "epoch": 2.62,
1295
- "learning_rate": 3.111e-06,
1296
- "loss": 0.3032,
1297
  "step": 1040
1298
  },
1299
  {
1300
  "epoch": 2.64,
1301
- "learning_rate": 3.1260000000000002e-06,
1302
- "loss": 0.3132,
1303
  "step": 1045
1304
  },
1305
  {
1306
  "epoch": 2.65,
1307
- "learning_rate": 3.141e-06,
1308
- "loss": 0.2795,
1309
  "step": 1050
1310
  },
1311
  {
1312
  "epoch": 2.66,
1313
- "learning_rate": 3.156e-06,
1314
- "loss": 0.3074,
1315
  "step": 1055
1316
  },
1317
  {
1318
- "epoch": 2.67,
1319
- "learning_rate": 3.1710000000000002e-06,
1320
- "loss": 0.3546,
1321
  "step": 1060
1322
  },
1323
  {
1324
  "epoch": 2.69,
1325
- "learning_rate": 3.186e-06,
1326
- "loss": 0.2661,
1327
  "step": 1065
1328
  },
1329
  {
1330
  "epoch": 2.7,
1331
- "learning_rate": 3.2010000000000004e-06,
1332
- "loss": 0.2756,
1333
  "step": 1070
1334
  },
1335
  {
1336
  "epoch": 2.71,
1337
- "learning_rate": 3.216e-06,
1338
- "loss": 0.3041,
1339
  "step": 1075
1340
  },
1341
  {
1342
- "epoch": 2.72,
1343
- "learning_rate": 3.231e-06,
1344
- "loss": 0.3163,
1345
  "step": 1080
1346
  },
1347
  {
1348
  "epoch": 2.74,
1349
- "learning_rate": 3.2460000000000003e-06,
1350
- "loss": 0.2646,
1351
  "step": 1085
1352
  },
1353
  {
1354
  "epoch": 2.75,
1355
- "learning_rate": 3.261e-06,
1356
- "loss": 0.2772,
1357
  "step": 1090
1358
  },
1359
  {
1360
  "epoch": 2.76,
1361
- "learning_rate": 3.276e-06,
1362
- "loss": 0.3198,
1363
  "step": 1095
1364
  },
1365
  {
1366
- "epoch": 2.77,
1367
- "learning_rate": 3.2910000000000003e-06,
1368
- "loss": 0.2586,
1369
  "step": 1100
1370
  },
1371
  {
1372
  "epoch": 2.79,
1373
- "learning_rate": 3.306e-06,
1374
- "loss": 0.2671,
1375
  "step": 1105
1376
  },
1377
  {
1378
  "epoch": 2.8,
1379
- "learning_rate": 3.3210000000000005e-06,
1380
- "loss": 0.3135,
1381
  "step": 1110
1382
  },
1383
  {
1384
  "epoch": 2.81,
1385
- "learning_rate": 3.336e-06,
1386
- "loss": 0.2482,
1387
  "step": 1115
1388
  },
1389
  {
1390
- "epoch": 2.82,
1391
- "learning_rate": 3.3509999999999998e-06,
1392
- "loss": 0.2427,
1393
  "step": 1120
1394
  },
1395
  {
1396
  "epoch": 2.84,
1397
- "learning_rate": 3.366e-06,
1398
- "loss": 0.2821,
1399
  "step": 1125
1400
  },
1401
  {
1402
  "epoch": 2.85,
1403
- "learning_rate": 3.381e-06,
1404
- "loss": 0.2651,
1405
  "step": 1130
1406
  },
1407
  {
1408
- "epoch": 2.86,
1409
- "learning_rate": 3.3959999999999998e-06,
1410
- "loss": 0.2517,
1411
  "step": 1135
1412
  },
1413
  {
1414
  "epoch": 2.88,
1415
- "learning_rate": 3.411e-06,
1416
- "loss": 0.2528,
1417
  "step": 1140
1418
  },
1419
  {
1420
  "epoch": 2.89,
1421
- "learning_rate": 3.426e-06,
1422
- "loss": 0.3223,
1423
  "step": 1145
1424
  },
1425
  {
1426
  "epoch": 2.9,
1427
- "learning_rate": 3.441e-06,
1428
- "loss": 0.2383,
1429
  "step": 1150
1430
  },
1431
  {
1432
- "epoch": 2.91,
1433
- "learning_rate": 3.456e-06,
1434
- "loss": 0.2402,
1435
  "step": 1155
1436
  },
1437
  {
1438
  "epoch": 2.93,
1439
- "learning_rate": 3.471e-06,
1440
- "loss": 0.2866,
1441
  "step": 1160
1442
  },
1443
  {
1444
  "epoch": 2.94,
1445
- "learning_rate": 3.486e-06,
1446
- "loss": 0.2316,
1447
  "step": 1165
1448
  },
1449
  {
1450
  "epoch": 2.95,
1451
- "learning_rate": 3.501e-06,
1452
- "loss": 0.2293,
1453
  "step": 1170
1454
  },
1455
  {
1456
- "epoch": 2.96,
1457
- "learning_rate": 3.516e-06,
1458
- "loss": 0.2607,
1459
  "step": 1175
1460
  },
1461
  {
1462
  "epoch": 2.98,
1463
- "learning_rate": 3.531e-06,
1464
- "loss": 0.2492,
1465
  "step": 1180
1466
  },
1467
  {
1468
  "epoch": 2.99,
1469
- "learning_rate": 3.546e-06,
1470
- "loss": 0.2366,
1471
  "step": 1185
1472
  },
1473
  {
1474
  "epoch": 3.0,
1475
- "step": 1188,
1476
- "total_flos": 3.172956745754896e+19,
1477
- "train_loss": 1.5876106701113961,
1478
- "train_runtime": 5725.9395,
1479
- "train_samples_per_second": 14.952,
1480
- "train_steps_per_second": 0.207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  }
1482
  ],
1483
  "logging_steps": 5,
1484
- "max_steps": 1188,
1485
  "num_input_tokens_seen": 0,
1486
- "num_train_epochs": 3,
1487
  "save_steps": 500,
1488
- "total_flos": 3.172956745754896e+19,
1489
  "train_batch_size": 12,
1490
  "trial_name": null,
1491
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.996216897856242,
5
  "eval_steps": 250,
6
+ "global_step": 1980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1249
  "step": 1000
1250
  },
1251
  {
1252
+ "epoch": 2.54,
1253
  "learning_rate": 3.006e-06,
1254
+ "loss": 0.292,
1255
  "step": 1005
1256
  },
1257
  {
1258
  "epoch": 2.55,
1259
+ "learning_rate": 3.0179999999999997e-06,
1260
+ "loss": 0.3656,
1261
  "step": 1010
1262
  },
1263
  {
1264
  "epoch": 2.56,
1265
+ "learning_rate": 3.033e-06,
1266
+ "loss": 0.3029,
1267
  "step": 1015
1268
  },
1269
  {
1270
+ "epoch": 2.58,
1271
+ "learning_rate": 3.048e-06,
1272
+ "loss": 0.2976,
1273
  "step": 1020
1274
  },
1275
  {
1276
  "epoch": 2.59,
1277
+ "learning_rate": 3.063e-06,
1278
+ "loss": 0.3674,
1279
  "step": 1025
1280
  },
1281
  {
1282
  "epoch": 2.6,
1283
+ "learning_rate": 3.078e-06,
1284
+ "loss": 0.3063,
1285
  "step": 1030
1286
  },
1287
  {
1288
  "epoch": 2.61,
1289
+ "learning_rate": 3.093e-06,
1290
+ "loss": 0.2821,
1291
  "step": 1035
1292
  },
1293
  {
1294
+ "epoch": 2.63,
1295
+ "learning_rate": 3.108e-06,
1296
+ "loss": 0.2804,
1297
  "step": 1040
1298
  },
1299
  {
1300
  "epoch": 2.64,
1301
+ "learning_rate": 3.123e-06,
1302
+ "loss": 0.3475,
1303
  "step": 1045
1304
  },
1305
  {
1306
  "epoch": 2.65,
1307
+ "learning_rate": 3.138e-06,
1308
+ "loss": 0.2895,
1309
  "step": 1050
1310
  },
1311
  {
1312
  "epoch": 2.66,
1313
+ "learning_rate": 3.153e-06,
1314
+ "loss": 0.278,
1315
  "step": 1055
1316
  },
1317
  {
1318
+ "epoch": 2.68,
1319
+ "learning_rate": 3.168e-06,
1320
+ "loss": 0.3217,
1321
  "step": 1060
1322
  },
1323
  {
1324
  "epoch": 2.69,
1325
+ "learning_rate": 3.1830000000000003e-06,
1326
+ "loss": 0.2822,
1327
  "step": 1065
1328
  },
1329
  {
1330
  "epoch": 2.7,
1331
+ "learning_rate": 3.198e-06,
1332
+ "loss": 0.2736,
1333
  "step": 1070
1334
  },
1335
  {
1336
  "epoch": 2.71,
1337
+ "learning_rate": 3.213e-06,
1338
+ "loss": 0.3326,
1339
  "step": 1075
1340
  },
1341
  {
1342
+ "epoch": 2.73,
1343
+ "learning_rate": 3.2280000000000003e-06,
1344
+ "loss": 0.2738,
1345
  "step": 1080
1346
  },
1347
  {
1348
  "epoch": 2.74,
1349
+ "learning_rate": 3.243e-06,
1350
+ "loss": 0.2712,
1351
  "step": 1085
1352
  },
1353
  {
1354
  "epoch": 2.75,
1355
+ "learning_rate": 3.258e-06,
1356
+ "loss": 0.2858,
1357
  "step": 1090
1358
  },
1359
  {
1360
  "epoch": 2.76,
1361
+ "learning_rate": 3.2730000000000003e-06,
1362
+ "loss": 0.2962,
1363
  "step": 1095
1364
  },
1365
  {
1366
+ "epoch": 2.78,
1367
+ "learning_rate": 3.288e-06,
1368
+ "loss": 0.2485,
1369
  "step": 1100
1370
  },
1371
  {
1372
  "epoch": 2.79,
1373
+ "learning_rate": 3.3030000000000004e-06,
1374
+ "loss": 0.2566,
1375
  "step": 1105
1376
  },
1377
  {
1378
  "epoch": 2.8,
1379
+ "learning_rate": 3.315e-06,
1380
+ "loss": 0.2978,
1381
  "step": 1110
1382
  },
1383
  {
1384
  "epoch": 2.81,
1385
+ "learning_rate": 3.3300000000000003e-06,
1386
+ "loss": 0.2629,
1387
  "step": 1115
1388
  },
1389
  {
1390
+ "epoch": 2.83,
1391
+ "learning_rate": 3.345e-06,
1392
+ "loss": 0.2559,
1393
  "step": 1120
1394
  },
1395
  {
1396
  "epoch": 2.84,
1397
+ "learning_rate": 3.36e-06,
1398
+ "loss": 0.2628,
1399
  "step": 1125
1400
  },
1401
  {
1402
  "epoch": 2.85,
1403
+ "learning_rate": 3.3750000000000003e-06,
1404
+ "loss": 0.2455,
1405
  "step": 1130
1406
  },
1407
  {
1408
+ "epoch": 2.87,
1409
+ "learning_rate": 3.39e-06,
1410
+ "loss": 0.2501,
1411
  "step": 1135
1412
  },
1413
  {
1414
  "epoch": 2.88,
1415
+ "learning_rate": 3.405e-06,
1416
+ "loss": 0.265,
1417
  "step": 1140
1418
  },
1419
  {
1420
  "epoch": 2.89,
1421
+ "learning_rate": 3.417e-06,
1422
+ "loss": 0.2922,
1423
  "step": 1145
1424
  },
1425
  {
1426
  "epoch": 2.9,
1427
+ "learning_rate": 3.4320000000000003e-06,
1428
+ "loss": 0.2492,
1429
  "step": 1150
1430
  },
1431
  {
1432
+ "epoch": 2.92,
1433
+ "learning_rate": 3.447e-06,
1434
+ "loss": 0.2417,
1435
  "step": 1155
1436
  },
1437
  {
1438
  "epoch": 2.93,
1439
+ "learning_rate": 3.462e-06,
1440
+ "loss": 0.27,
1441
  "step": 1160
1442
  },
1443
  {
1444
  "epoch": 2.94,
1445
+ "learning_rate": 3.4770000000000003e-06,
1446
+ "loss": 0.2335,
1447
  "step": 1165
1448
  },
1449
  {
1450
  "epoch": 2.95,
1451
+ "learning_rate": 3.492e-06,
1452
+ "loss": 0.2327,
1453
  "step": 1170
1454
  },
1455
  {
1456
+ "epoch": 2.97,
1457
+ "learning_rate": 3.507e-06,
1458
+ "loss": 0.2634,
1459
  "step": 1175
1460
  },
1461
  {
1462
  "epoch": 2.98,
1463
+ "learning_rate": 3.5220000000000003e-06,
1464
+ "loss": 0.2389,
1465
  "step": 1180
1466
  },
1467
  {
1468
  "epoch": 2.99,
1469
+ "learning_rate": 3.537e-06,
1470
+ "loss": 0.223,
1471
  "step": 1185
1472
  },
1473
  {
1474
  "epoch": 3.0,
1475
+ "learning_rate": 3.552e-06,
1476
+ "loss": 0.2773,
1477
+ "step": 1190
1478
+ },
1479
+ {
1480
+ "epoch": 3.02,
1481
+ "learning_rate": 3.5670000000000003e-06,
1482
+ "loss": 0.2285,
1483
+ "step": 1195
1484
+ },
1485
+ {
1486
+ "epoch": 3.03,
1487
+ "learning_rate": 3.582e-06,
1488
+ "loss": 0.2183,
1489
+ "step": 1200
1490
+ },
1491
+ {
1492
+ "epoch": 3.04,
1493
+ "learning_rate": 3.5970000000000005e-06,
1494
+ "loss": 0.2967,
1495
+ "step": 1205
1496
+ },
1497
+ {
1498
+ "epoch": 3.05,
1499
+ "learning_rate": 3.612e-06,
1500
+ "loss": 0.2179,
1501
+ "step": 1210
1502
+ },
1503
+ {
1504
+ "epoch": 3.07,
1505
+ "learning_rate": 3.6269999999999997e-06,
1506
+ "loss": 0.231,
1507
+ "step": 1215
1508
+ },
1509
+ {
1510
+ "epoch": 3.08,
1511
+ "learning_rate": 3.642e-06,
1512
+ "loss": 0.2366,
1513
+ "step": 1220
1514
+ },
1515
+ {
1516
+ "epoch": 3.09,
1517
+ "learning_rate": 3.657e-06,
1518
+ "loss": 0.2462,
1519
+ "step": 1225
1520
+ },
1521
+ {
1522
+ "epoch": 3.1,
1523
+ "learning_rate": 3.6719999999999997e-06,
1524
+ "loss": 0.2151,
1525
+ "step": 1230
1526
+ },
1527
+ {
1528
+ "epoch": 3.12,
1529
+ "learning_rate": 3.687e-06,
1530
+ "loss": 0.2162,
1531
+ "step": 1235
1532
+ },
1533
+ {
1534
+ "epoch": 3.13,
1535
+ "learning_rate": 3.702e-06,
1536
+ "loss": 0.2532,
1537
+ "step": 1240
1538
+ },
1539
+ {
1540
+ "epoch": 3.14,
1541
+ "learning_rate": 3.717e-06,
1542
+ "loss": 0.2035,
1543
+ "step": 1245
1544
+ },
1545
+ {
1546
+ "epoch": 3.16,
1547
+ "learning_rate": 3.732e-06,
1548
+ "loss": 0.2089,
1549
+ "step": 1250
1550
+ },
1551
+ {
1552
+ "epoch": 3.16,
1553
+ "eval_cer": 0.044489706264102716,
1554
+ "eval_loss": 0.2078969031572342,
1555
+ "eval_runtime": 113.5687,
1556
+ "eval_samples_per_second": 22.26,
1557
+ "eval_steps_per_second": 0.625,
1558
+ "eval_wer": 0.17657921548650027,
1559
+ "step": 1250
1560
+ },
1561
+ {
1562
+ "epoch": 3.17,
1563
+ "learning_rate": 3.747e-06,
1564
+ "loss": 0.2531,
1565
+ "step": 1255
1566
+ },
1567
+ {
1568
+ "epoch": 3.18,
1569
+ "learning_rate": 3.7620000000000006e-06,
1570
+ "loss": 0.2165,
1571
+ "step": 1260
1572
+ },
1573
+ {
1574
+ "epoch": 3.19,
1575
+ "learning_rate": 3.7770000000000004e-06,
1576
+ "loss": 0.212,
1577
+ "step": 1265
1578
+ },
1579
+ {
1580
+ "epoch": 3.21,
1581
+ "learning_rate": 3.7920000000000003e-06,
1582
+ "loss": 0.2218,
1583
+ "step": 1270
1584
+ },
1585
+ {
1586
+ "epoch": 3.22,
1587
+ "learning_rate": 3.8070000000000006e-06,
1588
+ "loss": 0.2297,
1589
+ "step": 1275
1590
+ },
1591
+ {
1592
+ "epoch": 3.23,
1593
+ "learning_rate": 3.822000000000001e-06,
1594
+ "loss": 0.2095,
1595
+ "step": 1280
1596
+ },
1597
+ {
1598
+ "epoch": 3.24,
1599
+ "learning_rate": 3.837000000000001e-06,
1600
+ "loss": 0.2028,
1601
+ "step": 1285
1602
+ },
1603
+ {
1604
+ "epoch": 3.26,
1605
+ "learning_rate": 3.852e-06,
1606
+ "loss": 0.2619,
1607
+ "step": 1290
1608
+ },
1609
+ {
1610
+ "epoch": 3.27,
1611
+ "learning_rate": 3.8669999999999996e-06,
1612
+ "loss": 0.1918,
1613
+ "step": 1295
1614
+ },
1615
+ {
1616
+ "epoch": 3.28,
1617
+ "learning_rate": 3.8819999999999994e-06,
1618
+ "loss": 0.1945,
1619
+ "step": 1300
1620
+ },
1621
+ {
1622
+ "epoch": 3.29,
1623
+ "learning_rate": 3.897e-06,
1624
+ "loss": 0.2347,
1625
+ "step": 1305
1626
+ },
1627
+ {
1628
+ "epoch": 3.31,
1629
+ "learning_rate": 3.912e-06,
1630
+ "loss": 0.2111,
1631
+ "step": 1310
1632
+ },
1633
+ {
1634
+ "epoch": 3.32,
1635
+ "learning_rate": 3.927e-06,
1636
+ "loss": 0.207,
1637
+ "step": 1315
1638
+ },
1639
+ {
1640
+ "epoch": 3.33,
1641
+ "learning_rate": 3.942e-06,
1642
+ "loss": 0.2281,
1643
+ "step": 1320
1644
+ },
1645
+ {
1646
+ "epoch": 3.34,
1647
+ "learning_rate": 3.9569999999999996e-06,
1648
+ "loss": 0.2334,
1649
+ "step": 1325
1650
+ },
1651
+ {
1652
+ "epoch": 3.36,
1653
+ "learning_rate": 3.971999999999999e-06,
1654
+ "loss": 0.1804,
1655
+ "step": 1330
1656
+ },
1657
+ {
1658
+ "epoch": 3.37,
1659
+ "learning_rate": 3.987e-06,
1660
+ "loss": 0.1943,
1661
+ "step": 1335
1662
+ },
1663
+ {
1664
+ "epoch": 3.38,
1665
+ "learning_rate": 4.002e-06,
1666
+ "loss": 0.2298,
1667
+ "step": 1340
1668
+ },
1669
+ {
1670
+ "epoch": 3.39,
1671
+ "learning_rate": 4.017e-06,
1672
+ "loss": 0.1911,
1673
+ "step": 1345
1674
+ },
1675
+ {
1676
+ "epoch": 3.41,
1677
+ "learning_rate": 4.032e-06,
1678
+ "loss": 0.1967,
1679
+ "step": 1350
1680
+ },
1681
+ {
1682
+ "epoch": 3.42,
1683
+ "learning_rate": 4.0469999999999995e-06,
1684
+ "loss": 0.2311,
1685
+ "step": 1355
1686
+ },
1687
+ {
1688
+ "epoch": 3.43,
1689
+ "learning_rate": 4.062e-06,
1690
+ "loss": 0.201,
1691
+ "step": 1360
1692
+ },
1693
+ {
1694
+ "epoch": 3.45,
1695
+ "learning_rate": 4.077e-06,
1696
+ "loss": 0.1956,
1697
+ "step": 1365
1698
+ },
1699
+ {
1700
+ "epoch": 3.46,
1701
+ "learning_rate": 4.092e-06,
1702
+ "loss": 0.2068,
1703
+ "step": 1370
1704
+ },
1705
+ {
1706
+ "epoch": 3.47,
1707
+ "learning_rate": 4.107e-06,
1708
+ "loss": 0.2211,
1709
+ "step": 1375
1710
+ },
1711
+ {
1712
+ "epoch": 3.48,
1713
+ "learning_rate": 4.122e-06,
1714
+ "loss": 0.187,
1715
+ "step": 1380
1716
+ },
1717
+ {
1718
+ "epoch": 3.5,
1719
+ "learning_rate": 4.137e-06,
1720
+ "loss": 0.1901,
1721
+ "step": 1385
1722
+ },
1723
+ {
1724
+ "epoch": 3.51,
1725
+ "learning_rate": 4.152e-06,
1726
+ "loss": 0.2077,
1727
+ "step": 1390
1728
+ },
1729
+ {
1730
+ "epoch": 3.52,
1731
+ "learning_rate": 4.167e-06,
1732
+ "loss": 0.1781,
1733
+ "step": 1395
1734
+ },
1735
+ {
1736
+ "epoch": 3.53,
1737
+ "learning_rate": 4.182e-06,
1738
+ "loss": 0.1806,
1739
+ "step": 1400
1740
+ },
1741
+ {
1742
+ "epoch": 3.55,
1743
+ "learning_rate": 4.197e-06,
1744
+ "loss": 0.2346,
1745
+ "step": 1405
1746
+ },
1747
+ {
1748
+ "epoch": 3.56,
1749
+ "learning_rate": 4.212e-06,
1750
+ "loss": 0.1864,
1751
+ "step": 1410
1752
+ },
1753
+ {
1754
+ "epoch": 3.57,
1755
+ "learning_rate": 4.227e-06,
1756
+ "loss": 0.1682,
1757
+ "step": 1415
1758
+ },
1759
+ {
1760
+ "epoch": 3.58,
1761
+ "learning_rate": 4.242e-06,
1762
+ "loss": 0.1944,
1763
+ "step": 1420
1764
+ },
1765
+ {
1766
+ "epoch": 3.6,
1767
+ "learning_rate": 4.257e-06,
1768
+ "loss": 0.2108,
1769
+ "step": 1425
1770
+ },
1771
+ {
1772
+ "epoch": 3.61,
1773
+ "learning_rate": 4.272e-06,
1774
+ "loss": 0.1731,
1775
+ "step": 1430
1776
+ },
1777
+ {
1778
+ "epoch": 3.62,
1779
+ "learning_rate": 4.287e-06,
1780
+ "loss": 0.1734,
1781
+ "step": 1435
1782
+ },
1783
+ {
1784
+ "epoch": 3.63,
1785
+ "learning_rate": 4.3020000000000005e-06,
1786
+ "loss": 0.2366,
1787
+ "step": 1440
1788
+ },
1789
+ {
1790
+ "epoch": 3.65,
1791
+ "learning_rate": 4.317e-06,
1792
+ "loss": 0.1858,
1793
+ "step": 1445
1794
+ },
1795
+ {
1796
+ "epoch": 3.66,
1797
+ "learning_rate": 4.332e-06,
1798
+ "loss": 0.1837,
1799
+ "step": 1450
1800
+ },
1801
+ {
1802
+ "epoch": 3.67,
1803
+ "learning_rate": 4.347e-06,
1804
+ "loss": 0.2047,
1805
+ "step": 1455
1806
+ },
1807
+ {
1808
+ "epoch": 3.68,
1809
+ "learning_rate": 4.362e-06,
1810
+ "loss": 0.1838,
1811
+ "step": 1460
1812
+ },
1813
+ {
1814
+ "epoch": 3.7,
1815
+ "learning_rate": 4.377e-06,
1816
+ "loss": 0.1684,
1817
+ "step": 1465
1818
+ },
1819
+ {
1820
+ "epoch": 3.71,
1821
+ "learning_rate": 4.3920000000000005e-06,
1822
+ "loss": 0.1965,
1823
+ "step": 1470
1824
+ },
1825
+ {
1826
+ "epoch": 3.72,
1827
+ "learning_rate": 4.407e-06,
1828
+ "loss": 0.2069,
1829
+ "step": 1475
1830
+ },
1831
+ {
1832
+ "epoch": 3.74,
1833
+ "learning_rate": 4.422e-06,
1834
+ "loss": 0.1751,
1835
+ "step": 1480
1836
+ },
1837
+ {
1838
+ "epoch": 3.75,
1839
+ "learning_rate": 4.437e-06,
1840
+ "loss": 0.1724,
1841
+ "step": 1485
1842
+ },
1843
+ {
1844
+ "epoch": 3.76,
1845
+ "learning_rate": 4.452e-06,
1846
+ "loss": 0.2224,
1847
+ "step": 1490
1848
+ },
1849
+ {
1850
+ "epoch": 3.77,
1851
+ "learning_rate": 4.467000000000001e-06,
1852
+ "loss": 0.1684,
1853
+ "step": 1495
1854
+ },
1855
+ {
1856
+ "epoch": 3.79,
1857
+ "learning_rate": 4.4820000000000005e-06,
1858
+ "loss": 0.1634,
1859
+ "step": 1500
1860
+ },
1861
+ {
1862
+ "epoch": 3.79,
1863
+ "eval_cer": 0.03657820643383454,
1864
+ "eval_loss": 0.16873595118522644,
1865
+ "eval_runtime": 60.2617,
1866
+ "eval_samples_per_second": 41.95,
1867
+ "eval_steps_per_second": 1.178,
1868
+ "eval_wer": 0.14106809305484802,
1869
+ "step": 1500
1870
+ },
1871
+ {
1872
+ "epoch": 3.8,
1873
+ "learning_rate": 4.497e-06,
1874
+ "loss": 0.2137,
1875
+ "step": 1505
1876
+ },
1877
+ {
1878
+ "epoch": 3.81,
1879
+ "learning_rate": 4.5089999999999995e-06,
1880
+ "loss": 0.1759,
1881
+ "step": 1510
1882
+ },
1883
+ {
1884
+ "epoch": 3.82,
1885
+ "learning_rate": 4.524e-06,
1886
+ "loss": 0.1698,
1887
+ "step": 1515
1888
+ },
1889
+ {
1890
+ "epoch": 3.84,
1891
+ "learning_rate": 4.539e-06,
1892
+ "loss": 0.1703,
1893
+ "step": 1520
1894
+ },
1895
+ {
1896
+ "epoch": 3.85,
1897
+ "learning_rate": 4.554e-06,
1898
+ "loss": 0.1841,
1899
+ "step": 1525
1900
+ },
1901
+ {
1902
+ "epoch": 3.86,
1903
+ "learning_rate": 4.569e-06,
1904
+ "loss": 0.1783,
1905
+ "step": 1530
1906
+ },
1907
+ {
1908
+ "epoch": 3.87,
1909
+ "learning_rate": 4.584e-06,
1910
+ "loss": 0.1633,
1911
+ "step": 1535
1912
+ },
1913
+ {
1914
+ "epoch": 3.89,
1915
+ "learning_rate": 4.5989999999999995e-06,
1916
+ "loss": 0.1839,
1917
+ "step": 1540
1918
+ },
1919
+ {
1920
+ "epoch": 3.9,
1921
+ "learning_rate": 4.614e-06,
1922
+ "loss": 0.153,
1923
+ "step": 1545
1924
+ },
1925
+ {
1926
+ "epoch": 3.91,
1927
+ "learning_rate": 4.629e-06,
1928
+ "loss": 0.1553,
1929
+ "step": 1550
1930
+ },
1931
+ {
1932
+ "epoch": 3.92,
1933
+ "learning_rate": 4.644e-06,
1934
+ "loss": 0.1834,
1935
+ "step": 1555
1936
+ },
1937
+ {
1938
+ "epoch": 3.94,
1939
+ "learning_rate": 4.659e-06,
1940
+ "loss": 0.1582,
1941
+ "step": 1560
1942
+ },
1943
+ {
1944
+ "epoch": 3.95,
1945
+ "learning_rate": 4.674e-06,
1946
+ "loss": 0.1592,
1947
+ "step": 1565
1948
+ },
1949
+ {
1950
+ "epoch": 3.96,
1951
+ "learning_rate": 4.689e-06,
1952
+ "loss": 0.1823,
1953
+ "step": 1570
1954
+ },
1955
+ {
1956
+ "epoch": 3.97,
1957
+ "learning_rate": 4.704e-06,
1958
+ "loss": 0.1782,
1959
+ "step": 1575
1960
+ },
1961
+ {
1962
+ "epoch": 3.99,
1963
+ "learning_rate": 4.719e-06,
1964
+ "loss": 0.1543,
1965
+ "step": 1580
1966
+ },
1967
+ {
1968
+ "epoch": 4.0,
1969
+ "learning_rate": 4.734e-06,
1970
+ "loss": 0.1943,
1971
+ "step": 1585
1972
+ },
1973
+ {
1974
+ "epoch": 4.01,
1975
+ "learning_rate": 4.749e-06,
1976
+ "loss": 0.1491,
1977
+ "step": 1590
1978
+ },
1979
+ {
1980
+ "epoch": 4.03,
1981
+ "learning_rate": 4.764e-06,
1982
+ "loss": 0.1656,
1983
+ "step": 1595
1984
+ },
1985
+ {
1986
+ "epoch": 4.04,
1987
+ "learning_rate": 4.779e-06,
1988
+ "loss": 0.1507,
1989
+ "step": 1600
1990
+ },
1991
+ {
1992
+ "epoch": 4.05,
1993
+ "learning_rate": 4.794e-06,
1994
+ "loss": 0.1589,
1995
+ "step": 1605
1996
+ },
1997
+ {
1998
+ "epoch": 4.06,
1999
+ "learning_rate": 4.809e-06,
2000
+ "loss": 0.1443,
2001
+ "step": 1610
2002
+ },
2003
+ {
2004
+ "epoch": 4.08,
2005
+ "learning_rate": 4.824e-06,
2006
+ "loss": 0.1487,
2007
+ "step": 1615
2008
+ },
2009
+ {
2010
+ "epoch": 4.09,
2011
+ "learning_rate": 4.839e-06,
2012
+ "loss": 0.1766,
2013
+ "step": 1620
2014
+ },
2015
+ {
2016
+ "epoch": 4.1,
2017
+ "learning_rate": 4.8540000000000005e-06,
2018
+ "loss": 0.1548,
2019
+ "step": 1625
2020
+ },
2021
+ {
2022
+ "epoch": 4.11,
2023
+ "learning_rate": 4.869e-06,
2024
+ "loss": 0.1494,
2025
+ "step": 1630
2026
+ },
2027
+ {
2028
+ "epoch": 4.13,
2029
+ "learning_rate": 4.884e-06,
2030
+ "loss": 0.1793,
2031
+ "step": 1635
2032
+ },
2033
+ {
2034
+ "epoch": 4.14,
2035
+ "learning_rate": 4.899e-06,
2036
+ "loss": 0.1417,
2037
+ "step": 1640
2038
+ },
2039
+ {
2040
+ "epoch": 4.15,
2041
+ "learning_rate": 4.914e-06,
2042
+ "loss": 0.1399,
2043
+ "step": 1645
2044
+ },
2045
+ {
2046
+ "epoch": 4.16,
2047
+ "learning_rate": 4.929000000000001e-06,
2048
+ "loss": 0.166,
2049
+ "step": 1650
2050
+ },
2051
+ {
2052
+ "epoch": 4.18,
2053
+ "learning_rate": 4.9440000000000004e-06,
2054
+ "loss": 0.1748,
2055
+ "step": 1655
2056
+ },
2057
+ {
2058
+ "epoch": 4.19,
2059
+ "learning_rate": 4.959e-06,
2060
+ "loss": 0.1378,
2061
+ "step": 1660
2062
+ },
2063
+ {
2064
+ "epoch": 4.2,
2065
+ "learning_rate": 4.974e-06,
2066
+ "loss": 0.144,
2067
+ "step": 1665
2068
+ },
2069
+ {
2070
+ "epoch": 4.21,
2071
+ "learning_rate": 4.989e-06,
2072
+ "loss": 0.1794,
2073
+ "step": 1670
2074
+ },
2075
+ {
2076
+ "epoch": 4.23,
2077
+ "learning_rate": 5.004e-06,
2078
+ "loss": 0.146,
2079
+ "step": 1675
2080
+ },
2081
+ {
2082
+ "epoch": 4.24,
2083
+ "learning_rate": 5.0190000000000006e-06,
2084
+ "loss": 0.141,
2085
+ "step": 1680
2086
+ },
2087
+ {
2088
+ "epoch": 4.25,
2089
+ "learning_rate": 5.034e-06,
2090
+ "loss": 0.1757,
2091
+ "step": 1685
2092
+ },
2093
+ {
2094
+ "epoch": 4.26,
2095
+ "learning_rate": 5.049e-06,
2096
+ "loss": 0.1423,
2097
+ "step": 1690
2098
+ },
2099
+ {
2100
+ "epoch": 4.28,
2101
+ "learning_rate": 5.064e-06,
2102
+ "loss": 0.1414,
2103
+ "step": 1695
2104
+ },
2105
+ {
2106
+ "epoch": 4.29,
2107
+ "learning_rate": 5.079e-06,
2108
+ "loss": 0.1556,
2109
+ "step": 1700
2110
+ },
2111
+ {
2112
+ "epoch": 4.3,
2113
+ "learning_rate": 5.094000000000001e-06,
2114
+ "loss": 0.1599,
2115
+ "step": 1705
2116
+ },
2117
+ {
2118
+ "epoch": 4.32,
2119
+ "learning_rate": 5.1090000000000006e-06,
2120
+ "loss": 0.1436,
2121
+ "step": 1710
2122
+ },
2123
+ {
2124
+ "epoch": 4.33,
2125
+ "learning_rate": 5.124e-06,
2126
+ "loss": 0.1346,
2127
+ "step": 1715
2128
+ },
2129
+ {
2130
+ "epoch": 4.34,
2131
+ "learning_rate": 5.139e-06,
2132
+ "loss": 0.1702,
2133
+ "step": 1720
2134
+ },
2135
+ {
2136
+ "epoch": 4.35,
2137
+ "learning_rate": 5.154e-06,
2138
+ "loss": 0.1397,
2139
+ "step": 1725
2140
+ },
2141
+ {
2142
+ "epoch": 4.37,
2143
+ "learning_rate": 5.169e-06,
2144
+ "loss": 0.1373,
2145
+ "step": 1730
2146
+ },
2147
+ {
2148
+ "epoch": 4.38,
2149
+ "learning_rate": 5.184000000000001e-06,
2150
+ "loss": 0.1929,
2151
+ "step": 1735
2152
+ },
2153
+ {
2154
+ "epoch": 4.39,
2155
+ "learning_rate": 5.1990000000000005e-06,
2156
+ "loss": 0.1426,
2157
+ "step": 1740
2158
+ },
2159
+ {
2160
+ "epoch": 4.4,
2161
+ "learning_rate": 5.214e-06,
2162
+ "loss": 0.1481,
2163
+ "step": 1745
2164
+ },
2165
+ {
2166
+ "epoch": 4.42,
2167
+ "learning_rate": 5.229e-06,
2168
+ "loss": 0.163,
2169
+ "step": 1750
2170
+ },
2171
+ {
2172
+ "epoch": 4.42,
2173
+ "eval_cer": 0.029768965035244312,
2174
+ "eval_loss": 0.1489591896533966,
2175
+ "eval_runtime": 60.6618,
2176
+ "eval_samples_per_second": 41.674,
2177
+ "eval_steps_per_second": 1.17,
2178
+ "eval_wer": 0.1163397860417728,
2179
+ "step": 1750
2180
+ },
2181
+ {
2182
+ "epoch": 4.43,
2183
+ "learning_rate": 5.244e-06,
2184
+ "loss": 0.1816,
2185
+ "step": 1755
2186
+ },
2187
+ {
2188
+ "epoch": 4.44,
2189
+ "learning_rate": 5.259000000000001e-06,
2190
+ "loss": 0.1444,
2191
+ "step": 1760
2192
+ },
2193
+ {
2194
+ "epoch": 4.45,
2195
+ "learning_rate": 5.274000000000001e-06,
2196
+ "loss": 0.1423,
2197
+ "step": 1765
2198
+ },
2199
+ {
2200
+ "epoch": 4.47,
2201
+ "learning_rate": 5.2890000000000005e-06,
2202
+ "loss": 0.1605,
2203
+ "step": 1770
2204
+ },
2205
+ {
2206
+ "epoch": 4.48,
2207
+ "learning_rate": 5.304e-06,
2208
+ "loss": 0.1355,
2209
+ "step": 1775
2210
+ },
2211
+ {
2212
+ "epoch": 4.49,
2213
+ "learning_rate": 5.319e-06,
2214
+ "loss": 0.1401,
2215
+ "step": 1780
2216
+ },
2217
+ {
2218
+ "epoch": 4.5,
2219
+ "learning_rate": 5.334000000000001e-06,
2220
+ "loss": 0.1593,
2221
+ "step": 1785
2222
+ },
2223
+ {
2224
+ "epoch": 4.52,
2225
+ "learning_rate": 5.349e-06,
2226
+ "loss": 0.1361,
2227
+ "step": 1790
2228
+ },
2229
+ {
2230
+ "epoch": 4.53,
2231
+ "learning_rate": 5.364e-06,
2232
+ "loss": 0.1363,
2233
+ "step": 1795
2234
+ },
2235
+ {
2236
+ "epoch": 4.54,
2237
+ "learning_rate": 5.379e-06,
2238
+ "loss": 0.1527,
2239
+ "step": 1800
2240
+ },
2241
+ {
2242
+ "epoch": 4.55,
2243
+ "learning_rate": 5.3939999999999995e-06,
2244
+ "loss": 0.1645,
2245
+ "step": 1805
2246
+ },
2247
+ {
2248
+ "epoch": 4.57,
2249
+ "learning_rate": 5.408999999999999e-06,
2250
+ "loss": 0.1285,
2251
+ "step": 1810
2252
+ },
2253
+ {
2254
+ "epoch": 4.58,
2255
+ "learning_rate": 5.424e-06,
2256
+ "loss": 0.1366,
2257
+ "step": 1815
2258
+ },
2259
+ {
2260
+ "epoch": 4.59,
2261
+ "learning_rate": 5.439e-06,
2262
+ "loss": 0.1808,
2263
+ "step": 1820
2264
+ },
2265
+ {
2266
+ "epoch": 4.61,
2267
+ "learning_rate": 5.454e-06,
2268
+ "loss": 0.1428,
2269
+ "step": 1825
2270
+ },
2271
+ {
2272
+ "epoch": 4.62,
2273
+ "learning_rate": 5.469e-06,
2274
+ "loss": 0.1314,
2275
+ "step": 1830
2276
+ },
2277
+ {
2278
+ "epoch": 4.63,
2279
+ "learning_rate": 5.4839999999999995e-06,
2280
+ "loss": 0.1479,
2281
+ "step": 1835
2282
+ },
2283
+ {
2284
+ "epoch": 4.64,
2285
+ "learning_rate": 5.499e-06,
2286
+ "loss": 0.135,
2287
+ "step": 1840
2288
+ },
2289
+ {
2290
+ "epoch": 4.66,
2291
+ "learning_rate": 5.514e-06,
2292
+ "loss": 0.1253,
2293
+ "step": 1845
2294
+ },
2295
+ {
2296
+ "epoch": 4.67,
2297
+ "learning_rate": 5.529e-06,
2298
+ "loss": 0.13,
2299
+ "step": 1850
2300
+ },
2301
+ {
2302
+ "epoch": 4.68,
2303
+ "learning_rate": 5.544e-06,
2304
+ "loss": 0.1484,
2305
+ "step": 1855
2306
+ },
2307
+ {
2308
+ "epoch": 4.69,
2309
+ "learning_rate": 5.559e-06,
2310
+ "loss": 0.1201,
2311
+ "step": 1860
2312
+ },
2313
+ {
2314
+ "epoch": 4.71,
2315
+ "learning_rate": 5.574e-06,
2316
+ "loss": 0.1263,
2317
+ "step": 1865
2318
+ },
2319
+ {
2320
+ "epoch": 4.72,
2321
+ "learning_rate": 5.589e-06,
2322
+ "loss": 0.1493,
2323
+ "step": 1870
2324
+ },
2325
+ {
2326
+ "epoch": 4.73,
2327
+ "learning_rate": 5.604e-06,
2328
+ "loss": 0.1276,
2329
+ "step": 1875
2330
+ },
2331
+ {
2332
+ "epoch": 4.74,
2333
+ "learning_rate": 5.619e-06,
2334
+ "loss": 0.1236,
2335
+ "step": 1880
2336
+ },
2337
+ {
2338
+ "epoch": 4.76,
2339
+ "learning_rate": 5.634e-06,
2340
+ "loss": 0.1718,
2341
+ "step": 1885
2342
+ },
2343
+ {
2344
+ "epoch": 4.77,
2345
+ "learning_rate": 5.649e-06,
2346
+ "loss": 0.1292,
2347
+ "step": 1890
2348
+ },
2349
+ {
2350
+ "epoch": 4.78,
2351
+ "learning_rate": 5.664e-06,
2352
+ "loss": 0.1175,
2353
+ "step": 1895
2354
+ },
2355
+ {
2356
+ "epoch": 4.79,
2357
+ "learning_rate": 5.679e-06,
2358
+ "loss": 0.1317,
2359
+ "step": 1900
2360
+ },
2361
+ {
2362
+ "epoch": 4.81,
2363
+ "learning_rate": 5.694e-06,
2364
+ "loss": 0.1412,
2365
+ "step": 1905
2366
+ },
2367
+ {
2368
+ "epoch": 4.82,
2369
+ "learning_rate": 5.709e-06,
2370
+ "loss": 0.1222,
2371
+ "step": 1910
2372
+ },
2373
+ {
2374
+ "epoch": 4.83,
2375
+ "learning_rate": 5.724e-06,
2376
+ "loss": 0.1193,
2377
+ "step": 1915
2378
+ },
2379
+ {
2380
+ "epoch": 4.84,
2381
+ "learning_rate": 5.7390000000000004e-06,
2382
+ "loss": 0.16,
2383
+ "step": 1920
2384
+ },
2385
+ {
2386
+ "epoch": 4.86,
2387
+ "learning_rate": 5.754e-06,
2388
+ "loss": 0.1228,
2389
+ "step": 1925
2390
+ },
2391
+ {
2392
+ "epoch": 4.87,
2393
+ "learning_rate": 5.769e-06,
2394
+ "loss": 0.1195,
2395
+ "step": 1930
2396
+ },
2397
+ {
2398
+ "epoch": 4.88,
2399
+ "learning_rate": 5.784e-06,
2400
+ "loss": 0.1608,
2401
+ "step": 1935
2402
+ },
2403
+ {
2404
+ "epoch": 4.9,
2405
+ "learning_rate": 5.799e-06,
2406
+ "loss": 0.1234,
2407
+ "step": 1940
2408
+ },
2409
+ {
2410
+ "epoch": 4.91,
2411
+ "learning_rate": 5.814e-06,
2412
+ "loss": 0.1265,
2413
+ "step": 1945
2414
+ },
2415
+ {
2416
+ "epoch": 4.92,
2417
+ "learning_rate": 5.8290000000000004e-06,
2418
+ "loss": 0.1431,
2419
+ "step": 1950
2420
+ },
2421
+ {
2422
+ "epoch": 4.93,
2423
+ "learning_rate": 5.844e-06,
2424
+ "loss": 0.1366,
2425
+ "step": 1955
2426
+ },
2427
+ {
2428
+ "epoch": 4.95,
2429
+ "learning_rate": 5.859e-06,
2430
+ "loss": 0.1091,
2431
+ "step": 1960
2432
+ },
2433
+ {
2434
+ "epoch": 4.96,
2435
+ "learning_rate": 5.874e-06,
2436
+ "loss": 0.139,
2437
+ "step": 1965
2438
+ },
2439
+ {
2440
+ "epoch": 4.97,
2441
+ "learning_rate": 5.886000000000001e-06,
2442
+ "loss": 0.1726,
2443
+ "step": 1970
2444
+ },
2445
+ {
2446
+ "epoch": 4.98,
2447
+ "learning_rate": 5.901000000000001e-06,
2448
+ "loss": 0.1148,
2449
+ "step": 1975
2450
+ },
2451
+ {
2452
+ "epoch": 5.0,
2453
+ "learning_rate": 5.916e-06,
2454
+ "loss": 0.1296,
2455
+ "step": 1980
2456
+ },
2457
+ {
2458
+ "epoch": 5.0,
2459
+ "step": 1980,
2460
+ "total_flos": 5.2867996741439324e+19,
2461
+ "train_loss": 0.09581804365822763,
2462
+ "train_runtime": 4900.8909,
2463
+ "train_samples_per_second": 29.115,
2464
+ "train_steps_per_second": 0.404
2465
  }
2466
  ],
2467
  "logging_steps": 5,
2468
+ "max_steps": 1980,
2469
  "num_input_tokens_seen": 0,
2470
+ "num_train_epochs": 5,
2471
  "save_steps": 500,
2472
+ "total_flos": 5.2867996741439324e+19,
2473
  "train_batch_size": 12,
2474
  "trial_name": null,
2475
  "trial_params": null