marinone94 commited on
Commit
46c0759
β€’
1 Parent(s): 7ac2142

Training in progress, step 3900

Browse files
{checkpoint-3200 β†’ checkpoint-3800}/config.json RENAMED
File without changes
{checkpoint-3200 β†’ checkpoint-3800}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d0ef9d1762f7ffe1cc3a0e4748e6ebb583dea4f49da96382216e78f5647702c
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3916c502476103f3ffc7a1308222895c30676a37dc692fc61cb70066e716d9ac
3
  size 2490337809
{checkpoint-3200 β†’ checkpoint-3800}/preprocessor_config.json RENAMED
File without changes
{checkpoint-3300 β†’ checkpoint-3800}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e5ce31e623a5f978a7686e5b6636e0db65bea23a7d3b791715106209b73f8c
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b145b6b59bd869ef8bc9342a6f9c151b7c459935f0fd7b3877c9afe90a49de
3
  size 1262063089
{checkpoint-3300 β†’ checkpoint-3800}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a716b25bdaec66a312a035315a78027e767ae161c16b37a11eeba450f275b66e
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0c1e59d01355dc7800f78e2a8c707b3bf501a6ea7d92076b4d2400614623cf
3
+ size 14567
{checkpoint-3300 β†’ checkpoint-3800}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:011849dafd5feecbd9c7cd405b92e51d3198c6a38da3d9f70b7ac2eb65d83b8f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29eb923c97bb88614ccb0255ae678634f872dd8aa03ae16319e241eb7a1e8c90
3
  size 559
{checkpoint-3200 β†’ checkpoint-3800}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d6d7554ac67cd9c5c482a68804a21e6ce04d359c4a6dab9309204dc50f02e7d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:108cf3add24d85591d96de728715165debaf66f2fb85e7a11bb55ca6e478dd61
3
  size 623
{checkpoint-3300 β†’ checkpoint-3800}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 36.26229508196721,
5
- "global_step": 3300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1293,11 +1293,206 @@
1293
  "eval_steps_per_second": 0.791,
1294
  "eval_wer": 0.1408458699971615,
1295
  "step": 3300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1296
  }
1297
  ],
1298
  "max_steps": 4550,
1299
  "num_train_epochs": 50,
1300
- "total_flos": 5.103177199568347e+19,
1301
  "trial_name": null,
1302
  "trial_params": null
1303
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 41.75409836065574,
5
+ "global_step": 3800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1293
  "eval_steps_per_second": 0.791,
1294
  "eval_wer": 0.1408458699971615,
1295
  "step": 3300
1296
+ },
1297
+ {
1298
+ "epoch": 36.48,
1299
+ "learning_rate": 0.00012078431372549021,
1300
+ "loss": 0.772,
1301
+ "step": 3320
1302
+ },
1303
+ {
1304
+ "epoch": 36.7,
1305
+ "learning_rate": 0.00011882352941176471,
1306
+ "loss": 0.7818,
1307
+ "step": 3340
1308
+ },
1309
+ {
1310
+ "epoch": 36.92,
1311
+ "learning_rate": 0.00011696078431372549,
1312
+ "loss": 0.8016,
1313
+ "step": 3360
1314
+ },
1315
+ {
1316
+ "epoch": 37.14,
1317
+ "learning_rate": 0.000115,
1318
+ "loss": 0.8061,
1319
+ "step": 3380
1320
+ },
1321
+ {
1322
+ "epoch": 37.36,
1323
+ "learning_rate": 0.0001130392156862745,
1324
+ "loss": 0.7703,
1325
+ "step": 3400
1326
+ },
1327
+ {
1328
+ "epoch": 37.36,
1329
+ "eval_loss": 0.16011376678943634,
1330
+ "eval_runtime": 187.5367,
1331
+ "eval_samples_per_second": 25.824,
1332
+ "eval_steps_per_second": 0.811,
1333
+ "eval_wer": 0.13692875390292364,
1334
+ "step": 3400
1335
+ },
1336
+ {
1337
+ "epoch": 37.58,
1338
+ "learning_rate": 0.00011107843137254903,
1339
+ "loss": 0.7713,
1340
+ "step": 3420
1341
+ },
1342
+ {
1343
+ "epoch": 37.8,
1344
+ "learning_rate": 0.00010911764705882353,
1345
+ "loss": 0.7712,
1346
+ "step": 3440
1347
+ },
1348
+ {
1349
+ "epoch": 38.02,
1350
+ "learning_rate": 0.00010715686274509805,
1351
+ "loss": 0.8149,
1352
+ "step": 3460
1353
+ },
1354
+ {
1355
+ "epoch": 38.24,
1356
+ "learning_rate": 0.00010519607843137255,
1357
+ "loss": 0.7885,
1358
+ "step": 3480
1359
+ },
1360
+ {
1361
+ "epoch": 38.46,
1362
+ "learning_rate": 0.00010323529411764706,
1363
+ "loss": 0.7474,
1364
+ "step": 3500
1365
+ },
1366
+ {
1367
+ "epoch": 38.46,
1368
+ "eval_loss": 0.1514146625995636,
1369
+ "eval_runtime": 190.8228,
1370
+ "eval_samples_per_second": 25.38,
1371
+ "eval_steps_per_second": 0.797,
1372
+ "eval_wer": 0.1342321884757309,
1373
+ "step": 3500
1374
+ },
1375
+ {
1376
+ "epoch": 38.68,
1377
+ "learning_rate": 0.00010127450980392156,
1378
+ "loss": 0.785,
1379
+ "step": 3520
1380
+ },
1381
+ {
1382
+ "epoch": 38.9,
1383
+ "learning_rate": 9.931372549019609e-05,
1384
+ "loss": 0.7677,
1385
+ "step": 3540
1386
+ },
1387
+ {
1388
+ "epoch": 39.12,
1389
+ "learning_rate": 9.73529411764706e-05,
1390
+ "loss": 0.7849,
1391
+ "step": 3560
1392
+ },
1393
+ {
1394
+ "epoch": 39.34,
1395
+ "learning_rate": 9.539215686274511e-05,
1396
+ "loss": 0.7637,
1397
+ "step": 3580
1398
+ },
1399
+ {
1400
+ "epoch": 39.56,
1401
+ "learning_rate": 9.343137254901961e-05,
1402
+ "loss": 0.7719,
1403
+ "step": 3600
1404
+ },
1405
+ {
1406
+ "epoch": 39.56,
1407
+ "eval_loss": 0.15932896733283997,
1408
+ "eval_runtime": 189.6806,
1409
+ "eval_samples_per_second": 25.532,
1410
+ "eval_steps_per_second": 0.801,
1411
+ "eval_wer": 0.1352540448481408,
1412
+ "step": 3600
1413
+ },
1414
+ {
1415
+ "epoch": 39.78,
1416
+ "learning_rate": 9.147058823529412e-05,
1417
+ "loss": 0.7591,
1418
+ "step": 3620
1419
+ },
1420
+ {
1421
+ "epoch": 39.99,
1422
+ "learning_rate": 8.950980392156862e-05,
1423
+ "loss": 0.7706,
1424
+ "step": 3640
1425
+ },
1426
+ {
1427
+ "epoch": 40.22,
1428
+ "learning_rate": 8.754901960784314e-05,
1429
+ "loss": 0.7805,
1430
+ "step": 3660
1431
+ },
1432
+ {
1433
+ "epoch": 40.44,
1434
+ "learning_rate": 8.558823529411765e-05,
1435
+ "loss": 0.7753,
1436
+ "step": 3680
1437
+ },
1438
+ {
1439
+ "epoch": 40.66,
1440
+ "learning_rate": 8.362745098039217e-05,
1441
+ "loss": 0.7638,
1442
+ "step": 3700
1443
+ },
1444
+ {
1445
+ "epoch": 40.66,
1446
+ "eval_loss": 0.15362653136253357,
1447
+ "eval_runtime": 192.1118,
1448
+ "eval_samples_per_second": 25.209,
1449
+ "eval_steps_per_second": 0.791,
1450
+ "eval_wer": 0.13380641498722678,
1451
+ "step": 3700
1452
+ },
1453
+ {
1454
+ "epoch": 40.87,
1455
+ "learning_rate": 8.166666666666667e-05,
1456
+ "loss": 0.7791,
1457
+ "step": 3720
1458
+ },
1459
+ {
1460
+ "epoch": 41.1,
1461
+ "learning_rate": 7.970588235294118e-05,
1462
+ "loss": 0.7931,
1463
+ "step": 3740
1464
+ },
1465
+ {
1466
+ "epoch": 41.32,
1467
+ "learning_rate": 7.774509803921568e-05,
1468
+ "loss": 0.741,
1469
+ "step": 3760
1470
+ },
1471
+ {
1472
+ "epoch": 41.54,
1473
+ "learning_rate": 7.57843137254902e-05,
1474
+ "loss": 0.7434,
1475
+ "step": 3780
1476
+ },
1477
+ {
1478
+ "epoch": 41.75,
1479
+ "learning_rate": 7.38235294117647e-05,
1480
+ "loss": 0.771,
1481
+ "step": 3800
1482
+ },
1483
+ {
1484
+ "epoch": 41.75,
1485
+ "eval_loss": 0.1530592143535614,
1486
+ "eval_runtime": 186.414,
1487
+ "eval_samples_per_second": 25.98,
1488
+ "eval_steps_per_second": 0.815,
1489
+ "eval_wer": 0.13170593244393983,
1490
+ "step": 3800
1491
  }
1492
  ],
1493
  "max_steps": 4550,
1494
  "num_train_epochs": 50,
1495
+ "total_flos": 5.876357873041098e+19,
1496
  "trial_name": null,
1497
  "trial_params": null
1498
  }
{checkpoint-3200 β†’ checkpoint-3800}/training_args.bin RENAMED
File without changes
{checkpoint-3300 β†’ checkpoint-3900}/config.json RENAMED
File without changes
{checkpoint-3300 β†’ checkpoint-3900}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:806c8e4e7fd28f0571714cc9f9a0055970d60ee26b8981016c52d353bb4430e3
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168e59323a9bf241005906b1870612b1b524768b8a27757437012f8cdf781fc3
3
  size 2490337809
{checkpoint-3300 β†’ checkpoint-3900}/preprocessor_config.json RENAMED
File without changes
{checkpoint-3200 β†’ checkpoint-3900}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ae84798d472963cd276f2962517c928f65dbe900552c2adf1ac952b2b09cb73
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b689bc4f48a4fe8515030b196fb5bd4de5819b77555cbfd8e1979aa0d04c367
3
  size 1262063089
{checkpoint-3200 β†’ checkpoint-3900}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cdfe7c6a1cd08c22115fc108dd9766a68df8ce8189a197857ef6e07d3e9f573
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:266a5b6d767d7cc2ccd3807fca6c4cfb051cc7d3796a6836ef75e2cf3c6b3218
3
+ size 14567
{checkpoint-3200 β†’ checkpoint-3900}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adc06e1a20206447d0abcf52e13d1dede8b4a0f2043d99c40e3b996eae19e163
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f1e0a22621c69063b9f7d6715acac2896bf4745746722f322c93153a0c85b5
3
  size 559
{checkpoint-3300 β†’ checkpoint-3900}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa9db20ef4c55522e29abf089521aed25de7e4d0ceb54efd10455bf35f8ac946
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1bc34f9b39344288eae3f0b593523acd1748174f4d473bee33a329e62da9e8a
3
  size 623
{checkpoint-3200 β†’ checkpoint-3900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 35.16393442622951,
5
- "global_step": 3200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1254,11 +1254,284 @@
1254
  "eval_steps_per_second": 0.795,
1255
  "eval_wer": 0.1379222253760999,
1256
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1257
  }
1258
  ],
1259
  "max_steps": 4550,
1260
  "num_train_epochs": 50,
1261
- "total_flos": 4.949777145440599e+19,
1262
  "trial_name": null,
1263
  "trial_params": null
1264
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 42.85245901639344,
5
+ "global_step": 3900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1254
  "eval_steps_per_second": 0.795,
1255
  "eval_wer": 0.1379222253760999,
1256
  "step": 3200
1257
+ },
1258
+ {
1259
+ "epoch": 35.38,
1260
+ "learning_rate": 0.00013058823529411764,
1261
+ "loss": 0.7963,
1262
+ "step": 3220
1263
+ },
1264
+ {
1265
+ "epoch": 35.6,
1266
+ "learning_rate": 0.00012862745098039216,
1267
+ "loss": 0.7895,
1268
+ "step": 3240
1269
+ },
1270
+ {
1271
+ "epoch": 35.82,
1272
+ "learning_rate": 0.0001266666666666667,
1273
+ "loss": 0.7964,
1274
+ "step": 3260
1275
+ },
1276
+ {
1277
+ "epoch": 36.04,
1278
+ "learning_rate": 0.0001247058823529412,
1279
+ "loss": 0.7931,
1280
+ "step": 3280
1281
+ },
1282
+ {
1283
+ "epoch": 36.26,
1284
+ "learning_rate": 0.0001227450980392157,
1285
+ "loss": 0.7835,
1286
+ "step": 3300
1287
+ },
1288
+ {
1289
+ "epoch": 36.26,
1290
+ "eval_loss": 0.16020993888378143,
1291
+ "eval_runtime": 192.1405,
1292
+ "eval_samples_per_second": 25.206,
1293
+ "eval_steps_per_second": 0.791,
1294
+ "eval_wer": 0.1408458699971615,
1295
+ "step": 3300
1296
+ },
1297
+ {
1298
+ "epoch": 36.48,
1299
+ "learning_rate": 0.00012078431372549021,
1300
+ "loss": 0.772,
1301
+ "step": 3320
1302
+ },
1303
+ {
1304
+ "epoch": 36.7,
1305
+ "learning_rate": 0.00011882352941176471,
1306
+ "loss": 0.7818,
1307
+ "step": 3340
1308
+ },
1309
+ {
1310
+ "epoch": 36.92,
1311
+ "learning_rate": 0.00011696078431372549,
1312
+ "loss": 0.8016,
1313
+ "step": 3360
1314
+ },
1315
+ {
1316
+ "epoch": 37.14,
1317
+ "learning_rate": 0.000115,
1318
+ "loss": 0.8061,
1319
+ "step": 3380
1320
+ },
1321
+ {
1322
+ "epoch": 37.36,
1323
+ "learning_rate": 0.0001130392156862745,
1324
+ "loss": 0.7703,
1325
+ "step": 3400
1326
+ },
1327
+ {
1328
+ "epoch": 37.36,
1329
+ "eval_loss": 0.16011376678943634,
1330
+ "eval_runtime": 187.5367,
1331
+ "eval_samples_per_second": 25.824,
1332
+ "eval_steps_per_second": 0.811,
1333
+ "eval_wer": 0.13692875390292364,
1334
+ "step": 3400
1335
+ },
1336
+ {
1337
+ "epoch": 37.58,
1338
+ "learning_rate": 0.00011107843137254903,
1339
+ "loss": 0.7713,
1340
+ "step": 3420
1341
+ },
1342
+ {
1343
+ "epoch": 37.8,
1344
+ "learning_rate": 0.00010911764705882353,
1345
+ "loss": 0.7712,
1346
+ "step": 3440
1347
+ },
1348
+ {
1349
+ "epoch": 38.02,
1350
+ "learning_rate": 0.00010715686274509805,
1351
+ "loss": 0.8149,
1352
+ "step": 3460
1353
+ },
1354
+ {
1355
+ "epoch": 38.24,
1356
+ "learning_rate": 0.00010519607843137255,
1357
+ "loss": 0.7885,
1358
+ "step": 3480
1359
+ },
1360
+ {
1361
+ "epoch": 38.46,
1362
+ "learning_rate": 0.00010323529411764706,
1363
+ "loss": 0.7474,
1364
+ "step": 3500
1365
+ },
1366
+ {
1367
+ "epoch": 38.46,
1368
+ "eval_loss": 0.1514146625995636,
1369
+ "eval_runtime": 190.8228,
1370
+ "eval_samples_per_second": 25.38,
1371
+ "eval_steps_per_second": 0.797,
1372
+ "eval_wer": 0.1342321884757309,
1373
+ "step": 3500
1374
+ },
1375
+ {
1376
+ "epoch": 38.68,
1377
+ "learning_rate": 0.00010127450980392156,
1378
+ "loss": 0.785,
1379
+ "step": 3520
1380
+ },
1381
+ {
1382
+ "epoch": 38.9,
1383
+ "learning_rate": 9.931372549019609e-05,
1384
+ "loss": 0.7677,
1385
+ "step": 3540
1386
+ },
1387
+ {
1388
+ "epoch": 39.12,
1389
+ "learning_rate": 9.73529411764706e-05,
1390
+ "loss": 0.7849,
1391
+ "step": 3560
1392
+ },
1393
+ {
1394
+ "epoch": 39.34,
1395
+ "learning_rate": 9.539215686274511e-05,
1396
+ "loss": 0.7637,
1397
+ "step": 3580
1398
+ },
1399
+ {
1400
+ "epoch": 39.56,
1401
+ "learning_rate": 9.343137254901961e-05,
1402
+ "loss": 0.7719,
1403
+ "step": 3600
1404
+ },
1405
+ {
1406
+ "epoch": 39.56,
1407
+ "eval_loss": 0.15932896733283997,
1408
+ "eval_runtime": 189.6806,
1409
+ "eval_samples_per_second": 25.532,
1410
+ "eval_steps_per_second": 0.801,
1411
+ "eval_wer": 0.1352540448481408,
1412
+ "step": 3600
1413
+ },
1414
+ {
1415
+ "epoch": 39.78,
1416
+ "learning_rate": 9.147058823529412e-05,
1417
+ "loss": 0.7591,
1418
+ "step": 3620
1419
+ },
1420
+ {
1421
+ "epoch": 39.99,
1422
+ "learning_rate": 8.950980392156862e-05,
1423
+ "loss": 0.7706,
1424
+ "step": 3640
1425
+ },
1426
+ {
1427
+ "epoch": 40.22,
1428
+ "learning_rate": 8.754901960784314e-05,
1429
+ "loss": 0.7805,
1430
+ "step": 3660
1431
+ },
1432
+ {
1433
+ "epoch": 40.44,
1434
+ "learning_rate": 8.558823529411765e-05,
1435
+ "loss": 0.7753,
1436
+ "step": 3680
1437
+ },
1438
+ {
1439
+ "epoch": 40.66,
1440
+ "learning_rate": 8.362745098039217e-05,
1441
+ "loss": 0.7638,
1442
+ "step": 3700
1443
+ },
1444
+ {
1445
+ "epoch": 40.66,
1446
+ "eval_loss": 0.15362653136253357,
1447
+ "eval_runtime": 192.1118,
1448
+ "eval_samples_per_second": 25.209,
1449
+ "eval_steps_per_second": 0.791,
1450
+ "eval_wer": 0.13380641498722678,
1451
+ "step": 3700
1452
+ },
1453
+ {
1454
+ "epoch": 40.87,
1455
+ "learning_rate": 8.166666666666667e-05,
1456
+ "loss": 0.7791,
1457
+ "step": 3720
1458
+ },
1459
+ {
1460
+ "epoch": 41.1,
1461
+ "learning_rate": 7.970588235294118e-05,
1462
+ "loss": 0.7931,
1463
+ "step": 3740
1464
+ },
1465
+ {
1466
+ "epoch": 41.32,
1467
+ "learning_rate": 7.774509803921568e-05,
1468
+ "loss": 0.741,
1469
+ "step": 3760
1470
+ },
1471
+ {
1472
+ "epoch": 41.54,
1473
+ "learning_rate": 7.57843137254902e-05,
1474
+ "loss": 0.7434,
1475
+ "step": 3780
1476
+ },
1477
+ {
1478
+ "epoch": 41.75,
1479
+ "learning_rate": 7.38235294117647e-05,
1480
+ "loss": 0.771,
1481
+ "step": 3800
1482
+ },
1483
+ {
1484
+ "epoch": 41.75,
1485
+ "eval_loss": 0.1530592143535614,
1486
+ "eval_runtime": 186.414,
1487
+ "eval_samples_per_second": 25.98,
1488
+ "eval_steps_per_second": 0.815,
1489
+ "eval_wer": 0.13170593244393983,
1490
+ "step": 3800
1491
+ },
1492
+ {
1493
+ "epoch": 41.97,
1494
+ "learning_rate": 7.186274509803923e-05,
1495
+ "loss": 0.7765,
1496
+ "step": 3820
1497
+ },
1498
+ {
1499
+ "epoch": 42.2,
1500
+ "learning_rate": 6.990196078431373e-05,
1501
+ "loss": 0.7599,
1502
+ "step": 3840
1503
+ },
1504
+ {
1505
+ "epoch": 42.42,
1506
+ "learning_rate": 6.794117647058824e-05,
1507
+ "loss": 0.7782,
1508
+ "step": 3860
1509
+ },
1510
+ {
1511
+ "epoch": 42.63,
1512
+ "learning_rate": 6.598039215686274e-05,
1513
+ "loss": 0.7395,
1514
+ "step": 3880
1515
+ },
1516
+ {
1517
+ "epoch": 42.85,
1518
+ "learning_rate": 6.401960784313726e-05,
1519
+ "loss": 0.7594,
1520
+ "step": 3900
1521
+ },
1522
+ {
1523
+ "epoch": 42.85,
1524
+ "eval_loss": 0.14983513951301575,
1525
+ "eval_runtime": 190.4439,
1526
+ "eval_samples_per_second": 25.43,
1527
+ "eval_steps_per_second": 0.798,
1528
+ "eval_wer": 0.12883905762134545,
1529
+ "step": 3900
1530
  }
1531
  ],
1532
  "max_steps": 4550,
1533
  "num_train_epochs": 50,
1534
+ "total_flos": 6.032084123274907e+19,
1535
  "trial_name": null,
1536
  "trial_params": null
1537
  }
{checkpoint-3300 β†’ checkpoint-3900}/training_args.bin RENAMED
File without changes