marinone94
commited on
Commit
β’
46c0759
1
Parent(s):
7ac2142
Training in progress, step 3900
Browse files- {checkpoint-3200 β checkpoint-3800}/config.json +0 -0
- {checkpoint-3200 β checkpoint-3800}/optimizer.pt +1 -1
- {checkpoint-3200 β checkpoint-3800}/preprocessor_config.json +0 -0
- {checkpoint-3300 β checkpoint-3800}/pytorch_model.bin +1 -1
- {checkpoint-3300 β checkpoint-3800}/rng_state.pth +2 -2
- {checkpoint-3300 β checkpoint-3800}/scaler.pt +1 -1
- {checkpoint-3200 β checkpoint-3800}/scheduler.pt +1 -1
- {checkpoint-3300 β checkpoint-3800}/trainer_state.json +198 -3
- {checkpoint-3200 β checkpoint-3800}/training_args.bin +0 -0
- {checkpoint-3300 β checkpoint-3900}/config.json +0 -0
- {checkpoint-3300 β checkpoint-3900}/optimizer.pt +1 -1
- {checkpoint-3300 β checkpoint-3900}/preprocessor_config.json +0 -0
- {checkpoint-3200 β checkpoint-3900}/pytorch_model.bin +1 -1
- {checkpoint-3200 β checkpoint-3900}/rng_state.pth +2 -2
- {checkpoint-3200 β checkpoint-3900}/scaler.pt +1 -1
- {checkpoint-3300 β checkpoint-3900}/scheduler.pt +1 -1
- {checkpoint-3200 β checkpoint-3900}/trainer_state.json +276 -3
- {checkpoint-3300 β checkpoint-3900}/training_args.bin +0 -0
{checkpoint-3200 β checkpoint-3800}/config.json
RENAMED
File without changes
|
{checkpoint-3200 β checkpoint-3800}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3916c502476103f3ffc7a1308222895c30676a37dc692fc61cb70066e716d9ac
|
3 |
size 2490337809
|
{checkpoint-3200 β checkpoint-3800}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-3300 β checkpoint-3800}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0b145b6b59bd869ef8bc9342a6f9c151b7c459935f0fd7b3877c9afe90a49de
|
3 |
size 1262063089
|
{checkpoint-3300 β checkpoint-3800}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd0c1e59d01355dc7800f78e2a8c707b3bf501a6ea7d92076b4d2400614623cf
|
3 |
+
size 14567
|
{checkpoint-3300 β checkpoint-3800}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29eb923c97bb88614ccb0255ae678634f872dd8aa03ae16319e241eb7a1e8c90
|
3 |
size 559
|
{checkpoint-3200 β checkpoint-3800}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:108cf3add24d85591d96de728715165debaf66f2fb85e7a11bb55ca6e478dd61
|
3 |
size 623
|
{checkpoint-3300 β checkpoint-3800}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1293,11 +1293,206 @@
|
|
1293 |
"eval_steps_per_second": 0.791,
|
1294 |
"eval_wer": 0.1408458699971615,
|
1295 |
"step": 3300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1296 |
}
|
1297 |
],
|
1298 |
"max_steps": 4550,
|
1299 |
"num_train_epochs": 50,
|
1300 |
-
"total_flos": 5.
|
1301 |
"trial_name": null,
|
1302 |
"trial_params": null
|
1303 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 41.75409836065574,
|
5 |
+
"global_step": 3800,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1293 |
"eval_steps_per_second": 0.791,
|
1294 |
"eval_wer": 0.1408458699971615,
|
1295 |
"step": 3300
|
1296 |
+
},
|
1297 |
+
{
|
1298 |
+
"epoch": 36.48,
|
1299 |
+
"learning_rate": 0.00012078431372549021,
|
1300 |
+
"loss": 0.772,
|
1301 |
+
"step": 3320
|
1302 |
+
},
|
1303 |
+
{
|
1304 |
+
"epoch": 36.7,
|
1305 |
+
"learning_rate": 0.00011882352941176471,
|
1306 |
+
"loss": 0.7818,
|
1307 |
+
"step": 3340
|
1308 |
+
},
|
1309 |
+
{
|
1310 |
+
"epoch": 36.92,
|
1311 |
+
"learning_rate": 0.00011696078431372549,
|
1312 |
+
"loss": 0.8016,
|
1313 |
+
"step": 3360
|
1314 |
+
},
|
1315 |
+
{
|
1316 |
+
"epoch": 37.14,
|
1317 |
+
"learning_rate": 0.000115,
|
1318 |
+
"loss": 0.8061,
|
1319 |
+
"step": 3380
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"epoch": 37.36,
|
1323 |
+
"learning_rate": 0.0001130392156862745,
|
1324 |
+
"loss": 0.7703,
|
1325 |
+
"step": 3400
|
1326 |
+
},
|
1327 |
+
{
|
1328 |
+
"epoch": 37.36,
|
1329 |
+
"eval_loss": 0.16011376678943634,
|
1330 |
+
"eval_runtime": 187.5367,
|
1331 |
+
"eval_samples_per_second": 25.824,
|
1332 |
+
"eval_steps_per_second": 0.811,
|
1333 |
+
"eval_wer": 0.13692875390292364,
|
1334 |
+
"step": 3400
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"epoch": 37.58,
|
1338 |
+
"learning_rate": 0.00011107843137254903,
|
1339 |
+
"loss": 0.7713,
|
1340 |
+
"step": 3420
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 37.8,
|
1344 |
+
"learning_rate": 0.00010911764705882353,
|
1345 |
+
"loss": 0.7712,
|
1346 |
+
"step": 3440
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 38.02,
|
1350 |
+
"learning_rate": 0.00010715686274509805,
|
1351 |
+
"loss": 0.8149,
|
1352 |
+
"step": 3460
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"epoch": 38.24,
|
1356 |
+
"learning_rate": 0.00010519607843137255,
|
1357 |
+
"loss": 0.7885,
|
1358 |
+
"step": 3480
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 38.46,
|
1362 |
+
"learning_rate": 0.00010323529411764706,
|
1363 |
+
"loss": 0.7474,
|
1364 |
+
"step": 3500
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"epoch": 38.46,
|
1368 |
+
"eval_loss": 0.1514146625995636,
|
1369 |
+
"eval_runtime": 190.8228,
|
1370 |
+
"eval_samples_per_second": 25.38,
|
1371 |
+
"eval_steps_per_second": 0.797,
|
1372 |
+
"eval_wer": 0.1342321884757309,
|
1373 |
+
"step": 3500
|
1374 |
+
},
|
1375 |
+
{
|
1376 |
+
"epoch": 38.68,
|
1377 |
+
"learning_rate": 0.00010127450980392156,
|
1378 |
+
"loss": 0.785,
|
1379 |
+
"step": 3520
|
1380 |
+
},
|
1381 |
+
{
|
1382 |
+
"epoch": 38.9,
|
1383 |
+
"learning_rate": 9.931372549019609e-05,
|
1384 |
+
"loss": 0.7677,
|
1385 |
+
"step": 3540
|
1386 |
+
},
|
1387 |
+
{
|
1388 |
+
"epoch": 39.12,
|
1389 |
+
"learning_rate": 9.73529411764706e-05,
|
1390 |
+
"loss": 0.7849,
|
1391 |
+
"step": 3560
|
1392 |
+
},
|
1393 |
+
{
|
1394 |
+
"epoch": 39.34,
|
1395 |
+
"learning_rate": 9.539215686274511e-05,
|
1396 |
+
"loss": 0.7637,
|
1397 |
+
"step": 3580
|
1398 |
+
},
|
1399 |
+
{
|
1400 |
+
"epoch": 39.56,
|
1401 |
+
"learning_rate": 9.343137254901961e-05,
|
1402 |
+
"loss": 0.7719,
|
1403 |
+
"step": 3600
|
1404 |
+
},
|
1405 |
+
{
|
1406 |
+
"epoch": 39.56,
|
1407 |
+
"eval_loss": 0.15932896733283997,
|
1408 |
+
"eval_runtime": 189.6806,
|
1409 |
+
"eval_samples_per_second": 25.532,
|
1410 |
+
"eval_steps_per_second": 0.801,
|
1411 |
+
"eval_wer": 0.1352540448481408,
|
1412 |
+
"step": 3600
|
1413 |
+
},
|
1414 |
+
{
|
1415 |
+
"epoch": 39.78,
|
1416 |
+
"learning_rate": 9.147058823529412e-05,
|
1417 |
+
"loss": 0.7591,
|
1418 |
+
"step": 3620
|
1419 |
+
},
|
1420 |
+
{
|
1421 |
+
"epoch": 39.99,
|
1422 |
+
"learning_rate": 8.950980392156862e-05,
|
1423 |
+
"loss": 0.7706,
|
1424 |
+
"step": 3640
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 40.22,
|
1428 |
+
"learning_rate": 8.754901960784314e-05,
|
1429 |
+
"loss": 0.7805,
|
1430 |
+
"step": 3660
|
1431 |
+
},
|
1432 |
+
{
|
1433 |
+
"epoch": 40.44,
|
1434 |
+
"learning_rate": 8.558823529411765e-05,
|
1435 |
+
"loss": 0.7753,
|
1436 |
+
"step": 3680
|
1437 |
+
},
|
1438 |
+
{
|
1439 |
+
"epoch": 40.66,
|
1440 |
+
"learning_rate": 8.362745098039217e-05,
|
1441 |
+
"loss": 0.7638,
|
1442 |
+
"step": 3700
|
1443 |
+
},
|
1444 |
+
{
|
1445 |
+
"epoch": 40.66,
|
1446 |
+
"eval_loss": 0.15362653136253357,
|
1447 |
+
"eval_runtime": 192.1118,
|
1448 |
+
"eval_samples_per_second": 25.209,
|
1449 |
+
"eval_steps_per_second": 0.791,
|
1450 |
+
"eval_wer": 0.13380641498722678,
|
1451 |
+
"step": 3700
|
1452 |
+
},
|
1453 |
+
{
|
1454 |
+
"epoch": 40.87,
|
1455 |
+
"learning_rate": 8.166666666666667e-05,
|
1456 |
+
"loss": 0.7791,
|
1457 |
+
"step": 3720
|
1458 |
+
},
|
1459 |
+
{
|
1460 |
+
"epoch": 41.1,
|
1461 |
+
"learning_rate": 7.970588235294118e-05,
|
1462 |
+
"loss": 0.7931,
|
1463 |
+
"step": 3740
|
1464 |
+
},
|
1465 |
+
{
|
1466 |
+
"epoch": 41.32,
|
1467 |
+
"learning_rate": 7.774509803921568e-05,
|
1468 |
+
"loss": 0.741,
|
1469 |
+
"step": 3760
|
1470 |
+
},
|
1471 |
+
{
|
1472 |
+
"epoch": 41.54,
|
1473 |
+
"learning_rate": 7.57843137254902e-05,
|
1474 |
+
"loss": 0.7434,
|
1475 |
+
"step": 3780
|
1476 |
+
},
|
1477 |
+
{
|
1478 |
+
"epoch": 41.75,
|
1479 |
+
"learning_rate": 7.38235294117647e-05,
|
1480 |
+
"loss": 0.771,
|
1481 |
+
"step": 3800
|
1482 |
+
},
|
1483 |
+
{
|
1484 |
+
"epoch": 41.75,
|
1485 |
+
"eval_loss": 0.1530592143535614,
|
1486 |
+
"eval_runtime": 186.414,
|
1487 |
+
"eval_samples_per_second": 25.98,
|
1488 |
+
"eval_steps_per_second": 0.815,
|
1489 |
+
"eval_wer": 0.13170593244393983,
|
1490 |
+
"step": 3800
|
1491 |
}
|
1492 |
],
|
1493 |
"max_steps": 4550,
|
1494 |
"num_train_epochs": 50,
|
1495 |
+
"total_flos": 5.876357873041098e+19,
|
1496 |
"trial_name": null,
|
1497 |
"trial_params": null
|
1498 |
}
|
{checkpoint-3200 β checkpoint-3800}/training_args.bin
RENAMED
File without changes
|
{checkpoint-3300 β checkpoint-3900}/config.json
RENAMED
File without changes
|
{checkpoint-3300 β checkpoint-3900}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:168e59323a9bf241005906b1870612b1b524768b8a27757437012f8cdf781fc3
|
3 |
size 2490337809
|
{checkpoint-3300 β checkpoint-3900}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-3200 β checkpoint-3900}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b689bc4f48a4fe8515030b196fb5bd4de5819b77555cbfd8e1979aa0d04c367
|
3 |
size 1262063089
|
{checkpoint-3200 β checkpoint-3900}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:266a5b6d767d7cc2ccd3807fca6c4cfb051cc7d3796a6836ef75e2cf3c6b3218
|
3 |
+
size 14567
|
{checkpoint-3200 β checkpoint-3900}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26f1e0a22621c69063b9f7d6715acac2896bf4745746722f322c93153a0c85b5
|
3 |
size 559
|
{checkpoint-3300 β checkpoint-3900}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1bc34f9b39344288eae3f0b593523acd1748174f4d473bee33a329e62da9e8a
|
3 |
size 623
|
{checkpoint-3200 β checkpoint-3900}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1254,11 +1254,284 @@
|
|
1254 |
"eval_steps_per_second": 0.795,
|
1255 |
"eval_wer": 0.1379222253760999,
|
1256 |
"step": 3200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1257 |
}
|
1258 |
],
|
1259 |
"max_steps": 4550,
|
1260 |
"num_train_epochs": 50,
|
1261 |
-
"total_flos":
|
1262 |
"trial_name": null,
|
1263 |
"trial_params": null
|
1264 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 42.85245901639344,
|
5 |
+
"global_step": 3900,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1254 |
"eval_steps_per_second": 0.795,
|
1255 |
"eval_wer": 0.1379222253760999,
|
1256 |
"step": 3200
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 35.38,
|
1260 |
+
"learning_rate": 0.00013058823529411764,
|
1261 |
+
"loss": 0.7963,
|
1262 |
+
"step": 3220
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"epoch": 35.6,
|
1266 |
+
"learning_rate": 0.00012862745098039216,
|
1267 |
+
"loss": 0.7895,
|
1268 |
+
"step": 3240
|
1269 |
+
},
|
1270 |
+
{
|
1271 |
+
"epoch": 35.82,
|
1272 |
+
"learning_rate": 0.0001266666666666667,
|
1273 |
+
"loss": 0.7964,
|
1274 |
+
"step": 3260
|
1275 |
+
},
|
1276 |
+
{
|
1277 |
+
"epoch": 36.04,
|
1278 |
+
"learning_rate": 0.0001247058823529412,
|
1279 |
+
"loss": 0.7931,
|
1280 |
+
"step": 3280
|
1281 |
+
},
|
1282 |
+
{
|
1283 |
+
"epoch": 36.26,
|
1284 |
+
"learning_rate": 0.0001227450980392157,
|
1285 |
+
"loss": 0.7835,
|
1286 |
+
"step": 3300
|
1287 |
+
},
|
1288 |
+
{
|
1289 |
+
"epoch": 36.26,
|
1290 |
+
"eval_loss": 0.16020993888378143,
|
1291 |
+
"eval_runtime": 192.1405,
|
1292 |
+
"eval_samples_per_second": 25.206,
|
1293 |
+
"eval_steps_per_second": 0.791,
|
1294 |
+
"eval_wer": 0.1408458699971615,
|
1295 |
+
"step": 3300
|
1296 |
+
},
|
1297 |
+
{
|
1298 |
+
"epoch": 36.48,
|
1299 |
+
"learning_rate": 0.00012078431372549021,
|
1300 |
+
"loss": 0.772,
|
1301 |
+
"step": 3320
|
1302 |
+
},
|
1303 |
+
{
|
1304 |
+
"epoch": 36.7,
|
1305 |
+
"learning_rate": 0.00011882352941176471,
|
1306 |
+
"loss": 0.7818,
|
1307 |
+
"step": 3340
|
1308 |
+
},
|
1309 |
+
{
|
1310 |
+
"epoch": 36.92,
|
1311 |
+
"learning_rate": 0.00011696078431372549,
|
1312 |
+
"loss": 0.8016,
|
1313 |
+
"step": 3360
|
1314 |
+
},
|
1315 |
+
{
|
1316 |
+
"epoch": 37.14,
|
1317 |
+
"learning_rate": 0.000115,
|
1318 |
+
"loss": 0.8061,
|
1319 |
+
"step": 3380
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"epoch": 37.36,
|
1323 |
+
"learning_rate": 0.0001130392156862745,
|
1324 |
+
"loss": 0.7703,
|
1325 |
+
"step": 3400
|
1326 |
+
},
|
1327 |
+
{
|
1328 |
+
"epoch": 37.36,
|
1329 |
+
"eval_loss": 0.16011376678943634,
|
1330 |
+
"eval_runtime": 187.5367,
|
1331 |
+
"eval_samples_per_second": 25.824,
|
1332 |
+
"eval_steps_per_second": 0.811,
|
1333 |
+
"eval_wer": 0.13692875390292364,
|
1334 |
+
"step": 3400
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"epoch": 37.58,
|
1338 |
+
"learning_rate": 0.00011107843137254903,
|
1339 |
+
"loss": 0.7713,
|
1340 |
+
"step": 3420
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 37.8,
|
1344 |
+
"learning_rate": 0.00010911764705882353,
|
1345 |
+
"loss": 0.7712,
|
1346 |
+
"step": 3440
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 38.02,
|
1350 |
+
"learning_rate": 0.00010715686274509805,
|
1351 |
+
"loss": 0.8149,
|
1352 |
+
"step": 3460
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"epoch": 38.24,
|
1356 |
+
"learning_rate": 0.00010519607843137255,
|
1357 |
+
"loss": 0.7885,
|
1358 |
+
"step": 3480
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 38.46,
|
1362 |
+
"learning_rate": 0.00010323529411764706,
|
1363 |
+
"loss": 0.7474,
|
1364 |
+
"step": 3500
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"epoch": 38.46,
|
1368 |
+
"eval_loss": 0.1514146625995636,
|
1369 |
+
"eval_runtime": 190.8228,
|
1370 |
+
"eval_samples_per_second": 25.38,
|
1371 |
+
"eval_steps_per_second": 0.797,
|
1372 |
+
"eval_wer": 0.1342321884757309,
|
1373 |
+
"step": 3500
|
1374 |
+
},
|
1375 |
+
{
|
1376 |
+
"epoch": 38.68,
|
1377 |
+
"learning_rate": 0.00010127450980392156,
|
1378 |
+
"loss": 0.785,
|
1379 |
+
"step": 3520
|
1380 |
+
},
|
1381 |
+
{
|
1382 |
+
"epoch": 38.9,
|
1383 |
+
"learning_rate": 9.931372549019609e-05,
|
1384 |
+
"loss": 0.7677,
|
1385 |
+
"step": 3540
|
1386 |
+
},
|
1387 |
+
{
|
1388 |
+
"epoch": 39.12,
|
1389 |
+
"learning_rate": 9.73529411764706e-05,
|
1390 |
+
"loss": 0.7849,
|
1391 |
+
"step": 3560
|
1392 |
+
},
|
1393 |
+
{
|
1394 |
+
"epoch": 39.34,
|
1395 |
+
"learning_rate": 9.539215686274511e-05,
|
1396 |
+
"loss": 0.7637,
|
1397 |
+
"step": 3580
|
1398 |
+
},
|
1399 |
+
{
|
1400 |
+
"epoch": 39.56,
|
1401 |
+
"learning_rate": 9.343137254901961e-05,
|
1402 |
+
"loss": 0.7719,
|
1403 |
+
"step": 3600
|
1404 |
+
},
|
1405 |
+
{
|
1406 |
+
"epoch": 39.56,
|
1407 |
+
"eval_loss": 0.15932896733283997,
|
1408 |
+
"eval_runtime": 189.6806,
|
1409 |
+
"eval_samples_per_second": 25.532,
|
1410 |
+
"eval_steps_per_second": 0.801,
|
1411 |
+
"eval_wer": 0.1352540448481408,
|
1412 |
+
"step": 3600
|
1413 |
+
},
|
1414 |
+
{
|
1415 |
+
"epoch": 39.78,
|
1416 |
+
"learning_rate": 9.147058823529412e-05,
|
1417 |
+
"loss": 0.7591,
|
1418 |
+
"step": 3620
|
1419 |
+
},
|
1420 |
+
{
|
1421 |
+
"epoch": 39.99,
|
1422 |
+
"learning_rate": 8.950980392156862e-05,
|
1423 |
+
"loss": 0.7706,
|
1424 |
+
"step": 3640
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 40.22,
|
1428 |
+
"learning_rate": 8.754901960784314e-05,
|
1429 |
+
"loss": 0.7805,
|
1430 |
+
"step": 3660
|
1431 |
+
},
|
1432 |
+
{
|
1433 |
+
"epoch": 40.44,
|
1434 |
+
"learning_rate": 8.558823529411765e-05,
|
1435 |
+
"loss": 0.7753,
|
1436 |
+
"step": 3680
|
1437 |
+
},
|
1438 |
+
{
|
1439 |
+
"epoch": 40.66,
|
1440 |
+
"learning_rate": 8.362745098039217e-05,
|
1441 |
+
"loss": 0.7638,
|
1442 |
+
"step": 3700
|
1443 |
+
},
|
1444 |
+
{
|
1445 |
+
"epoch": 40.66,
|
1446 |
+
"eval_loss": 0.15362653136253357,
|
1447 |
+
"eval_runtime": 192.1118,
|
1448 |
+
"eval_samples_per_second": 25.209,
|
1449 |
+
"eval_steps_per_second": 0.791,
|
1450 |
+
"eval_wer": 0.13380641498722678,
|
1451 |
+
"step": 3700
|
1452 |
+
},
|
1453 |
+
{
|
1454 |
+
"epoch": 40.87,
|
1455 |
+
"learning_rate": 8.166666666666667e-05,
|
1456 |
+
"loss": 0.7791,
|
1457 |
+
"step": 3720
|
1458 |
+
},
|
1459 |
+
{
|
1460 |
+
"epoch": 41.1,
|
1461 |
+
"learning_rate": 7.970588235294118e-05,
|
1462 |
+
"loss": 0.7931,
|
1463 |
+
"step": 3740
|
1464 |
+
},
|
1465 |
+
{
|
1466 |
+
"epoch": 41.32,
|
1467 |
+
"learning_rate": 7.774509803921568e-05,
|
1468 |
+
"loss": 0.741,
|
1469 |
+
"step": 3760
|
1470 |
+
},
|
1471 |
+
{
|
1472 |
+
"epoch": 41.54,
|
1473 |
+
"learning_rate": 7.57843137254902e-05,
|
1474 |
+
"loss": 0.7434,
|
1475 |
+
"step": 3780
|
1476 |
+
},
|
1477 |
+
{
|
1478 |
+
"epoch": 41.75,
|
1479 |
+
"learning_rate": 7.38235294117647e-05,
|
1480 |
+
"loss": 0.771,
|
1481 |
+
"step": 3800
|
1482 |
+
},
|
1483 |
+
{
|
1484 |
+
"epoch": 41.75,
|
1485 |
+
"eval_loss": 0.1530592143535614,
|
1486 |
+
"eval_runtime": 186.414,
|
1487 |
+
"eval_samples_per_second": 25.98,
|
1488 |
+
"eval_steps_per_second": 0.815,
|
1489 |
+
"eval_wer": 0.13170593244393983,
|
1490 |
+
"step": 3800
|
1491 |
+
},
|
1492 |
+
{
|
1493 |
+
"epoch": 41.97,
|
1494 |
+
"learning_rate": 7.186274509803923e-05,
|
1495 |
+
"loss": 0.7765,
|
1496 |
+
"step": 3820
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 42.2,
|
1500 |
+
"learning_rate": 6.990196078431373e-05,
|
1501 |
+
"loss": 0.7599,
|
1502 |
+
"step": 3840
|
1503 |
+
},
|
1504 |
+
{
|
1505 |
+
"epoch": 42.42,
|
1506 |
+
"learning_rate": 6.794117647058824e-05,
|
1507 |
+
"loss": 0.7782,
|
1508 |
+
"step": 3860
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 42.63,
|
1512 |
+
"learning_rate": 6.598039215686274e-05,
|
1513 |
+
"loss": 0.7395,
|
1514 |
+
"step": 3880
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 42.85,
|
1518 |
+
"learning_rate": 6.401960784313726e-05,
|
1519 |
+
"loss": 0.7594,
|
1520 |
+
"step": 3900
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 42.85,
|
1524 |
+
"eval_loss": 0.14983513951301575,
|
1525 |
+
"eval_runtime": 190.4439,
|
1526 |
+
"eval_samples_per_second": 25.43,
|
1527 |
+
"eval_steps_per_second": 0.798,
|
1528 |
+
"eval_wer": 0.12883905762134545,
|
1529 |
+
"step": 3900
|
1530 |
}
|
1531 |
],
|
1532 |
"max_steps": 4550,
|
1533 |
"num_train_epochs": 50,
|
1534 |
+
"total_flos": 6.032084123274907e+19,
|
1535 |
"trial_name": null,
|
1536 |
"trial_params": null
|
1537 |
}
|
{checkpoint-3300 β checkpoint-3900}/training_args.bin
RENAMED
File without changes
|