{ "best_metric": 0.753384912959381, "best_model_checkpoint": "../models/t5-picard/checkpoint-2368", "epoch": 99.98963557338682, "global_step": 7400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0001, "loss": 3.3904, "step": 1 }, { "epoch": 0.05, "learning_rate": 0.0001, "loss": 1.8308, "step": 4 }, { "epoch": 0.11, "learning_rate": 0.0001, "loss": 0.8985, "step": 8 }, { "epoch": 0.16, "learning_rate": 0.0001, "loss": 0.7145, "step": 12 }, { "epoch": 0.21, "learning_rate": 0.0001, "loss": 0.4975, "step": 16 }, { "epoch": 0.27, "learning_rate": 0.0001, "loss": 0.4788, "step": 20 }, { "epoch": 0.32, "learning_rate": 0.0001, "loss": 0.4007, "step": 24 }, { "epoch": 0.37, "learning_rate": 0.0001, "loss": 0.3452, "step": 28 }, { "epoch": 0.43, "learning_rate": 0.0001, "loss": 0.3293, "step": 32 }, { "epoch": 0.48, "learning_rate": 0.0001, "loss": 0.3214, "step": 36 }, { "epoch": 0.53, "learning_rate": 0.0001, "loss": 0.3254, "step": 40 }, { "epoch": 0.59, "learning_rate": 0.0001, "loss": 0.2502, "step": 44 }, { "epoch": 0.64, "learning_rate": 0.0001, "loss": 0.261, "step": 48 }, { "epoch": 0.7, "learning_rate": 0.0001, "loss": 0.2192, "step": 52 }, { "epoch": 0.75, "learning_rate": 0.0001, "loss": 0.2354, "step": 56 }, { "epoch": 0.8, "learning_rate": 0.0001, "loss": 0.2376, "step": 60 }, { "epoch": 0.86, "learning_rate": 0.0001, "loss": 0.1848, "step": 64 }, { "epoch": 0.86, "eval_exec": 0.528046421663443, "eval_loss": 0.1530311554670334, "eval_runtime": 253.3827, "eval_samples_per_second": 4.081, "step": 64 }, { "epoch": 0.91, "learning_rate": 0.0001, "loss": 0.1615, "step": 68 }, { "epoch": 0.96, "learning_rate": 0.0001, "loss": 0.1756, "step": 72 }, { "epoch": 1.03, "learning_rate": 0.0001, "loss": 0.2114, "step": 76 }, { "epoch": 1.08, "learning_rate": 0.0001, "loss": 0.1703, "step": 80 }, { "epoch": 1.13, "learning_rate": 0.0001, "loss": 0.1485, "step": 84 }, { "epoch": 1.19, "learning_rate": 0.0001, "loss": 0.119, "step": 88 }, { "epoch": 1.24, "learning_rate": 0.0001, "loss": 0.1273, "step": 92 }, { "epoch": 1.29, "learning_rate": 0.0001, "loss": 0.1625, "step": 96 }, { "epoch": 1.35, "learning_rate": 0.0001, "loss": 0.13, "step": 100 }, { "epoch": 1.4, "learning_rate": 0.0001, "loss": 0.1265, "step": 104 }, { "epoch": 1.45, "learning_rate": 0.0001, "loss": 0.1088, "step": 108 }, { "epoch": 1.51, "learning_rate": 0.0001, "loss": 0.1397, "step": 112 }, { "epoch": 1.56, "learning_rate": 0.0001, "loss": 0.1404, "step": 116 }, { "epoch": 1.62, "learning_rate": 0.0001, "loss": 0.1136, "step": 120 }, { "epoch": 1.67, "learning_rate": 0.0001, "loss": 0.0919, "step": 124 }, { "epoch": 1.72, "learning_rate": 0.0001, "loss": 0.1114, "step": 128 }, { "epoch": 1.72, "eval_exec": 0.5309477756286267, "eval_loss": 0.1567097008228302, "eval_runtime": 282.0248, "eval_samples_per_second": 3.666, "step": 128 }, { "epoch": 1.78, "learning_rate": 0.0001, "loss": 0.131, "step": 132 }, { "epoch": 1.83, "learning_rate": 0.0001, "loss": 0.0972, "step": 136 }, { "epoch": 1.88, "learning_rate": 0.0001, "loss": 0.0924, "step": 140 }, { "epoch": 1.94, "learning_rate": 0.0001, "loss": 0.0915, "step": 144 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 0.1105, "step": 148 }, { "epoch": 2.05, "learning_rate": 0.0001, "loss": 0.1299, "step": 152 }, { "epoch": 2.11, "learning_rate": 0.0001, "loss": 0.0861, "step": 156 }, { "epoch": 2.16, "learning_rate": 0.0001, "loss": 0.0779, "step": 160 }, { "epoch": 2.21, "learning_rate": 0.0001, "loss": 0.0677, "step": 164 }, { "epoch": 2.27, "learning_rate": 0.0001, "loss": 0.0879, "step": 168 }, { "epoch": 2.32, "learning_rate": 0.0001, "loss": 0.0953, "step": 172 }, { "epoch": 2.37, "learning_rate": 0.0001, "loss": 0.0766, "step": 176 }, { "epoch": 2.43, "learning_rate": 0.0001, "loss": 0.0621, "step": 180 }, { "epoch": 2.48, "learning_rate": 0.0001, "loss": 0.0793, "step": 184 }, { "epoch": 2.53, "learning_rate": 0.0001, "loss": 0.0929, "step": 188 }, { "epoch": 2.59, "learning_rate": 0.0001, "loss": 0.074, "step": 192 }, { "epoch": 2.59, "eval_exec": 0.6876208897485493, "eval_loss": 0.1364244967699051, "eval_runtime": 267.8144, "eval_samples_per_second": 3.861, "step": 192 }, { "epoch": 2.64, "learning_rate": 0.0001, "loss": 0.0661, "step": 196 }, { "epoch": 2.7, "learning_rate": 0.0001, "loss": 0.059, "step": 200 }, { "epoch": 2.75, "learning_rate": 0.0001, "loss": 0.0857, "step": 204 }, { "epoch": 2.8, "learning_rate": 0.0001, "loss": 0.0743, "step": 208 }, { "epoch": 2.86, "learning_rate": 0.0001, "loss": 0.0755, "step": 212 }, { "epoch": 2.91, "learning_rate": 0.0001, "loss": 0.0572, "step": 216 }, { "epoch": 2.96, "learning_rate": 0.0001, "loss": 0.1111, "step": 220 }, { "epoch": 3.03, "learning_rate": 0.0001, "loss": 0.0819, "step": 224 }, { "epoch": 3.08, "learning_rate": 0.0001, "loss": 0.0695, "step": 228 }, { "epoch": 3.13, "learning_rate": 0.0001, "loss": 0.0553, "step": 232 }, { "epoch": 3.19, "learning_rate": 0.0001, "loss": 0.0473, "step": 236 }, { "epoch": 3.24, "learning_rate": 0.0001, "loss": 0.051, "step": 240 }, { "epoch": 3.29, "learning_rate": 0.0001, "loss": 0.065, "step": 244 }, { "epoch": 3.35, "learning_rate": 0.0001, "loss": 0.0554, "step": 248 }, { "epoch": 3.4, "learning_rate": 0.0001, "loss": 0.0494, "step": 252 }, { "epoch": 3.45, "learning_rate": 0.0001, "loss": 0.0468, "step": 256 }, { "epoch": 3.45, "eval_exec": 0.6876208897485493, "eval_loss": 0.15044596791267395, "eval_runtime": 292.7353, "eval_samples_per_second": 3.532, "step": 256 }, { "epoch": 3.51, "learning_rate": 0.0001, "loss": 0.0588, "step": 260 }, { "epoch": 3.56, "learning_rate": 0.0001, "loss": 0.0591, "step": 264 }, { "epoch": 3.62, "learning_rate": 0.0001, "loss": 0.0498, "step": 268 }, { "epoch": 3.67, "learning_rate": 0.0001, "loss": 0.0414, "step": 272 }, { "epoch": 3.72, "learning_rate": 0.0001, "loss": 0.048, "step": 276 }, { "epoch": 3.78, "learning_rate": 0.0001, "loss": 0.0612, "step": 280 }, { "epoch": 3.83, "learning_rate": 0.0001, "loss": 0.051, "step": 284 }, { "epoch": 3.88, "learning_rate": 0.0001, "loss": 0.042, "step": 288 }, { "epoch": 3.94, "learning_rate": 0.0001, "loss": 0.2851, "step": 292 }, { "epoch": 3.99, "learning_rate": 0.0001, "loss": 0.0504, "step": 296 }, { "epoch": 4.05, "learning_rate": 0.0001, "loss": 0.0575, "step": 300 }, { "epoch": 4.11, "learning_rate": 0.0001, "loss": 0.0389, "step": 304 }, { "epoch": 4.16, "learning_rate": 0.0001, "loss": 0.0352, "step": 308 }, { "epoch": 4.21, "learning_rate": 0.0001, "loss": 0.0335, "step": 312 }, { "epoch": 4.27, "learning_rate": 0.0001, "loss": 0.0431, "step": 316 }, { "epoch": 4.32, "learning_rate": 0.0001, "loss": 0.0417, "step": 320 }, { "epoch": 4.32, "eval_exec": 0.706963249516441, "eval_loss": 0.15714390575885773, "eval_runtime": 277.5044, "eval_samples_per_second": 3.726, "step": 320 }, { "epoch": 4.37, "learning_rate": 0.0001, "loss": 0.0341, "step": 324 }, { "epoch": 4.43, "learning_rate": 0.0001, "loss": 0.0426, "step": 328 }, { "epoch": 4.48, "learning_rate": 0.0001, "loss": 0.0407, "step": 332 }, { "epoch": 4.53, "learning_rate": 0.0001, "loss": 0.0446, "step": 336 }, { "epoch": 4.59, "learning_rate": 0.0001, "loss": 0.0408, "step": 340 }, { "epoch": 4.64, "learning_rate": 0.0001, "loss": 0.0323, "step": 344 }, { "epoch": 4.7, "learning_rate": 0.0001, "loss": 0.032, "step": 348 }, { "epoch": 4.75, "learning_rate": 0.0001, "loss": 0.0428, "step": 352 }, { "epoch": 4.8, "learning_rate": 0.0001, "loss": 0.0452, "step": 356 }, { "epoch": 4.86, "learning_rate": 0.0001, "loss": 0.033, "step": 360 }, { "epoch": 4.91, "learning_rate": 0.0001, "loss": 0.0238, "step": 364 }, { "epoch": 4.96, "learning_rate": 0.0001, "loss": 0.0383, "step": 368 }, { "epoch": 5.03, "learning_rate": 0.0001, "loss": 0.0393, "step": 372 }, { "epoch": 5.08, "learning_rate": 0.0001, "loss": 0.033, "step": 376 }, { "epoch": 5.13, "learning_rate": 0.0001, "loss": 0.0261, "step": 380 }, { "epoch": 5.19, "learning_rate": 0.0001, "loss": 0.0204, "step": 384 }, { "epoch": 5.19, "eval_exec": 0.7040618955512572, "eval_loss": 0.17861126363277435, "eval_runtime": 288.586, "eval_samples_per_second": 3.583, "step": 384 }, { "epoch": 5.24, "learning_rate": 0.0001, "loss": 0.0283, "step": 388 }, { "epoch": 5.29, "learning_rate": 0.0001, "loss": 0.0328, "step": 392 }, { "epoch": 5.35, "learning_rate": 0.0001, "loss": 0.0303, "step": 396 }, { "epoch": 5.4, "learning_rate": 0.0001, "loss": 0.0225, "step": 400 }, { "epoch": 5.45, "learning_rate": 0.0001, "loss": 0.0243, "step": 404 }, { "epoch": 5.51, "learning_rate": 0.0001, "loss": 0.0319, "step": 408 }, { "epoch": 5.56, "learning_rate": 0.0001, "loss": 0.0392, "step": 412 }, { "epoch": 5.62, "learning_rate": 0.0001, "loss": 0.0265, "step": 416 }, { "epoch": 5.67, "learning_rate": 0.0001, "loss": 0.0198, "step": 420 }, { "epoch": 5.72, "learning_rate": 0.0001, "loss": 0.0293, "step": 424 }, { "epoch": 5.78, "learning_rate": 0.0001, "loss": 0.0326, "step": 428 }, { "epoch": 5.83, "learning_rate": 0.0001, "loss": 0.0288, "step": 432 }, { "epoch": 5.88, "learning_rate": 0.0001, "loss": 0.023, "step": 436 }, { "epoch": 5.94, "learning_rate": 0.0001, "loss": 0.0227, "step": 440 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 0.0273, "step": 444 }, { "epoch": 6.05, "learning_rate": 0.0001, "loss": 0.029, "step": 448 }, { "epoch": 6.05, "eval_exec": 0.6924564796905223, "eval_loss": 0.17845195531845093, "eval_runtime": 299.4799, "eval_samples_per_second": 3.453, "step": 448 }, { "epoch": 6.11, "learning_rate": 0.0001, "loss": 0.0276, "step": 452 }, { "epoch": 6.16, "learning_rate": 0.0001, "loss": 0.0227, "step": 456 }, { "epoch": 6.21, "learning_rate": 0.0001, "loss": 0.0185, "step": 460 }, { "epoch": 6.27, "learning_rate": 0.0001, "loss": 0.0244, "step": 464 }, { "epoch": 6.32, "learning_rate": 0.0001, "loss": 0.0281, "step": 468 }, { "epoch": 6.37, "learning_rate": 0.0001, "loss": 0.0181, "step": 472 }, { "epoch": 6.43, "learning_rate": 0.0001, "loss": 0.0132, "step": 476 }, { "epoch": 6.48, "learning_rate": 0.0001, "loss": 0.0205, "step": 480 }, { "epoch": 6.53, "learning_rate": 0.0001, "loss": 0.0274, "step": 484 }, { "epoch": 6.59, "learning_rate": 0.0001, "loss": 0.0209, "step": 488 }, { "epoch": 6.64, "learning_rate": 0.0001, "loss": 0.0161, "step": 492 }, { "epoch": 6.7, "learning_rate": 0.0001, "loss": 0.0186, "step": 496 }, { "epoch": 6.75, "learning_rate": 0.0001, "loss": 0.0202, "step": 500 }, { "epoch": 6.8, "learning_rate": 0.0001, "loss": 0.0246, "step": 504 }, { "epoch": 6.86, "learning_rate": 0.0001, "loss": 0.0177, "step": 508 }, { "epoch": 6.91, "learning_rate": 0.0001, "loss": 0.0147, "step": 512 }, { "epoch": 6.91, "eval_exec": 0.7214700193423598, "eval_loss": 0.1890781968832016, "eval_runtime": 285.738, "eval_samples_per_second": 3.619, "step": 512 }, { "epoch": 6.96, "learning_rate": 0.0001, "loss": 0.023, "step": 516 }, { "epoch": 7.03, "learning_rate": 0.0001, "loss": 0.0231, "step": 520 }, { "epoch": 7.08, "learning_rate": 0.0001, "loss": 0.0169, "step": 524 }, { "epoch": 7.13, "learning_rate": 0.0001, "loss": 0.0161, "step": 528 }, { "epoch": 7.19, "learning_rate": 0.0001, "loss": 0.0115, "step": 532 }, { "epoch": 7.24, "learning_rate": 0.0001, "loss": 0.0145, "step": 536 }, { "epoch": 7.29, "learning_rate": 0.0001, "loss": 0.0273, "step": 540 }, { "epoch": 7.35, "learning_rate": 0.0001, "loss": 0.022, "step": 544 }, { "epoch": 7.4, "learning_rate": 0.0001, "loss": 0.0154, "step": 548 }, { "epoch": 7.45, "learning_rate": 0.0001, "loss": 0.0123, "step": 552 }, { "epoch": 7.51, "learning_rate": 0.0001, "loss": 0.0159, "step": 556 }, { "epoch": 7.56, "learning_rate": 0.0001, "loss": 0.0198, "step": 560 }, { "epoch": 7.62, "learning_rate": 0.0001, "loss": 0.0213, "step": 564 }, { "epoch": 7.67, "learning_rate": 0.0001, "loss": 0.0148, "step": 568 }, { "epoch": 7.72, "learning_rate": 0.0001, "loss": 0.0137, "step": 572 }, { "epoch": 7.78, "learning_rate": 0.0001, "loss": 0.0181, "step": 576 }, { "epoch": 7.78, "eval_exec": 0.7040618955512572, "eval_loss": 0.22579778730869293, "eval_runtime": 297.1361, "eval_samples_per_second": 3.48, "step": 576 }, { "epoch": 7.83, "learning_rate": 0.0001, "loss": 0.0196, "step": 580 }, { "epoch": 7.88, "learning_rate": 0.0001, "loss": 0.0141, "step": 584 }, { "epoch": 7.94, "learning_rate": 0.0001, "loss": 0.0118, "step": 588 }, { "epoch": 7.99, "learning_rate": 0.0001, "loss": 0.0192, "step": 592 }, { "epoch": 8.05, "learning_rate": 0.0001, "loss": 0.0181, "step": 596 }, { "epoch": 8.11, "learning_rate": 0.0001, "loss": 0.0108, "step": 600 }, { "epoch": 8.16, "learning_rate": 0.0001, "loss": 0.0112, "step": 604 }, { "epoch": 8.21, "learning_rate": 0.0001, "loss": 0.0092, "step": 608 }, { "epoch": 8.27, "learning_rate": 0.0001, "loss": 0.0153, "step": 612 }, { "epoch": 8.32, "learning_rate": 0.0001, "loss": 0.0169, "step": 616 }, { "epoch": 8.37, "learning_rate": 0.0001, "loss": 0.011, "step": 620 }, { "epoch": 8.43, "learning_rate": 0.0001, "loss": 0.0095, "step": 624 }, { "epoch": 8.48, "learning_rate": 0.0001, "loss": 0.0128, "step": 628 }, { "epoch": 8.53, "learning_rate": 0.0001, "loss": 0.0161, "step": 632 }, { "epoch": 8.59, "learning_rate": 0.0001, "loss": 0.0128, "step": 636 }, { "epoch": 8.64, "learning_rate": 0.0001, "loss": 0.0103, "step": 640 }, { "epoch": 8.64, "eval_exec": 0.6895551257253385, "eval_loss": 0.2176068276166916, "eval_runtime": 294.0528, "eval_samples_per_second": 3.516, "step": 640 }, { "epoch": 8.7, "learning_rate": 0.0001, "loss": 0.0074, "step": 644 }, { "epoch": 8.75, "learning_rate": 0.0001, "loss": 0.0152, "step": 648 }, { "epoch": 8.8, "learning_rate": 0.0001, "loss": 0.0152, "step": 652 }, { "epoch": 8.86, "learning_rate": 0.0001, "loss": 0.0118, "step": 656 }, { "epoch": 8.91, "learning_rate": 0.0001, "loss": 0.0114, "step": 660 }, { "epoch": 8.96, "learning_rate": 0.0001, "loss": 0.0116, "step": 664 }, { "epoch": 9.03, "learning_rate": 0.0001, "loss": 0.015, "step": 668 }, { "epoch": 9.08, "learning_rate": 0.0001, "loss": 0.0127, "step": 672 }, { "epoch": 9.13, "learning_rate": 0.0001, "loss": 0.0098, "step": 676 }, { "epoch": 9.19, "learning_rate": 0.0001, "loss": 0.009, "step": 680 }, { "epoch": 9.24, "learning_rate": 0.0001, "loss": 0.0101, "step": 684 }, { "epoch": 9.29, "learning_rate": 0.0001, "loss": 0.0211, "step": 688 }, { "epoch": 9.35, "learning_rate": 0.0001, "loss": 0.0141, "step": 692 }, { "epoch": 9.4, "learning_rate": 0.0001, "loss": 0.0089, "step": 696 }, { "epoch": 9.45, "learning_rate": 0.0001, "loss": 0.008, "step": 700 }, { "epoch": 9.51, "learning_rate": 0.0001, "loss": 0.0118, "step": 704 }, { "epoch": 9.51, "eval_exec": 0.688588007736944, "eval_loss": 0.2283668965101242, "eval_runtime": 296.2258, "eval_samples_per_second": 3.491, "step": 704 }, { "epoch": 9.56, "learning_rate": 0.0001, "loss": 0.0156, "step": 708 }, { "epoch": 9.62, "learning_rate": 0.0001, "loss": 0.0082, "step": 712 }, { "epoch": 9.67, "learning_rate": 0.0001, "loss": 0.0075, "step": 716 }, { "epoch": 9.72, "learning_rate": 0.0001, "loss": 0.01, "step": 720 }, { "epoch": 9.78, "learning_rate": 0.0001, "loss": 0.0111, "step": 724 }, { "epoch": 9.83, "learning_rate": 0.0001, "loss": 0.0103, "step": 728 }, { "epoch": 9.88, "learning_rate": 0.0001, "loss": 0.0084, "step": 732 }, { "epoch": 9.94, "learning_rate": 0.0001, "loss": 0.0063, "step": 736 }, { "epoch": 9.99, "learning_rate": 0.0001, "loss": 0.0091, "step": 740 }, { "epoch": 10.05, "learning_rate": 0.0001, "loss": 0.0119, "step": 744 }, { "epoch": 10.11, "learning_rate": 0.0001, "loss": 0.0089, "step": 748 }, { "epoch": 10.16, "learning_rate": 0.0001, "loss": 0.008, "step": 752 }, { "epoch": 10.21, "learning_rate": 0.0001, "loss": 0.0079, "step": 756 }, { "epoch": 10.27, "learning_rate": 0.0001, "loss": 0.0103, "step": 760 }, { "epoch": 10.32, "learning_rate": 0.0001, "loss": 0.0134, "step": 764 }, { "epoch": 10.37, "learning_rate": 0.0001, "loss": 0.0088, "step": 768 }, { "epoch": 10.37, "eval_exec": 0.7147001934235977, "eval_loss": 0.22069412469863892, "eval_runtime": 296.2474, "eval_samples_per_second": 3.49, "step": 768 }, { "epoch": 10.43, "learning_rate": 0.0001, "loss": 0.0079, "step": 772 }, { "epoch": 10.48, "learning_rate": 0.0001, "loss": 0.0079, "step": 776 }, { "epoch": 10.53, "learning_rate": 0.0001, "loss": 0.0126, "step": 780 }, { "epoch": 10.59, "learning_rate": 0.0001, "loss": 0.0084, "step": 784 }, { "epoch": 10.64, "learning_rate": 0.0001, "loss": 0.0084, "step": 788 }, { "epoch": 10.7, "learning_rate": 0.0001, "loss": 0.0056, "step": 792 }, { "epoch": 10.75, "learning_rate": 0.0001, "loss": 0.0122, "step": 796 }, { "epoch": 10.8, "learning_rate": 0.0001, "loss": 0.0145, "step": 800 }, { "epoch": 10.86, "learning_rate": 0.0001, "loss": 0.0076, "step": 804 }, { "epoch": 10.91, "learning_rate": 0.0001, "loss": 0.0071, "step": 808 }, { "epoch": 10.96, "learning_rate": 0.0001, "loss": 0.0107, "step": 812 }, { "epoch": 11.03, "learning_rate": 0.0001, "loss": 0.0146, "step": 816 }, { "epoch": 11.08, "learning_rate": 0.0001, "loss": 0.008, "step": 820 }, { "epoch": 11.13, "learning_rate": 0.0001, "loss": 0.0057, "step": 824 }, { "epoch": 11.19, "learning_rate": 0.0001, "loss": 0.0054, "step": 828 }, { "epoch": 11.24, "learning_rate": 0.0001, "loss": 0.0059, "step": 832 }, { "epoch": 11.24, "eval_exec": 0.7098646034816247, "eval_loss": 0.25100022554397583, "eval_runtime": 292.4465, "eval_samples_per_second": 3.536, "step": 832 }, { "epoch": 11.29, "learning_rate": 0.0001, "loss": 0.0096, "step": 836 }, { "epoch": 11.35, "learning_rate": 0.0001, "loss": 0.0079, "step": 840 }, { "epoch": 11.4, "learning_rate": 0.0001, "loss": 0.0059, "step": 844 }, { "epoch": 11.45, "learning_rate": 0.0001, "loss": 0.0099, "step": 848 }, { "epoch": 11.51, "learning_rate": 0.0001, "loss": 0.0133, "step": 852 }, { "epoch": 11.56, "learning_rate": 0.0001, "loss": 0.0087, "step": 856 }, { "epoch": 11.62, "learning_rate": 0.0001, "loss": 0.0073, "step": 860 }, { "epoch": 11.67, "learning_rate": 0.0001, "loss": 0.0048, "step": 864 }, { "epoch": 11.72, "learning_rate": 0.0001, "loss": 0.007, "step": 868 }, { "epoch": 11.78, "learning_rate": 0.0001, "loss": 0.0098, "step": 872 }, { "epoch": 11.83, "learning_rate": 0.0001, "loss": 0.0119, "step": 876 }, { "epoch": 11.88, "learning_rate": 0.0001, "loss": 0.0121, "step": 880 }, { "epoch": 11.94, "learning_rate": 0.0001, "loss": 0.0059, "step": 884 }, { "epoch": 11.99, "learning_rate": 0.0001, "loss": 0.0103, "step": 888 }, { "epoch": 12.05, "learning_rate": 0.0001, "loss": 0.0072, "step": 892 }, { "epoch": 12.11, "learning_rate": 0.0001, "loss": 0.0055, "step": 896 }, { "epoch": 12.11, "eval_exec": 0.7098646034816247, "eval_loss": 0.24743221700191498, "eval_runtime": 295.2581, "eval_samples_per_second": 3.502, "step": 896 }, { "epoch": 12.16, "learning_rate": 0.0001, "loss": 0.0068, "step": 900 }, { "epoch": 12.21, "learning_rate": 0.0001, "loss": 0.0034, "step": 904 }, { "epoch": 12.27, "learning_rate": 0.0001, "loss": 0.0081, "step": 908 }, { "epoch": 12.32, "learning_rate": 0.0001, "loss": 0.0095, "step": 912 }, { "epoch": 12.37, "learning_rate": 0.0001, "loss": 0.0069, "step": 916 }, { "epoch": 12.43, "learning_rate": 0.0001, "loss": 0.0091, "step": 920 }, { "epoch": 12.48, "learning_rate": 0.0001, "loss": 0.0062, "step": 924 }, { "epoch": 12.53, "learning_rate": 0.0001, "loss": 0.0084, "step": 928 }, { "epoch": 12.59, "learning_rate": 0.0001, "loss": 0.0064, "step": 932 }, { "epoch": 12.64, "learning_rate": 0.0001, "loss": 0.0057, "step": 936 }, { "epoch": 12.7, "learning_rate": 0.0001, "loss": 0.0075, "step": 940 }, { "epoch": 12.75, "learning_rate": 0.0001, "loss": 0.0091, "step": 944 }, { "epoch": 12.8, "learning_rate": 0.0001, "loss": 0.0078, "step": 948 }, { "epoch": 12.86, "learning_rate": 0.0001, "loss": 0.0061, "step": 952 }, { "epoch": 12.91, "learning_rate": 0.0001, "loss": 0.0098, "step": 956 }, { "epoch": 12.96, "learning_rate": 0.0001, "loss": 0.0072, "step": 960 }, { "epoch": 12.96, "eval_exec": 0.6924564796905223, "eval_loss": 0.25289151072502136, "eval_runtime": 288.8532, "eval_samples_per_second": 3.58, "step": 960 }, { "epoch": 13.03, "learning_rate": 0.0001, "loss": 0.0111, "step": 964 }, { "epoch": 13.08, "learning_rate": 0.0001, "loss": 0.0048, "step": 968 }, { "epoch": 13.13, "learning_rate": 0.0001, "loss": 0.0054, "step": 972 }, { "epoch": 13.19, "learning_rate": 0.0001, "loss": 0.0043, "step": 976 }, { "epoch": 13.24, "learning_rate": 0.0001, "loss": 0.0118, "step": 980 }, { "epoch": 13.29, "learning_rate": 0.0001, "loss": 0.0069, "step": 984 }, { "epoch": 13.35, "learning_rate": 0.0001, "loss": 0.0057, "step": 988 }, { "epoch": 13.4, "learning_rate": 0.0001, "loss": 0.0054, "step": 992 }, { "epoch": 13.45, "learning_rate": 0.0001, "loss": 0.0035, "step": 996 }, { "epoch": 13.51, "learning_rate": 0.0001, "loss": 0.0041, "step": 1000 }, { "epoch": 13.56, "learning_rate": 0.0001, "loss": 0.0052, "step": 1004 }, { "epoch": 13.62, "learning_rate": 0.0001, "loss": 0.0068, "step": 1008 }, { "epoch": 13.67, "learning_rate": 0.0001, "loss": 0.0039, "step": 1012 }, { "epoch": 13.72, "learning_rate": 0.0001, "loss": 0.0052, "step": 1016 }, { "epoch": 13.78, "learning_rate": 0.0001, "loss": 0.0075, "step": 1020 }, { "epoch": 13.83, "learning_rate": 0.0001, "loss": 0.0078, "step": 1024 }, { "epoch": 13.83, "eval_exec": 0.7311411992263056, "eval_loss": 0.26111745834350586, "eval_runtime": 286.014, "eval_samples_per_second": 3.615, "step": 1024 }, { "epoch": 13.88, "learning_rate": 0.0001, "loss": 0.0059, "step": 1028 }, { "epoch": 13.94, "learning_rate": 0.0001, "loss": 0.0046, "step": 1032 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1036 }, { "epoch": 14.05, "learning_rate": 0.0001, "loss": 0.0079, "step": 1040 }, { "epoch": 14.11, "learning_rate": 0.0001, "loss": 0.0056, "step": 1044 }, { "epoch": 14.16, "learning_rate": 0.0001, "loss": 0.005, "step": 1048 }, { "epoch": 14.21, "learning_rate": 0.0001, "loss": 0.0039, "step": 1052 }, { "epoch": 14.27, "learning_rate": 0.0001, "loss": 0.0043, "step": 1056 }, { "epoch": 14.32, "learning_rate": 0.0001, "loss": 0.005, "step": 1060 }, { "epoch": 14.37, "learning_rate": 0.0001, "loss": 0.0047, "step": 1064 }, { "epoch": 14.43, "learning_rate": 0.0001, "loss": 0.0026, "step": 1068 }, { "epoch": 14.48, "learning_rate": 0.0001, "loss": 0.005, "step": 1072 }, { "epoch": 14.53, "learning_rate": 0.0001, "loss": 0.0073, "step": 1076 }, { "epoch": 14.59, "learning_rate": 0.0001, "loss": 0.0069, "step": 1080 }, { "epoch": 14.64, "learning_rate": 0.0001, "loss": 0.0043, "step": 1084 }, { "epoch": 14.7, "learning_rate": 0.0001, "loss": 0.0035, "step": 1088 }, { "epoch": 14.7, "eval_exec": 0.7205029013539652, "eval_loss": 0.2667447030544281, "eval_runtime": 283.0779, "eval_samples_per_second": 3.653, "step": 1088 }, { "epoch": 14.75, "learning_rate": 0.0001, "loss": 0.0076, "step": 1092 }, { "epoch": 14.8, "learning_rate": 0.0001, "loss": 0.0083, "step": 1096 }, { "epoch": 14.86, "learning_rate": 0.0001, "loss": 0.0064, "step": 1100 }, { "epoch": 14.91, "learning_rate": 0.0001, "loss": 0.0042, "step": 1104 }, { "epoch": 14.96, "learning_rate": 0.0001, "loss": 0.0066, "step": 1108 }, { "epoch": 15.03, "learning_rate": 0.0001, "loss": 0.0053, "step": 1112 }, { "epoch": 15.08, "learning_rate": 0.0001, "loss": 0.005, "step": 1116 }, { "epoch": 15.13, "learning_rate": 0.0001, "loss": 0.004, "step": 1120 }, { "epoch": 15.19, "learning_rate": 0.0001, "loss": 0.0038, "step": 1124 }, { "epoch": 15.24, "learning_rate": 0.0001, "loss": 0.005, "step": 1128 }, { "epoch": 15.29, "learning_rate": 0.0001, "loss": 0.0064, "step": 1132 }, { "epoch": 15.35, "learning_rate": 0.0001, "loss": 0.0048, "step": 1136 }, { "epoch": 15.4, "learning_rate": 0.0001, "loss": 0.0034, "step": 1140 }, { "epoch": 15.45, "learning_rate": 0.0001, "loss": 0.0034, "step": 1144 }, { "epoch": 15.51, "learning_rate": 0.0001, "loss": 0.005, "step": 1148 }, { "epoch": 15.56, "learning_rate": 0.0001, "loss": 0.0078, "step": 1152 }, { "epoch": 15.56, "eval_exec": 0.730174081237911, "eval_loss": 0.24596308171749115, "eval_runtime": 290.0696, "eval_samples_per_second": 3.565, "step": 1152 }, { "epoch": 15.62, "learning_rate": 0.0001, "loss": 0.0032, "step": 1156 }, { "epoch": 15.67, "learning_rate": 0.0001, "loss": 0.0025, "step": 1160 }, { "epoch": 15.72, "learning_rate": 0.0001, "loss": 0.0041, "step": 1164 }, { "epoch": 15.78, "learning_rate": 0.0001, "loss": 0.0058, "step": 1168 }, { "epoch": 15.83, "learning_rate": 0.0001, "loss": 0.0082, "step": 1172 }, { "epoch": 15.88, "learning_rate": 0.0001, "loss": 0.0037, "step": 1176 }, { "epoch": 15.94, "learning_rate": 0.0001, "loss": 0.0021, "step": 1180 }, { "epoch": 15.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1184 }, { "epoch": 16.05, "learning_rate": 0.0001, "loss": 0.0045, "step": 1188 }, { "epoch": 16.11, "learning_rate": 0.0001, "loss": 0.0038, "step": 1192 }, { "epoch": 16.16, "learning_rate": 0.0001, "loss": 0.0032, "step": 1196 }, { "epoch": 16.21, "learning_rate": 0.0001, "loss": 0.0048, "step": 1200 }, { "epoch": 16.27, "learning_rate": 0.0001, "loss": 0.0035, "step": 1204 }, { "epoch": 16.32, "learning_rate": 0.0001, "loss": 0.0044, "step": 1208 }, { "epoch": 16.37, "learning_rate": 0.0001, "loss": 0.0035, "step": 1212 }, { "epoch": 16.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 1216 }, { "epoch": 16.43, "eval_exec": 0.7050290135396519, "eval_loss": 0.28547874093055725, "eval_runtime": 289.8855, "eval_samples_per_second": 3.567, "step": 1216 }, { "epoch": 16.48, "learning_rate": 0.0001, "loss": 0.003, "step": 1220 }, { "epoch": 16.53, "learning_rate": 0.0001, "loss": 0.0044, "step": 1224 }, { "epoch": 16.59, "learning_rate": 0.0001, "loss": 0.0046, "step": 1228 }, { "epoch": 16.64, "learning_rate": 0.0001, "loss": 0.0037, "step": 1232 }, { "epoch": 16.7, "learning_rate": 0.0001, "loss": 0.0025, "step": 1236 }, { "epoch": 16.75, "learning_rate": 0.0001, "loss": 0.0052, "step": 1240 }, { "epoch": 16.8, "learning_rate": 0.0001, "loss": 0.0054, "step": 1244 }, { "epoch": 16.86, "learning_rate": 0.0001, "loss": 0.0045, "step": 1248 }, { "epoch": 16.91, "learning_rate": 0.0001, "loss": 0.004, "step": 1252 }, { "epoch": 16.96, "learning_rate": 0.0001, "loss": 0.0049, "step": 1256 }, { "epoch": 17.03, "learning_rate": 0.0001, "loss": 0.0049, "step": 1260 }, { "epoch": 17.08, "learning_rate": 0.0001, "loss": 0.0049, "step": 1264 }, { "epoch": 17.13, "learning_rate": 0.0001, "loss": 0.0038, "step": 1268 }, { "epoch": 17.19, "learning_rate": 0.0001, "loss": 0.0033, "step": 1272 }, { "epoch": 17.24, "learning_rate": 0.0001, "loss": 0.0029, "step": 1276 }, { "epoch": 17.29, "learning_rate": 0.0001, "loss": 0.0043, "step": 1280 }, { "epoch": 17.29, "eval_exec": 0.695357833655706, "eval_loss": 0.2833567261695862, "eval_runtime": 296.5011, "eval_samples_per_second": 3.487, "step": 1280 }, { "epoch": 17.35, "learning_rate": 0.0001, "loss": 0.004, "step": 1284 }, { "epoch": 17.4, "learning_rate": 0.0001, "loss": 0.0065, "step": 1288 }, { "epoch": 17.45, "learning_rate": 0.0001, "loss": 0.0024, "step": 1292 }, { "epoch": 17.51, "learning_rate": 0.0001, "loss": 0.0046, "step": 1296 }, { "epoch": 17.56, "learning_rate": 0.0001, "loss": 0.0047, "step": 1300 }, { "epoch": 17.62, "learning_rate": 0.0001, "loss": 0.0048, "step": 1304 }, { "epoch": 17.67, "learning_rate": 0.0001, "loss": 0.0029, "step": 1308 }, { "epoch": 17.72, "learning_rate": 0.0001, "loss": 0.0035, "step": 1312 }, { "epoch": 17.78, "learning_rate": 0.0001, "loss": 0.0054, "step": 1316 }, { "epoch": 17.83, "learning_rate": 0.0001, "loss": 0.006, "step": 1320 }, { "epoch": 17.88, "learning_rate": 0.0001, "loss": 0.0042, "step": 1324 }, { "epoch": 17.94, "learning_rate": 0.0001, "loss": 0.0024, "step": 1328 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 0.0053, "step": 1332 }, { "epoch": 18.05, "learning_rate": 0.0001, "loss": 0.0199, "step": 1336 }, { "epoch": 18.11, "learning_rate": 0.0001, "loss": 0.008, "step": 1340 }, { "epoch": 18.16, "learning_rate": 0.0001, "loss": 0.0046, "step": 1344 }, { "epoch": 18.16, "eval_exec": 0.7021276595744681, "eval_loss": 0.2530258595943451, "eval_runtime": 289.9942, "eval_samples_per_second": 3.566, "step": 1344 }, { "epoch": 18.21, "learning_rate": 0.0001, "loss": 0.0028, "step": 1348 }, { "epoch": 18.27, "learning_rate": 0.0001, "loss": 0.0039, "step": 1352 }, { "epoch": 18.32, "learning_rate": 0.0001, "loss": 0.0045, "step": 1356 }, { "epoch": 18.37, "learning_rate": 0.0001, "loss": 0.0032, "step": 1360 }, { "epoch": 18.43, "learning_rate": 0.0001, "loss": 0.003, "step": 1364 }, { "epoch": 18.48, "learning_rate": 0.0001, "loss": 0.0031, "step": 1368 }, { "epoch": 18.53, "learning_rate": 0.0001, "loss": 0.0054, "step": 1372 }, { "epoch": 18.59, "learning_rate": 0.0001, "loss": 0.0037, "step": 1376 }, { "epoch": 18.64, "learning_rate": 0.0001, "loss": 0.0039, "step": 1380 }, { "epoch": 18.7, "learning_rate": 0.0001, "loss": 0.0029, "step": 1384 }, { "epoch": 18.75, "learning_rate": 0.0001, "loss": 0.0037, "step": 1388 }, { "epoch": 18.8, "learning_rate": 0.0001, "loss": 0.0043, "step": 1392 }, { "epoch": 18.86, "learning_rate": 0.0001, "loss": 0.0026, "step": 1396 }, { "epoch": 18.91, "learning_rate": 0.0001, "loss": 0.0027, "step": 1400 }, { "epoch": 18.96, "learning_rate": 0.0001, "loss": 0.0036, "step": 1404 }, { "epoch": 19.03, "learning_rate": 0.0001, "loss": 0.004, "step": 1408 }, { "epoch": 19.03, "eval_exec": 0.7330754352030948, "eval_loss": 0.2696512043476105, "eval_runtime": 291.6539, "eval_samples_per_second": 3.545, "step": 1408 }, { "epoch": 19.08, "learning_rate": 0.0001, "loss": 0.0049, "step": 1412 }, { "epoch": 19.13, "learning_rate": 0.0001, "loss": 0.0015, "step": 1416 }, { "epoch": 19.19, "learning_rate": 0.0001, "loss": 0.0018, "step": 1420 }, { "epoch": 19.24, "learning_rate": 0.0001, "loss": 0.0024, "step": 1424 }, { "epoch": 19.29, "learning_rate": 0.0001, "loss": 0.0046, "step": 1428 }, { "epoch": 19.35, "learning_rate": 0.0001, "loss": 0.0035, "step": 1432 }, { "epoch": 19.4, "learning_rate": 0.0001, "loss": 0.0019, "step": 1436 }, { "epoch": 19.45, "learning_rate": 0.0001, "loss": 0.0018, "step": 1440 }, { "epoch": 19.51, "learning_rate": 0.0001, "loss": 0.0045, "step": 1444 }, { "epoch": 19.56, "learning_rate": 0.0001, "loss": 0.0036, "step": 1448 }, { "epoch": 19.62, "learning_rate": 0.0001, "loss": 0.002, "step": 1452 }, { "epoch": 19.67, "learning_rate": 0.0001, "loss": 0.0024, "step": 1456 }, { "epoch": 19.72, "learning_rate": 0.0001, "loss": 0.01, "step": 1460 }, { "epoch": 19.78, "learning_rate": 0.0001, "loss": 0.0051, "step": 1464 }, { "epoch": 19.83, "learning_rate": 0.0001, "loss": 0.0039, "step": 1468 }, { "epoch": 19.88, "learning_rate": 0.0001, "loss": 0.0036, "step": 1472 }, { "epoch": 19.88, "eval_exec": 0.7224371373307543, "eval_loss": 0.2764066755771637, "eval_runtime": 304.8058, "eval_samples_per_second": 3.392, "step": 1472 }, { "epoch": 19.94, "learning_rate": 0.0001, "loss": 0.0024, "step": 1476 }, { "epoch": 19.99, "learning_rate": 0.0001, "loss": 0.0038, "step": 1480 }, { "epoch": 20.05, "learning_rate": 0.0001, "loss": 0.0041, "step": 1484 }, { "epoch": 20.11, "learning_rate": 0.0001, "loss": 0.004, "step": 1488 }, { "epoch": 20.16, "learning_rate": 0.0001, "loss": 0.002, "step": 1492 }, { "epoch": 20.21, "learning_rate": 0.0001, "loss": 0.0036, "step": 1496 }, { "epoch": 20.27, "learning_rate": 0.0001, "loss": 0.0031, "step": 1500 }, { "epoch": 20.32, "learning_rate": 0.0001, "loss": 0.0031, "step": 1504 }, { "epoch": 20.37, "learning_rate": 0.0001, "loss": 0.0025, "step": 1508 }, { "epoch": 20.43, "learning_rate": 0.0001, "loss": 0.0042, "step": 1512 }, { "epoch": 20.48, "learning_rate": 0.0001, "loss": 0.0035, "step": 1516 }, { "epoch": 20.53, "learning_rate": 0.0001, "loss": 0.0074, "step": 1520 }, { "epoch": 20.59, "learning_rate": 0.0001, "loss": 0.0039, "step": 1524 }, { "epoch": 20.64, "learning_rate": 0.0001, "loss": 0.0025, "step": 1528 }, { "epoch": 20.7, "learning_rate": 0.0001, "loss": 0.0031, "step": 1532 }, { "epoch": 20.75, "learning_rate": 0.0001, "loss": 0.0035, "step": 1536 }, { "epoch": 20.75, "eval_exec": 0.7156673114119922, "eval_loss": 0.2632952034473419, "eval_runtime": 295.0708, "eval_samples_per_second": 3.504, "step": 1536 }, { "epoch": 20.8, "learning_rate": 0.0001, "loss": 0.0031, "step": 1540 }, { "epoch": 20.86, "learning_rate": 0.0001, "loss": 0.0032, "step": 1544 }, { "epoch": 20.91, "learning_rate": 0.0001, "loss": 0.0027, "step": 1548 }, { "epoch": 20.96, "learning_rate": 0.0001, "loss": 0.0038, "step": 1552 }, { "epoch": 21.03, "learning_rate": 0.0001, "loss": 0.0034, "step": 1556 }, { "epoch": 21.08, "learning_rate": 0.0001, "loss": 0.0034, "step": 1560 }, { "epoch": 21.13, "learning_rate": 0.0001, "loss": 0.0053, "step": 1564 }, { "epoch": 21.19, "learning_rate": 0.0001, "loss": 0.0015, "step": 1568 }, { "epoch": 21.24, "learning_rate": 0.0001, "loss": 0.0022, "step": 1572 }, { "epoch": 21.29, "learning_rate": 0.0001, "loss": 0.003, "step": 1576 }, { "epoch": 21.35, "learning_rate": 0.0001, "loss": 0.0026, "step": 1580 }, { "epoch": 21.4, "learning_rate": 0.0001, "loss": 0.0028, "step": 1584 }, { "epoch": 21.45, "learning_rate": 0.0001, "loss": 0.0038, "step": 1588 }, { "epoch": 21.51, "learning_rate": 0.0001, "loss": 0.0029, "step": 1592 }, { "epoch": 21.56, "learning_rate": 0.0001, "loss": 0.0039, "step": 1596 }, { "epoch": 21.62, "learning_rate": 0.0001, "loss": 0.0047, "step": 1600 }, { "epoch": 21.62, "eval_exec": 0.7224371373307543, "eval_loss": 0.2584507465362549, "eval_runtime": 296.7113, "eval_samples_per_second": 3.485, "step": 1600 }, { "epoch": 21.67, "learning_rate": 0.0001, "loss": 0.0019, "step": 1604 }, { "epoch": 21.72, "learning_rate": 0.0001, "loss": 0.0027, "step": 1608 }, { "epoch": 21.78, "learning_rate": 0.0001, "loss": 0.0031, "step": 1612 }, { "epoch": 21.83, "learning_rate": 0.0001, "loss": 0.0023, "step": 1616 }, { "epoch": 21.88, "learning_rate": 0.0001, "loss": 0.0023, "step": 1620 }, { "epoch": 21.94, "learning_rate": 0.0001, "loss": 0.0041, "step": 1624 }, { "epoch": 21.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 1628 }, { "epoch": 22.05, "learning_rate": 0.0001, "loss": 0.0027, "step": 1632 }, { "epoch": 22.11, "learning_rate": 0.0001, "loss": 0.0019, "step": 1636 }, { "epoch": 22.16, "learning_rate": 0.0001, "loss": 0.0024, "step": 1640 }, { "epoch": 22.21, "learning_rate": 0.0001, "loss": 0.002, "step": 1644 }, { "epoch": 22.27, "learning_rate": 0.0001, "loss": 0.0027, "step": 1648 }, { "epoch": 22.32, "learning_rate": 0.0001, "loss": 0.0033, "step": 1652 }, { "epoch": 22.37, "learning_rate": 0.0001, "loss": 0.0026, "step": 1656 }, { "epoch": 22.43, "learning_rate": 0.0001, "loss": 0.0027, "step": 1660 }, { "epoch": 22.48, "learning_rate": 0.0001, "loss": 0.0042, "step": 1664 }, { "epoch": 22.48, "eval_exec": 0.7263056092843327, "eval_loss": 0.262117475271225, "eval_runtime": 293.7879, "eval_samples_per_second": 3.52, "step": 1664 }, { "epoch": 22.53, "learning_rate": 0.0001, "loss": 0.0045, "step": 1668 }, { "epoch": 22.59, "learning_rate": 0.0001, "loss": 0.0038, "step": 1672 }, { "epoch": 22.64, "learning_rate": 0.0001, "loss": 0.0029, "step": 1676 }, { "epoch": 22.7, "learning_rate": 0.0001, "loss": 0.0019, "step": 1680 }, { "epoch": 22.75, "learning_rate": 0.0001, "loss": 0.0041, "step": 1684 }, { "epoch": 22.8, "learning_rate": 0.0001, "loss": 0.0178, "step": 1688 }, { "epoch": 22.86, "learning_rate": 0.0001, "loss": 0.0028, "step": 1692 }, { "epoch": 22.91, "learning_rate": 0.0001, "loss": 0.0032, "step": 1696 }, { "epoch": 22.96, "learning_rate": 0.0001, "loss": 0.0035, "step": 1700 }, { "epoch": 23.03, "learning_rate": 0.0001, "loss": 0.0037, "step": 1704 }, { "epoch": 23.08, "learning_rate": 0.0001, "loss": 0.0026, "step": 1708 }, { "epoch": 23.13, "learning_rate": 0.0001, "loss": 0.002, "step": 1712 }, { "epoch": 23.19, "learning_rate": 0.0001, "loss": 0.0023, "step": 1716 }, { "epoch": 23.24, "learning_rate": 0.0001, "loss": 0.0017, "step": 1720 }, { "epoch": 23.29, "learning_rate": 0.0001, "loss": 0.0026, "step": 1724 }, { "epoch": 23.35, "learning_rate": 0.0001, "loss": 0.0026, "step": 1728 }, { "epoch": 23.35, "eval_exec": 0.7205029013539652, "eval_loss": 0.2860746383666992, "eval_runtime": 301.1885, "eval_samples_per_second": 3.433, "step": 1728 }, { "epoch": 23.4, "learning_rate": 0.0001, "loss": 0.0043, "step": 1732 }, { "epoch": 23.45, "learning_rate": 0.0001, "loss": 0.0024, "step": 1736 }, { "epoch": 23.51, "learning_rate": 0.0001, "loss": 0.0026, "step": 1740 }, { "epoch": 23.56, "learning_rate": 0.0001, "loss": 0.0032, "step": 1744 }, { "epoch": 23.62, "learning_rate": 0.0001, "loss": 0.0026, "step": 1748 }, { "epoch": 23.67, "learning_rate": 0.0001, "loss": 0.0022, "step": 1752 }, { "epoch": 23.72, "learning_rate": 0.0001, "loss": 0.0018, "step": 1756 }, { "epoch": 23.78, "learning_rate": 0.0001, "loss": 0.0024, "step": 1760 }, { "epoch": 23.83, "learning_rate": 0.0001, "loss": 0.0022, "step": 1764 }, { "epoch": 23.88, "learning_rate": 0.0001, "loss": 0.0032, "step": 1768 }, { "epoch": 23.94, "learning_rate": 0.0001, "loss": 0.0021, "step": 1772 }, { "epoch": 23.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 1776 }, { "epoch": 24.05, "learning_rate": 0.0001, "loss": 0.0034, "step": 1780 }, { "epoch": 24.11, "learning_rate": 0.0001, "loss": 0.0019, "step": 1784 }, { "epoch": 24.16, "learning_rate": 0.0001, "loss": 0.0029, "step": 1788 }, { "epoch": 24.21, "learning_rate": 0.0001, "loss": 0.0022, "step": 1792 }, { "epoch": 24.21, "eval_exec": 0.7243713733075435, "eval_loss": 0.27234283089637756, "eval_runtime": 296.6131, "eval_samples_per_second": 3.486, "step": 1792 }, { "epoch": 24.27, "learning_rate": 0.0001, "loss": 0.0018, "step": 1796 }, { "epoch": 24.32, "learning_rate": 0.0001, "loss": 0.0022, "step": 1800 }, { "epoch": 24.37, "learning_rate": 0.0001, "loss": 0.0018, "step": 1804 }, { "epoch": 24.43, "learning_rate": 0.0001, "loss": 0.0024, "step": 1808 }, { "epoch": 24.48, "learning_rate": 0.0001, "loss": 0.002, "step": 1812 }, { "epoch": 24.53, "learning_rate": 0.0001, "loss": 0.0025, "step": 1816 }, { "epoch": 24.59, "learning_rate": 0.0001, "loss": 0.0024, "step": 1820 }, { "epoch": 24.64, "learning_rate": 0.0001, "loss": 0.0035, "step": 1824 }, { "epoch": 24.7, "learning_rate": 0.0001, "loss": 0.0023, "step": 1828 }, { "epoch": 24.75, "learning_rate": 0.0001, "loss": 0.0016, "step": 1832 }, { "epoch": 24.8, "learning_rate": 0.0001, "loss": 0.0033, "step": 1836 }, { "epoch": 24.86, "learning_rate": 0.0001, "loss": 0.0035, "step": 1840 }, { "epoch": 24.91, "learning_rate": 0.0001, "loss": 0.0027, "step": 1844 }, { "epoch": 24.96, "learning_rate": 0.0001, "loss": 0.0033, "step": 1848 }, { "epoch": 25.03, "learning_rate": 0.0001, "loss": 0.0029, "step": 1852 }, { "epoch": 25.08, "learning_rate": 0.0001, "loss": 0.0024, "step": 1856 }, { "epoch": 25.08, "eval_exec": 0.718568665377176, "eval_loss": 0.2882576584815979, "eval_runtime": 293.4007, "eval_samples_per_second": 3.524, "step": 1856 }, { "epoch": 25.13, "learning_rate": 0.0001, "loss": 0.0026, "step": 1860 }, { "epoch": 25.19, "learning_rate": 0.0001, "loss": 0.0027, "step": 1864 }, { "epoch": 25.24, "learning_rate": 0.0001, "loss": 0.0033, "step": 1868 }, { "epoch": 25.29, "learning_rate": 0.0001, "loss": 0.0037, "step": 1872 }, { "epoch": 25.35, "learning_rate": 0.0001, "loss": 0.0022, "step": 1876 }, { "epoch": 25.4, "learning_rate": 0.0001, "loss": 0.0028, "step": 1880 }, { "epoch": 25.45, "learning_rate": 0.0001, "loss": 0.0037, "step": 1884 }, { "epoch": 25.51, "learning_rate": 0.0001, "loss": 0.004, "step": 1888 }, { "epoch": 25.56, "learning_rate": 0.0001, "loss": 0.0031, "step": 1892 }, { "epoch": 25.62, "learning_rate": 0.0001, "loss": 0.0025, "step": 1896 }, { "epoch": 25.67, "learning_rate": 0.0001, "loss": 0.0044, "step": 1900 }, { "epoch": 25.72, "learning_rate": 0.0001, "loss": 0.0033, "step": 1904 }, { "epoch": 25.78, "learning_rate": 0.0001, "loss": 0.0035, "step": 1908 }, { "epoch": 25.83, "learning_rate": 0.0001, "loss": 0.002, "step": 1912 }, { "epoch": 25.88, "learning_rate": 0.0001, "loss": 0.0032, "step": 1916 }, { "epoch": 25.94, "learning_rate": 0.0001, "loss": 0.002, "step": 1920 }, { "epoch": 25.94, "eval_exec": 0.6963249516441006, "eval_loss": 0.2879628837108612, "eval_runtime": 294.5537, "eval_samples_per_second": 3.51, "step": 1920 }, { "epoch": 25.99, "learning_rate": 0.0001, "loss": 0.0028, "step": 1924 }, { "epoch": 26.05, "learning_rate": 0.0001, "loss": 0.0032, "step": 1928 }, { "epoch": 26.11, "learning_rate": 0.0001, "loss": 0.0015, "step": 1932 }, { "epoch": 26.16, "learning_rate": 0.0001, "loss": 0.0022, "step": 1936 }, { "epoch": 26.21, "learning_rate": 0.0001, "loss": 0.0022, "step": 1940 }, { "epoch": 26.27, "learning_rate": 0.0001, "loss": 0.0025, "step": 1944 }, { "epoch": 26.32, "learning_rate": 0.0001, "loss": 0.0056, "step": 1948 }, { "epoch": 26.37, "learning_rate": 0.0001, "loss": 0.0033, "step": 1952 }, { "epoch": 26.43, "learning_rate": 0.0001, "loss": 0.0022, "step": 1956 }, { "epoch": 26.48, "learning_rate": 0.0001, "loss": 0.0023, "step": 1960 }, { "epoch": 26.53, "learning_rate": 0.0001, "loss": 0.003, "step": 1964 }, { "epoch": 26.59, "learning_rate": 0.0001, "loss": 0.0022, "step": 1968 }, { "epoch": 26.64, "learning_rate": 0.0001, "loss": 0.0026, "step": 1972 }, { "epoch": 26.7, "learning_rate": 0.0001, "loss": 0.0024, "step": 1976 }, { "epoch": 26.75, "learning_rate": 0.0001, "loss": 0.0033, "step": 1980 }, { "epoch": 26.8, "learning_rate": 0.0001, "loss": 0.0019, "step": 1984 }, { "epoch": 26.8, "eval_exec": 0.7272727272727273, "eval_loss": 0.29901865124702454, "eval_runtime": 296.6574, "eval_samples_per_second": 3.486, "step": 1984 }, { "epoch": 26.86, "learning_rate": 0.0001, "loss": 0.0029, "step": 1988 }, { "epoch": 26.91, "learning_rate": 0.0001, "loss": 0.002, "step": 1992 }, { "epoch": 26.96, "learning_rate": 0.0001, "loss": 0.0048, "step": 1996 }, { "epoch": 27.03, "learning_rate": 0.0001, "loss": 0.0031, "step": 2000 }, { "epoch": 27.08, "learning_rate": 0.0001, "loss": 0.0019, "step": 2004 }, { "epoch": 27.13, "learning_rate": 0.0001, "loss": 0.0012, "step": 2008 }, { "epoch": 27.19, "learning_rate": 0.0001, "loss": 0.0015, "step": 2012 }, { "epoch": 27.24, "learning_rate": 0.0001, "loss": 0.0012, "step": 2016 }, { "epoch": 27.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 2020 }, { "epoch": 27.35, "learning_rate": 0.0001, "loss": 0.0021, "step": 2024 }, { "epoch": 27.4, "learning_rate": 0.0001, "loss": 0.0019, "step": 2028 }, { "epoch": 27.45, "learning_rate": 0.0001, "loss": 0.0014, "step": 2032 }, { "epoch": 27.51, "learning_rate": 0.0001, "loss": 0.0017, "step": 2036 }, { "epoch": 27.56, "learning_rate": 0.0001, "loss": 0.0016, "step": 2040 }, { "epoch": 27.62, "learning_rate": 0.0001, "loss": 0.0009, "step": 2044 }, { "epoch": 27.67, "learning_rate": 0.0001, "loss": 0.0021, "step": 2048 }, { "epoch": 27.67, "eval_exec": 0.7195357833655706, "eval_loss": 0.3057432770729065, "eval_runtime": 300.1856, "eval_samples_per_second": 3.445, "step": 2048 }, { "epoch": 27.72, "learning_rate": 0.0001, "loss": 0.0027, "step": 2052 }, { "epoch": 27.78, "learning_rate": 0.0001, "loss": 0.0023, "step": 2056 }, { "epoch": 27.83, "learning_rate": 0.0001, "loss": 0.0018, "step": 2060 }, { "epoch": 27.88, "learning_rate": 0.0001, "loss": 0.0022, "step": 2064 }, { "epoch": 27.94, "learning_rate": 0.0001, "loss": 0.0019, "step": 2068 }, { "epoch": 27.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2072 }, { "epoch": 28.05, "learning_rate": 0.0001, "loss": 0.0032, "step": 2076 }, { "epoch": 28.11, "learning_rate": 0.0001, "loss": 0.0031, "step": 2080 }, { "epoch": 28.16, "learning_rate": 0.0001, "loss": 0.0018, "step": 2084 }, { "epoch": 28.21, "learning_rate": 0.0001, "loss": 0.0017, "step": 2088 }, { "epoch": 28.27, "learning_rate": 0.0001, "loss": 0.0015, "step": 2092 }, { "epoch": 28.32, "learning_rate": 0.0001, "loss": 0.0029, "step": 2096 }, { "epoch": 28.37, "learning_rate": 0.0001, "loss": 0.0023, "step": 2100 }, { "epoch": 28.43, "learning_rate": 0.0001, "loss": 0.0013, "step": 2104 }, { "epoch": 28.48, "learning_rate": 0.0001, "loss": 0.002, "step": 2108 }, { "epoch": 28.53, "learning_rate": 0.0001, "loss": 0.0021, "step": 2112 }, { "epoch": 28.53, "eval_exec": 0.7282398452611218, "eval_loss": 0.28940409421920776, "eval_runtime": 294.4149, "eval_samples_per_second": 3.512, "step": 2112 }, { "epoch": 28.59, "learning_rate": 0.0001, "loss": 0.0016, "step": 2116 }, { "epoch": 28.64, "learning_rate": 0.0001, "loss": 0.002, "step": 2120 }, { "epoch": 28.7, "learning_rate": 0.0001, "loss": 0.0023, "step": 2124 }, { "epoch": 28.75, "learning_rate": 0.0001, "loss": 0.0031, "step": 2128 }, { "epoch": 28.8, "learning_rate": 0.0001, "loss": 0.0024, "step": 2132 }, { "epoch": 28.86, "learning_rate": 0.0001, "loss": 0.0034, "step": 2136 }, { "epoch": 28.91, "learning_rate": 0.0001, "loss": 0.0021, "step": 2140 }, { "epoch": 28.96, "learning_rate": 0.0001, "loss": 0.0031, "step": 2144 }, { "epoch": 29.03, "learning_rate": 0.0001, "loss": 0.0019, "step": 2148 }, { "epoch": 29.08, "learning_rate": 0.0001, "loss": 0.0028, "step": 2152 }, { "epoch": 29.13, "learning_rate": 0.0001, "loss": 0.0015, "step": 2156 }, { "epoch": 29.19, "learning_rate": 0.0001, "loss": 0.0019, "step": 2160 }, { "epoch": 29.24, "learning_rate": 0.0001, "loss": 0.001, "step": 2164 }, { "epoch": 29.29, "learning_rate": 0.0001, "loss": 0.0025, "step": 2168 }, { "epoch": 29.35, "learning_rate": 0.0001, "loss": 0.003, "step": 2172 }, { "epoch": 29.4, "learning_rate": 0.0001, "loss": 0.0014, "step": 2176 }, { "epoch": 29.4, "eval_exec": 0.7166344294003868, "eval_loss": 0.308432400226593, "eval_runtime": 289.3433, "eval_samples_per_second": 3.574, "step": 2176 }, { "epoch": 29.45, "learning_rate": 0.0001, "loss": 0.0012, "step": 2180 }, { "epoch": 29.51, "learning_rate": 0.0001, "loss": 0.0026, "step": 2184 }, { "epoch": 29.56, "learning_rate": 0.0001, "loss": 0.0044, "step": 2188 }, { "epoch": 29.62, "learning_rate": 0.0001, "loss": 0.0019, "step": 2192 }, { "epoch": 29.67, "learning_rate": 0.0001, "loss": 0.0013, "step": 2196 }, { "epoch": 29.72, "learning_rate": 0.0001, "loss": 0.0021, "step": 2200 }, { "epoch": 29.78, "learning_rate": 0.0001, "loss": 0.002, "step": 2204 }, { "epoch": 29.83, "learning_rate": 0.0001, "loss": 0.003, "step": 2208 }, { "epoch": 29.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 2212 }, { "epoch": 29.94, "learning_rate": 0.0001, "loss": 0.0012, "step": 2216 }, { "epoch": 29.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2220 }, { "epoch": 30.05, "learning_rate": 0.0001, "loss": 0.0024, "step": 2224 }, { "epoch": 30.11, "learning_rate": 0.0001, "loss": 0.0047, "step": 2228 }, { "epoch": 30.16, "learning_rate": 0.0001, "loss": 0.0027, "step": 2232 }, { "epoch": 30.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2236 }, { "epoch": 30.27, "learning_rate": 0.0001, "loss": 0.0035, "step": 2240 }, { "epoch": 30.27, "eval_exec": 0.7176015473887815, "eval_loss": 0.28124359250068665, "eval_runtime": 292.5108, "eval_samples_per_second": 3.535, "step": 2240 }, { "epoch": 30.32, "learning_rate": 0.0001, "loss": 0.0022, "step": 2244 }, { "epoch": 30.37, "learning_rate": 0.0001, "loss": 0.0023, "step": 2248 }, { "epoch": 30.43, "learning_rate": 0.0001, "loss": 0.0034, "step": 2252 }, { "epoch": 30.48, "learning_rate": 0.0001, "loss": 0.0024, "step": 2256 }, { "epoch": 30.53, "learning_rate": 0.0001, "loss": 0.0038, "step": 2260 }, { "epoch": 30.59, "learning_rate": 0.0001, "loss": 0.0024, "step": 2264 }, { "epoch": 30.64, "learning_rate": 0.0001, "loss": 0.004, "step": 2268 }, { "epoch": 30.7, "learning_rate": 0.0001, "loss": 0.0012, "step": 2272 }, { "epoch": 30.75, "learning_rate": 0.0001, "loss": 0.0023, "step": 2276 }, { "epoch": 30.8, "learning_rate": 0.0001, "loss": 0.0021, "step": 2280 }, { "epoch": 30.86, "learning_rate": 0.0001, "loss": 0.0017, "step": 2284 }, { "epoch": 30.91, "learning_rate": 0.0001, "loss": 0.001, "step": 2288 }, { "epoch": 30.96, "learning_rate": 0.0001, "loss": 0.0011, "step": 2292 }, { "epoch": 31.03, "learning_rate": 0.0001, "loss": 0.0027, "step": 2296 }, { "epoch": 31.08, "learning_rate": 0.0001, "loss": 0.0013, "step": 2300 }, { "epoch": 31.13, "learning_rate": 0.0001, "loss": 0.0011, "step": 2304 }, { "epoch": 31.13, "eval_exec": 0.7214700193423598, "eval_loss": 0.30306151509284973, "eval_runtime": 285.8576, "eval_samples_per_second": 3.617, "step": 2304 }, { "epoch": 31.19, "learning_rate": 0.0001, "loss": 0.0007, "step": 2308 }, { "epoch": 31.24, "learning_rate": 0.0001, "loss": 0.0014, "step": 2312 }, { "epoch": 31.29, "learning_rate": 0.0001, "loss": 0.003, "step": 2316 }, { "epoch": 31.35, "learning_rate": 0.0001, "loss": 0.003, "step": 2320 }, { "epoch": 31.4, "learning_rate": 0.0001, "loss": 0.002, "step": 2324 }, { "epoch": 31.45, "learning_rate": 0.0001, "loss": 0.0013, "step": 2328 }, { "epoch": 31.51, "learning_rate": 0.0001, "loss": 0.0022, "step": 2332 }, { "epoch": 31.56, "learning_rate": 0.0001, "loss": 0.0022, "step": 2336 }, { "epoch": 31.62, "learning_rate": 0.0001, "loss": 0.0023, "step": 2340 }, { "epoch": 31.67, "learning_rate": 0.0001, "loss": 0.0028, "step": 2344 }, { "epoch": 31.72, "learning_rate": 0.0001, "loss": 0.0016, "step": 2348 }, { "epoch": 31.78, "learning_rate": 0.0001, "loss": 0.0019, "step": 2352 }, { "epoch": 31.83, "learning_rate": 0.0001, "loss": 0.0007, "step": 2356 }, { "epoch": 31.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 2360 }, { "epoch": 31.94, "learning_rate": 0.0001, "loss": 0.0014, "step": 2364 }, { "epoch": 31.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2368 }, { "epoch": 31.99, "eval_exec": 0.753384912959381, "eval_loss": 0.2954598069190979, "eval_runtime": 301.2457, "eval_samples_per_second": 3.432, "step": 2368 }, { "epoch": 32.05, "learning_rate": 0.0001, "loss": 0.0016, "step": 2372 }, { "epoch": 32.11, "learning_rate": 0.0001, "loss": 0.0021, "step": 2376 }, { "epoch": 32.16, "learning_rate": 0.0001, "loss": 0.0025, "step": 2380 }, { "epoch": 32.21, "learning_rate": 0.0001, "loss": 0.0045, "step": 2384 }, { "epoch": 32.27, "learning_rate": 0.0001, "loss": 0.0028, "step": 2388 }, { "epoch": 32.32, "learning_rate": 0.0001, "loss": 0.0023, "step": 2392 }, { "epoch": 32.37, "learning_rate": 0.0001, "loss": 0.0014, "step": 2396 }, { "epoch": 32.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 2400 }, { "epoch": 32.48, "learning_rate": 0.0001, "loss": 0.0019, "step": 2404 }, { "epoch": 32.53, "learning_rate": 0.0001, "loss": 0.0014, "step": 2408 }, { "epoch": 32.59, "learning_rate": 0.0001, "loss": 0.0008, "step": 2412 }, { "epoch": 32.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 2416 }, { "epoch": 32.7, "learning_rate": 0.0001, "loss": 0.004, "step": 2420 }, { "epoch": 32.75, "learning_rate": 0.0001, "loss": 0.003, "step": 2424 }, { "epoch": 32.8, "learning_rate": 0.0001, "loss": 0.002, "step": 2428 }, { "epoch": 32.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2432 }, { "epoch": 32.86, "eval_exec": 0.730174081237911, "eval_loss": 0.2934824824333191, "eval_runtime": 296.4371, "eval_samples_per_second": 3.488, "step": 2432 }, { "epoch": 32.91, "learning_rate": 0.0001, "loss": 0.0009, "step": 2436 }, { "epoch": 32.96, "learning_rate": 0.0001, "loss": 0.0023, "step": 2440 }, { "epoch": 33.03, "learning_rate": 0.0001, "loss": 0.0024, "step": 2444 }, { "epoch": 33.08, "learning_rate": 0.0001, "loss": 0.0017, "step": 2448 }, { "epoch": 33.13, "learning_rate": 0.0001, "loss": 0.0032, "step": 2452 }, { "epoch": 33.19, "learning_rate": 0.0001, "loss": 0.0014, "step": 2456 }, { "epoch": 33.24, "learning_rate": 0.0001, "loss": 0.0016, "step": 2460 }, { "epoch": 33.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 2464 }, { "epoch": 33.35, "learning_rate": 0.0001, "loss": 0.001, "step": 2468 }, { "epoch": 33.4, "learning_rate": 0.0001, "loss": 0.0011, "step": 2472 }, { "epoch": 33.45, "learning_rate": 0.0001, "loss": 0.0008, "step": 2476 }, { "epoch": 33.51, "learning_rate": 0.0001, "loss": 0.0025, "step": 2480 }, { "epoch": 33.56, "learning_rate": 0.0001, "loss": 0.0027, "step": 2484 }, { "epoch": 33.62, "learning_rate": 0.0001, "loss": 0.0017, "step": 2488 }, { "epoch": 33.67, "learning_rate": 0.0001, "loss": 0.0013, "step": 2492 }, { "epoch": 33.72, "learning_rate": 0.0001, "loss": 0.0024, "step": 2496 }, { "epoch": 33.72, "eval_exec": 0.695357833655706, "eval_loss": 0.28229427337646484, "eval_runtime": 288.5686, "eval_samples_per_second": 3.583, "step": 2496 }, { "epoch": 33.78, "learning_rate": 0.0001, "loss": 0.0041, "step": 2500 }, { "epoch": 33.83, "learning_rate": 0.0001, "loss": 0.0014, "step": 2504 }, { "epoch": 33.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 2508 }, { "epoch": 33.94, "learning_rate": 0.0001, "loss": 0.0011, "step": 2512 }, { "epoch": 33.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2516 }, { "epoch": 34.05, "learning_rate": 0.0001, "loss": 0.002, "step": 2520 }, { "epoch": 34.11, "learning_rate": 0.0001, "loss": 0.0015, "step": 2524 }, { "epoch": 34.16, "learning_rate": 0.0001, "loss": 0.0015, "step": 2528 }, { "epoch": 34.21, "learning_rate": 0.0001, "loss": 0.0021, "step": 2532 }, { "epoch": 34.27, "learning_rate": 0.0001, "loss": 0.0029, "step": 2536 }, { "epoch": 34.32, "learning_rate": 0.0001, "loss": 0.0017, "step": 2540 }, { "epoch": 34.37, "learning_rate": 0.0001, "loss": 0.0015, "step": 2544 }, { "epoch": 34.43, "learning_rate": 0.0001, "loss": 0.0025, "step": 2548 }, { "epoch": 34.48, "learning_rate": 0.0001, "loss": 0.0015, "step": 2552 }, { "epoch": 34.53, "learning_rate": 0.0001, "loss": 0.002, "step": 2556 }, { "epoch": 34.59, "learning_rate": 0.0001, "loss": 0.0019, "step": 2560 }, { "epoch": 34.59, "eval_exec": 0.718568665377176, "eval_loss": 0.2955181896686554, "eval_runtime": 294.421, "eval_samples_per_second": 3.512, "step": 2560 }, { "epoch": 34.64, "learning_rate": 0.0001, "loss": 0.0017, "step": 2564 }, { "epoch": 34.7, "learning_rate": 0.0001, "loss": 0.0008, "step": 2568 }, { "epoch": 34.75, "learning_rate": 0.0001, "loss": 0.003, "step": 2572 }, { "epoch": 34.8, "learning_rate": 0.0001, "loss": 0.0018, "step": 2576 }, { "epoch": 34.86, "learning_rate": 0.0001, "loss": 0.0022, "step": 2580 }, { "epoch": 34.91, "learning_rate": 0.0001, "loss": 0.0018, "step": 2584 }, { "epoch": 34.96, "learning_rate": 0.0001, "loss": 0.001, "step": 2588 }, { "epoch": 35.03, "learning_rate": 0.0001, "loss": 0.0016, "step": 2592 }, { "epoch": 35.08, "learning_rate": 0.0001, "loss": 0.0026, "step": 2596 }, { "epoch": 35.13, "learning_rate": 0.0001, "loss": 0.001, "step": 2600 }, { "epoch": 35.19, "learning_rate": 0.0001, "loss": 0.0021, "step": 2604 }, { "epoch": 35.24, "learning_rate": 0.0001, "loss": 0.0018, "step": 2608 }, { "epoch": 35.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 2612 }, { "epoch": 35.35, "learning_rate": 0.0001, "loss": 0.0018, "step": 2616 }, { "epoch": 35.4, "learning_rate": 0.0001, "loss": 0.0014, "step": 2620 }, { "epoch": 35.45, "learning_rate": 0.0001, "loss": 0.0015, "step": 2624 }, { "epoch": 35.45, "eval_exec": 0.7243713733075435, "eval_loss": 0.3102652430534363, "eval_runtime": 296.911, "eval_samples_per_second": 3.483, "step": 2624 }, { "epoch": 35.51, "learning_rate": 0.0001, "loss": 0.0034, "step": 2628 }, { "epoch": 35.56, "learning_rate": 0.0001, "loss": 0.0023, "step": 2632 }, { "epoch": 35.62, "learning_rate": 0.0001, "loss": 0.0012, "step": 2636 }, { "epoch": 35.67, "learning_rate": 0.0001, "loss": 0.0019, "step": 2640 }, { "epoch": 35.72, "learning_rate": 0.0001, "loss": 0.0023, "step": 2644 }, { "epoch": 35.78, "learning_rate": 0.0001, "loss": 0.0014, "step": 2648 }, { "epoch": 35.83, "learning_rate": 0.0001, "loss": 0.0028, "step": 2652 }, { "epoch": 35.88, "learning_rate": 0.0001, "loss": 0.0018, "step": 2656 }, { "epoch": 35.94, "learning_rate": 0.0001, "loss": 0.0028, "step": 2660 }, { "epoch": 35.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 2664 }, { "epoch": 36.05, "learning_rate": 0.0001, "loss": 0.0027, "step": 2668 }, { "epoch": 36.11, "learning_rate": 0.0001, "loss": 0.0017, "step": 2672 }, { "epoch": 36.16, "learning_rate": 0.0001, "loss": 0.0014, "step": 2676 }, { "epoch": 36.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 2680 }, { "epoch": 36.27, "learning_rate": 0.0001, "loss": 0.0017, "step": 2684 }, { "epoch": 36.32, "learning_rate": 0.0001, "loss": 0.003, "step": 2688 }, { "epoch": 36.32, "eval_exec": 0.7030947775628626, "eval_loss": 0.2786959409713745, "eval_runtime": 286.0516, "eval_samples_per_second": 3.615, "step": 2688 }, { "epoch": 36.37, "learning_rate": 0.0001, "loss": 0.0022, "step": 2692 }, { "epoch": 36.43, "learning_rate": 0.0001, "loss": 0.0018, "step": 2696 }, { "epoch": 36.48, "learning_rate": 0.0001, "loss": 0.001, "step": 2700 }, { "epoch": 36.53, "learning_rate": 0.0001, "loss": 0.0026, "step": 2704 }, { "epoch": 36.59, "learning_rate": 0.0001, "loss": 0.002, "step": 2708 }, { "epoch": 36.64, "learning_rate": 0.0001, "loss": 0.0022, "step": 2712 }, { "epoch": 36.7, "learning_rate": 0.0001, "loss": 0.0009, "step": 2716 }, { "epoch": 36.75, "learning_rate": 0.0001, "loss": 0.0021, "step": 2720 }, { "epoch": 36.8, "learning_rate": 0.0001, "loss": 0.002, "step": 2724 }, { "epoch": 36.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 2728 }, { "epoch": 36.91, "learning_rate": 0.0001, "loss": 0.0018, "step": 2732 }, { "epoch": 36.96, "learning_rate": 0.0001, "loss": 0.0013, "step": 2736 }, { "epoch": 37.03, "learning_rate": 0.0001, "loss": 0.0016, "step": 2740 }, { "epoch": 37.08, "learning_rate": 0.0001, "loss": 0.0012, "step": 2744 }, { "epoch": 37.13, "learning_rate": 0.0001, "loss": 0.0013, "step": 2748 }, { "epoch": 37.19, "learning_rate": 0.0001, "loss": 0.001, "step": 2752 }, { "epoch": 37.19, "eval_exec": 0.7195357833655706, "eval_loss": 0.3104759156703949, "eval_runtime": 302.4048, "eval_samples_per_second": 3.419, "step": 2752 }, { "epoch": 37.24, "learning_rate": 0.0001, "loss": 0.0016, "step": 2756 }, { "epoch": 37.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 2760 }, { "epoch": 37.35, "learning_rate": 0.0001, "loss": 0.001, "step": 2764 }, { "epoch": 37.4, "learning_rate": 0.0001, "loss": 0.0018, "step": 2768 }, { "epoch": 37.45, "learning_rate": 0.0001, "loss": 0.0022, "step": 2772 }, { "epoch": 37.51, "learning_rate": 0.0001, "loss": 0.001, "step": 2776 }, { "epoch": 37.56, "learning_rate": 0.0001, "loss": 0.0015, "step": 2780 }, { "epoch": 37.62, "learning_rate": 0.0001, "loss": 0.0015, "step": 2784 }, { "epoch": 37.67, "learning_rate": 0.0001, "loss": 0.0013, "step": 2788 }, { "epoch": 37.72, "learning_rate": 0.0001, "loss": 0.0009, "step": 2792 }, { "epoch": 37.78, "learning_rate": 0.0001, "loss": 0.0029, "step": 2796 }, { "epoch": 37.83, "learning_rate": 0.0001, "loss": 0.0016, "step": 2800 }, { "epoch": 37.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 2804 }, { "epoch": 37.94, "learning_rate": 0.0001, "loss": 0.0013, "step": 2808 }, { "epoch": 37.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2812 }, { "epoch": 38.05, "learning_rate": 0.0001, "loss": 0.0038, "step": 2816 }, { "epoch": 38.05, "eval_exec": 0.7011605415860735, "eval_loss": 0.3018852472305298, "eval_runtime": 286.3546, "eval_samples_per_second": 3.611, "step": 2816 }, { "epoch": 38.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 2820 }, { "epoch": 38.16, "learning_rate": 0.0001, "loss": 0.0013, "step": 2824 }, { "epoch": 38.21, "learning_rate": 0.0001, "loss": 0.0016, "step": 2828 }, { "epoch": 38.27, "learning_rate": 0.0001, "loss": 0.0017, "step": 2832 }, { "epoch": 38.32, "learning_rate": 0.0001, "loss": 0.0016, "step": 2836 }, { "epoch": 38.37, "learning_rate": 0.0001, "loss": 0.0018, "step": 2840 }, { "epoch": 38.43, "learning_rate": 0.0001, "loss": 0.0006, "step": 2844 }, { "epoch": 38.48, "learning_rate": 0.0001, "loss": 0.001, "step": 2848 }, { "epoch": 38.53, "learning_rate": 0.0001, "loss": 0.0017, "step": 2852 }, { "epoch": 38.59, "learning_rate": 0.0001, "loss": 0.0022, "step": 2856 }, { "epoch": 38.64, "learning_rate": 0.0001, "loss": 0.0012, "step": 2860 }, { "epoch": 38.7, "learning_rate": 0.0001, "loss": 0.0024, "step": 2864 }, { "epoch": 38.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 2868 }, { "epoch": 38.8, "learning_rate": 0.0001, "loss": 0.0027, "step": 2872 }, { "epoch": 38.86, "learning_rate": 0.0001, "loss": 0.001, "step": 2876 }, { "epoch": 38.91, "learning_rate": 0.0001, "loss": 0.0012, "step": 2880 }, { "epoch": 38.91, "eval_exec": 0.7098646034816247, "eval_loss": 0.295253187417984, "eval_runtime": 292.7859, "eval_samples_per_second": 3.532, "step": 2880 }, { "epoch": 38.96, "learning_rate": 0.0001, "loss": 0.0011, "step": 2884 }, { "epoch": 39.03, "learning_rate": 0.0001, "loss": 0.0012, "step": 2888 }, { "epoch": 39.08, "learning_rate": 0.0001, "loss": 0.0012, "step": 2892 }, { "epoch": 39.13, "learning_rate": 0.0001, "loss": 0.0005, "step": 2896 }, { "epoch": 39.19, "learning_rate": 0.0001, "loss": 0.0018, "step": 2900 }, { "epoch": 39.24, "learning_rate": 0.0001, "loss": 0.0015, "step": 2904 }, { "epoch": 39.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 2908 }, { "epoch": 39.35, "learning_rate": 0.0001, "loss": 0.0007, "step": 2912 }, { "epoch": 39.4, "learning_rate": 0.0001, "loss": 0.0009, "step": 2916 }, { "epoch": 39.45, "learning_rate": 0.0001, "loss": 0.0004, "step": 2920 }, { "epoch": 39.51, "learning_rate": 0.0001, "loss": 0.0012, "step": 2924 }, { "epoch": 39.56, "learning_rate": 0.0001, "loss": 0.0007, "step": 2928 }, { "epoch": 39.62, "learning_rate": 0.0001, "loss": 0.0011, "step": 2932 }, { "epoch": 39.67, "learning_rate": 0.0001, "loss": 0.0012, "step": 2936 }, { "epoch": 39.72, "learning_rate": 0.0001, "loss": 0.0008, "step": 2940 }, { "epoch": 39.78, "learning_rate": 0.0001, "loss": 0.0014, "step": 2944 }, { "epoch": 39.78, "eval_exec": 0.7156673114119922, "eval_loss": 0.3224295973777771, "eval_runtime": 296.2398, "eval_samples_per_second": 3.49, "step": 2944 }, { "epoch": 39.83, "learning_rate": 0.0001, "loss": 0.002, "step": 2948 }, { "epoch": 39.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 2952 }, { "epoch": 39.94, "learning_rate": 0.0001, "loss": 0.001, "step": 2956 }, { "epoch": 39.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2960 }, { "epoch": 40.05, "learning_rate": 0.0001, "loss": 0.0027, "step": 2964 }, { "epoch": 40.11, "learning_rate": 0.0001, "loss": 0.0024, "step": 2968 }, { "epoch": 40.16, "learning_rate": 0.0001, "loss": 0.0027, "step": 2972 }, { "epoch": 40.21, "learning_rate": 0.0001, "loss": 0.0012, "step": 2976 }, { "epoch": 40.27, "learning_rate": 0.0001, "loss": 0.0022, "step": 2980 }, { "epoch": 40.32, "learning_rate": 0.0001, "loss": 0.0019, "step": 2984 }, { "epoch": 40.37, "learning_rate": 0.0001, "loss": 0.0022, "step": 2988 }, { "epoch": 40.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 2992 }, { "epoch": 40.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 2996 }, { "epoch": 40.53, "learning_rate": 0.0001, "loss": 0.0013, "step": 3000 }, { "epoch": 40.59, "learning_rate": 0.0001, "loss": 0.0038, "step": 3004 }, { "epoch": 40.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 3008 }, { "epoch": 40.64, "eval_exec": 0.7040618955512572, "eval_loss": 0.3050314486026764, "eval_runtime": 289.6078, "eval_samples_per_second": 3.57, "step": 3008 }, { "epoch": 40.7, "learning_rate": 0.0001, "loss": 0.0026, "step": 3012 }, { "epoch": 40.75, "learning_rate": 0.0001, "loss": 0.0021, "step": 3016 }, { "epoch": 40.8, "learning_rate": 0.0001, "loss": 0.0015, "step": 3020 }, { "epoch": 40.86, "learning_rate": 0.0001, "loss": 0.0021, "step": 3024 }, { "epoch": 40.91, "learning_rate": 0.0001, "loss": 0.0012, "step": 3028 }, { "epoch": 40.96, "learning_rate": 0.0001, "loss": 0.0032, "step": 3032 }, { "epoch": 41.03, "learning_rate": 0.0001, "loss": 0.0018, "step": 3036 }, { "epoch": 41.08, "learning_rate": 0.0001, "loss": 0.0014, "step": 3040 }, { "epoch": 41.13, "learning_rate": 0.0001, "loss": 0.0009, "step": 3044 }, { "epoch": 41.19, "learning_rate": 0.0001, "loss": 0.0019, "step": 3048 }, { "epoch": 41.24, "learning_rate": 0.0001, "loss": 0.0029, "step": 3052 }, { "epoch": 41.29, "learning_rate": 0.0001, "loss": 0.0022, "step": 3056 }, { "epoch": 41.35, "learning_rate": 0.0001, "loss": 0.0011, "step": 3060 }, { "epoch": 41.4, "learning_rate": 0.0001, "loss": 0.0018, "step": 3064 }, { "epoch": 41.45, "learning_rate": 0.0001, "loss": 0.0005, "step": 3068 }, { "epoch": 41.51, "learning_rate": 0.0001, "loss": 0.0016, "step": 3072 }, { "epoch": 41.51, "eval_exec": 0.7088974854932302, "eval_loss": 0.30448201298713684, "eval_runtime": 294.3306, "eval_samples_per_second": 3.513, "step": 3072 }, { "epoch": 41.56, "learning_rate": 0.0001, "loss": 0.0025, "step": 3076 }, { "epoch": 41.62, "learning_rate": 0.0001, "loss": 0.0011, "step": 3080 }, { "epoch": 41.67, "learning_rate": 0.0001, "loss": 0.0005, "step": 3084 }, { "epoch": 41.72, "learning_rate": 0.0001, "loss": 0.0006, "step": 3088 }, { "epoch": 41.78, "learning_rate": 0.0001, "loss": 0.0012, "step": 3092 }, { "epoch": 41.83, "learning_rate": 0.0001, "loss": 0.0019, "step": 3096 }, { "epoch": 41.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 3100 }, { "epoch": 41.94, "learning_rate": 0.0001, "loss": 0.0007, "step": 3104 }, { "epoch": 41.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 3108 }, { "epoch": 42.05, "learning_rate": 0.0001, "loss": 0.0021, "step": 3112 }, { "epoch": 42.11, "learning_rate": 0.0001, "loss": 0.001, "step": 3116 }, { "epoch": 42.16, "learning_rate": 0.0001, "loss": 0.0023, "step": 3120 }, { "epoch": 42.21, "learning_rate": 0.0001, "loss": 0.001, "step": 3124 }, { "epoch": 42.27, "learning_rate": 0.0001, "loss": 0.0015, "step": 3128 }, { "epoch": 42.32, "learning_rate": 0.0001, "loss": 0.0012, "step": 3132 }, { "epoch": 42.37, "learning_rate": 0.0001, "loss": 0.0007, "step": 3136 }, { "epoch": 42.37, "eval_exec": 0.7243713733075435, "eval_loss": 0.3050415515899658, "eval_runtime": 293.6495, "eval_samples_per_second": 3.521, "step": 3136 }, { "epoch": 42.43, "learning_rate": 0.0001, "loss": 0.0022, "step": 3140 }, { "epoch": 42.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 3144 }, { "epoch": 42.53, "learning_rate": 0.0001, "loss": 0.0015, "step": 3148 }, { "epoch": 42.59, "learning_rate": 0.0001, "loss": 0.0037, "step": 3152 }, { "epoch": 42.64, "learning_rate": 0.0001, "loss": 0.0024, "step": 3156 }, { "epoch": 42.7, "learning_rate": 0.0001, "loss": 0.0012, "step": 3160 }, { "epoch": 42.75, "learning_rate": 0.0001, "loss": 0.0016, "step": 3164 }, { "epoch": 42.8, "learning_rate": 0.0001, "loss": 0.0031, "step": 3168 }, { "epoch": 42.86, "learning_rate": 0.0001, "loss": 0.0013, "step": 3172 }, { "epoch": 42.91, "learning_rate": 0.0001, "loss": 0.0006, "step": 3176 }, { "epoch": 42.96, "learning_rate": 0.0001, "loss": 0.0015, "step": 3180 }, { "epoch": 43.03, "learning_rate": 0.0001, "loss": 0.0014, "step": 3184 }, { "epoch": 43.08, "learning_rate": 0.0001, "loss": 0.0009, "step": 3188 }, { "epoch": 43.13, "learning_rate": 0.0001, "loss": 0.0008, "step": 3192 }, { "epoch": 43.19, "learning_rate": 0.0001, "loss": 0.0013, "step": 3196 }, { "epoch": 43.24, "learning_rate": 0.0001, "loss": 0.0012, "step": 3200 }, { "epoch": 43.24, "eval_exec": 0.7040618955512572, "eval_loss": 0.3184911012649536, "eval_runtime": 290.8614, "eval_samples_per_second": 3.555, "step": 3200 }, { "epoch": 43.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 3204 }, { "epoch": 43.35, "learning_rate": 0.0001, "loss": 0.0027, "step": 3208 }, { "epoch": 43.4, "learning_rate": 0.0001, "loss": 0.0025, "step": 3212 }, { "epoch": 43.45, "learning_rate": 0.0001, "loss": 0.0006, "step": 3216 }, { "epoch": 43.51, "learning_rate": 0.0001, "loss": 0.0027, "step": 3220 }, { "epoch": 43.56, "learning_rate": 0.0001, "loss": 0.0016, "step": 3224 }, { "epoch": 43.62, "learning_rate": 0.0001, "loss": 0.0011, "step": 3228 }, { "epoch": 43.67, "learning_rate": 0.0001, "loss": 0.0007, "step": 3232 }, { "epoch": 43.72, "learning_rate": 0.0001, "loss": 0.0028, "step": 3236 }, { "epoch": 43.78, "learning_rate": 0.0001, "loss": 0.0011, "step": 3240 }, { "epoch": 43.83, "learning_rate": 0.0001, "loss": 0.0008, "step": 3244 }, { "epoch": 43.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 3248 }, { "epoch": 43.94, "learning_rate": 0.0001, "loss": 0.0028, "step": 3252 }, { "epoch": 43.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 3256 }, { "epoch": 44.05, "learning_rate": 0.0001, "loss": 0.0026, "step": 3260 }, { "epoch": 44.11, "learning_rate": 0.0001, "loss": 0.0014, "step": 3264 }, { "epoch": 44.11, "eval_exec": 0.7108317214700194, "eval_loss": 0.2903190851211548, "eval_runtime": 296.1114, "eval_samples_per_second": 3.492, "step": 3264 }, { "epoch": 44.16, "learning_rate": 0.0001, "loss": 0.0008, "step": 3268 }, { "epoch": 44.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 3272 }, { "epoch": 44.27, "learning_rate": 0.0001, "loss": 0.0016, "step": 3276 }, { "epoch": 44.32, "learning_rate": 0.0001, "loss": 0.0017, "step": 3280 }, { "epoch": 44.37, "learning_rate": 0.0001, "loss": 0.0011, "step": 3284 }, { "epoch": 44.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 3288 }, { "epoch": 44.48, "learning_rate": 0.0001, "loss": 0.0006, "step": 3292 }, { "epoch": 44.53, "learning_rate": 0.0001, "loss": 0.0014, "step": 3296 }, { "epoch": 44.59, "learning_rate": 0.0001, "loss": 0.0021, "step": 3300 }, { "epoch": 44.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 3304 }, { "epoch": 44.7, "learning_rate": 0.0001, "loss": 0.0014, "step": 3308 }, { "epoch": 44.75, "learning_rate": 0.0001, "loss": 0.0035, "step": 3312 }, { "epoch": 44.8, "learning_rate": 0.0001, "loss": 0.001, "step": 3316 }, { "epoch": 44.86, "learning_rate": 0.0001, "loss": 0.0013, "step": 3320 }, { "epoch": 44.91, "learning_rate": 0.0001, "loss": 0.0007, "step": 3324 }, { "epoch": 44.96, "learning_rate": 0.0001, "loss": 0.0032, "step": 3328 }, { "epoch": 44.96, "eval_exec": 0.7166344294003868, "eval_loss": 0.29221683740615845, "eval_runtime": 297.9619, "eval_samples_per_second": 3.47, "step": 3328 }, { "epoch": 45.03, "learning_rate": 0.0001, "loss": 0.0013, "step": 3332 }, { "epoch": 45.08, "learning_rate": 0.0001, "loss": 0.0015, "step": 3336 }, { "epoch": 45.13, "learning_rate": 0.0001, "loss": 0.0013, "step": 3340 }, { "epoch": 45.19, "learning_rate": 0.0001, "loss": 0.0012, "step": 3344 }, { "epoch": 45.24, "learning_rate": 0.0001, "loss": 0.0013, "step": 3348 }, { "epoch": 45.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 3352 }, { "epoch": 45.35, "learning_rate": 0.0001, "loss": 0.0006, "step": 3356 }, { "epoch": 45.4, "learning_rate": 0.0001, "loss": 0.0017, "step": 3360 }, { "epoch": 45.45, "learning_rate": 0.0001, "loss": 0.0003, "step": 3364 }, { "epoch": 45.51, "learning_rate": 0.0001, "loss": 0.0008, "step": 3368 }, { "epoch": 45.56, "learning_rate": 0.0001, "loss": 0.0009, "step": 3372 }, { "epoch": 45.62, "learning_rate": 0.0001, "loss": 0.0022, "step": 3376 }, { "epoch": 45.67, "learning_rate": 0.0001, "loss": 0.0009, "step": 3380 }, { "epoch": 45.72, "learning_rate": 0.0001, "loss": 0.0002, "step": 3384 }, { "epoch": 45.78, "learning_rate": 0.0001, "loss": 0.0021, "step": 3388 }, { "epoch": 45.83, "learning_rate": 0.0001, "loss": 0.0023, "step": 3392 }, { "epoch": 45.83, "eval_exec": 0.7166344294003868, "eval_loss": 0.30226799845695496, "eval_runtime": 296.3235, "eval_samples_per_second": 3.489, "step": 3392 }, { "epoch": 45.88, "learning_rate": 0.0001, "loss": 0.0021, "step": 3396 }, { "epoch": 45.94, "learning_rate": 0.0001, "loss": 0.0023, "step": 3400 }, { "epoch": 45.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3404 }, { "epoch": 46.05, "learning_rate": 0.0001, "loss": 0.0015, "step": 3408 }, { "epoch": 46.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 3412 }, { "epoch": 46.16, "learning_rate": 0.0001, "loss": 0.0014, "step": 3416 }, { "epoch": 46.21, "learning_rate": 0.0001, "loss": 0.0005, "step": 3420 }, { "epoch": 46.27, "learning_rate": 0.0001, "loss": 0.0014, "step": 3424 }, { "epoch": 46.32, "learning_rate": 0.0001, "loss": 0.0016, "step": 3428 }, { "epoch": 46.37, "learning_rate": 0.0001, "loss": 0.0012, "step": 3432 }, { "epoch": 46.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 3436 }, { "epoch": 46.48, "learning_rate": 0.0001, "loss": 0.0027, "step": 3440 }, { "epoch": 46.53, "learning_rate": 0.0001, "loss": 0.0008, "step": 3444 }, { "epoch": 46.59, "learning_rate": 0.0001, "loss": 0.0012, "step": 3448 }, { "epoch": 46.64, "learning_rate": 0.0001, "loss": 0.0006, "step": 3452 }, { "epoch": 46.7, "learning_rate": 0.0001, "loss": 0.0004, "step": 3456 }, { "epoch": 46.7, "eval_exec": 0.7437137330754352, "eval_loss": 0.30926698446273804, "eval_runtime": 297.8667, "eval_samples_per_second": 3.471, "step": 3456 }, { "epoch": 46.75, "learning_rate": 0.0001, "loss": 0.0023, "step": 3460 }, { "epoch": 46.8, "learning_rate": 0.0001, "loss": 0.0011, "step": 3464 }, { "epoch": 46.86, "learning_rate": 0.0001, "loss": 0.0011, "step": 3468 }, { "epoch": 46.91, "learning_rate": 0.0001, "loss": 0.0012, "step": 3472 }, { "epoch": 46.96, "learning_rate": 0.0001, "loss": 0.0013, "step": 3476 }, { "epoch": 47.03, "learning_rate": 0.0001, "loss": 0.0009, "step": 3480 }, { "epoch": 47.08, "learning_rate": 0.0001, "loss": 0.0006, "step": 3484 }, { "epoch": 47.13, "learning_rate": 0.0001, "loss": 0.0015, "step": 3488 }, { "epoch": 47.19, "learning_rate": 0.0001, "loss": 0.0003, "step": 3492 }, { "epoch": 47.24, "learning_rate": 0.0001, "loss": 0.0021, "step": 3496 }, { "epoch": 47.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 3500 }, { "epoch": 47.35, "learning_rate": 0.0001, "loss": 0.0006, "step": 3504 }, { "epoch": 47.4, "learning_rate": 0.0001, "loss": 0.0004, "step": 3508 }, { "epoch": 47.45, "learning_rate": 0.0001, "loss": 0.0006, "step": 3512 }, { "epoch": 47.51, "learning_rate": 0.0001, "loss": 0.0025, "step": 3516 }, { "epoch": 47.56, "learning_rate": 0.0001, "loss": 0.0054, "step": 3520 }, { "epoch": 47.56, "eval_exec": 0.7350096711798839, "eval_loss": 0.27321064472198486, "eval_runtime": 285.4659, "eval_samples_per_second": 3.622, "step": 3520 }, { "epoch": 47.62, "learning_rate": 0.0001, "loss": 0.001, "step": 3524 }, { "epoch": 47.67, "learning_rate": 0.0001, "loss": 0.0009, "step": 3528 }, { "epoch": 47.72, "learning_rate": 0.0001, "loss": 0.0013, "step": 3532 }, { "epoch": 47.78, "learning_rate": 0.0001, "loss": 0.0016, "step": 3536 }, { "epoch": 47.83, "learning_rate": 0.0001, "loss": 0.0008, "step": 3540 }, { "epoch": 47.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 3544 }, { "epoch": 47.94, "learning_rate": 0.0001, "loss": 0.0008, "step": 3548 }, { "epoch": 47.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3552 }, { "epoch": 48.05, "learning_rate": 0.0001, "loss": 0.0014, "step": 3556 }, { "epoch": 48.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 3560 }, { "epoch": 48.16, "learning_rate": 0.0001, "loss": 0.0013, "step": 3564 }, { "epoch": 48.21, "learning_rate": 0.0001, "loss": 0.0003, "step": 3568 }, { "epoch": 48.27, "learning_rate": 0.0001, "loss": 0.0018, "step": 3572 }, { "epoch": 48.32, "learning_rate": 0.0001, "loss": 0.0005, "step": 3576 }, { "epoch": 48.37, "learning_rate": 0.0001, "loss": 0.0029, "step": 3580 }, { "epoch": 48.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 3584 }, { "epoch": 48.43, "eval_exec": 0.746615087040619, "eval_loss": 0.2905672490596771, "eval_runtime": 293.155, "eval_samples_per_second": 3.527, "step": 3584 }, { "epoch": 48.48, "learning_rate": 0.0001, "loss": 0.0008, "step": 3588 }, { "epoch": 48.53, "learning_rate": 0.0001, "loss": 0.0012, "step": 3592 }, { "epoch": 48.59, "learning_rate": 0.0001, "loss": 0.0022, "step": 3596 }, { "epoch": 48.64, "learning_rate": 0.0001, "loss": 0.0013, "step": 3600 }, { "epoch": 48.7, "learning_rate": 0.0001, "loss": 0.001, "step": 3604 }, { "epoch": 48.75, "learning_rate": 0.0001, "loss": 0.0014, "step": 3608 }, { "epoch": 48.8, "learning_rate": 0.0001, "loss": 0.0011, "step": 3612 }, { "epoch": 48.86, "learning_rate": 0.0001, "loss": 0.0057, "step": 3616 }, { "epoch": 48.91, "learning_rate": 0.0001, "loss": 0.0008, "step": 3620 }, { "epoch": 48.96, "learning_rate": 0.0001, "loss": 0.0012, "step": 3624 }, { "epoch": 49.03, "learning_rate": 0.0001, "loss": 0.002, "step": 3628 }, { "epoch": 49.08, "learning_rate": 0.0001, "loss": 0.0022, "step": 3632 }, { "epoch": 49.13, "learning_rate": 0.0001, "loss": 0.0012, "step": 3636 }, { "epoch": 49.19, "learning_rate": 0.0001, "loss": 0.002, "step": 3640 }, { "epoch": 49.24, "learning_rate": 0.0001, "loss": 0.0007, "step": 3644 }, { "epoch": 49.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 3648 }, { "epoch": 49.29, "eval_exec": 0.7388781431334622, "eval_loss": 0.2956254184246063, "eval_runtime": 295.8069, "eval_samples_per_second": 3.496, "step": 3648 }, { "epoch": 49.35, "learning_rate": 0.0001, "loss": 0.001, "step": 3652 }, { "epoch": 49.4, "learning_rate": 0.0001, "loss": 0.0014, "step": 3656 }, { "epoch": 49.45, "learning_rate": 0.0001, "loss": 0.0007, "step": 3660 }, { "epoch": 49.51, "learning_rate": 0.0001, "loss": 0.003, "step": 3664 }, { "epoch": 49.56, "learning_rate": 0.0001, "loss": 0.0011, "step": 3668 }, { "epoch": 49.62, "learning_rate": 0.0001, "loss": 0.0025, "step": 3672 }, { "epoch": 49.67, "learning_rate": 0.0001, "loss": 0.0009, "step": 3676 }, { "epoch": 49.72, "learning_rate": 0.0001, "loss": 0.0007, "step": 3680 }, { "epoch": 49.78, "learning_rate": 0.0001, "loss": 0.0015, "step": 3684 }, { "epoch": 49.83, "learning_rate": 0.0001, "loss": 0.0007, "step": 3688 }, { "epoch": 49.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 3692 }, { "epoch": 49.94, "learning_rate": 0.0001, "loss": 0.0006, "step": 3696 }, { "epoch": 49.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3700 }, { "epoch": 50.05, "learning_rate": 0.0001, "loss": 0.0014, "step": 3704 }, { "epoch": 50.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 3708 }, { "epoch": 50.16, "learning_rate": 0.0001, "loss": 0.0015, "step": 3712 }, { "epoch": 50.16, "eval_exec": 0.7437137330754352, "eval_loss": 0.30492928624153137, "eval_runtime": 291.3729, "eval_samples_per_second": 3.549, "step": 3712 }, { "epoch": 50.21, "learning_rate": 0.0001, "loss": 0.0003, "step": 3716 }, { "epoch": 50.27, "learning_rate": 0.0001, "loss": 0.0008, "step": 3720 }, { "epoch": 50.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 3724 }, { "epoch": 50.37, "learning_rate": 0.0001, "loss": 0.0004, "step": 3728 }, { "epoch": 50.43, "learning_rate": 0.0001, "loss": 0.0004, "step": 3732 }, { "epoch": 50.48, "learning_rate": 0.0001, "loss": 0.0005, "step": 3736 }, { "epoch": 50.53, "learning_rate": 0.0001, "loss": 0.0022, "step": 3740 }, { "epoch": 50.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 3744 }, { "epoch": 50.64, "learning_rate": 0.0001, "loss": 0.0003, "step": 3748 }, { "epoch": 50.7, "learning_rate": 0.0001, "loss": 0.0004, "step": 3752 }, { "epoch": 50.75, "learning_rate": 0.0001, "loss": 0.0015, "step": 3756 }, { "epoch": 50.8, "learning_rate": 0.0001, "loss": 0.0013, "step": 3760 }, { "epoch": 50.86, "learning_rate": 0.0001, "loss": 0.0004, "step": 3764 }, { "epoch": 50.91, "learning_rate": 0.0001, "loss": 0.0004, "step": 3768 }, { "epoch": 50.96, "learning_rate": 0.0001, "loss": 0.0015, "step": 3772 }, { "epoch": 51.03, "learning_rate": 0.0001, "loss": 0.0015, "step": 3776 }, { "epoch": 51.03, "eval_exec": 0.7495164410058027, "eval_loss": 0.32364675402641296, "eval_runtime": 300.6402, "eval_samples_per_second": 3.439, "step": 3776 }, { "epoch": 51.08, "learning_rate": 0.0001, "loss": 0.0012, "step": 3780 }, { "epoch": 51.13, "learning_rate": 0.0001, "loss": 0.0004, "step": 3784 }, { "epoch": 51.19, "learning_rate": 0.0001, "loss": 0.0005, "step": 3788 }, { "epoch": 51.24, "learning_rate": 0.0001, "loss": 0.0012, "step": 3792 }, { "epoch": 51.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 3796 }, { "epoch": 51.35, "learning_rate": 0.0001, "loss": 0.0018, "step": 3800 }, { "epoch": 51.4, "learning_rate": 0.0001, "loss": 0.001, "step": 3804 }, { "epoch": 51.45, "learning_rate": 0.0001, "loss": 0.0003, "step": 3808 }, { "epoch": 51.51, "learning_rate": 0.0001, "loss": 0.0028, "step": 3812 }, { "epoch": 51.56, "learning_rate": 0.0001, "loss": 0.0009, "step": 3816 }, { "epoch": 51.62, "learning_rate": 0.0001, "loss": 0.0013, "step": 3820 }, { "epoch": 51.67, "learning_rate": 0.0001, "loss": 0.0005, "step": 3824 }, { "epoch": 51.72, "learning_rate": 0.0001, "loss": 0.0008, "step": 3828 }, { "epoch": 51.78, "learning_rate": 0.0001, "loss": 0.0014, "step": 3832 }, { "epoch": 51.83, "learning_rate": 0.0001, "loss": 0.0016, "step": 3836 }, { "epoch": 51.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 3840 }, { "epoch": 51.88, "eval_exec": 0.7253384912959381, "eval_loss": 0.30479735136032104, "eval_runtime": 283.847, "eval_samples_per_second": 3.643, "step": 3840 }, { "epoch": 51.94, "learning_rate": 0.0001, "loss": 0.0004, "step": 3844 }, { "epoch": 51.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 3848 }, { "epoch": 52.05, "learning_rate": 0.0001, "loss": 0.0029, "step": 3852 }, { "epoch": 52.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 3856 }, { "epoch": 52.16, "learning_rate": 0.0001, "loss": 0.0015, "step": 3860 }, { "epoch": 52.21, "learning_rate": 0.0001, "loss": 0.0006, "step": 3864 }, { "epoch": 52.27, "learning_rate": 0.0001, "loss": 0.001, "step": 3868 }, { "epoch": 52.32, "learning_rate": 0.0001, "loss": 0.0018, "step": 3872 }, { "epoch": 52.37, "learning_rate": 0.0001, "loss": 0.0011, "step": 3876 }, { "epoch": 52.43, "learning_rate": 0.0001, "loss": 0.0004, "step": 3880 }, { "epoch": 52.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 3884 }, { "epoch": 52.53, "learning_rate": 0.0001, "loss": 0.0024, "step": 3888 }, { "epoch": 52.59, "learning_rate": 0.0001, "loss": 0.0015, "step": 3892 }, { "epoch": 52.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 3896 }, { "epoch": 52.7, "learning_rate": 0.0001, "loss": 0.0009, "step": 3900 }, { "epoch": 52.75, "learning_rate": 0.0001, "loss": 0.001, "step": 3904 }, { "epoch": 52.75, "eval_exec": 0.7214700193423598, "eval_loss": 0.3040502965450287, "eval_runtime": 296.1224, "eval_samples_per_second": 3.492, "step": 3904 }, { "epoch": 52.8, "learning_rate": 0.0001, "loss": 0.0022, "step": 3908 }, { "epoch": 52.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 3912 }, { "epoch": 52.91, "learning_rate": 0.0001, "loss": 0.0006, "step": 3916 }, { "epoch": 52.96, "learning_rate": 0.0001, "loss": 0.0008, "step": 3920 }, { "epoch": 53.03, "learning_rate": 0.0001, "loss": 0.0016, "step": 3924 }, { "epoch": 53.08, "learning_rate": 0.0001, "loss": 0.0014, "step": 3928 }, { "epoch": 53.13, "learning_rate": 0.0001, "loss": 0.0012, "step": 3932 }, { "epoch": 53.19, "learning_rate": 0.0001, "loss": 0.0007, "step": 3936 }, { "epoch": 53.24, "learning_rate": 0.0001, "loss": 0.0004, "step": 3940 }, { "epoch": 53.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 3944 }, { "epoch": 53.35, "learning_rate": 0.0001, "loss": 0.0008, "step": 3948 }, { "epoch": 53.4, "learning_rate": 0.0001, "loss": 0.0005, "step": 3952 }, { "epoch": 53.45, "learning_rate": 0.0001, "loss": 0.0009, "step": 3956 }, { "epoch": 53.51, "learning_rate": 0.0001, "loss": 0.0007, "step": 3960 }, { "epoch": 53.56, "learning_rate": 0.0001, "loss": 0.0008, "step": 3964 }, { "epoch": 53.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 3968 }, { "epoch": 53.62, "eval_exec": 0.7330754352030948, "eval_loss": 0.3174877464771271, "eval_runtime": 296.7562, "eval_samples_per_second": 3.484, "step": 3968 }, { "epoch": 53.67, "learning_rate": 0.0001, "loss": 0.0009, "step": 3972 }, { "epoch": 53.72, "learning_rate": 0.0001, "loss": 0.0003, "step": 3976 }, { "epoch": 53.78, "learning_rate": 0.0001, "loss": 0.0019, "step": 3980 }, { "epoch": 53.83, "learning_rate": 0.0001, "loss": 0.0019, "step": 3984 }, { "epoch": 53.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 3988 }, { "epoch": 53.94, "learning_rate": 0.0001, "loss": 0.0006, "step": 3992 }, { "epoch": 53.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 3996 }, { "epoch": 54.05, "learning_rate": 0.0001, "loss": 0.0018, "step": 4000 }, { "epoch": 54.11, "learning_rate": 0.0001, "loss": 0.0005, "step": 4004 }, { "epoch": 54.16, "learning_rate": 0.0001, "loss": 0.0006, "step": 4008 }, { "epoch": 54.21, "learning_rate": 0.0001, "loss": 0.0005, "step": 4012 }, { "epoch": 54.27, "learning_rate": 0.0001, "loss": 0.0011, "step": 4016 }, { "epoch": 54.32, "learning_rate": 0.0001, "loss": 0.0018, "step": 4020 }, { "epoch": 54.37, "learning_rate": 0.0001, "loss": 0.0018, "step": 4024 }, { "epoch": 54.43, "learning_rate": 0.0001, "loss": 0.0012, "step": 4028 }, { "epoch": 54.48, "learning_rate": 0.0001, "loss": 0.0002, "step": 4032 }, { "epoch": 54.48, "eval_exec": 0.7340425531914894, "eval_loss": 0.3035335838794708, "eval_runtime": 288.5745, "eval_samples_per_second": 3.583, "step": 4032 }, { "epoch": 54.53, "learning_rate": 0.0001, "loss": 0.0019, "step": 4036 }, { "epoch": 54.59, "learning_rate": 0.0001, "loss": 0.0017, "step": 4040 }, { "epoch": 54.64, "learning_rate": 0.0001, "loss": 0.0033, "step": 4044 }, { "epoch": 54.7, "learning_rate": 0.0001, "loss": 0.0004, "step": 4048 }, { "epoch": 54.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 4052 }, { "epoch": 54.8, "learning_rate": 0.0001, "loss": 0.0013, "step": 4056 }, { "epoch": 54.86, "learning_rate": 0.0001, "loss": 0.0024, "step": 4060 }, { "epoch": 54.91, "learning_rate": 0.0001, "loss": 0.0004, "step": 4064 }, { "epoch": 54.96, "learning_rate": 0.0001, "loss": 0.0004, "step": 4068 }, { "epoch": 55.03, "learning_rate": 0.0001, "loss": 0.0017, "step": 4072 }, { "epoch": 55.08, "learning_rate": 0.0001, "loss": 0.0014, "step": 4076 }, { "epoch": 55.13, "learning_rate": 0.0001, "loss": 0.0007, "step": 4080 }, { "epoch": 55.19, "learning_rate": 0.0001, "loss": 0.0005, "step": 4084 }, { "epoch": 55.24, "learning_rate": 0.0001, "loss": 0.0029, "step": 4088 }, { "epoch": 55.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 4092 }, { "epoch": 55.35, "learning_rate": 0.0001, "loss": 0.0015, "step": 4096 }, { "epoch": 55.35, "eval_exec": 0.7321083172147002, "eval_loss": 0.3005565404891968, "eval_runtime": 293.428, "eval_samples_per_second": 3.524, "step": 4096 }, { "epoch": 55.4, "learning_rate": 0.0001, "loss": 0.0009, "step": 4100 }, { "epoch": 55.45, "learning_rate": 0.0001, "loss": 0.0007, "step": 4104 }, { "epoch": 55.51, "learning_rate": 0.0001, "loss": 0.0012, "step": 4108 }, { "epoch": 55.56, "learning_rate": 0.0001, "loss": 0.0024, "step": 4112 }, { "epoch": 55.62, "learning_rate": 0.0001, "loss": 0.0009, "step": 4116 }, { "epoch": 55.67, "learning_rate": 0.0001, "loss": 0.0006, "step": 4120 }, { "epoch": 55.72, "learning_rate": 0.0001, "loss": 0.0005, "step": 4124 }, { "epoch": 55.78, "learning_rate": 0.0001, "loss": 0.0013, "step": 4128 }, { "epoch": 55.83, "learning_rate": 0.0001, "loss": 0.0011, "step": 4132 }, { "epoch": 55.88, "learning_rate": 0.0001, "loss": 0.001, "step": 4136 }, { "epoch": 55.94, "learning_rate": 0.0001, "loss": 0.0008, "step": 4140 }, { "epoch": 55.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 4144 }, { "epoch": 56.05, "learning_rate": 0.0001, "loss": 0.0013, "step": 4148 }, { "epoch": 56.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 4152 }, { "epoch": 56.16, "learning_rate": 0.0001, "loss": 0.0008, "step": 4156 }, { "epoch": 56.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 4160 }, { "epoch": 56.21, "eval_exec": 0.723404255319149, "eval_loss": 0.3162212371826172, "eval_runtime": 294.7911, "eval_samples_per_second": 3.508, "step": 4160 }, { "epoch": 56.27, "learning_rate": 0.0001, "loss": 0.0012, "step": 4164 }, { "epoch": 56.32, "learning_rate": 0.0001, "loss": 0.0055, "step": 4168 }, { "epoch": 56.37, "learning_rate": 0.0001, "loss": 0.0005, "step": 4172 }, { "epoch": 56.43, "learning_rate": 0.0001, "loss": 0.0003, "step": 4176 }, { "epoch": 56.48, "learning_rate": 0.0001, "loss": 0.0008, "step": 4180 }, { "epoch": 56.53, "learning_rate": 0.0001, "loss": 0.0013, "step": 4184 }, { "epoch": 56.59, "learning_rate": 0.0001, "loss": 0.0006, "step": 4188 }, { "epoch": 56.64, "learning_rate": 0.0001, "loss": 0.0025, "step": 4192 }, { "epoch": 56.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 4196 }, { "epoch": 56.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 4200 }, { "epoch": 56.8, "learning_rate": 0.0001, "loss": 0.0005, "step": 4204 }, { "epoch": 56.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 4208 }, { "epoch": 56.91, "learning_rate": 0.0001, "loss": 0.0009, "step": 4212 }, { "epoch": 56.96, "learning_rate": 0.0001, "loss": 0.0008, "step": 4216 }, { "epoch": 57.03, "learning_rate": 0.0001, "loss": 0.0014, "step": 4220 }, { "epoch": 57.08, "learning_rate": 0.0001, "loss": 0.003, "step": 4224 }, { "epoch": 57.08, "eval_exec": 0.706963249516441, "eval_loss": 0.3172769248485565, "eval_runtime": 287.3743, "eval_samples_per_second": 3.598, "step": 4224 }, { "epoch": 57.13, "learning_rate": 0.0001, "loss": 0.0007, "step": 4228 }, { "epoch": 57.19, "learning_rate": 0.0001, "loss": 0.0012, "step": 4232 }, { "epoch": 57.24, "learning_rate": 0.0001, "loss": 0.0004, "step": 4236 }, { "epoch": 57.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 4240 }, { "epoch": 57.35, "learning_rate": 0.0001, "loss": 0.0019, "step": 4244 }, { "epoch": 57.4, "learning_rate": 0.0001, "loss": 0.0008, "step": 4248 }, { "epoch": 57.45, "learning_rate": 0.0001, "loss": 0.0011, "step": 4252 }, { "epoch": 57.51, "learning_rate": 0.0001, "loss": 0.0015, "step": 4256 }, { "epoch": 57.56, "learning_rate": 0.0001, "loss": 0.001, "step": 4260 }, { "epoch": 57.62, "learning_rate": 0.0001, "loss": 0.0019, "step": 4264 }, { "epoch": 57.67, "learning_rate": 0.0001, "loss": 0.0013, "step": 4268 }, { "epoch": 57.72, "learning_rate": 0.0001, "loss": 0.0007, "step": 4272 }, { "epoch": 57.78, "learning_rate": 0.0001, "loss": 0.0013, "step": 4276 }, { "epoch": 57.83, "learning_rate": 0.0001, "loss": 0.0011, "step": 4280 }, { "epoch": 57.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 4284 }, { "epoch": 57.94, "learning_rate": 0.0001, "loss": 0.0021, "step": 4288 }, { "epoch": 57.94, "eval_exec": 0.7224371373307543, "eval_loss": 0.2908557057380676, "eval_runtime": 293.2594, "eval_samples_per_second": 3.526, "step": 4288 }, { "epoch": 57.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 4292 }, { "epoch": 58.05, "learning_rate": 0.0001, "loss": 0.0012, "step": 4296 }, { "epoch": 58.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 4300 }, { "epoch": 58.16, "learning_rate": 0.0001, "loss": 0.0032, "step": 4304 }, { "epoch": 58.21, "learning_rate": 0.0001, "loss": 0.0003, "step": 4308 }, { "epoch": 58.27, "learning_rate": 0.0001, "loss": 0.0009, "step": 4312 }, { "epoch": 58.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 4316 }, { "epoch": 58.37, "learning_rate": 0.0001, "loss": 0.0006, "step": 4320 }, { "epoch": 58.43, "learning_rate": 0.0001, "loss": 0.0012, "step": 4324 }, { "epoch": 58.48, "learning_rate": 0.0001, "loss": 0.0003, "step": 4328 }, { "epoch": 58.53, "learning_rate": 0.0001, "loss": 0.0009, "step": 4332 }, { "epoch": 58.59, "learning_rate": 0.0001, "loss": 0.0007, "step": 4336 }, { "epoch": 58.64, "learning_rate": 0.0001, "loss": 0.0018, "step": 4340 }, { "epoch": 58.7, "learning_rate": 0.0001, "loss": 0.0013, "step": 4344 }, { "epoch": 58.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 4348 }, { "epoch": 58.8, "learning_rate": 0.0001, "loss": 0.0008, "step": 4352 }, { "epoch": 58.8, "eval_exec": 0.7166344294003868, "eval_loss": 0.31116729974746704, "eval_runtime": 295.9051, "eval_samples_per_second": 3.494, "step": 4352 }, { "epoch": 58.86, "learning_rate": 0.0001, "loss": 0.0018, "step": 4356 }, { "epoch": 58.91, "learning_rate": 0.0001, "loss": 0.0025, "step": 4360 }, { "epoch": 58.96, "learning_rate": 0.0001, "loss": 0.0006, "step": 4364 }, { "epoch": 59.03, "learning_rate": 0.0001, "loss": 0.001, "step": 4368 }, { "epoch": 59.08, "learning_rate": 0.0001, "loss": 0.001, "step": 4372 }, { "epoch": 59.13, "learning_rate": 0.0001, "loss": 0.0012, "step": 4376 }, { "epoch": 59.19, "learning_rate": 0.0001, "loss": 0.0011, "step": 4380 }, { "epoch": 59.24, "learning_rate": 0.0001, "loss": 0.0014, "step": 4384 }, { "epoch": 59.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 4388 }, { "epoch": 59.35, "learning_rate": 0.0001, "loss": 0.0009, "step": 4392 }, { "epoch": 59.4, "learning_rate": 0.0001, "loss": 0.0005, "step": 4396 }, { "epoch": 59.45, "learning_rate": 0.0001, "loss": 0.0007, "step": 4400 }, { "epoch": 59.51, "learning_rate": 0.0001, "loss": 0.0007, "step": 4404 }, { "epoch": 59.56, "learning_rate": 0.0001, "loss": 0.0014, "step": 4408 }, { "epoch": 59.62, "learning_rate": 0.0001, "loss": 0.0006, "step": 4412 }, { "epoch": 59.67, "learning_rate": 0.0001, "loss": 0.0012, "step": 4416 }, { "epoch": 59.67, "eval_exec": 0.7388781431334622, "eval_loss": 0.3091637194156647, "eval_runtime": 297.2444, "eval_samples_per_second": 3.479, "step": 4416 }, { "epoch": 59.72, "learning_rate": 0.0001, "loss": 0.0023, "step": 4420 }, { "epoch": 59.78, "learning_rate": 0.0001, "loss": 0.0033, "step": 4424 }, { "epoch": 59.83, "learning_rate": 0.0001, "loss": 0.0024, "step": 4428 }, { "epoch": 59.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 4432 }, { "epoch": 59.94, "learning_rate": 0.0001, "loss": 0.0014, "step": 4436 }, { "epoch": 59.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4440 }, { "epoch": 60.05, "learning_rate": 0.0001, "loss": 0.0014, "step": 4444 }, { "epoch": 60.11, "learning_rate": 0.0001, "loss": 0.0015, "step": 4448 }, { "epoch": 60.16, "learning_rate": 0.0001, "loss": 0.0004, "step": 4452 }, { "epoch": 60.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 4456 }, { "epoch": 60.27, "learning_rate": 0.0001, "loss": 0.0004, "step": 4460 }, { "epoch": 60.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 4464 }, { "epoch": 60.37, "learning_rate": 0.0001, "loss": 0.0007, "step": 4468 }, { "epoch": 60.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 4472 }, { "epoch": 60.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 4476 }, { "epoch": 60.53, "learning_rate": 0.0001, "loss": 0.0021, "step": 4480 }, { "epoch": 60.53, "eval_exec": 0.7243713733075435, "eval_loss": 0.3012126684188843, "eval_runtime": 289.2665, "eval_samples_per_second": 3.575, "step": 4480 }, { "epoch": 60.59, "learning_rate": 0.0001, "loss": 0.001, "step": 4484 }, { "epoch": 60.64, "learning_rate": 0.0001, "loss": 0.0013, "step": 4488 }, { "epoch": 60.7, "learning_rate": 0.0001, "loss": 0.0006, "step": 4492 }, { "epoch": 60.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 4496 }, { "epoch": 60.8, "learning_rate": 0.0001, "loss": 0.001, "step": 4500 }, { "epoch": 60.86, "learning_rate": 0.0001, "loss": 0.0031, "step": 4504 }, { "epoch": 60.91, "learning_rate": 0.0001, "loss": 0.0009, "step": 4508 }, { "epoch": 60.96, "learning_rate": 0.0001, "loss": 0.0009, "step": 4512 }, { "epoch": 61.03, "learning_rate": 0.0001, "loss": 0.0012, "step": 4516 }, { "epoch": 61.08, "learning_rate": 0.0001, "loss": 0.0006, "step": 4520 }, { "epoch": 61.13, "learning_rate": 0.0001, "loss": 0.0007, "step": 4524 }, { "epoch": 61.19, "learning_rate": 0.0001, "loss": 0.001, "step": 4528 }, { "epoch": 61.24, "learning_rate": 0.0001, "loss": 0.0004, "step": 4532 }, { "epoch": 61.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 4536 }, { "epoch": 61.35, "learning_rate": 0.0001, "loss": 0.0029, "step": 4540 }, { "epoch": 61.4, "learning_rate": 0.0001, "loss": 0.0007, "step": 4544 }, { "epoch": 61.4, "eval_exec": 0.7272727272727273, "eval_loss": 0.30137544870376587, "eval_runtime": 286.7428, "eval_samples_per_second": 3.606, "step": 4544 }, { "epoch": 61.45, "learning_rate": 0.0001, "loss": 0.0015, "step": 4548 }, { "epoch": 61.51, "learning_rate": 0.0001, "loss": 0.0017, "step": 4552 }, { "epoch": 61.56, "learning_rate": 0.0001, "loss": 0.0008, "step": 4556 }, { "epoch": 61.62, "learning_rate": 0.0001, "loss": 0.0009, "step": 4560 }, { "epoch": 61.67, "learning_rate": 0.0001, "loss": 0.001, "step": 4564 }, { "epoch": 61.72, "learning_rate": 0.0001, "loss": 0.0009, "step": 4568 }, { "epoch": 61.78, "learning_rate": 0.0001, "loss": 0.0011, "step": 4572 }, { "epoch": 61.83, "learning_rate": 0.0001, "loss": 0.0007, "step": 4576 }, { "epoch": 61.88, "learning_rate": 0.0001, "loss": 0.001, "step": 4580 }, { "epoch": 61.94, "learning_rate": 0.0001, "loss": 0.001, "step": 4584 }, { "epoch": 61.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4588 }, { "epoch": 62.05, "learning_rate": 0.0001, "loss": 0.0026, "step": 4592 }, { "epoch": 62.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 4596 }, { "epoch": 62.16, "learning_rate": 0.0001, "loss": 0.0008, "step": 4600 }, { "epoch": 62.21, "learning_rate": 0.0001, "loss": 0.0003, "step": 4604 }, { "epoch": 62.27, "learning_rate": 0.0001, "loss": 0.0007, "step": 4608 }, { "epoch": 62.27, "eval_exec": 0.723404255319149, "eval_loss": 0.3098083734512329, "eval_runtime": 291.4042, "eval_samples_per_second": 3.548, "step": 4608 }, { "epoch": 62.32, "learning_rate": 0.0001, "loss": 0.0038, "step": 4612 }, { "epoch": 62.37, "learning_rate": 0.0001, "loss": 0.0013, "step": 4616 }, { "epoch": 62.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 4620 }, { "epoch": 62.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 4624 }, { "epoch": 62.53, "learning_rate": 0.0001, "loss": 0.002, "step": 4628 }, { "epoch": 62.59, "learning_rate": 0.0001, "loss": 0.0018, "step": 4632 }, { "epoch": 62.64, "learning_rate": 0.0001, "loss": 0.0005, "step": 4636 }, { "epoch": 62.7, "learning_rate": 0.0001, "loss": 0.0008, "step": 4640 }, { "epoch": 62.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 4644 }, { "epoch": 62.8, "learning_rate": 0.0001, "loss": 0.001, "step": 4648 }, { "epoch": 62.86, "learning_rate": 0.0001, "loss": 0.0004, "step": 4652 }, { "epoch": 62.91, "learning_rate": 0.0001, "loss": 0.0003, "step": 4656 }, { "epoch": 62.96, "learning_rate": 0.0001, "loss": 0.0002, "step": 4660 }, { "epoch": 63.03, "learning_rate": 0.0001, "loss": 0.0011, "step": 4664 }, { "epoch": 63.08, "learning_rate": 0.0001, "loss": 0.001, "step": 4668 }, { "epoch": 63.13, "learning_rate": 0.0001, "loss": 0.0003, "step": 4672 }, { "epoch": 63.13, "eval_exec": 0.7437137330754352, "eval_loss": 0.32493457198143005, "eval_runtime": 288.524, "eval_samples_per_second": 3.584, "step": 4672 }, { "epoch": 63.19, "learning_rate": 0.0001, "loss": 0.0005, "step": 4676 }, { "epoch": 63.24, "learning_rate": 0.0001, "loss": 0.0002, "step": 4680 }, { "epoch": 63.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 4684 }, { "epoch": 63.35, "learning_rate": 0.0001, "loss": 0.0011, "step": 4688 }, { "epoch": 63.4, "learning_rate": 0.0001, "loss": 0.0007, "step": 4692 }, { "epoch": 63.45, "learning_rate": 0.0001, "loss": 0.0002, "step": 4696 }, { "epoch": 63.51, "learning_rate": 0.0001, "loss": 0.0005, "step": 4700 }, { "epoch": 63.56, "learning_rate": 0.0001, "loss": 0.0012, "step": 4704 }, { "epoch": 63.62, "learning_rate": 0.0001, "loss": 0.0003, "step": 4708 }, { "epoch": 63.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 4712 }, { "epoch": 63.72, "learning_rate": 0.0001, "loss": 0.0009, "step": 4716 }, { "epoch": 63.78, "learning_rate": 0.0001, "loss": 0.0009, "step": 4720 }, { "epoch": 63.83, "learning_rate": 0.0001, "loss": 0.0006, "step": 4724 }, { "epoch": 63.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 4728 }, { "epoch": 63.94, "learning_rate": 0.0001, "loss": 0.0004, "step": 4732 }, { "epoch": 63.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4736 }, { "epoch": 63.99, "eval_exec": 0.7427466150870407, "eval_loss": 0.3356438875198364, "eval_runtime": 295.1379, "eval_samples_per_second": 3.503, "step": 4736 }, { "epoch": 64.05, "learning_rate": 0.0001, "loss": 0.0009, "step": 4740 }, { "epoch": 64.11, "learning_rate": 0.0001, "loss": 0.0004, "step": 4744 }, { "epoch": 64.16, "learning_rate": 0.0001, "loss": 0.001, "step": 4748 }, { "epoch": 64.21, "learning_rate": 0.0001, "loss": 0.0024, "step": 4752 }, { "epoch": 64.27, "learning_rate": 0.0001, "loss": 0.0006, "step": 4756 }, { "epoch": 64.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 4760 }, { "epoch": 64.37, "learning_rate": 0.0001, "loss": 0.0005, "step": 4764 }, { "epoch": 64.43, "learning_rate": 0.0001, "loss": 0.0002, "step": 4768 }, { "epoch": 64.48, "learning_rate": 0.0001, "loss": 0.0007, "step": 4772 }, { "epoch": 64.53, "learning_rate": 0.0001, "loss": 0.0006, "step": 4776 }, { "epoch": 64.59, "learning_rate": 0.0001, "loss": 0.0002, "step": 4780 }, { "epoch": 64.64, "learning_rate": 0.0001, "loss": 0.0002, "step": 4784 }, { "epoch": 64.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 4788 }, { "epoch": 64.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 4792 }, { "epoch": 64.8, "learning_rate": 0.0001, "loss": 0.0008, "step": 4796 }, { "epoch": 64.86, "learning_rate": 0.0001, "loss": 0.0002, "step": 4800 }, { "epoch": 64.86, "eval_exec": 0.7398452611218569, "eval_loss": 0.3491382598876953, "eval_runtime": 296.7752, "eval_samples_per_second": 3.484, "step": 4800 }, { "epoch": 64.91, "learning_rate": 0.0001, "loss": 0.001, "step": 4804 }, { "epoch": 64.96, "learning_rate": 0.0001, "loss": 0.0014, "step": 4808 }, { "epoch": 65.03, "learning_rate": 0.0001, "loss": 0.0013, "step": 4812 }, { "epoch": 65.08, "learning_rate": 0.0001, "loss": 0.0008, "step": 4816 }, { "epoch": 65.13, "learning_rate": 0.0001, "loss": 0.0005, "step": 4820 }, { "epoch": 65.19, "learning_rate": 0.0001, "loss": 0.0011, "step": 4824 }, { "epoch": 65.24, "learning_rate": 0.0001, "loss": 0.002, "step": 4828 }, { "epoch": 65.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 4832 }, { "epoch": 65.35, "learning_rate": 0.0001, "loss": 0.0011, "step": 4836 }, { "epoch": 65.4, "learning_rate": 0.0001, "loss": 0.0008, "step": 4840 }, { "epoch": 65.45, "learning_rate": 0.0001, "loss": 0.0008, "step": 4844 }, { "epoch": 65.51, "learning_rate": 0.0001, "loss": 0.0004, "step": 4848 }, { "epoch": 65.56, "learning_rate": 0.0001, "loss": 0.0007, "step": 4852 }, { "epoch": 65.62, "learning_rate": 0.0001, "loss": 0.0003, "step": 4856 }, { "epoch": 65.67, "learning_rate": 0.0001, "loss": 0.0005, "step": 4860 }, { "epoch": 65.72, "learning_rate": 0.0001, "loss": 0.0006, "step": 4864 }, { "epoch": 65.72, "eval_exec": 0.7272727272727273, "eval_loss": 0.34306177496910095, "eval_runtime": 286.5304, "eval_samples_per_second": 3.609, "step": 4864 }, { "epoch": 65.78, "learning_rate": 0.0001, "loss": 0.0008, "step": 4868 }, { "epoch": 65.83, "learning_rate": 0.0001, "loss": 0.0006, "step": 4872 }, { "epoch": 65.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 4876 }, { "epoch": 65.94, "learning_rate": 0.0001, "loss": 0.0004, "step": 4880 }, { "epoch": 65.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 4884 }, { "epoch": 66.05, "learning_rate": 0.0001, "loss": 0.0005, "step": 4888 }, { "epoch": 66.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 4892 }, { "epoch": 66.16, "learning_rate": 0.0001, "loss": 0.0007, "step": 4896 }, { "epoch": 66.21, "learning_rate": 0.0001, "loss": 0.0005, "step": 4900 }, { "epoch": 66.27, "learning_rate": 0.0001, "loss": 0.0013, "step": 4904 }, { "epoch": 66.32, "learning_rate": 0.0001, "loss": 0.001, "step": 4908 }, { "epoch": 66.37, "learning_rate": 0.0001, "loss": 0.0008, "step": 4912 }, { "epoch": 66.43, "learning_rate": 0.0001, "loss": 0.0004, "step": 4916 }, { "epoch": 66.48, "learning_rate": 0.0001, "loss": 0.0006, "step": 4920 }, { "epoch": 66.53, "learning_rate": 0.0001, "loss": 0.0019, "step": 4924 }, { "epoch": 66.59, "learning_rate": 0.0001, "loss": 0.0013, "step": 4928 }, { "epoch": 66.59, "eval_exec": 0.7050290135396519, "eval_loss": 0.3411843478679657, "eval_runtime": 292.9024, "eval_samples_per_second": 3.53, "step": 4928 }, { "epoch": 66.64, "learning_rate": 0.0001, "loss": 0.0005, "step": 4932 }, { "epoch": 66.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 4936 }, { "epoch": 66.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 4940 }, { "epoch": 66.8, "learning_rate": 0.0001, "loss": 0.0006, "step": 4944 }, { "epoch": 66.86, "learning_rate": 0.0001, "loss": 0.0006, "step": 4948 }, { "epoch": 66.91, "learning_rate": 0.0001, "loss": 0.001, "step": 4952 }, { "epoch": 66.96, "learning_rate": 0.0001, "loss": 0.0002, "step": 4956 }, { "epoch": 67.03, "learning_rate": 0.0001, "loss": 0.0009, "step": 4960 }, { "epoch": 67.08, "learning_rate": 0.0001, "loss": 0.0007, "step": 4964 }, { "epoch": 67.13, "learning_rate": 0.0001, "loss": 0.0002, "step": 4968 }, { "epoch": 67.19, "learning_rate": 0.0001, "loss": 0.0003, "step": 4972 }, { "epoch": 67.24, "learning_rate": 0.0001, "loss": 0.0002, "step": 4976 }, { "epoch": 67.29, "learning_rate": 0.0001, "loss": 0.001, "step": 4980 }, { "epoch": 67.35, "learning_rate": 0.0001, "loss": 0.0014, "step": 4984 }, { "epoch": 67.4, "learning_rate": 0.0001, "loss": 0.0003, "step": 4988 }, { "epoch": 67.45, "learning_rate": 0.0001, "loss": 0.0012, "step": 4992 }, { "epoch": 67.45, "eval_exec": 0.7224371373307543, "eval_loss": 0.33966416120529175, "eval_runtime": 289.6629, "eval_samples_per_second": 3.57, "step": 4992 }, { "epoch": 67.51, "learning_rate": 0.0001, "loss": 0.0002, "step": 4996 }, { "epoch": 67.56, "learning_rate": 0.0001, "loss": 0.0015, "step": 5000 }, { "epoch": 67.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 5004 }, { "epoch": 67.67, "learning_rate": 0.0001, "loss": 0.0016, "step": 5008 }, { "epoch": 67.72, "learning_rate": 0.0001, "loss": 0.0013, "step": 5012 }, { "epoch": 67.78, "learning_rate": 0.0001, "loss": 0.0017, "step": 5016 }, { "epoch": 67.83, "learning_rate": 0.0001, "loss": 0.0003, "step": 5020 }, { "epoch": 67.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 5024 }, { "epoch": 67.94, "learning_rate": 0.0001, "loss": 0.0004, "step": 5028 }, { "epoch": 67.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 5032 }, { "epoch": 68.05, "learning_rate": 0.0001, "loss": 0.0009, "step": 5036 }, { "epoch": 68.11, "learning_rate": 0.0001, "loss": 0.0005, "step": 5040 }, { "epoch": 68.16, "learning_rate": 0.0001, "loss": 0.0006, "step": 5044 }, { "epoch": 68.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 5048 }, { "epoch": 68.27, "learning_rate": 0.0001, "loss": 0.0006, "step": 5052 }, { "epoch": 68.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 5056 }, { "epoch": 68.32, "eval_exec": 0.723404255319149, "eval_loss": 0.31566286087036133, "eval_runtime": 285.389, "eval_samples_per_second": 3.623, "step": 5056 }, { "epoch": 68.37, "learning_rate": 0.0001, "loss": 0.0008, "step": 5060 }, { "epoch": 68.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 5064 }, { "epoch": 68.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 5068 }, { "epoch": 68.53, "learning_rate": 0.0001, "loss": 0.0013, "step": 5072 }, { "epoch": 68.59, "learning_rate": 0.0001, "loss": 0.0004, "step": 5076 }, { "epoch": 68.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 5080 }, { "epoch": 68.7, "learning_rate": 0.0001, "loss": 0.0004, "step": 5084 }, { "epoch": 68.75, "learning_rate": 0.0001, "loss": 0.0023, "step": 5088 }, { "epoch": 68.8, "learning_rate": 0.0001, "loss": 0.001, "step": 5092 }, { "epoch": 68.86, "learning_rate": 0.0001, "loss": 0.0024, "step": 5096 }, { "epoch": 68.91, "learning_rate": 0.0001, "loss": 0.0011, "step": 5100 }, { "epoch": 68.96, "learning_rate": 0.0001, "loss": 0.0012, "step": 5104 }, { "epoch": 69.03, "learning_rate": 0.0001, "loss": 0.001, "step": 5108 }, { "epoch": 69.08, "learning_rate": 0.0001, "loss": 0.0016, "step": 5112 }, { "epoch": 69.13, "learning_rate": 0.0001, "loss": 0.0007, "step": 5116 }, { "epoch": 69.19, "learning_rate": 0.0001, "loss": 0.0009, "step": 5120 }, { "epoch": 69.19, "eval_exec": 0.7108317214700194, "eval_loss": 0.3202356994152069, "eval_runtime": 289.377, "eval_samples_per_second": 3.573, "step": 5120 }, { "epoch": 69.24, "learning_rate": 0.0001, "loss": 0.0003, "step": 5124 }, { "epoch": 69.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 5128 }, { "epoch": 69.35, "learning_rate": 0.0001, "loss": 0.0002, "step": 5132 }, { "epoch": 69.4, "learning_rate": 0.0001, "loss": 0.0002, "step": 5136 }, { "epoch": 69.45, "learning_rate": 0.0001, "loss": 0.0001, "step": 5140 }, { "epoch": 69.51, "learning_rate": 0.0001, "loss": 0.0015, "step": 5144 }, { "epoch": 69.56, "learning_rate": 0.0001, "loss": 0.001, "step": 5148 }, { "epoch": 69.62, "learning_rate": 0.0001, "loss": 0.0016, "step": 5152 }, { "epoch": 69.67, "learning_rate": 0.0001, "loss": 0.0006, "step": 5156 }, { "epoch": 69.72, "learning_rate": 0.0001, "loss": 0.0071, "step": 5160 }, { "epoch": 69.78, "learning_rate": 0.0001, "loss": 0.0006, "step": 5164 }, { "epoch": 69.83, "learning_rate": 0.0001, "loss": 0.0003, "step": 5168 }, { "epoch": 69.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 5172 }, { "epoch": 69.94, "learning_rate": 0.0001, "loss": 0.0003, "step": 5176 }, { "epoch": 69.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 5180 }, { "epoch": 70.05, "learning_rate": 0.0001, "loss": 0.0007, "step": 5184 }, { "epoch": 70.05, "eval_exec": 0.7040618955512572, "eval_loss": 0.31820809841156006, "eval_runtime": 291.8188, "eval_samples_per_second": 3.543, "step": 5184 }, { "epoch": 70.11, "learning_rate": 0.0001, "loss": 0.0002, "step": 5188 }, { "epoch": 70.16, "learning_rate": 0.0001, "loss": 0.0002, "step": 5192 }, { "epoch": 70.21, "learning_rate": 0.0001, "loss": 0.0001, "step": 5196 }, { "epoch": 70.27, "learning_rate": 0.0001, "loss": 0.0006, "step": 5200 }, { "epoch": 70.32, "learning_rate": 0.0001, "loss": 0.0013, "step": 5204 }, { "epoch": 70.37, "learning_rate": 0.0001, "loss": 0.0004, "step": 5208 }, { "epoch": 70.43, "learning_rate": 0.0001, "loss": 0.0002, "step": 5212 }, { "epoch": 70.48, "learning_rate": 0.0001, "loss": 0.0004, "step": 5216 }, { "epoch": 70.53, "learning_rate": 0.0001, "loss": 0.0012, "step": 5220 }, { "epoch": 70.59, "learning_rate": 0.0001, "loss": 0.0002, "step": 5224 }, { "epoch": 70.64, "learning_rate": 0.0001, "loss": 0.0004, "step": 5228 }, { "epoch": 70.7, "learning_rate": 0.0001, "loss": 0.0004, "step": 5232 }, { "epoch": 70.75, "learning_rate": 0.0001, "loss": 0.0017, "step": 5236 }, { "epoch": 70.8, "learning_rate": 0.0001, "loss": 0.0007, "step": 5240 }, { "epoch": 70.86, "learning_rate": 0.0001, "loss": 0.0001, "step": 5244 }, { "epoch": 70.91, "learning_rate": 0.0001, "loss": 0.0004, "step": 5248 }, { "epoch": 70.91, "eval_exec": 0.7176015473887815, "eval_loss": 0.35949328541755676, "eval_runtime": 287.4088, "eval_samples_per_second": 3.598, "step": 5248 }, { "epoch": 70.96, "learning_rate": 0.0001, "loss": 0.0005, "step": 5252 }, { "epoch": 71.03, "learning_rate": 0.0001, "loss": 0.0002, "step": 5256 }, { "epoch": 71.08, "learning_rate": 0.0001, "loss": 0.0009, "step": 5260 }, { "epoch": 71.13, "learning_rate": 0.0001, "loss": 0.0007, "step": 5264 }, { "epoch": 71.19, "learning_rate": 0.0001, "loss": 0.0002, "step": 5268 }, { "epoch": 71.24, "learning_rate": 0.0001, "loss": 0.0007, "step": 5272 }, { "epoch": 71.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 5276 }, { "epoch": 71.35, "learning_rate": 0.0001, "loss": 0.0013, "step": 5280 }, { "epoch": 71.4, "learning_rate": 0.0001, "loss": 0.0002, "step": 5284 }, { "epoch": 71.45, "learning_rate": 0.0001, "loss": 0.0014, "step": 5288 }, { "epoch": 71.51, "learning_rate": 0.0001, "loss": 0.0007, "step": 5292 }, { "epoch": 71.56, "learning_rate": 0.0001, "loss": 0.0004, "step": 5296 }, { "epoch": 71.62, "learning_rate": 0.0001, "loss": 0.005, "step": 5300 }, { "epoch": 71.67, "learning_rate": 0.0001, "loss": 0.0011, "step": 5304 }, { "epoch": 71.72, "learning_rate": 0.0001, "loss": 0.0026, "step": 5308 }, { "epoch": 71.78, "learning_rate": 0.0001, "loss": 0.0009, "step": 5312 }, { "epoch": 71.78, "eval_exec": 0.7205029013539652, "eval_loss": 0.304627001285553, "eval_runtime": 285.1203, "eval_samples_per_second": 3.627, "step": 5312 }, { "epoch": 71.83, "learning_rate": 0.0001, "loss": 0.0005, "step": 5316 }, { "epoch": 71.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 5320 }, { "epoch": 71.94, "learning_rate": 0.0001, "loss": 0.0016, "step": 5324 }, { "epoch": 71.99, "learning_rate": 0.0001, "loss": 0.003, "step": 5328 }, { "epoch": 72.05, "learning_rate": 0.0001, "loss": 0.0007, "step": 5332 }, { "epoch": 72.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 5336 }, { "epoch": 72.16, "learning_rate": 0.0001, "loss": 0.0003, "step": 5340 }, { "epoch": 72.21, "learning_rate": 0.0001, "loss": 0.0002, "step": 5344 }, { "epoch": 72.27, "learning_rate": 0.0001, "loss": 0.0015, "step": 5348 }, { "epoch": 72.32, "learning_rate": 0.0001, "loss": 0.0016, "step": 5352 }, { "epoch": 72.37, "learning_rate": 0.0001, "loss": 0.0009, "step": 5356 }, { "epoch": 72.43, "learning_rate": 0.0001, "loss": 0.0004, "step": 5360 }, { "epoch": 72.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 5364 }, { "epoch": 72.53, "learning_rate": 0.0001, "loss": 0.0005, "step": 5368 }, { "epoch": 72.59, "learning_rate": 0.0001, "loss": 0.0004, "step": 5372 }, { "epoch": 72.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 5376 }, { "epoch": 72.64, "eval_exec": 0.7321083172147002, "eval_loss": 0.3293524980545044, "eval_runtime": 290.4949, "eval_samples_per_second": 3.559, "step": 5376 }, { "epoch": 72.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 5380 }, { "epoch": 72.75, "learning_rate": 0.0001, "loss": 0.0004, "step": 5384 }, { "epoch": 72.8, "learning_rate": 0.0001, "loss": 0.0005, "step": 5388 }, { "epoch": 72.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 5392 }, { "epoch": 72.91, "learning_rate": 0.0001, "loss": 0.0007, "step": 5396 }, { "epoch": 72.96, "learning_rate": 0.0001, "loss": 0.001, "step": 5400 }, { "epoch": 73.03, "learning_rate": 0.0001, "loss": 0.0015, "step": 5404 }, { "epoch": 73.08, "learning_rate": 0.0001, "loss": 0.0008, "step": 5408 }, { "epoch": 73.13, "learning_rate": 0.0001, "loss": 0.0008, "step": 5412 }, { "epoch": 73.19, "learning_rate": 0.0001, "loss": 0.0006, "step": 5416 }, { "epoch": 73.24, "learning_rate": 0.0001, "loss": 0.0056, "step": 5420 }, { "epoch": 73.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 5424 }, { "epoch": 73.35, "learning_rate": 0.0001, "loss": 0.0022, "step": 5428 }, { "epoch": 73.4, "learning_rate": 0.0001, "loss": 0.0008, "step": 5432 }, { "epoch": 73.45, "learning_rate": 0.0001, "loss": 0.0003, "step": 5436 }, { "epoch": 73.51, "learning_rate": 0.0001, "loss": 0.0022, "step": 5440 }, { "epoch": 73.51, "eval_exec": 0.7350096711798839, "eval_loss": 0.31576061248779297, "eval_runtime": 291.4555, "eval_samples_per_second": 3.548, "step": 5440 }, { "epoch": 73.56, "learning_rate": 0.0001, "loss": 0.0005, "step": 5444 }, { "epoch": 73.62, "learning_rate": 0.0001, "loss": 0.0003, "step": 5448 }, { "epoch": 73.67, "learning_rate": 0.0001, "loss": 0.0002, "step": 5452 }, { "epoch": 73.72, "learning_rate": 0.0001, "loss": 0.001, "step": 5456 }, { "epoch": 73.78, "learning_rate": 0.0001, "loss": 0.0006, "step": 5460 }, { "epoch": 73.83, "learning_rate": 0.0001, "loss": 0.0009, "step": 5464 }, { "epoch": 73.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 5468 }, { "epoch": 73.94, "learning_rate": 0.0001, "loss": 0.0003, "step": 5472 }, { "epoch": 73.99, "learning_rate": 0.0001, "loss": 0.0004, "step": 5476 }, { "epoch": 74.05, "learning_rate": 0.0001, "loss": 0.0008, "step": 5480 }, { "epoch": 74.11, "learning_rate": 0.0001, "loss": 0.0006, "step": 5484 }, { "epoch": 74.16, "learning_rate": 0.0001, "loss": 0.0024, "step": 5488 }, { "epoch": 74.21, "learning_rate": 0.0001, "loss": 0.0006, "step": 5492 }, { "epoch": 74.27, "learning_rate": 0.0001, "loss": 0.0004, "step": 5496 }, { "epoch": 74.32, "learning_rate": 0.0001, "loss": 0.0002, "step": 5500 }, { "epoch": 74.37, "learning_rate": 0.0001, "loss": 0.0009, "step": 5504 }, { "epoch": 74.37, "eval_exec": 0.7021276595744681, "eval_loss": 0.3458137512207031, "eval_runtime": 285.7195, "eval_samples_per_second": 3.619, "step": 5504 }, { "epoch": 74.43, "learning_rate": 0.0001, "loss": 0.0013, "step": 5508 }, { "epoch": 74.48, "learning_rate": 0.0001, "loss": 0.0022, "step": 5512 }, { "epoch": 74.53, "learning_rate": 0.0001, "loss": 0.0006, "step": 5516 }, { "epoch": 74.59, "learning_rate": 0.0001, "loss": 0.0013, "step": 5520 }, { "epoch": 74.64, "learning_rate": 0.0001, "loss": 0.0002, "step": 5524 }, { "epoch": 74.7, "learning_rate": 0.0001, "loss": 0.0005, "step": 5528 }, { "epoch": 74.75, "learning_rate": 0.0001, "loss": 0.004, "step": 5532 }, { "epoch": 74.8, "learning_rate": 0.0001, "loss": 0.0006, "step": 5536 }, { "epoch": 74.86, "learning_rate": 0.0001, "loss": 0.0017, "step": 5540 }, { "epoch": 74.91, "learning_rate": 0.0001, "loss": 0.0006, "step": 5544 }, { "epoch": 74.96, "learning_rate": 0.0001, "loss": 0.003, "step": 5548 }, { "epoch": 75.03, "learning_rate": 0.0001, "loss": 0.0015, "step": 5552 }, { "epoch": 75.08, "learning_rate": 0.0001, "loss": 0.0003, "step": 5556 }, { "epoch": 75.13, "learning_rate": 0.0001, "loss": 0.0005, "step": 5560 }, { "epoch": 75.19, "learning_rate": 0.0001, "loss": 0.0001, "step": 5564 }, { "epoch": 75.24, "learning_rate": 0.0001, "loss": 0.0006, "step": 5568 }, { "epoch": 75.24, "eval_exec": 0.7108317214700194, "eval_loss": 0.3360079526901245, "eval_runtime": 289.9247, "eval_samples_per_second": 3.566, "step": 5568 }, { "epoch": 75.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 5572 }, { "epoch": 75.35, "learning_rate": 0.0001, "loss": 0.0003, "step": 5576 }, { "epoch": 75.4, "learning_rate": 0.0001, "loss": 0.0003, "step": 5580 }, { "epoch": 75.45, "learning_rate": 0.0001, "loss": 0.0016, "step": 5584 }, { "epoch": 75.51, "learning_rate": 0.0001, "loss": 0.0007, "step": 5588 }, { "epoch": 75.56, "learning_rate": 0.0001, "loss": 0.0014, "step": 5592 }, { "epoch": 75.62, "learning_rate": 0.0001, "loss": 0.0006, "step": 5596 }, { "epoch": 75.67, "learning_rate": 0.0001, "loss": 0.0009, "step": 5600 }, { "epoch": 75.72, "learning_rate": 0.0001, "loss": 0.0004, "step": 5604 }, { "epoch": 75.78, "learning_rate": 0.0001, "loss": 0.001, "step": 5608 }, { "epoch": 75.83, "learning_rate": 0.0001, "loss": 0.0015, "step": 5612 }, { "epoch": 75.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 5616 }, { "epoch": 75.94, "learning_rate": 0.0001, "loss": 0.0025, "step": 5620 }, { "epoch": 75.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5624 }, { "epoch": 76.05, "learning_rate": 0.0001, "loss": 0.0009, "step": 5628 }, { "epoch": 76.11, "learning_rate": 0.0001, "loss": 0.001, "step": 5632 }, { "epoch": 76.11, "eval_exec": 0.706963249516441, "eval_loss": 0.3310911953449249, "eval_runtime": 288.5245, "eval_samples_per_second": 3.584, "step": 5632 }, { "epoch": 76.16, "learning_rate": 0.0001, "loss": 0.0007, "step": 5636 }, { "epoch": 76.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 5640 }, { "epoch": 76.27, "learning_rate": 0.0001, "loss": 0.0009, "step": 5644 }, { "epoch": 76.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 5648 }, { "epoch": 76.37, "learning_rate": 0.0001, "loss": 0.0036, "step": 5652 }, { "epoch": 76.43, "learning_rate": 0.0001, "loss": 0.0003, "step": 5656 }, { "epoch": 76.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 5660 }, { "epoch": 76.53, "learning_rate": 0.0001, "loss": 0.0017, "step": 5664 }, { "epoch": 76.59, "learning_rate": 0.0001, "loss": 0.0006, "step": 5668 }, { "epoch": 76.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 5672 }, { "epoch": 76.7, "learning_rate": 0.0001, "loss": 0.0014, "step": 5676 }, { "epoch": 76.75, "learning_rate": 0.0001, "loss": 0.0005, "step": 5680 }, { "epoch": 76.8, "learning_rate": 0.0001, "loss": 0.0005, "step": 5684 }, { "epoch": 76.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 5688 }, { "epoch": 76.91, "learning_rate": 0.0001, "loss": 0.0008, "step": 5692 }, { "epoch": 76.96, "learning_rate": 0.0001, "loss": 0.0008, "step": 5696 }, { "epoch": 76.96, "eval_exec": 0.706963249516441, "eval_loss": 0.3491285741329193, "eval_runtime": 290.4295, "eval_samples_per_second": 3.56, "step": 5696 }, { "epoch": 77.03, "learning_rate": 0.0001, "loss": 0.0008, "step": 5700 }, { "epoch": 77.08, "learning_rate": 0.0001, "loss": 0.0004, "step": 5704 }, { "epoch": 77.13, "learning_rate": 0.0001, "loss": 0.0014, "step": 5708 }, { "epoch": 77.19, "learning_rate": 0.0001, "loss": 0.0009, "step": 5712 }, { "epoch": 77.24, "learning_rate": 0.0001, "loss": 0.0005, "step": 5716 }, { "epoch": 77.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 5720 }, { "epoch": 77.35, "learning_rate": 0.0001, "loss": 0.0009, "step": 5724 }, { "epoch": 77.4, "learning_rate": 0.0001, "loss": 0.0018, "step": 5728 }, { "epoch": 77.45, "learning_rate": 0.0001, "loss": 0.0002, "step": 5732 }, { "epoch": 77.51, "learning_rate": 0.0001, "loss": 0.0004, "step": 5736 }, { "epoch": 77.56, "learning_rate": 0.0001, "loss": 0.0014, "step": 5740 }, { "epoch": 77.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 5744 }, { "epoch": 77.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 5748 }, { "epoch": 77.72, "learning_rate": 0.0001, "loss": 0.0008, "step": 5752 }, { "epoch": 77.78, "learning_rate": 0.0001, "loss": 0.0007, "step": 5756 }, { "epoch": 77.83, "learning_rate": 0.0001, "loss": 0.0005, "step": 5760 }, { "epoch": 77.83, "eval_exec": 0.7108317214700194, "eval_loss": 0.33614587783813477, "eval_runtime": 284.0797, "eval_samples_per_second": 3.64, "step": 5760 }, { "epoch": 77.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 5764 }, { "epoch": 77.94, "learning_rate": 0.0001, "loss": 0.0011, "step": 5768 }, { "epoch": 77.99, "learning_rate": 0.0001, "loss": 0.0003, "step": 5772 }, { "epoch": 78.05, "learning_rate": 0.0001, "loss": 0.0004, "step": 5776 }, { "epoch": 78.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 5780 }, { "epoch": 78.16, "learning_rate": 0.0001, "loss": 0.0005, "step": 5784 }, { "epoch": 78.21, "learning_rate": 0.0001, "loss": 0.0005, "step": 5788 }, { "epoch": 78.27, "learning_rate": 0.0001, "loss": 0.0013, "step": 5792 }, { "epoch": 78.32, "learning_rate": 0.0001, "loss": 0.0003, "step": 5796 }, { "epoch": 78.37, "learning_rate": 0.0001, "loss": 0.0008, "step": 5800 }, { "epoch": 78.43, "learning_rate": 0.0001, "loss": 0.0006, "step": 5804 }, { "epoch": 78.48, "learning_rate": 0.0001, "loss": 0.0003, "step": 5808 }, { "epoch": 78.53, "learning_rate": 0.0001, "loss": 0.0004, "step": 5812 }, { "epoch": 78.59, "learning_rate": 0.0001, "loss": 0.001, "step": 5816 }, { "epoch": 78.64, "learning_rate": 0.0001, "loss": 0.0016, "step": 5820 }, { "epoch": 78.7, "learning_rate": 0.0001, "loss": 0.0002, "step": 5824 }, { "epoch": 78.7, "eval_exec": 0.7117988394584139, "eval_loss": 0.3427754342556, "eval_runtime": 290.913, "eval_samples_per_second": 3.554, "step": 5824 }, { "epoch": 78.75, "learning_rate": 0.0001, "loss": 0.0002, "step": 5828 }, { "epoch": 78.8, "learning_rate": 0.0001, "loss": 0.0028, "step": 5832 }, { "epoch": 78.86, "learning_rate": 0.0001, "loss": 0.0006, "step": 5836 }, { "epoch": 78.91, "learning_rate": 0.0001, "loss": 0.0003, "step": 5840 }, { "epoch": 78.96, "learning_rate": 0.0001, "loss": 0.0008, "step": 5844 }, { "epoch": 79.03, "learning_rate": 0.0001, "loss": 0.0007, "step": 5848 }, { "epoch": 79.08, "learning_rate": 0.0001, "loss": 0.0005, "step": 5852 }, { "epoch": 79.13, "learning_rate": 0.0001, "loss": 0.0003, "step": 5856 }, { "epoch": 79.19, "learning_rate": 0.0001, "loss": 0.0003, "step": 5860 }, { "epoch": 79.24, "learning_rate": 0.0001, "loss": 0.0006, "step": 5864 }, { "epoch": 79.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 5868 }, { "epoch": 79.35, "learning_rate": 0.0001, "loss": 0.0002, "step": 5872 }, { "epoch": 79.4, "learning_rate": 0.0001, "loss": 0.0006, "step": 5876 }, { "epoch": 79.45, "learning_rate": 0.0001, "loss": 0.0004, "step": 5880 }, { "epoch": 79.51, "learning_rate": 0.0001, "loss": 0.0003, "step": 5884 }, { "epoch": 79.56, "learning_rate": 0.0001, "loss": 0.0007, "step": 5888 }, { "epoch": 79.56, "eval_exec": 0.7011605415860735, "eval_loss": 0.34577926993370056, "eval_runtime": 287.6027, "eval_samples_per_second": 3.595, "step": 5888 }, { "epoch": 79.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 5892 }, { "epoch": 79.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 5896 }, { "epoch": 79.72, "learning_rate": 0.0001, "loss": 0.0004, "step": 5900 }, { "epoch": 79.78, "learning_rate": 0.0001, "loss": 0.0007, "step": 5904 }, { "epoch": 79.83, "learning_rate": 0.0001, "loss": 0.0008, "step": 5908 }, { "epoch": 79.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 5912 }, { "epoch": 79.94, "learning_rate": 0.0001, "loss": 0.001, "step": 5916 }, { "epoch": 79.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5920 }, { "epoch": 80.05, "learning_rate": 0.0001, "loss": 0.0008, "step": 5924 }, { "epoch": 80.11, "learning_rate": 0.0001, "loss": 0.0006, "step": 5928 }, { "epoch": 80.16, "learning_rate": 0.0001, "loss": 0.0004, "step": 5932 }, { "epoch": 80.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 5936 }, { "epoch": 80.27, "learning_rate": 0.0001, "loss": 0.0017, "step": 5940 }, { "epoch": 80.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 5944 }, { "epoch": 80.37, "learning_rate": 0.0001, "loss": 0.0004, "step": 5948 }, { "epoch": 80.43, "learning_rate": 0.0001, "loss": 0.0011, "step": 5952 }, { "epoch": 80.43, "eval_exec": 0.7050290135396519, "eval_loss": 0.32949239015579224, "eval_runtime": 280.7542, "eval_samples_per_second": 3.683, "step": 5952 }, { "epoch": 80.48, "learning_rate": 0.0001, "loss": 0.0003, "step": 5956 }, { "epoch": 80.53, "learning_rate": 0.0001, "loss": 0.0007, "step": 5960 }, { "epoch": 80.59, "learning_rate": 0.0001, "loss": 0.0004, "step": 5964 }, { "epoch": 80.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 5968 }, { "epoch": 80.7, "learning_rate": 0.0001, "loss": 0.0007, "step": 5972 }, { "epoch": 80.75, "learning_rate": 0.0001, "loss": 0.0003, "step": 5976 }, { "epoch": 80.8, "learning_rate": 0.0001, "loss": 0.0004, "step": 5980 }, { "epoch": 80.86, "learning_rate": 0.0001, "loss": 0.0003, "step": 5984 }, { "epoch": 80.91, "learning_rate": 0.0001, "loss": 0.0001, "step": 5988 }, { "epoch": 80.96, "learning_rate": 0.0001, "loss": 0.0008, "step": 5992 }, { "epoch": 81.03, "learning_rate": 0.0001, "loss": 0.0003, "step": 5996 }, { "epoch": 81.08, "learning_rate": 0.0001, "loss": 0.0004, "step": 6000 }, { "epoch": 81.13, "learning_rate": 0.0001, "loss": 0.0002, "step": 6004 }, { "epoch": 81.19, "learning_rate": 0.0001, "loss": 0.0001, "step": 6008 }, { "epoch": 81.24, "learning_rate": 0.0001, "loss": 0.0012, "step": 6012 }, { "epoch": 81.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 6016 }, { "epoch": 81.29, "eval_exec": 0.7040618955512572, "eval_loss": 0.3443618714809418, "eval_runtime": 284.4305, "eval_samples_per_second": 3.635, "step": 6016 }, { "epoch": 81.35, "learning_rate": 0.0001, "loss": 0.0004, "step": 6020 }, { "epoch": 81.4, "learning_rate": 0.0001, "loss": 0.0002, "step": 6024 }, { "epoch": 81.45, "learning_rate": 0.0001, "loss": 0.0002, "step": 6028 }, { "epoch": 81.51, "learning_rate": 0.0001, "loss": 0.0001, "step": 6032 }, { "epoch": 81.56, "learning_rate": 0.0001, "loss": 0.0016, "step": 6036 }, { "epoch": 81.62, "learning_rate": 0.0001, "loss": 0.0001, "step": 6040 }, { "epoch": 81.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 6044 }, { "epoch": 81.72, "learning_rate": 0.0001, "loss": 0.0001, "step": 6048 }, { "epoch": 81.78, "learning_rate": 0.0001, "loss": 0.0003, "step": 6052 }, { "epoch": 81.83, "learning_rate": 0.0001, "loss": 0.001, "step": 6056 }, { "epoch": 81.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 6060 }, { "epoch": 81.94, "learning_rate": 0.0001, "loss": 0.0005, "step": 6064 }, { "epoch": 81.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 6068 }, { "epoch": 82.05, "learning_rate": 0.0001, "loss": 0.0002, "step": 6072 }, { "epoch": 82.11, "learning_rate": 0.0001, "loss": 0.0004, "step": 6076 }, { "epoch": 82.16, "learning_rate": 0.0001, "loss": 0.0001, "step": 6080 }, { "epoch": 82.16, "eval_exec": 0.7253384912959381, "eval_loss": 0.35400378704071045, "eval_runtime": 287.759, "eval_samples_per_second": 3.593, "step": 6080 }, { "epoch": 82.21, "learning_rate": 0.0001, "loss": 0.0003, "step": 6084 }, { "epoch": 82.27, "learning_rate": 0.0001, "loss": 0.001, "step": 6088 }, { "epoch": 82.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 6092 }, { "epoch": 82.37, "learning_rate": 0.0001, "loss": 0.0003, "step": 6096 }, { "epoch": 82.43, "learning_rate": 0.0001, "loss": 0.0002, "step": 6100 }, { "epoch": 82.48, "learning_rate": 0.0001, "loss": 0.0018, "step": 6104 }, { "epoch": 82.53, "learning_rate": 0.0001, "loss": 0.0004, "step": 6108 }, { "epoch": 82.59, "learning_rate": 0.0001, "loss": 0.0007, "step": 6112 }, { "epoch": 82.64, "learning_rate": 0.0001, "loss": 0.001, "step": 6116 }, { "epoch": 82.7, "learning_rate": 0.0001, "loss": 0.0002, "step": 6120 }, { "epoch": 82.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 6124 }, { "epoch": 82.8, "learning_rate": 0.0001, "loss": 0.0005, "step": 6128 }, { "epoch": 82.86, "learning_rate": 0.0001, "loss": 0.0005, "step": 6132 }, { "epoch": 82.91, "learning_rate": 0.0001, "loss": 0.0002, "step": 6136 }, { "epoch": 82.96, "learning_rate": 0.0001, "loss": 0.0005, "step": 6140 }, { "epoch": 83.03, "learning_rate": 0.0001, "loss": 0.0001, "step": 6144 }, { "epoch": 83.03, "eval_exec": 0.690522243713733, "eval_loss": 0.3448183834552765, "eval_runtime": 284.9854, "eval_samples_per_second": 3.628, "step": 6144 }, { "epoch": 83.08, "learning_rate": 0.0001, "loss": 0.0004, "step": 6148 }, { "epoch": 83.13, "learning_rate": 0.0001, "loss": 0.0001, "step": 6152 }, { "epoch": 83.19, "learning_rate": 0.0001, "loss": 0.0005, "step": 6156 }, { "epoch": 83.24, "learning_rate": 0.0001, "loss": 0.0006, "step": 6160 }, { "epoch": 83.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 6164 }, { "epoch": 83.35, "learning_rate": 0.0001, "loss": 0.0006, "step": 6168 }, { "epoch": 83.4, "learning_rate": 0.0001, "loss": 0.0002, "step": 6172 }, { "epoch": 83.45, "learning_rate": 0.0001, "loss": 0.0005, "step": 6176 }, { "epoch": 83.51, "learning_rate": 0.0001, "loss": 0.0006, "step": 6180 }, { "epoch": 83.56, "learning_rate": 0.0001, "loss": 0.0008, "step": 6184 }, { "epoch": 83.62, "learning_rate": 0.0001, "loss": 0.0005, "step": 6188 }, { "epoch": 83.67, "learning_rate": 0.0001, "loss": 0.0, "step": 6192 }, { "epoch": 83.72, "learning_rate": 0.0001, "loss": 0.0002, "step": 6196 }, { "epoch": 83.78, "learning_rate": 0.0001, "loss": 0.0006, "step": 6200 }, { "epoch": 83.83, "learning_rate": 0.0001, "loss": 0.0003, "step": 6204 }, { "epoch": 83.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 6208 }, { "epoch": 83.88, "eval_exec": 0.7156673114119922, "eval_loss": 0.3485656976699829, "eval_runtime": 290.1762, "eval_samples_per_second": 3.563, "step": 6208 }, { "epoch": 83.94, "learning_rate": 0.0001, "loss": 0.0012, "step": 6212 }, { "epoch": 83.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 6216 }, { "epoch": 84.05, "learning_rate": 0.0001, "loss": 0.0005, "step": 6220 }, { "epoch": 84.11, "learning_rate": 0.0001, "loss": 0.0022, "step": 6224 }, { "epoch": 84.16, "learning_rate": 0.0001, "loss": 0.0002, "step": 6228 }, { "epoch": 84.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 6232 }, { "epoch": 84.27, "learning_rate": 0.0001, "loss": 0.0005, "step": 6236 }, { "epoch": 84.32, "learning_rate": 0.0001, "loss": 0.001, "step": 6240 }, { "epoch": 84.37, "learning_rate": 0.0001, "loss": 0.0011, "step": 6244 }, { "epoch": 84.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 6248 }, { "epoch": 84.48, "learning_rate": 0.0001, "loss": 0.0029, "step": 6252 }, { "epoch": 84.53, "learning_rate": 0.0001, "loss": 0.0004, "step": 6256 }, { "epoch": 84.59, "learning_rate": 0.0001, "loss": 0.0007, "step": 6260 }, { "epoch": 84.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 6264 }, { "epoch": 84.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 6268 }, { "epoch": 84.75, "learning_rate": 0.0001, "loss": 0.0002, "step": 6272 }, { "epoch": 84.75, "eval_exec": 0.7243713733075435, "eval_loss": 0.33773335814476013, "eval_runtime": 286.9564, "eval_samples_per_second": 3.603, "step": 6272 }, { "epoch": 84.8, "learning_rate": 0.0001, "loss": 0.0004, "step": 6276 }, { "epoch": 84.86, "learning_rate": 0.0001, "loss": 0.0003, "step": 6280 }, { "epoch": 84.91, "learning_rate": 0.0001, "loss": 0.0007, "step": 6284 }, { "epoch": 84.96, "learning_rate": 0.0001, "loss": 0.001, "step": 6288 }, { "epoch": 85.03, "learning_rate": 0.0001, "loss": 0.0005, "step": 6292 }, { "epoch": 85.08, "learning_rate": 0.0001, "loss": 0.0015, "step": 6296 }, { "epoch": 85.13, "learning_rate": 0.0001, "loss": 0.0002, "step": 6300 }, { "epoch": 85.19, "learning_rate": 0.0001, "loss": 0.0006, "step": 6304 }, { "epoch": 85.24, "learning_rate": 0.0001, "loss": 0.0003, "step": 6308 }, { "epoch": 85.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 6312 }, { "epoch": 85.35, "learning_rate": 0.0001, "loss": 0.0014, "step": 6316 }, { "epoch": 85.4, "learning_rate": 0.0001, "loss": 0.0003, "step": 6320 }, { "epoch": 85.45, "learning_rate": 0.0001, "loss": 0.0008, "step": 6324 }, { "epoch": 85.51, "learning_rate": 0.0001, "loss": 0.0003, "step": 6328 }, { "epoch": 85.56, "learning_rate": 0.0001, "loss": 0.0004, "step": 6332 }, { "epoch": 85.62, "learning_rate": 0.0001, "loss": 0.0009, "step": 6336 }, { "epoch": 85.62, "eval_exec": 0.7243713733075435, "eval_loss": 0.33152279257774353, "eval_runtime": 290.7991, "eval_samples_per_second": 3.556, "step": 6336 }, { "epoch": 85.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 6340 }, { "epoch": 85.72, "learning_rate": 0.0001, "loss": 0.0016, "step": 6344 }, { "epoch": 85.78, "learning_rate": 0.0001, "loss": 0.0027, "step": 6348 }, { "epoch": 85.83, "learning_rate": 0.0001, "loss": 0.0013, "step": 6352 }, { "epoch": 85.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 6356 }, { "epoch": 85.94, "learning_rate": 0.0001, "loss": 0.001, "step": 6360 }, { "epoch": 85.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 6364 }, { "epoch": 86.05, "learning_rate": 0.0001, "loss": 0.0005, "step": 6368 }, { "epoch": 86.11, "learning_rate": 0.0001, "loss": 0.0004, "step": 6372 }, { "epoch": 86.16, "learning_rate": 0.0001, "loss": 0.0001, "step": 6376 }, { "epoch": 86.21, "learning_rate": 0.0001, "loss": 0.0005, "step": 6380 }, { "epoch": 86.27, "learning_rate": 0.0001, "loss": 0.0004, "step": 6384 }, { "epoch": 86.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 6388 }, { "epoch": 86.37, "learning_rate": 0.0001, "loss": 0.0019, "step": 6392 }, { "epoch": 86.43, "learning_rate": 0.0001, "loss": 0.0004, "step": 6396 }, { "epoch": 86.48, "learning_rate": 0.0001, "loss": 0.0004, "step": 6400 }, { "epoch": 86.48, "eval_exec": 0.7147001934235977, "eval_loss": 0.33703288435935974, "eval_runtime": 284.2076, "eval_samples_per_second": 3.638, "step": 6400 }, { "epoch": 86.53, "learning_rate": 0.0001, "loss": 0.0002, "step": 6404 }, { "epoch": 86.59, "learning_rate": 0.0001, "loss": 0.0002, "step": 6408 }, { "epoch": 86.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 6412 }, { "epoch": 86.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 6416 }, { "epoch": 86.75, "learning_rate": 0.0001, "loss": 0.0005, "step": 6420 }, { "epoch": 86.8, "learning_rate": 0.0001, "loss": 0.0004, "step": 6424 }, { "epoch": 86.86, "learning_rate": 0.0001, "loss": 0.0004, "step": 6428 }, { "epoch": 86.91, "learning_rate": 0.0001, "loss": 0.0001, "step": 6432 }, { "epoch": 86.96, "learning_rate": 0.0001, "loss": 0.0012, "step": 6436 }, { "epoch": 87.03, "learning_rate": 0.0001, "loss": 0.0002, "step": 6440 }, { "epoch": 87.08, "learning_rate": 0.0001, "loss": 0.0007, "step": 6444 }, { "epoch": 87.13, "learning_rate": 0.0001, "loss": 0.0003, "step": 6448 }, { "epoch": 87.19, "learning_rate": 0.0001, "loss": 0.0003, "step": 6452 }, { "epoch": 87.24, "learning_rate": 0.0001, "loss": 0.0007, "step": 6456 }, { "epoch": 87.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 6460 }, { "epoch": 87.35, "learning_rate": 0.0001, "loss": 0.0005, "step": 6464 }, { "epoch": 87.35, "eval_exec": 0.7253384912959381, "eval_loss": 0.3449646830558777, "eval_runtime": 294.4474, "eval_samples_per_second": 3.512, "step": 6464 }, { "epoch": 87.4, "learning_rate": 0.0001, "loss": 0.0005, "step": 6468 }, { "epoch": 87.45, "learning_rate": 0.0001, "loss": 0.0009, "step": 6472 }, { "epoch": 87.51, "learning_rate": 0.0001, "loss": 0.0011, "step": 6476 }, { "epoch": 87.56, "learning_rate": 0.0001, "loss": 0.0011, "step": 6480 }, { "epoch": 87.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 6484 }, { "epoch": 87.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 6488 }, { "epoch": 87.72, "learning_rate": 0.0001, "loss": 0.0003, "step": 6492 }, { "epoch": 87.78, "learning_rate": 0.0001, "loss": 0.001, "step": 6496 }, { "epoch": 87.83, "learning_rate": 0.0001, "loss": 0.0007, "step": 6500 }, { "epoch": 87.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 6504 }, { "epoch": 87.94, "learning_rate": 0.0001, "loss": 0.0001, "step": 6508 }, { "epoch": 87.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 6512 }, { "epoch": 88.05, "learning_rate": 0.0001, "loss": 0.0012, "step": 6516 }, { "epoch": 88.11, "learning_rate": 0.0001, "loss": 0.0004, "step": 6520 }, { "epoch": 88.16, "learning_rate": 0.0001, "loss": 0.0006, "step": 6524 }, { "epoch": 88.21, "learning_rate": 0.0001, "loss": 0.0006, "step": 6528 }, { "epoch": 88.21, "eval_exec": 0.7050290135396519, "eval_loss": 0.3305092751979828, "eval_runtime": 286.599, "eval_samples_per_second": 3.608, "step": 6528 }, { "epoch": 88.27, "learning_rate": 0.0001, "loss": 0.0033, "step": 6532 }, { "epoch": 88.32, "learning_rate": 0.0001, "loss": 0.0004, "step": 6536 }, { "epoch": 88.37, "learning_rate": 0.0001, "loss": 0.0009, "step": 6540 }, { "epoch": 88.43, "learning_rate": 0.0001, "loss": 0.0013, "step": 6544 }, { "epoch": 88.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 6548 }, { "epoch": 88.53, "learning_rate": 0.0001, "loss": 0.0011, "step": 6552 }, { "epoch": 88.59, "learning_rate": 0.0001, "loss": 0.0005, "step": 6556 }, { "epoch": 88.64, "learning_rate": 0.0001, "loss": 0.0006, "step": 6560 }, { "epoch": 88.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 6564 }, { "epoch": 88.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 6568 }, { "epoch": 88.8, "learning_rate": 0.0001, "loss": 0.0002, "step": 6572 }, { "epoch": 88.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 6576 }, { "epoch": 88.91, "learning_rate": 0.0001, "loss": 0.0001, "step": 6580 }, { "epoch": 88.96, "learning_rate": 0.0001, "loss": 0.0012, "step": 6584 }, { "epoch": 89.03, "learning_rate": 0.0001, "loss": 0.0008, "step": 6588 }, { "epoch": 89.08, "learning_rate": 0.0001, "loss": 0.0007, "step": 6592 }, { "epoch": 89.08, "eval_exec": 0.7166344294003868, "eval_loss": 0.3231656849384308, "eval_runtime": 284.3494, "eval_samples_per_second": 3.636, "step": 6592 }, { "epoch": 89.13, "learning_rate": 0.0001, "loss": 0.0002, "step": 6596 }, { "epoch": 89.19, "learning_rate": 0.0001, "loss": 0.0001, "step": 6600 }, { "epoch": 89.24, "learning_rate": 0.0001, "loss": 0.0001, "step": 6604 }, { "epoch": 89.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 6608 }, { "epoch": 89.35, "learning_rate": 0.0001, "loss": 0.0006, "step": 6612 }, { "epoch": 89.4, "learning_rate": 0.0001, "loss": 0.0001, "step": 6616 }, { "epoch": 89.45, "learning_rate": 0.0001, "loss": 0.0002, "step": 6620 }, { "epoch": 89.51, "learning_rate": 0.0001, "loss": 0.001, "step": 6624 }, { "epoch": 89.56, "learning_rate": 0.0001, "loss": 0.0007, "step": 6628 }, { "epoch": 89.62, "learning_rate": 0.0001, "loss": 0.0005, "step": 6632 }, { "epoch": 89.67, "learning_rate": 0.0001, "loss": 0.0001, "step": 6636 }, { "epoch": 89.72, "learning_rate": 0.0001, "loss": 0.0016, "step": 6640 }, { "epoch": 89.78, "learning_rate": 0.0001, "loss": 0.0008, "step": 6644 }, { "epoch": 89.83, "learning_rate": 0.0001, "loss": 0.0009, "step": 6648 }, { "epoch": 89.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 6652 }, { "epoch": 89.94, "learning_rate": 0.0001, "loss": 0.0003, "step": 6656 }, { "epoch": 89.94, "eval_exec": 0.7127659574468085, "eval_loss": 0.3296290338039398, "eval_runtime": 289.4346, "eval_samples_per_second": 3.572, "step": 6656 }, { "epoch": 89.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6660 }, { "epoch": 90.05, "learning_rate": 0.0001, "loss": 0.0002, "step": 6664 }, { "epoch": 90.11, "learning_rate": 0.0001, "loss": 0.0013, "step": 6668 }, { "epoch": 90.16, "learning_rate": 0.0001, "loss": 0.0002, "step": 6672 }, { "epoch": 90.21, "learning_rate": 0.0001, "loss": 0.0003, "step": 6676 }, { "epoch": 90.27, "learning_rate": 0.0001, "loss": 0.0007, "step": 6680 }, { "epoch": 90.32, "learning_rate": 0.0001, "loss": 0.0003, "step": 6684 }, { "epoch": 90.37, "learning_rate": 0.0001, "loss": 0.0028, "step": 6688 }, { "epoch": 90.43, "learning_rate": 0.0001, "loss": 0.0005, "step": 6692 }, { "epoch": 90.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 6696 }, { "epoch": 90.53, "learning_rate": 0.0001, "loss": 0.0005, "step": 6700 }, { "epoch": 90.59, "learning_rate": 0.0001, "loss": 0.0004, "step": 6704 }, { "epoch": 90.64, "learning_rate": 0.0001, "loss": 0.0003, "step": 6708 }, { "epoch": 90.7, "learning_rate": 0.0001, "loss": 0.0001, "step": 6712 }, { "epoch": 90.75, "learning_rate": 0.0001, "loss": 0.0005, "step": 6716 }, { "epoch": 90.8, "learning_rate": 0.0001, "loss": 0.0009, "step": 6720 }, { "epoch": 90.8, "eval_exec": 0.7050290135396519, "eval_loss": 0.33742156624794006, "eval_runtime": 286.2212, "eval_samples_per_second": 3.613, "step": 6720 }, { "epoch": 90.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 6724 }, { "epoch": 90.91, "learning_rate": 0.0001, "loss": 0.0001, "step": 6728 }, { "epoch": 90.96, "learning_rate": 0.0001, "loss": 0.0024, "step": 6732 }, { "epoch": 91.03, "learning_rate": 0.0001, "loss": 0.0014, "step": 6736 }, { "epoch": 91.08, "learning_rate": 0.0001, "loss": 0.0002, "step": 6740 }, { "epoch": 91.13, "learning_rate": 0.0001, "loss": 0.0014, "step": 6744 }, { "epoch": 91.19, "learning_rate": 0.0001, "loss": 0.0004, "step": 6748 }, { "epoch": 91.24, "learning_rate": 0.0001, "loss": 0.0005, "step": 6752 }, { "epoch": 91.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 6756 }, { "epoch": 91.35, "learning_rate": 0.0001, "loss": 0.0002, "step": 6760 }, { "epoch": 91.4, "learning_rate": 0.0001, "loss": 0.0014, "step": 6764 }, { "epoch": 91.45, "learning_rate": 0.0001, "loss": 0.0001, "step": 6768 }, { "epoch": 91.51, "learning_rate": 0.0001, "loss": 0.0002, "step": 6772 }, { "epoch": 91.56, "learning_rate": 0.0001, "loss": 0.0002, "step": 6776 }, { "epoch": 91.62, "learning_rate": 0.0001, "loss": 0.0006, "step": 6780 }, { "epoch": 91.67, "learning_rate": 0.0001, "loss": 0.0011, "step": 6784 }, { "epoch": 91.67, "eval_exec": 0.7030947775628626, "eval_loss": 0.3439423739910126, "eval_runtime": 283.1873, "eval_samples_per_second": 3.651, "step": 6784 }, { "epoch": 91.72, "learning_rate": 0.0001, "loss": 0.0002, "step": 6788 }, { "epoch": 91.78, "learning_rate": 0.0001, "loss": 0.0013, "step": 6792 }, { "epoch": 91.83, "learning_rate": 0.0001, "loss": 0.0005, "step": 6796 }, { "epoch": 91.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 6800 }, { "epoch": 91.94, "learning_rate": 0.0001, "loss": 0.0002, "step": 6804 }, { "epoch": 91.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6808 }, { "epoch": 92.05, "learning_rate": 0.0001, "loss": 0.0005, "step": 6812 }, { "epoch": 92.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 6816 }, { "epoch": 92.16, "learning_rate": 0.0001, "loss": 0.0014, "step": 6820 }, { "epoch": 92.21, "learning_rate": 0.0001, "loss": 0.0002, "step": 6824 }, { "epoch": 92.27, "learning_rate": 0.0001, "loss": 0.0005, "step": 6828 }, { "epoch": 92.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 6832 }, { "epoch": 92.37, "learning_rate": 0.0001, "loss": 0.0017, "step": 6836 }, { "epoch": 92.43, "learning_rate": 0.0001, "loss": 0.0003, "step": 6840 }, { "epoch": 92.48, "learning_rate": 0.0001, "loss": 0.0004, "step": 6844 }, { "epoch": 92.53, "learning_rate": 0.0001, "loss": 0.0011, "step": 6848 }, { "epoch": 92.53, "eval_exec": 0.7050290135396519, "eval_loss": 0.3420410752296448, "eval_runtime": 289.1079, "eval_samples_per_second": 3.577, "step": 6848 }, { "epoch": 92.59, "learning_rate": 0.0001, "loss": 0.0006, "step": 6852 }, { "epoch": 92.64, "learning_rate": 0.0001, "loss": 0.0005, "step": 6856 }, { "epoch": 92.7, "learning_rate": 0.0001, "loss": 0.0003, "step": 6860 }, { "epoch": 92.75, "learning_rate": 0.0001, "loss": 0.0005, "step": 6864 }, { "epoch": 92.8, "learning_rate": 0.0001, "loss": 0.0003, "step": 6868 }, { "epoch": 92.86, "learning_rate": 0.0001, "loss": 0.001, "step": 6872 }, { "epoch": 92.91, "learning_rate": 0.0001, "loss": 0.0002, "step": 6876 }, { "epoch": 92.96, "learning_rate": 0.0001, "loss": 0.0011, "step": 6880 }, { "epoch": 93.03, "learning_rate": 0.0001, "loss": 0.0007, "step": 6884 }, { "epoch": 93.08, "learning_rate": 0.0001, "loss": 0.0002, "step": 6888 }, { "epoch": 93.13, "learning_rate": 0.0001, "loss": 0.0007, "step": 6892 }, { "epoch": 93.19, "learning_rate": 0.0001, "loss": 0.0006, "step": 6896 }, { "epoch": 93.24, "learning_rate": 0.0001, "loss": 0.0009, "step": 6900 }, { "epoch": 93.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 6904 }, { "epoch": 93.35, "learning_rate": 0.0001, "loss": 0.0014, "step": 6908 }, { "epoch": 93.4, "learning_rate": 0.0001, "loss": 0.0004, "step": 6912 }, { "epoch": 93.4, "eval_exec": 0.7263056092843327, "eval_loss": 0.337966650724411, "eval_runtime": 283.4007, "eval_samples_per_second": 3.649, "step": 6912 }, { "epoch": 93.45, "learning_rate": 0.0001, "loss": 0.0011, "step": 6916 }, { "epoch": 93.51, "learning_rate": 0.0001, "loss": 0.0006, "step": 6920 }, { "epoch": 93.56, "learning_rate": 0.0001, "loss": 0.0004, "step": 6924 }, { "epoch": 93.62, "learning_rate": 0.0001, "loss": 0.001, "step": 6928 }, { "epoch": 93.67, "learning_rate": 0.0001, "loss": 0.0007, "step": 6932 }, { "epoch": 93.72, "learning_rate": 0.0001, "loss": 0.0003, "step": 6936 }, { "epoch": 93.78, "learning_rate": 0.0001, "loss": 0.0006, "step": 6940 }, { "epoch": 93.83, "learning_rate": 0.0001, "loss": 0.0006, "step": 6944 }, { "epoch": 93.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 6948 }, { "epoch": 93.94, "learning_rate": 0.0001, "loss": 0.0002, "step": 6952 }, { "epoch": 93.99, "learning_rate": 0.0001, "loss": 0.001, "step": 6956 }, { "epoch": 94.05, "learning_rate": 0.0001, "loss": 0.0004, "step": 6960 }, { "epoch": 94.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 6964 }, { "epoch": 94.16, "learning_rate": 0.0001, "loss": 0.0002, "step": 6968 }, { "epoch": 94.21, "learning_rate": 0.0001, "loss": 0.0001, "step": 6972 }, { "epoch": 94.27, "learning_rate": 0.0001, "loss": 0.0001, "step": 6976 }, { "epoch": 94.27, "eval_exec": 0.7176015473887815, "eval_loss": 0.364266037940979, "eval_runtime": 284.3795, "eval_samples_per_second": 3.636, "step": 6976 }, { "epoch": 94.32, "learning_rate": 0.0001, "loss": 0.0003, "step": 6980 }, { "epoch": 94.37, "learning_rate": 0.0001, "loss": 0.0002, "step": 6984 }, { "epoch": 94.43, "learning_rate": 0.0001, "loss": 0.0003, "step": 6988 }, { "epoch": 94.48, "learning_rate": 0.0001, "loss": 0.0004, "step": 6992 }, { "epoch": 94.53, "learning_rate": 0.0001, "loss": 0.0003, "step": 6996 }, { "epoch": 94.59, "learning_rate": 0.0001, "loss": 0.0005, "step": 7000 }, { "epoch": 94.64, "learning_rate": 0.0001, "loss": 0.0017, "step": 7004 }, { "epoch": 94.7, "learning_rate": 0.0001, "loss": 0.0006, "step": 7008 }, { "epoch": 94.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 7012 }, { "epoch": 94.8, "learning_rate": 0.0001, "loss": 0.001, "step": 7016 }, { "epoch": 94.86, "learning_rate": 0.0001, "loss": 0.0001, "step": 7020 }, { "epoch": 94.91, "learning_rate": 0.0001, "loss": 0.0004, "step": 7024 }, { "epoch": 94.96, "learning_rate": 0.0001, "loss": 0.0004, "step": 7028 }, { "epoch": 95.03, "learning_rate": 0.0001, "loss": 0.0003, "step": 7032 }, { "epoch": 95.08, "learning_rate": 0.0001, "loss": 0.0006, "step": 7036 }, { "epoch": 95.13, "learning_rate": 0.0001, "loss": 0.0005, "step": 7040 }, { "epoch": 95.13, "eval_exec": 0.730174081237911, "eval_loss": 0.3686355650424957, "eval_runtime": 284.9995, "eval_samples_per_second": 3.628, "step": 7040 }, { "epoch": 95.19, "learning_rate": 0.0001, "loss": 0.0001, "step": 7044 }, { "epoch": 95.24, "learning_rate": 0.0001, "loss": 0.0002, "step": 7048 }, { "epoch": 95.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 7052 }, { "epoch": 95.35, "learning_rate": 0.0001, "loss": 0.0006, "step": 7056 }, { "epoch": 95.4, "learning_rate": 0.0001, "loss": 0.0001, "step": 7060 }, { "epoch": 95.45, "learning_rate": 0.0001, "loss": 0.0004, "step": 7064 }, { "epoch": 95.51, "learning_rate": 0.0001, "loss": 0.0005, "step": 7068 }, { "epoch": 95.56, "learning_rate": 0.0001, "loss": 0.0012, "step": 7072 }, { "epoch": 95.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 7076 }, { "epoch": 95.67, "learning_rate": 0.0001, "loss": 0.0003, "step": 7080 }, { "epoch": 95.72, "learning_rate": 0.0001, "loss": 0.0002, "step": 7084 }, { "epoch": 95.78, "learning_rate": 0.0001, "loss": 0.0002, "step": 7088 }, { "epoch": 95.83, "learning_rate": 0.0001, "loss": 0.001, "step": 7092 }, { "epoch": 95.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 7096 }, { "epoch": 95.94, "learning_rate": 0.0001, "loss": 0.0001, "step": 7100 }, { "epoch": 95.99, "learning_rate": 0.0001, "loss": 0.0001, "step": 7104 }, { "epoch": 95.99, "eval_exec": 0.7176015473887815, "eval_loss": 0.3639739453792572, "eval_runtime": 281.8061, "eval_samples_per_second": 3.669, "step": 7104 }, { "epoch": 96.05, "learning_rate": 0.0001, "loss": 0.0008, "step": 7108 }, { "epoch": 96.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 7112 }, { "epoch": 96.16, "learning_rate": 0.0001, "loss": 0.0006, "step": 7116 }, { "epoch": 96.21, "learning_rate": 0.0001, "loss": 0.0004, "step": 7120 }, { "epoch": 96.27, "learning_rate": 0.0001, "loss": 0.0002, "step": 7124 }, { "epoch": 96.32, "learning_rate": 0.0001, "loss": 0.001, "step": 7128 }, { "epoch": 96.37, "learning_rate": 0.0001, "loss": 0.0004, "step": 7132 }, { "epoch": 96.43, "learning_rate": 0.0001, "loss": 0.0001, "step": 7136 }, { "epoch": 96.48, "learning_rate": 0.0001, "loss": 0.0004, "step": 7140 }, { "epoch": 96.53, "learning_rate": 0.0001, "loss": 0.001, "step": 7144 }, { "epoch": 96.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 7148 }, { "epoch": 96.64, "learning_rate": 0.0001, "loss": 0.0002, "step": 7152 }, { "epoch": 96.7, "learning_rate": 0.0001, "loss": 0.0005, "step": 7156 }, { "epoch": 96.75, "learning_rate": 0.0001, "loss": 0.0005, "step": 7160 }, { "epoch": 96.8, "learning_rate": 0.0001, "loss": 0.0011, "step": 7164 }, { "epoch": 96.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 7168 }, { "epoch": 96.86, "eval_exec": 0.7224371373307543, "eval_loss": 0.35076427459716797, "eval_runtime": 285.1481, "eval_samples_per_second": 3.626, "step": 7168 }, { "epoch": 96.91, "learning_rate": 0.0001, "loss": 0.0005, "step": 7172 }, { "epoch": 96.96, "learning_rate": 0.0001, "loss": 0.0023, "step": 7176 }, { "epoch": 97.03, "learning_rate": 0.0001, "loss": 0.0007, "step": 7180 }, { "epoch": 97.08, "learning_rate": 0.0001, "loss": 0.0004, "step": 7184 }, { "epoch": 97.13, "learning_rate": 0.0001, "loss": 0.0009, "step": 7188 }, { "epoch": 97.19, "learning_rate": 0.0001, "loss": 0.0009, "step": 7192 }, { "epoch": 97.24, "learning_rate": 0.0001, "loss": 0.0002, "step": 7196 }, { "epoch": 97.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 7200 }, { "epoch": 97.35, "learning_rate": 0.0001, "loss": 0.0007, "step": 7204 }, { "epoch": 97.4, "learning_rate": 0.0001, "loss": 0.0007, "step": 7208 }, { "epoch": 97.45, "learning_rate": 0.0001, "loss": 0.0007, "step": 7212 }, { "epoch": 97.51, "learning_rate": 0.0001, "loss": 0.0006, "step": 7216 }, { "epoch": 97.56, "learning_rate": 0.0001, "loss": 0.0004, "step": 7220 }, { "epoch": 97.62, "learning_rate": 0.0001, "loss": 0.0003, "step": 7224 }, { "epoch": 97.67, "learning_rate": 0.0001, "loss": 0.0009, "step": 7228 }, { "epoch": 97.72, "learning_rate": 0.0001, "loss": 0.0002, "step": 7232 }, { "epoch": 97.72, "eval_exec": 0.723404255319149, "eval_loss": 0.34438732266426086, "eval_runtime": 287.451, "eval_samples_per_second": 3.597, "step": 7232 }, { "epoch": 97.78, "learning_rate": 0.0001, "loss": 0.0003, "step": 7236 }, { "epoch": 97.83, "learning_rate": 0.0001, "loss": 0.0005, "step": 7240 }, { "epoch": 97.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 7244 }, { "epoch": 97.94, "learning_rate": 0.0001, "loss": 0.0003, "step": 7248 }, { "epoch": 97.99, "learning_rate": 0.0001, "loss": 0.0004, "step": 7252 }, { "epoch": 98.05, "learning_rate": 0.0001, "loss": 0.0008, "step": 7256 }, { "epoch": 98.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 7260 }, { "epoch": 98.16, "learning_rate": 0.0001, "loss": 0.0004, "step": 7264 }, { "epoch": 98.21, "learning_rate": 0.0001, "loss": 0.0004, "step": 7268 }, { "epoch": 98.27, "learning_rate": 0.0001, "loss": 0.0017, "step": 7272 }, { "epoch": 98.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 7276 }, { "epoch": 98.37, "learning_rate": 0.0001, "loss": 0.0004, "step": 7280 }, { "epoch": 98.43, "learning_rate": 0.0001, "loss": 0.0005, "step": 7284 }, { "epoch": 98.48, "learning_rate": 0.0001, "loss": 0.0001, "step": 7288 }, { "epoch": 98.53, "learning_rate": 0.0001, "loss": 0.0003, "step": 7292 }, { "epoch": 98.59, "learning_rate": 0.0001, "loss": 0.0002, "step": 7296 }, { "epoch": 98.59, "eval_exec": 0.718568665377176, "eval_loss": 0.3525933623313904, "eval_runtime": 283.2747, "eval_samples_per_second": 3.65, "step": 7296 }, { "epoch": 98.64, "learning_rate": 0.0001, "loss": 0.0002, "step": 7300 }, { "epoch": 98.7, "learning_rate": 0.0001, "loss": 0.0005, "step": 7304 }, { "epoch": 98.75, "learning_rate": 0.0001, "loss": 0.0002, "step": 7308 }, { "epoch": 98.8, "learning_rate": 0.0001, "loss": 0.0004, "step": 7312 }, { "epoch": 98.86, "learning_rate": 0.0001, "loss": 0.0002, "step": 7316 }, { "epoch": 98.91, "learning_rate": 0.0001, "loss": 0.0008, "step": 7320 }, { "epoch": 98.96, "learning_rate": 0.0001, "loss": 0.0004, "step": 7324 }, { "epoch": 99.03, "learning_rate": 0.0001, "loss": 0.0003, "step": 7328 }, { "epoch": 99.08, "learning_rate": 0.0001, "loss": 0.0002, "step": 7332 }, { "epoch": 99.13, "learning_rate": 0.0001, "loss": 0.0002, "step": 7336 }, { "epoch": 99.19, "learning_rate": 0.0001, "loss": 0.001, "step": 7340 }, { "epoch": 99.24, "learning_rate": 0.0001, "loss": 0.0001, "step": 7344 }, { "epoch": 99.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 7348 }, { "epoch": 99.35, "learning_rate": 0.0001, "loss": 0.0002, "step": 7352 }, { "epoch": 99.4, "learning_rate": 0.0001, "loss": 0.0002, "step": 7356 }, { "epoch": 99.45, "learning_rate": 0.0001, "loss": 0.0002, "step": 7360 }, { "epoch": 99.45, "eval_exec": 0.7311411992263056, "eval_loss": 0.3651147186756134, "eval_runtime": 287.5898, "eval_samples_per_second": 3.595, "step": 7360 }, { "epoch": 99.51, "learning_rate": 0.0001, "loss": 0.0007, "step": 7364 }, { "epoch": 99.56, "learning_rate": 0.0001, "loss": 0.0002, "step": 7368 }, { "epoch": 99.62, "learning_rate": 0.0001, "loss": 0.0004, "step": 7372 }, { "epoch": 99.67, "learning_rate": 0.0001, "loss": 0.0002, "step": 7376 }, { "epoch": 99.72, "learning_rate": 0.0001, "loss": 0.0008, "step": 7380 }, { "epoch": 99.78, "learning_rate": 0.0001, "loss": 0.0009, "step": 7384 }, { "epoch": 99.83, "learning_rate": 0.0001, "loss": 0.0001, "step": 7388 }, { "epoch": 99.88, "learning_rate": 0.0001, "loss": 0.001, "step": 7392 }, { "epoch": 99.94, "learning_rate": 0.0001, "loss": 0.0002, "step": 7396 }, { "epoch": 99.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 7400 }, { "epoch": 99.99, "step": 7400, "total_flos": 5.036125754774262e+18, "train_loss": 0.00013556757536748224, "train_runtime": 24498.5901, "train_samples_per_second": 61.04, "train_steps_per_second": 0.302 } ], "max_steps": 7400, "num_train_epochs": 100, "total_flos": 5.036125754774262e+18, "trial_name": null, "trial_params": null }