{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 100, "global_step": 915, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.00019781420765027324, "loss": 2.1723, "step": 10 }, { "epoch": 0.11, "learning_rate": 0.00019562841530054644, "loss": 1.7534, "step": 20 }, { "epoch": 0.16, "learning_rate": 0.00019344262295081967, "loss": 1.4381, "step": 30 }, { "epoch": 0.22, "learning_rate": 0.0001912568306010929, "loss": 1.3181, "step": 40 }, { "epoch": 0.27, "learning_rate": 0.00018907103825136615, "loss": 1.2203, "step": 50 }, { "epoch": 0.33, "learning_rate": 0.00018688524590163935, "loss": 1.0679, "step": 60 }, { "epoch": 0.38, "learning_rate": 0.00018469945355191258, "loss": 1.0329, "step": 70 }, { "epoch": 0.44, "learning_rate": 0.0001825136612021858, "loss": 0.9817, "step": 80 }, { "epoch": 0.49, "learning_rate": 0.00018032786885245904, "loss": 0.866, "step": 90 }, { "epoch": 0.55, "learning_rate": 0.00017814207650273224, "loss": 0.8634, "step": 100 }, { "epoch": 0.55, "eval_accuracy": 0.6975, "eval_loss": 0.9265598654747009, "eval_runtime": 17.6341, "eval_samples_per_second": 68.05, "eval_steps_per_second": 8.506, "step": 100 }, { "epoch": 0.6, "learning_rate": 0.00017595628415300547, "loss": 0.6979, "step": 110 }, { "epoch": 0.66, "learning_rate": 0.0001737704918032787, "loss": 0.7132, "step": 120 }, { "epoch": 0.71, "learning_rate": 0.00017158469945355192, "loss": 0.6692, "step": 130 }, { "epoch": 0.77, "learning_rate": 0.00016939890710382515, "loss": 0.6, "step": 140 }, { "epoch": 0.82, "learning_rate": 0.00016721311475409838, "loss": 0.5497, "step": 150 }, { "epoch": 0.87, "learning_rate": 0.0001650273224043716, "loss": 0.4835, "step": 160 }, { "epoch": 0.93, "learning_rate": 0.0001628415300546448, "loss": 0.4699, "step": 170 }, { "epoch": 0.98, "learning_rate": 0.00016065573770491804, "loss": 0.4311, "step": 180 }, { "epoch": 1.04, "learning_rate": 0.00015846994535519127, "loss": 0.4363, "step": 190 }, { "epoch": 1.09, "learning_rate": 0.0001562841530054645, "loss": 0.3225, "step": 200 }, { "epoch": 1.09, "eval_accuracy": 0.7325, "eval_loss": 0.8994238376617432, "eval_runtime": 16.2421, "eval_samples_per_second": 73.882, "eval_steps_per_second": 9.235, "step": 200 }, { "epoch": 1.15, "learning_rate": 0.0001540983606557377, "loss": 0.3329, "step": 210 }, { "epoch": 1.2, "learning_rate": 0.00015191256830601093, "loss": 0.3262, "step": 220 }, { "epoch": 1.26, "learning_rate": 0.00014972677595628418, "loss": 0.3394, "step": 230 }, { "epoch": 1.31, "learning_rate": 0.00014754098360655738, "loss": 0.3319, "step": 240 }, { "epoch": 1.37, "learning_rate": 0.0001453551912568306, "loss": 0.2554, "step": 250 }, { "epoch": 1.42, "learning_rate": 0.00014316939890710384, "loss": 0.2912, "step": 260 }, { "epoch": 1.48, "learning_rate": 0.00014098360655737707, "loss": 0.2801, "step": 270 }, { "epoch": 1.53, "learning_rate": 0.00013879781420765027, "loss": 0.2443, "step": 280 }, { "epoch": 1.58, "learning_rate": 0.0001366120218579235, "loss": 0.278, "step": 290 }, { "epoch": 1.64, "learning_rate": 0.00013442622950819673, "loss": 0.2353, "step": 300 }, { "epoch": 1.64, "eval_accuracy": 0.73, "eval_loss": 0.968270480632782, "eval_runtime": 17.23, "eval_samples_per_second": 69.646, "eval_steps_per_second": 8.706, "step": 300 }, { "epoch": 1.69, "learning_rate": 0.00013224043715846995, "loss": 0.2092, "step": 310 }, { "epoch": 1.75, "learning_rate": 0.00013005464480874316, "loss": 0.1959, "step": 320 }, { "epoch": 1.8, "learning_rate": 0.0001278688524590164, "loss": 0.1841, "step": 330 }, { "epoch": 1.86, "learning_rate": 0.00012568306010928964, "loss": 0.2678, "step": 340 }, { "epoch": 1.91, "learning_rate": 0.00012349726775956284, "loss": 0.2366, "step": 350 }, { "epoch": 1.97, "learning_rate": 0.00012131147540983607, "loss": 0.2198, "step": 360 }, { "epoch": 2.02, "learning_rate": 0.0001191256830601093, "loss": 0.1459, "step": 370 }, { "epoch": 2.08, "learning_rate": 0.00011693989071038251, "loss": 0.0904, "step": 380 }, { "epoch": 2.13, "learning_rate": 0.00011475409836065574, "loss": 0.1093, "step": 390 }, { "epoch": 2.19, "learning_rate": 0.00011256830601092896, "loss": 0.1119, "step": 400 }, { "epoch": 2.19, "eval_accuracy": 0.7491666666666666, "eval_loss": 0.9247021675109863, "eval_runtime": 16.092, "eval_samples_per_second": 74.571, "eval_steps_per_second": 9.321, "step": 400 }, { "epoch": 2.24, "learning_rate": 0.00011038251366120218, "loss": 0.1183, "step": 410 }, { "epoch": 2.3, "learning_rate": 0.00010819672131147543, "loss": 0.1155, "step": 420 }, { "epoch": 2.35, "learning_rate": 0.00010601092896174864, "loss": 0.138, "step": 430 }, { "epoch": 2.4, "learning_rate": 0.00010382513661202187, "loss": 0.1124, "step": 440 }, { "epoch": 2.46, "learning_rate": 0.00010163934426229508, "loss": 0.1535, "step": 450 }, { "epoch": 2.51, "learning_rate": 9.945355191256831e-05, "loss": 0.1895, "step": 460 }, { "epoch": 2.57, "learning_rate": 9.726775956284153e-05, "loss": 0.128, "step": 470 }, { "epoch": 2.62, "learning_rate": 9.508196721311476e-05, "loss": 0.0633, "step": 480 }, { "epoch": 2.68, "learning_rate": 9.289617486338798e-05, "loss": 0.0788, "step": 490 }, { "epoch": 2.73, "learning_rate": 9.071038251366121e-05, "loss": 0.049, "step": 500 }, { "epoch": 2.73, "eval_accuracy": 0.7566666666666667, "eval_loss": 0.9662973880767822, "eval_runtime": 15.9004, "eval_samples_per_second": 75.47, "eval_steps_per_second": 9.434, "step": 500 }, { "epoch": 2.79, "learning_rate": 8.852459016393443e-05, "loss": 0.0829, "step": 510 }, { "epoch": 2.84, "learning_rate": 8.633879781420766e-05, "loss": 0.0821, "step": 520 }, { "epoch": 2.9, "learning_rate": 8.415300546448088e-05, "loss": 0.0554, "step": 530 }, { "epoch": 2.95, "learning_rate": 8.19672131147541e-05, "loss": 0.0756, "step": 540 }, { "epoch": 3.01, "learning_rate": 7.978142076502733e-05, "loss": 0.136, "step": 550 }, { "epoch": 3.06, "learning_rate": 7.759562841530054e-05, "loss": 0.0903, "step": 560 }, { "epoch": 3.11, "learning_rate": 7.540983606557377e-05, "loss": 0.0868, "step": 570 }, { "epoch": 3.17, "learning_rate": 7.3224043715847e-05, "loss": 0.0523, "step": 580 }, { "epoch": 3.22, "learning_rate": 7.103825136612023e-05, "loss": 0.0396, "step": 590 }, { "epoch": 3.28, "learning_rate": 6.885245901639344e-05, "loss": 0.0537, "step": 600 }, { "epoch": 3.28, "eval_accuracy": 0.7566666666666667, "eval_loss": 1.0557572841644287, "eval_runtime": 16.1696, "eval_samples_per_second": 74.213, "eval_steps_per_second": 9.277, "step": 600 }, { "epoch": 3.33, "learning_rate": 6.666666666666667e-05, "loss": 0.064, "step": 610 }, { "epoch": 3.39, "learning_rate": 6.44808743169399e-05, "loss": 0.0206, "step": 620 }, { "epoch": 3.44, "learning_rate": 6.229508196721313e-05, "loss": 0.028, "step": 630 }, { "epoch": 3.5, "learning_rate": 6.010928961748634e-05, "loss": 0.026, "step": 640 }, { "epoch": 3.55, "learning_rate": 5.792349726775956e-05, "loss": 0.0229, "step": 650 }, { "epoch": 3.61, "learning_rate": 5.5737704918032785e-05, "loss": 0.032, "step": 660 }, { "epoch": 3.66, "learning_rate": 5.355191256830602e-05, "loss": 0.023, "step": 670 }, { "epoch": 3.72, "learning_rate": 5.136612021857924e-05, "loss": 0.0269, "step": 680 }, { "epoch": 3.77, "learning_rate": 4.918032786885246e-05, "loss": 0.023, "step": 690 }, { "epoch": 3.83, "learning_rate": 4.6994535519125685e-05, "loss": 0.0274, "step": 700 }, { "epoch": 3.83, "eval_accuracy": 0.7691666666666667, "eval_loss": 1.0343540906906128, "eval_runtime": 16.2987, "eval_samples_per_second": 73.626, "eval_steps_per_second": 9.203, "step": 700 }, { "epoch": 3.88, "learning_rate": 4.4808743169398906e-05, "loss": 0.0197, "step": 710 }, { "epoch": 3.93, "learning_rate": 4.262295081967213e-05, "loss": 0.0195, "step": 720 }, { "epoch": 3.99, "learning_rate": 4.0437158469945356e-05, "loss": 0.0122, "step": 730 }, { "epoch": 4.04, "learning_rate": 3.825136612021858e-05, "loss": 0.0156, "step": 740 }, { "epoch": 4.1, "learning_rate": 3.6065573770491806e-05, "loss": 0.0115, "step": 750 }, { "epoch": 4.15, "learning_rate": 3.387978142076503e-05, "loss": 0.0118, "step": 760 }, { "epoch": 4.21, "learning_rate": 3.1693989071038256e-05, "loss": 0.0123, "step": 770 }, { "epoch": 4.26, "learning_rate": 2.9508196721311478e-05, "loss": 0.0108, "step": 780 }, { "epoch": 4.32, "learning_rate": 2.7322404371584703e-05, "loss": 0.0112, "step": 790 }, { "epoch": 4.37, "learning_rate": 2.5136612021857924e-05, "loss": 0.0102, "step": 800 }, { "epoch": 4.37, "eval_accuracy": 0.7941666666666667, "eval_loss": 0.9259100556373596, "eval_runtime": 16.5205, "eval_samples_per_second": 72.637, "eval_steps_per_second": 9.08, "step": 800 }, { "epoch": 4.43, "learning_rate": 2.295081967213115e-05, "loss": 0.0101, "step": 810 }, { "epoch": 4.48, "learning_rate": 2.0765027322404374e-05, "loss": 0.0099, "step": 820 }, { "epoch": 4.54, "learning_rate": 1.85792349726776e-05, "loss": 0.0097, "step": 830 }, { "epoch": 4.59, "learning_rate": 1.6393442622950818e-05, "loss": 0.011, "step": 840 }, { "epoch": 4.64, "learning_rate": 1.4207650273224044e-05, "loss": 0.0094, "step": 850 }, { "epoch": 4.7, "learning_rate": 1.2021857923497268e-05, "loss": 0.01, "step": 860 }, { "epoch": 4.75, "learning_rate": 9.836065573770493e-06, "loss": 0.0096, "step": 870 }, { "epoch": 4.81, "learning_rate": 7.650273224043716e-06, "loss": 0.0098, "step": 880 }, { "epoch": 4.86, "learning_rate": 5.46448087431694e-06, "loss": 0.0163, "step": 890 }, { "epoch": 4.92, "learning_rate": 3.278688524590164e-06, "loss": 0.0095, "step": 900 }, { "epoch": 4.92, "eval_accuracy": 0.785, "eval_loss": 0.9604464769363403, "eval_runtime": 17.3811, "eval_samples_per_second": 69.04, "eval_steps_per_second": 8.63, "step": 900 }, { "epoch": 4.97, "learning_rate": 1.092896174863388e-06, "loss": 0.0092, "step": 910 }, { "epoch": 5.0, "step": 915, "total_flos": 3.4029172406502605e+18, "train_loss": 0.2756463099698551, "train_runtime": 974.1891, "train_samples_per_second": 45.073, "train_steps_per_second": 0.939 } ], "logging_steps": 10, "max_steps": 915, "num_train_epochs": 5, "save_steps": 1000, "total_flos": 3.4029172406502605e+18, "trial_name": null, "trial_params": null }