{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01949769498511016, "global_step": 1120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 4.1497, "step": 10 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 3.7626, "step": 20 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 3.3202, "step": 30 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 3.1832, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 3.0717, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.00012, "loss": 2.9981, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.00014000000000000001, "loss": 2.9461, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.00016, "loss": 2.9218, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.00017999999999999998, "loss": 2.9574, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.9917, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.00022, "loss": 2.9242, "step": 110 }, { "epoch": 0.0, "learning_rate": 0.00024, "loss": 2.9225, "step": 120 }, { "epoch": 0.0, "learning_rate": 0.00026000000000000003, "loss": 2.9253, "step": 130 }, { "epoch": 0.0, "learning_rate": 0.00028000000000000003, "loss": 2.8171, "step": 140 }, { "epoch": 0.0, "learning_rate": 0.0003, "loss": 2.9023, "step": 150 }, { "epoch": 0.0, "learning_rate": 0.00032, "loss": 2.809, "step": 160 }, { "epoch": 0.0, "learning_rate": 0.00034, "loss": 2.9079, "step": 170 }, { "epoch": 0.0, "learning_rate": 0.00035999999999999997, "loss": 2.7811, "step": 180 }, { "epoch": 0.0, "learning_rate": 0.00038, "loss": 2.8214, "step": 190 }, { "epoch": 0.0, "learning_rate": 0.0004, "loss": 2.821, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00042, "loss": 2.7647, "step": 210 }, { "epoch": 0.0, "learning_rate": 0.00044, "loss": 2.776, "step": 220 }, { "epoch": 0.0, "learning_rate": 0.00046, "loss": 2.8615, "step": 230 }, { "epoch": 0.0, "learning_rate": 0.00048, "loss": 2.7913, "step": 240 }, { "epoch": 0.0, "learning_rate": 0.0005, "loss": 2.7515, "step": 250 }, { "epoch": 0.0, "learning_rate": 0.0005200000000000001, "loss": 2.7199, "step": 260 }, { "epoch": 0.0, "learning_rate": 0.00054, "loss": 2.7464, "step": 270 }, { "epoch": 0.0, "learning_rate": 0.0005600000000000001, "loss": 2.7036, "step": 280 }, { "epoch": 0.0, "learning_rate": 0.00058, "loss": 2.8226, "step": 290 }, { "epoch": 0.0, "learning_rate": 0.0006, "loss": 2.6855, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.00062, "loss": 2.7442, "step": 310 }, { "epoch": 0.0, "learning_rate": 0.00064, "loss": 2.7517, "step": 320 }, { "epoch": 0.0, "learning_rate": 0.00066, "loss": 2.6921, "step": 330 }, { "epoch": 0.0, "learning_rate": 0.00068, "loss": 2.7034, "step": 340 }, { "epoch": 0.0, "learning_rate": 0.0007, "loss": 2.7489, "step": 350 }, { "epoch": 0.0, "learning_rate": 0.0007199999999999999, "loss": 2.6452, "step": 360 }, { "epoch": 0.0, "learning_rate": 0.00074, "loss": 2.7452, "step": 370 }, { "epoch": 0.0, "learning_rate": 0.00076, "loss": 2.6834, "step": 380 }, { "epoch": 0.0, "learning_rate": 0.0007800000000000001, "loss": 2.6984, "step": 390 }, { "epoch": 0.0, "learning_rate": 0.0008, "loss": 2.6716, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.00082, "loss": 2.7164, "step": 410 }, { "epoch": 0.0, "learning_rate": 0.00084, "loss": 2.634, "step": 420 }, { "epoch": 0.0, "learning_rate": 0.00086, "loss": 2.6932, "step": 430 }, { "epoch": 0.0, "learning_rate": 0.00088, "loss": 2.623, "step": 440 }, { "epoch": 0.0, "learning_rate": 0.0009000000000000001, "loss": 2.6827, "step": 450 }, { "epoch": 0.0, "learning_rate": 0.00092, "loss": 2.6558, "step": 460 }, { "epoch": 0.0, "learning_rate": 0.00094, "loss": 2.7282, "step": 470 }, { "epoch": 0.0, "learning_rate": 0.00096, "loss": 2.7123, "step": 480 }, { "epoch": 0.0, "learning_rate": 0.00098, "loss": 2.5858, "step": 490 }, { "epoch": 0.0, "learning_rate": 0.001, "loss": 2.7641, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.00102, "loss": 2.6606, "step": 510 }, { "epoch": 0.0, "learning_rate": 0.0010400000000000001, "loss": 2.7237, "step": 520 }, { "epoch": 0.0, "learning_rate": 0.0010600000000000002, "loss": 2.66, "step": 530 }, { "epoch": 0.0, "learning_rate": 0.00108, "loss": 2.6041, "step": 540 }, { "epoch": 0.0, "learning_rate": 0.0011, "loss": 2.608, "step": 550 }, { "epoch": 0.0, "learning_rate": 0.0011200000000000001, "loss": 2.63, "step": 560 }, { "epoch": 0.0, "learning_rate": 0.00114, "loss": 2.6255, "step": 570 }, { "epoch": 0.0, "learning_rate": 0.00116, "loss": 2.6602, "step": 580 }, { "epoch": 0.0, "learning_rate": 0.00118, "loss": 2.6522, "step": 590 }, { "epoch": 0.0, "learning_rate": 0.0012, "loss": 2.7137, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.00122, "loss": 2.6914, "step": 610 }, { "epoch": 0.0, "learning_rate": 0.00124, "loss": 2.6888, "step": 620 }, { "epoch": 0.0, "learning_rate": 0.00126, "loss": 2.6398, "step": 630 }, { "epoch": 0.0, "learning_rate": 0.00128, "loss": 2.6058, "step": 640 }, { "epoch": 0.0, "learning_rate": 0.0013000000000000002, "loss": 2.7215, "step": 650 }, { "epoch": 0.0, "learning_rate": 0.00132, "loss": 2.5945, "step": 660 }, { "epoch": 0.0, "learning_rate": 0.00134, "loss": 2.6325, "step": 670 }, { "epoch": 0.0, "learning_rate": 0.00136, "loss": 2.6191, "step": 680 }, { "epoch": 0.01, "learning_rate": 0.00138, "loss": 2.5328, "step": 690 }, { "epoch": 0.01, "learning_rate": 0.0014, "loss": 2.562, "step": 700 }, { "epoch": 0.01, "learning_rate": 0.00142, "loss": 2.5454, "step": 710 }, { "epoch": 0.01, "learning_rate": 0.0014399999999999999, "loss": 2.5328, "step": 720 }, { "epoch": 0.01, "learning_rate": 0.00146, "loss": 2.5141, "step": 730 }, { "epoch": 0.01, "learning_rate": 0.00148, "loss": 2.5019, "step": 740 }, { "epoch": 0.01, "learning_rate": 0.0015, "loss": 2.4906, "step": 750 }, { "epoch": 0.01, "learning_rate": 0.00152, "loss": 2.5153, "step": 760 }, { "epoch": 0.01, "learning_rate": 0.0015400000000000001, "loss": 2.4857, "step": 770 }, { "epoch": 0.01, "learning_rate": 0.0015600000000000002, "loss": 2.5265, "step": 780 }, { "epoch": 0.01, "learning_rate": 0.00158, "loss": 2.4994, "step": 790 }, { "epoch": 0.01, "learning_rate": 0.0016, "loss": 2.4538, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.0016200000000000001, "loss": 2.5823, "step": 810 }, { "epoch": 0.01, "learning_rate": 0.00164, "loss": 2.4527, "step": 820 }, { "epoch": 0.01, "learning_rate": 0.00166, "loss": 2.4914, "step": 830 }, { "epoch": 0.01, "learning_rate": 0.00168, "loss": 2.4307, "step": 840 }, { "epoch": 0.01, "learning_rate": 0.0017, "loss": 2.4795, "step": 850 }, { "epoch": 0.01, "learning_rate": 0.00172, "loss": 2.4245, "step": 860 }, { "epoch": 0.02, "learning_rate": 0.00174, "loss": 2.4751, "step": 870 }, { "epoch": 0.02, "learning_rate": 0.00176, "loss": 2.4705, "step": 880 }, { "epoch": 0.02, "learning_rate": 0.0017800000000000001, "loss": 2.4574, "step": 890 }, { "epoch": 0.02, "learning_rate": 0.0018000000000000002, "loss": 2.4271, "step": 900 }, { "epoch": 0.02, "learning_rate": 0.00182, "loss": 2.4609, "step": 910 }, { "epoch": 0.02, "learning_rate": 0.00184, "loss": 2.4255, "step": 920 }, { "epoch": 0.02, "learning_rate": 0.00186, "loss": 2.4151, "step": 930 }, { "epoch": 0.02, "learning_rate": 0.00188, "loss": 2.4331, "step": 940 }, { "epoch": 0.02, "learning_rate": 0.0019, "loss": 2.4576, "step": 950 }, { "epoch": 0.02, "learning_rate": 0.00192, "loss": 2.4156, "step": 960 }, { "epoch": 0.02, "learning_rate": 0.0019399999999999999, "loss": 2.417, "step": 970 }, { "epoch": 0.02, "learning_rate": 0.00196, "loss": 2.444, "step": 980 }, { "epoch": 0.02, "learning_rate": 0.00198, "loss": 2.5019, "step": 990 }, { "epoch": 0.02, "learning_rate": 0.002, "loss": 2.4522, "step": 1000 }, { "epoch": 0.02, "learning_rate": 0.00202, "loss": 2.4106, "step": 1010 }, { "epoch": 0.02, "learning_rate": 0.00204, "loss": 2.4295, "step": 1020 }, { "epoch": 0.02, "learning_rate": 0.00206, "loss": 2.4683, "step": 1030 }, { "epoch": 0.02, "learning_rate": 0.0020800000000000003, "loss": 2.4315, "step": 1040 }, { "epoch": 0.02, "learning_rate": 0.0021000000000000003, "loss": 2.41, "step": 1050 }, { "epoch": 0.02, "learning_rate": 0.0021200000000000004, "loss": 2.4551, "step": 1060 }, { "epoch": 0.02, "learning_rate": 0.00214, "loss": 2.4642, "step": 1070 }, { "epoch": 0.02, "learning_rate": 0.00216, "loss": 2.4526, "step": 1080 }, { "epoch": 0.02, "learning_rate": 0.00218, "loss": 2.3934, "step": 1090 }, { "epoch": 0.02, "learning_rate": 0.0022, "loss": 2.3918, "step": 1100 }, { "epoch": 0.02, "learning_rate": 0.00222, "loss": 2.4356, "step": 1110 }, { "epoch": 0.02, "learning_rate": 0.0022400000000000002, "loss": 2.4506, "step": 1120 } ], "max_steps": 172326, "num_train_epochs": 3, "total_flos": 5.5330613854470144e+17, "trial_name": null, "trial_params": null }