{ "best_metric": 1.1649552583694458, "best_model_checkpoint": "./interact_output_20231214_183743/checkpoint-912", "epoch": 3.991247264770241, "eval_steps": 500, "global_step": 912, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.0007984276816171436, "loss": 1.8072, "step": 10 }, { "epoch": 0.09, "learning_rate": 0.0007981855684763583, "loss": 1.9931, "step": 20 }, { "epoch": 0.13, "learning_rate": 0.0007974595227250475, "loss": 2.0219, "step": 30 }, { "epoch": 0.18, "learning_rate": 0.0007962504250201388, "loss": 1.9413, "step": 40 }, { "epoch": 0.22, "learning_rate": 0.000794559741936249, "loss": 1.8684, "step": 50 }, { "epoch": 0.26, "learning_rate": 0.0007923895241868038, "loss": 1.7867, "step": 60 }, { "epoch": 0.31, "learning_rate": 0.0007897424041366252, "loss": 1.8186, "step": 70 }, { "epoch": 0.35, "learning_rate": 0.0007866215926090057, "loss": 1.7737, "step": 80 }, { "epoch": 0.39, "learning_rate": 0.0007830308749911415, "loss": 1.6727, "step": 90 }, { "epoch": 0.44, "learning_rate": 0.0007789746066426482, "loss": 1.6249, "step": 100 }, { "epoch": 0.48, "learning_rate": 0.0007744577076127291, "loss": 1.7025, "step": 110 }, { "epoch": 0.53, "learning_rate": 0.0007694856566724036, "loss": 1.6132, "step": 120 }, { "epoch": 0.57, "learning_rate": 0.0007640644846690332, "loss": 1.63, "step": 130 }, { "epoch": 0.61, "learning_rate": 0.0007582007672112082, "loss": 1.6888, "step": 140 }, { "epoch": 0.66, "learning_rate": 0.0007519016166928652, "loss": 1.6102, "step": 150 }, { "epoch": 0.7, "learning_rate": 0.0007451746736663118, "loss": 1.5319, "step": 160 }, { "epoch": 0.74, "learning_rate": 0.000738028097574621, "loss": 1.5352, "step": 170 }, { "epoch": 0.79, "learning_rate": 0.000730470556854638, "loss": 1.4991, "step": 180 }, { "epoch": 0.83, "learning_rate": 0.0007225112184226035, "loss": 1.495, "step": 190 }, { "epoch": 0.88, "learning_rate": 0.0007141597365551446, "loss": 1.4296, "step": 200 }, { "epoch": 0.92, "learning_rate": 0.0007054262411791251, "loss": 1.4373, "step": 210 }, { "epoch": 0.96, "learning_rate": 0.0006963213255845531, "loss": 1.4589, "step": 220 }, { "epoch": 1.0, "eval_loss": 1.5730081796646118, "eval_runtime": 181.0133, "eval_samples_per_second": 14.883, "eval_steps_per_second": 1.862, "step": 228 }, { "epoch": 1.01, "learning_rate": 0.0006868560335754548, "loss": 1.4361, "step": 230 }, { "epoch": 1.05, "learning_rate": 0.000677041846074296, "loss": 1.3813, "step": 240 }, { "epoch": 1.09, "learning_rate": 0.000666890667196201, "loss": 1.3511, "step": 250 }, { "epoch": 1.14, "learning_rate": 0.0006564148098098617, "loss": 1.455, "step": 260 }, { "epoch": 1.18, "learning_rate": 0.0006456269806026464, "loss": 1.4276, "step": 270 }, { "epoch": 1.23, "learning_rate": 0.00063454026466803, "loss": 1.2906, "step": 280 }, { "epoch": 1.27, "learning_rate": 0.0006231681096340324, "loss": 1.3605, "step": 290 }, { "epoch": 1.31, "learning_rate": 0.0006115243093519255, "loss": 1.3765, "step": 300 }, { "epoch": 1.36, "learning_rate": 0.0005996229871649842, "loss": 1.3846, "step": 310 }, { "epoch": 1.4, "learning_rate": 0.0005874785787775835, "loss": 1.3476, "step": 320 }, { "epoch": 1.44, "learning_rate": 0.0005751058147454162, "loss": 1.3307, "step": 330 }, { "epoch": 1.49, "learning_rate": 0.0005625197026080706, "loss": 1.3481, "step": 340 }, { "epoch": 1.53, "learning_rate": 0.00054973550868564, "loss": 1.2677, "step": 350 }, { "epoch": 1.58, "learning_rate": 0.0005367687395614475, "loss": 1.2801, "step": 360 }, { "epoch": 1.62, "learning_rate": 0.0005236351232733387, "loss": 1.2434, "step": 370 }, { "epoch": 1.66, "learning_rate": 0.0005103505902363665, "loss": 1.2472, "step": 380 }, { "epoch": 1.71, "learning_rate": 0.0004969312539199984, "loss": 1.1805, "step": 390 }, { "epoch": 1.75, "learning_rate": 0.0004833933913032899, "loss": 1.2795, "step": 400 }, { "epoch": 1.79, "learning_rate": 0.0004697534231317295, "loss": 1.1841, "step": 410 }, { "epoch": 1.84, "learning_rate": 0.00045602789399970073, "loss": 1.2189, "step": 420 }, { "epoch": 1.88, "learning_rate": 0.0004422334522827224, "loss": 1.2124, "step": 430 }, { "epoch": 1.93, "learning_rate": 0.00042838682994380845, "loss": 1.1371, "step": 440 }, { "epoch": 1.97, "learning_rate": 0.00041450482223843874, "loss": 1.1254, "step": 450 }, { "epoch": 2.0, "eval_loss": 1.318668007850647, "eval_runtime": 181.264, "eval_samples_per_second": 14.862, "eval_steps_per_second": 1.859, "step": 457 }, { "epoch": 2.01, "learning_rate": 0.0004006042673427602, "loss": 1.2324, "step": 460 }, { "epoch": 2.06, "learning_rate": 0.0003867020259297277, "loss": 1.2353, "step": 470 }, { "epoch": 2.1, "learning_rate": 0.00037281496071795675, "loss": 1.2029, "step": 480 }, { "epoch": 2.14, "learning_rate": 0.0003589599160180951, "loss": 1.1946, "step": 490 }, { "epoch": 2.19, "learning_rate": 0.0003451536973015218, "loss": 1.2571, "step": 500 }, { "epoch": 2.23, "learning_rate": 0.0003314130508161583, "loss": 1.1964, "step": 510 }, { "epoch": 2.28, "learning_rate": 0.0003177546432741117, "loss": 1.2171, "step": 520 }, { "epoch": 2.32, "learning_rate": 0.00030419504163579317, "loss": 1.1815, "step": 530 }, { "epoch": 2.36, "learning_rate": 0.00029075069301502925, "loss": 1.1589, "step": 540 }, { "epoch": 2.41, "learning_rate": 0.000277437904729541, "loss": 1.1154, "step": 550 }, { "epoch": 2.45, "learning_rate": 0.0002642728245209895, "loss": 1.1195, "step": 560 }, { "epoch": 2.49, "learning_rate": 0.0002512714209685778, "loss": 1.1485, "step": 570 }, { "epoch": 2.54, "learning_rate": 0.00023844946411996905, "loss": 1.1151, "step": 580 }, { "epoch": 2.58, "learning_rate": 0.0002258225063630134, "loss": 1.1342, "step": 590 }, { "epoch": 2.63, "learning_rate": 0.00021340586356148388, "loss": 1.1106, "step": 600 }, { "epoch": 2.67, "learning_rate": 0.0002012145964777057, "loss": 1.0693, "step": 610 }, { "epoch": 2.71, "learning_rate": 0.00018926349250461, "loss": 1.1118, "step": 620 }, { "epoch": 2.76, "learning_rate": 0.00017756704772937113, "loss": 1.097, "step": 630 }, { "epoch": 2.8, "learning_rate": 0.00016613944935038317, "loss": 1.0072, "step": 640 }, { "epoch": 2.84, "learning_rate": 0.000154994558468902, "loss": 1.0244, "step": 650 }, { "epoch": 2.89, "learning_rate": 0.0001441458932762289, "loss": 1.0308, "step": 660 }, { "epoch": 2.93, "learning_rate": 0.00013360661265682426, "loss": 0.9882, "step": 670 }, { "epoch": 2.98, "learning_rate": 0.00012338950022724405, "loss": 0.9938, "step": 680 }, { "epoch": 3.0, "eval_loss": 1.1857038736343384, "eval_runtime": 181.1024, "eval_samples_per_second": 14.876, "eval_steps_per_second": 1.861, "step": 685 }, { "epoch": 3.02, "learning_rate": 0.00011350694883025702, "loss": 1.0906, "step": 690 }, { "epoch": 3.06, "learning_rate": 0.00010397094550294988, "loss": 1.1792, "step": 700 }, { "epoch": 3.11, "learning_rate": 9.4793056937056e-05, "loss": 1.0951, "step": 710 }, { "epoch": 3.15, "learning_rate": 8.598441544914002e-05, "loss": 1.1168, "step": 720 }, { "epoch": 3.19, "learning_rate": 7.755570547765905e-05, "loss": 1.0971, "step": 730 }, { "epoch": 3.24, "learning_rate": 6.951715062327716e-05, "loss": 1.0359, "step": 740 }, { "epoch": 3.28, "learning_rate": 6.187850124815228e-05, "loss": 1.077, "step": 750 }, { "epoch": 3.33, "learning_rate": 5.4649022649238026e-05, "loss": 1.0996, "step": 760 }, { "epoch": 3.37, "learning_rate": 4.783748381994562e-05, "loss": 1.0043, "step": 770 }, { "epoch": 3.41, "learning_rate": 4.145214681379591e-05, "loss": 1.1422, "step": 780 }, { "epoch": 3.46, "learning_rate": 3.550075672296503e-05, "loss": 1.1366, "step": 790 }, { "epoch": 3.5, "learning_rate": 2.9990532283877747e-05, "loss": 1.0587, "step": 800 }, { "epoch": 3.54, "learning_rate": 2.492815712124332e-05, "loss": 1.1301, "step": 810 }, { "epoch": 3.59, "learning_rate": 2.0319771641155883e-05, "loss": 1.0567, "step": 820 }, { "epoch": 3.63, "learning_rate": 1.617096558309071e-05, "loss": 1.1119, "step": 830 }, { "epoch": 3.68, "learning_rate": 1.2486771239831942e-05, "loss": 1.1186, "step": 840 }, { "epoch": 3.72, "learning_rate": 9.271657353555046e-06, "loss": 1.0765, "step": 850 }, { "epoch": 3.76, "learning_rate": 6.529523695467422e-06, "loss": 1.0678, "step": 860 }, { "epoch": 3.81, "learning_rate": 4.263696335582372e-06, "loss": 1.1022, "step": 870 }, { "epoch": 3.85, "learning_rate": 2.476923608363819e-06, "loss": 1.0498, "step": 880 }, { "epoch": 3.89, "learning_rate": 1.1713727791349433e-06, "loss": 1.0907, "step": 890 }, { "epoch": 3.94, "learning_rate": 3.4862741529444126e-07, "loss": 1.0615, "step": 900 }, { "epoch": 3.98, "learning_rate": 9.685465529235211e-09, "loss": 1.0461, "step": 910 }, { "epoch": 3.99, "eval_loss": 1.1649552583694458, "eval_runtime": 181.1967, "eval_samples_per_second": 14.868, "eval_steps_per_second": 1.86, "step": 912 } ], "logging_steps": 10, "max_steps": 912, "num_train_epochs": 4, "save_steps": 500, "total_flos": 1.1665671520864666e+17, "trial_name": null, "trial_params": null }