{ "best_metric": 0.4947710931301117, "best_model_checkpoint": "zephyr_7B_ruPython/checkpoint-3466", "epoch": 1.9997115245925285, "eval_steps": 500, "global_step": 3466, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 7.692307692307694e-06, "loss": 1.0975, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.5384615384615387e-05, "loss": 1.083, "step": 40 }, { "epoch": 0.03, "learning_rate": 2.307692307692308e-05, "loss": 1.0227, "step": 60 }, { "epoch": 0.05, "learning_rate": 3.0769230769230774e-05, "loss": 0.9524, "step": 80 }, { "epoch": 0.06, "learning_rate": 3.846153846153846e-05, "loss": 0.8904, "step": 100 }, { "epoch": 0.07, "learning_rate": 4.615384615384616e-05, "loss": 0.8482, "step": 120 }, { "epoch": 0.08, "learning_rate": 5.384615384615385e-05, "loss": 0.8015, "step": 140 }, { "epoch": 0.09, "learning_rate": 6.153846153846155e-05, "loss": 0.8032, "step": 160 }, { "epoch": 0.1, "learning_rate": 6.923076923076924e-05, "loss": 0.7839, "step": 180 }, { "epoch": 0.12, "learning_rate": 7.692307692307693e-05, "loss": 0.7964, "step": 200 }, { "epoch": 0.13, "learning_rate": 8.461538461538461e-05, "loss": 0.7531, "step": 220 }, { "epoch": 0.14, "learning_rate": 9.230769230769232e-05, "loss": 0.7508, "step": 240 }, { "epoch": 0.15, "learning_rate": 0.0001, "loss": 0.7531, "step": 260 }, { "epoch": 0.16, "learning_rate": 0.0001076923076923077, "loss": 0.7543, "step": 280 }, { "epoch": 0.17, "learning_rate": 0.00011538461538461538, "loss": 0.7426, "step": 300 }, { "epoch": 0.18, "learning_rate": 0.0001230769230769231, "loss": 0.7222, "step": 320 }, { "epoch": 0.2, "learning_rate": 0.00013076923076923077, "loss": 0.7456, "step": 340 }, { "epoch": 0.21, "learning_rate": 0.00013846153846153847, "loss": 0.7447, "step": 360 }, { "epoch": 0.22, "learning_rate": 0.00014615384615384615, "loss": 0.7297, "step": 380 }, { "epoch": 0.23, "learning_rate": 0.00015384615384615385, "loss": 0.7339, "step": 400 }, { "epoch": 0.24, "learning_rate": 0.00016153846153846155, "loss": 0.7418, "step": 420 }, { "epoch": 0.25, "learning_rate": 0.00016923076923076923, "loss": 0.7315, "step": 440 }, { "epoch": 0.27, "learning_rate": 0.00017692307692307693, "loss": 0.7104, "step": 460 }, { "epoch": 0.28, "learning_rate": 0.00018461538461538463, "loss": 0.717, "step": 480 }, { "epoch": 0.29, "learning_rate": 0.00019230769230769233, "loss": 0.7307, "step": 500 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 0.728, "step": 520 }, { "epoch": 0.31, "learning_rate": 0.0001991451164778799, "loss": 0.7171, "step": 540 }, { "epoch": 0.32, "learning_rate": 0.0001982902329557598, "loss": 0.7137, "step": 560 }, { "epoch": 0.33, "learning_rate": 0.0001974353494336397, "loss": 0.6911, "step": 580 }, { "epoch": 0.35, "learning_rate": 0.00019658046591151956, "loss": 0.6992, "step": 600 }, { "epoch": 0.36, "learning_rate": 0.00019572558238939946, "loss": 0.6959, "step": 620 }, { "epoch": 0.37, "learning_rate": 0.00019487069886727932, "loss": 0.7046, "step": 640 }, { "epoch": 0.38, "learning_rate": 0.00019401581534515922, "loss": 0.6976, "step": 660 }, { "epoch": 0.39, "learning_rate": 0.0001931609318230391, "loss": 0.7042, "step": 680 }, { "epoch": 0.4, "learning_rate": 0.000192306048300919, "loss": 0.6908, "step": 700 }, { "epoch": 0.42, "learning_rate": 0.0001914511647787989, "loss": 0.6984, "step": 720 }, { "epoch": 0.43, "learning_rate": 0.0001905962812566788, "loss": 0.6999, "step": 740 }, { "epoch": 0.44, "learning_rate": 0.0001897413977345587, "loss": 0.7094, "step": 760 }, { "epoch": 0.45, "learning_rate": 0.00018888651421243856, "loss": 0.7003, "step": 780 }, { "epoch": 0.46, "learning_rate": 0.00018803163069031845, "loss": 0.6809, "step": 800 }, { "epoch": 0.47, "learning_rate": 0.00018717674716819835, "loss": 0.7135, "step": 820 }, { "epoch": 0.48, "learning_rate": 0.00018632186364607822, "loss": 0.6913, "step": 840 }, { "epoch": 0.5, "learning_rate": 0.0001854669801239581, "loss": 0.6932, "step": 860 }, { "epoch": 0.51, "learning_rate": 0.000184612096601838, "loss": 0.6832, "step": 880 }, { "epoch": 0.52, "learning_rate": 0.0001837572130797179, "loss": 0.6927, "step": 900 }, { "epoch": 0.53, "learning_rate": 0.0001829023295575978, "loss": 0.6729, "step": 920 }, { "epoch": 0.54, "learning_rate": 0.00018204744603547766, "loss": 0.6742, "step": 940 }, { "epoch": 0.55, "learning_rate": 0.00018119256251335756, "loss": 0.6718, "step": 960 }, { "epoch": 0.57, "learning_rate": 0.00018033767899123745, "loss": 0.6868, "step": 980 }, { "epoch": 0.58, "learning_rate": 0.00017948279546911735, "loss": 0.6869, "step": 1000 }, { "epoch": 0.59, "learning_rate": 0.00017862791194699724, "loss": 0.6633, "step": 1020 }, { "epoch": 0.6, "learning_rate": 0.0001777730284248771, "loss": 0.6967, "step": 1040 }, { "epoch": 0.61, "learning_rate": 0.000176918144902757, "loss": 0.6758, "step": 1060 }, { "epoch": 0.62, "learning_rate": 0.0001760632613806369, "loss": 0.6918, "step": 1080 }, { "epoch": 0.63, "learning_rate": 0.0001752083778585168, "loss": 0.6718, "step": 1100 }, { "epoch": 0.65, "learning_rate": 0.00017435349433639666, "loss": 0.6722, "step": 1120 }, { "epoch": 0.66, "learning_rate": 0.00017349861081427656, "loss": 0.684, "step": 1140 }, { "epoch": 0.67, "learning_rate": 0.00017264372729215645, "loss": 0.6619, "step": 1160 }, { "epoch": 0.68, "learning_rate": 0.00017178884377003635, "loss": 0.6789, "step": 1180 }, { "epoch": 0.69, "learning_rate": 0.00017093396024791624, "loss": 0.677, "step": 1200 }, { "epoch": 0.7, "learning_rate": 0.00017007907672579614, "loss": 0.6836, "step": 1220 }, { "epoch": 0.72, "learning_rate": 0.000169266937379782, "loss": 0.6823, "step": 1240 }, { "epoch": 0.73, "learning_rate": 0.0001684120538576619, "loss": 0.6936, "step": 1260 }, { "epoch": 0.74, "learning_rate": 0.0001675571703355418, "loss": 0.6646, "step": 1280 }, { "epoch": 0.75, "learning_rate": 0.0001667022868134217, "loss": 0.6733, "step": 1300 }, { "epoch": 0.76, "learning_rate": 0.00016584740329130159, "loss": 0.6753, "step": 1320 }, { "epoch": 0.77, "learning_rate": 0.00016499251976918145, "loss": 0.677, "step": 1340 }, { "epoch": 0.78, "learning_rate": 0.00016413763624706135, "loss": 0.6692, "step": 1360 }, { "epoch": 0.8, "learning_rate": 0.00016328275272494122, "loss": 0.656, "step": 1380 }, { "epoch": 0.81, "learning_rate": 0.0001624278692028211, "loss": 0.6767, "step": 1400 }, { "epoch": 0.82, "learning_rate": 0.000161572985680701, "loss": 0.6485, "step": 1420 }, { "epoch": 0.83, "learning_rate": 0.0001607181021585809, "loss": 0.679, "step": 1440 }, { "epoch": 0.84, "learning_rate": 0.0001598632186364608, "loss": 0.6701, "step": 1460 }, { "epoch": 0.85, "learning_rate": 0.0001590083351143407, "loss": 0.6632, "step": 1480 }, { "epoch": 0.87, "learning_rate": 0.00015815345159222056, "loss": 0.6707, "step": 1500 }, { "epoch": 0.88, "learning_rate": 0.00015729856807010045, "loss": 0.6738, "step": 1520 }, { "epoch": 0.89, "learning_rate": 0.00015644368454798035, "loss": 0.68, "step": 1540 }, { "epoch": 0.9, "learning_rate": 0.00015558880102586024, "loss": 0.6748, "step": 1560 }, { "epoch": 0.91, "learning_rate": 0.00015473391750374014, "loss": 0.6628, "step": 1580 }, { "epoch": 0.92, "learning_rate": 0.00015387903398162, "loss": 0.6674, "step": 1600 }, { "epoch": 0.93, "learning_rate": 0.0001530241504594999, "loss": 0.6572, "step": 1620 }, { "epoch": 0.95, "learning_rate": 0.0001521692669373798, "loss": 0.6547, "step": 1640 }, { "epoch": 0.96, "learning_rate": 0.0001513143834152597, "loss": 0.6599, "step": 1660 }, { "epoch": 0.97, "learning_rate": 0.00015045949989313955, "loss": 0.6665, "step": 1680 }, { "epoch": 0.98, "learning_rate": 0.00014960461637101945, "loss": 0.6687, "step": 1700 }, { "epoch": 0.99, "learning_rate": 0.00014874973284889934, "loss": 0.6655, "step": 1720 }, { "epoch": 1.0, "eval_loss": 0.5166412591934204, "eval_runtime": 3665.2848, "eval_samples_per_second": 1.081, "eval_steps_per_second": 0.36, "step": 1733 }, { "epoch": 1.0, "learning_rate": 0.00014789484932677924, "loss": 0.673, "step": 1740 }, { "epoch": 1.02, "learning_rate": 0.00014703996580465913, "loss": 0.6603, "step": 1760 }, { "epoch": 1.03, "learning_rate": 0.00014618508228253903, "loss": 0.6548, "step": 1780 }, { "epoch": 1.04, "learning_rate": 0.0001453301987604189, "loss": 0.6708, "step": 1800 }, { "epoch": 1.05, "learning_rate": 0.0001444753152382988, "loss": 0.6668, "step": 1820 }, { "epoch": 1.06, "learning_rate": 0.00014362043171617866, "loss": 0.6901, "step": 1840 }, { "epoch": 1.07, "learning_rate": 0.00014276554819405855, "loss": 0.6648, "step": 1860 }, { "epoch": 1.08, "learning_rate": 0.00014191066467193845, "loss": 0.6431, "step": 1880 }, { "epoch": 1.1, "learning_rate": 0.00014105578114981834, "loss": 0.6653, "step": 1900 }, { "epoch": 1.11, "learning_rate": 0.00014020089762769824, "loss": 0.6693, "step": 1920 }, { "epoch": 1.12, "learning_rate": 0.00013934601410557813, "loss": 0.6732, "step": 1940 }, { "epoch": 1.13, "learning_rate": 0.00013849113058345803, "loss": 0.6434, "step": 1960 }, { "epoch": 1.14, "learning_rate": 0.00013763624706133792, "loss": 0.6429, "step": 1980 }, { "epoch": 1.15, "learning_rate": 0.0001367813635392178, "loss": 0.6619, "step": 2000 }, { "epoch": 1.17, "learning_rate": 0.00013592648001709766, "loss": 0.6561, "step": 2020 }, { "epoch": 1.18, "learning_rate": 0.00013507159649497755, "loss": 0.6518, "step": 2040 }, { "epoch": 1.19, "learning_rate": 0.00013421671297285745, "loss": 0.6358, "step": 2060 }, { "epoch": 1.2, "learning_rate": 0.00013336182945073734, "loss": 0.6711, "step": 2080 }, { "epoch": 1.21, "learning_rate": 0.00013250694592861724, "loss": 0.6543, "step": 2100 }, { "epoch": 1.22, "learning_rate": 0.00013165206240649713, "loss": 0.6517, "step": 2120 }, { "epoch": 1.23, "learning_rate": 0.00013079717888437703, "loss": 0.6549, "step": 2140 }, { "epoch": 1.25, "learning_rate": 0.0001299422953622569, "loss": 0.6558, "step": 2160 }, { "epoch": 1.26, "learning_rate": 0.0001290874118401368, "loss": 0.6585, "step": 2180 }, { "epoch": 1.27, "learning_rate": 0.00012823252831801668, "loss": 0.6444, "step": 2200 }, { "epoch": 1.28, "learning_rate": 0.00012737764479589655, "loss": 0.642, "step": 2220 }, { "epoch": 1.29, "learning_rate": 0.00012652276127377645, "loss": 0.6708, "step": 2240 }, { "epoch": 1.3, "learning_rate": 0.00012566787775165634, "loss": 0.653, "step": 2260 }, { "epoch": 1.32, "learning_rate": 0.00012481299422953623, "loss": 0.6438, "step": 2280 }, { "epoch": 1.33, "learning_rate": 0.00012395811070741613, "loss": 0.6296, "step": 2300 }, { "epoch": 1.34, "learning_rate": 0.00012310322718529602, "loss": 0.6376, "step": 2320 }, { "epoch": 1.35, "learning_rate": 0.0001222483436631759, "loss": 0.6284, "step": 2340 }, { "epoch": 1.36, "learning_rate": 0.00012139346014105579, "loss": 0.6373, "step": 2360 }, { "epoch": 1.37, "learning_rate": 0.00012053857661893568, "loss": 0.6289, "step": 2380 }, { "epoch": 1.38, "learning_rate": 0.00011968369309681558, "loss": 0.6421, "step": 2400 }, { "epoch": 1.4, "learning_rate": 0.00011882880957469544, "loss": 0.6385, "step": 2420 }, { "epoch": 1.41, "learning_rate": 0.00011797392605257534, "loss": 0.6312, "step": 2440 }, { "epoch": 1.42, "learning_rate": 0.00011711904253045522, "loss": 0.6454, "step": 2460 }, { "epoch": 1.43, "learning_rate": 0.00011626415900833511, "loss": 0.6476, "step": 2480 }, { "epoch": 1.44, "learning_rate": 0.00011540927548621501, "loss": 0.6373, "step": 2500 }, { "epoch": 1.45, "learning_rate": 0.0001145543919640949, "loss": 0.647, "step": 2520 }, { "epoch": 1.47, "learning_rate": 0.00011369950844197479, "loss": 0.6319, "step": 2540 }, { "epoch": 1.48, "learning_rate": 0.00011284462491985468, "loss": 0.6506, "step": 2560 }, { "epoch": 1.49, "learning_rate": 0.00011198974139773457, "loss": 0.637, "step": 2580 }, { "epoch": 1.5, "learning_rate": 0.00011113485787561446, "loss": 0.6339, "step": 2600 }, { "epoch": 1.51, "learning_rate": 0.00011027997435349434, "loss": 0.6316, "step": 2620 }, { "epoch": 1.52, "learning_rate": 0.00010942509083137422, "loss": 0.6325, "step": 2640 }, { "epoch": 1.53, "learning_rate": 0.00010857020730925411, "loss": 0.6202, "step": 2660 }, { "epoch": 1.55, "learning_rate": 0.00010771532378713401, "loss": 0.6219, "step": 2680 }, { "epoch": 1.56, "learning_rate": 0.00010686044026501389, "loss": 0.6367, "step": 2700 }, { "epoch": 1.57, "learning_rate": 0.00010600555674289378, "loss": 0.6374, "step": 2720 }, { "epoch": 1.58, "learning_rate": 0.00010515067322077368, "loss": 0.6235, "step": 2740 }, { "epoch": 1.59, "learning_rate": 0.00010429578969865357, "loss": 0.626, "step": 2760 }, { "epoch": 1.6, "learning_rate": 0.00010344090617653345, "loss": 0.6325, "step": 2780 }, { "epoch": 1.62, "learning_rate": 0.00010258602265441335, "loss": 0.6425, "step": 2800 }, { "epoch": 1.63, "learning_rate": 0.00010173113913229324, "loss": 0.6344, "step": 2820 }, { "epoch": 1.64, "learning_rate": 0.00010087625561017311, "loss": 0.6208, "step": 2840 }, { "epoch": 1.65, "learning_rate": 0.000100021372088053, "loss": 0.6339, "step": 2860 }, { "epoch": 1.66, "learning_rate": 9.916648856593289e-05, "loss": 0.6348, "step": 2880 }, { "epoch": 1.67, "learning_rate": 9.831160504381278e-05, "loss": 0.6193, "step": 2900 }, { "epoch": 1.68, "learning_rate": 9.745672152169268e-05, "loss": 0.6283, "step": 2920 }, { "epoch": 1.7, "learning_rate": 9.660183799957256e-05, "loss": 0.6314, "step": 2940 }, { "epoch": 1.71, "learning_rate": 9.574695447745245e-05, "loss": 0.6373, "step": 2960 }, { "epoch": 1.72, "learning_rate": 9.489207095533233e-05, "loss": 0.6437, "step": 2980 }, { "epoch": 1.73, "learning_rate": 9.403718743321223e-05, "loss": 0.6433, "step": 3000 }, { "epoch": 1.74, "learning_rate": 9.318230391109212e-05, "loss": 0.6238, "step": 3020 }, { "epoch": 1.75, "learning_rate": 9.2327420388972e-05, "loss": 0.6237, "step": 3040 }, { "epoch": 1.77, "learning_rate": 9.14725368668519e-05, "loss": 0.6388, "step": 3060 }, { "epoch": 1.78, "learning_rate": 9.06176533447318e-05, "loss": 0.6422, "step": 3080 }, { "epoch": 1.79, "learning_rate": 8.976276982261168e-05, "loss": 0.5997, "step": 3100 }, { "epoch": 1.8, "learning_rate": 8.890788630049156e-05, "loss": 0.6265, "step": 3120 }, { "epoch": 1.81, "learning_rate": 8.805300277837145e-05, "loss": 0.6298, "step": 3140 }, { "epoch": 1.82, "learning_rate": 8.719811925625135e-05, "loss": 0.622, "step": 3160 }, { "epoch": 1.83, "learning_rate": 8.634323573413123e-05, "loss": 0.6232, "step": 3180 }, { "epoch": 1.85, "learning_rate": 8.548835221201111e-05, "loss": 0.6337, "step": 3200 }, { "epoch": 1.86, "learning_rate": 8.4633468689891e-05, "loss": 0.6187, "step": 3220 }, { "epoch": 1.87, "learning_rate": 8.37785851677709e-05, "loss": 0.6446, "step": 3240 }, { "epoch": 1.88, "learning_rate": 8.292370164565079e-05, "loss": 0.6298, "step": 3260 }, { "epoch": 1.89, "learning_rate": 8.206881812353067e-05, "loss": 0.6364, "step": 3280 }, { "epoch": 1.9, "learning_rate": 8.121393460141055e-05, "loss": 0.6314, "step": 3300 }, { "epoch": 1.92, "learning_rate": 8.035905107929045e-05, "loss": 0.633, "step": 3320 }, { "epoch": 1.93, "learning_rate": 7.950416755717034e-05, "loss": 0.6163, "step": 3340 }, { "epoch": 1.94, "learning_rate": 7.864928403505023e-05, "loss": 0.6351, "step": 3360 }, { "epoch": 1.95, "learning_rate": 7.779440051293012e-05, "loss": 0.6038, "step": 3380 }, { "epoch": 1.96, "learning_rate": 7.693951699081e-05, "loss": 0.6245, "step": 3400 }, { "epoch": 1.97, "learning_rate": 7.60846334686899e-05, "loss": 0.6311, "step": 3420 }, { "epoch": 1.98, "learning_rate": 7.522974994656978e-05, "loss": 0.6391, "step": 3440 }, { "epoch": 2.0, "learning_rate": 7.437486642444967e-05, "loss": 0.6205, "step": 3460 }, { "epoch": 2.0, "eval_loss": 0.4947710931301117, "eval_runtime": 3649.3488, "eval_samples_per_second": 1.086, "eval_steps_per_second": 0.362, "step": 3466 } ], "logging_steps": 20, "max_steps": 5199, "num_train_epochs": 3, "save_steps": 500, "total_flos": 3.6485939432993587e+18, "trial_name": null, "trial_params": null }