{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8449049873931491, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, "loss": 1.1041, "step": 10 }, { "epoch": 0.02, "learning_rate": 2.8000000000000003e-06, "loss": 1.0838, "step": 20 }, { "epoch": 0.04, "learning_rate": 4.800000000000001e-06, "loss": 1.0709, "step": 30 }, { "epoch": 0.05, "learning_rate": 6.800000000000001e-06, "loss": 1.0346, "step": 40 }, { "epoch": 0.06, "learning_rate": 8.6e-06, "loss": 0.9823, "step": 50 }, { "epoch": 0.07, "learning_rate": 1.04e-05, "loss": 0.9176, "step": 60 }, { "epoch": 0.09, "learning_rate": 1.2400000000000002e-05, "loss": 0.8376, "step": 70 }, { "epoch": 0.1, "learning_rate": 1.4400000000000001e-05, "loss": 0.7697, "step": 80 }, { "epoch": 0.11, "learning_rate": 1.62e-05, "loss": 0.763, "step": 90 }, { "epoch": 0.12, "learning_rate": 1.8200000000000002e-05, "loss": 0.7829, "step": 100 }, { "epoch": 0.14, "learning_rate": 1.9991449337323645e-05, "loss": 0.7286, "step": 110 }, { "epoch": 0.15, "learning_rate": 1.990594271056007e-05, "loss": 0.7299, "step": 120 }, { "epoch": 0.16, "learning_rate": 1.9828986746472855e-05, "loss": 0.7274, "step": 130 }, { "epoch": 0.17, "learning_rate": 1.974348011970928e-05, "loss": 0.7931, "step": 140 }, { "epoch": 0.18, "learning_rate": 1.966652415562206e-05, "loss": 0.8357, "step": 150 }, { "epoch": 0.2, "learning_rate": 1.958101752885849e-05, "loss": 0.8294, "step": 160 }, { "epoch": 0.21, "learning_rate": 1.9495510902094914e-05, "loss": 0.8006, "step": 170 }, { "epoch": 0.22, "learning_rate": 1.941000427533134e-05, "loss": 0.7768, "step": 180 }, { "epoch": 0.23, "learning_rate": 1.9324497648567767e-05, "loss": 0.7608, "step": 190 }, { "epoch": 0.25, "learning_rate": 1.9238991021804192e-05, "loss": 0.7362, "step": 200 }, { "epoch": 0.26, "learning_rate": 1.9153484395040617e-05, "loss": 0.7534, "step": 210 }, { "epoch": 0.27, "learning_rate": 1.9067977768277045e-05, "loss": 0.7199, "step": 220 }, { "epoch": 0.28, "learning_rate": 1.898247114151347e-05, "loss": 0.7259, "step": 230 }, { "epoch": 0.3, "learning_rate": 1.8896964514749895e-05, "loss": 0.7333, "step": 240 }, { "epoch": 0.31, "learning_rate": 1.881145788798632e-05, "loss": 0.7216, "step": 250 }, { "epoch": 0.32, "learning_rate": 1.8725951261222744e-05, "loss": 0.7263, "step": 260 }, { "epoch": 0.33, "learning_rate": 1.8640444634459173e-05, "loss": 0.6909, "step": 270 }, { "epoch": 0.34, "learning_rate": 1.8554938007695597e-05, "loss": 0.7865, "step": 280 }, { "epoch": 0.36, "learning_rate": 1.8469431380932022e-05, "loss": 0.7109, "step": 290 }, { "epoch": 0.37, "learning_rate": 1.838392475416845e-05, "loss": 0.7012, "step": 300 }, { "epoch": 0.38, "learning_rate": 1.8298418127404875e-05, "loss": 0.741, "step": 310 }, { "epoch": 0.39, "learning_rate": 1.82129115006413e-05, "loss": 0.8036, "step": 320 }, { "epoch": 0.41, "learning_rate": 1.8127404873877728e-05, "loss": 0.7909, "step": 330 }, { "epoch": 0.42, "learning_rate": 1.8041898247114153e-05, "loss": 0.7493, "step": 340 }, { "epoch": 0.43, "learning_rate": 1.7956391620350578e-05, "loss": 0.7676, "step": 350 }, { "epoch": 0.44, "learning_rate": 1.7870884993587006e-05, "loss": 0.7381, "step": 360 }, { "epoch": 0.46, "learning_rate": 1.778537836682343e-05, "loss": 0.7494, "step": 370 }, { "epoch": 0.47, "learning_rate": 1.7699871740059856e-05, "loss": 0.723, "step": 380 }, { "epoch": 0.48, "learning_rate": 1.761436511329628e-05, "loss": 0.7203, "step": 390 }, { "epoch": 0.49, "learning_rate": 1.752885848653271e-05, "loss": 0.6964, "step": 400 }, { "epoch": 0.5, "learning_rate": 1.7443351859769134e-05, "loss": 0.7471, "step": 410 }, { "epoch": 0.52, "learning_rate": 1.735784523300556e-05, "loss": 0.7653, "step": 420 }, { "epoch": 0.53, "learning_rate": 1.7272338606241987e-05, "loss": 0.7678, "step": 430 }, { "epoch": 0.54, "learning_rate": 1.718683197947841e-05, "loss": 0.7468, "step": 440 }, { "epoch": 0.55, "learning_rate": 1.7101325352714836e-05, "loss": 0.7228, "step": 450 }, { "epoch": 0.57, "learning_rate": 1.7015818725951264e-05, "loss": 0.8254, "step": 460 }, { "epoch": 0.58, "learning_rate": 1.693031209918769e-05, "loss": 0.7702, "step": 470 }, { "epoch": 0.59, "learning_rate": 1.6844805472424114e-05, "loss": 0.7854, "step": 480 }, { "epoch": 0.6, "learning_rate": 1.6759298845660542e-05, "loss": 0.7204, "step": 490 }, { "epoch": 0.61, "learning_rate": 1.6673792218896967e-05, "loss": 0.803, "step": 500 }, { "epoch": 0.61, "eval_oasst_export_w_label_accuracy": 0.6076923076923076, "eval_oasst_export_w_label_kendalltau": 0.14663003663003704, "eval_oasst_export_w_label_loss": 0.76123046875, "eval_oasst_export_w_label_neg_score": -1.1142578125, "eval_oasst_export_w_label_pos_score": -0.93017578125, "eval_oasst_export_w_label_runtime": 214.0188, "eval_oasst_export_w_label_samples_per_second": 8.504, "eval_oasst_export_w_label_score_diff": 0.1845703125, "eval_oasst_export_w_label_steps_per_second": 8.504, "step": 500 }, { "epoch": 0.63, "learning_rate": 1.6588285592133392e-05, "loss": 0.7866, "step": 510 }, { "epoch": 0.64, "learning_rate": 1.6502778965369817e-05, "loss": 0.7746, "step": 520 }, { "epoch": 0.65, "learning_rate": 1.641727233860624e-05, "loss": 0.7875, "step": 530 }, { "epoch": 0.66, "learning_rate": 1.633176571184267e-05, "loss": 0.7511, "step": 540 }, { "epoch": 0.68, "learning_rate": 1.6246259085079095e-05, "loss": 0.7669, "step": 550 }, { "epoch": 0.69, "learning_rate": 1.616075245831552e-05, "loss": 0.7047, "step": 560 }, { "epoch": 0.7, "learning_rate": 1.6075245831551948e-05, "loss": 0.7662, "step": 570 }, { "epoch": 0.71, "learning_rate": 1.5989739204788372e-05, "loss": 0.744, "step": 580 }, { "epoch": 0.73, "learning_rate": 1.5904232578024797e-05, "loss": 0.7313, "step": 590 }, { "epoch": 0.74, "learning_rate": 1.5818725951261225e-05, "loss": 0.7611, "step": 600 }, { "epoch": 0.75, "learning_rate": 1.573321932449765e-05, "loss": 0.7027, "step": 610 }, { "epoch": 0.76, "learning_rate": 1.5647712697734075e-05, "loss": 0.7306, "step": 620 }, { "epoch": 0.77, "learning_rate": 1.5562206070970503e-05, "loss": 0.7197, "step": 630 }, { "epoch": 0.79, "learning_rate": 1.5476699444206925e-05, "loss": 0.7277, "step": 640 }, { "epoch": 0.8, "learning_rate": 1.5391192817443353e-05, "loss": 0.7492, "step": 650 }, { "epoch": 0.81, "learning_rate": 1.5305686190679778e-05, "loss": 0.7044, "step": 660 }, { "epoch": 0.82, "learning_rate": 1.5220179563916204e-05, "loss": 0.731, "step": 670 }, { "epoch": 0.84, "learning_rate": 1.513467293715263e-05, "loss": 0.7152, "step": 680 }, { "epoch": 0.85, "learning_rate": 1.5049166310389056e-05, "loss": 0.7563, "step": 690 }, { "epoch": 0.86, "learning_rate": 1.4963659683625482e-05, "loss": 0.7047, "step": 700 }, { "epoch": 0.87, "learning_rate": 1.4878153056861909e-05, "loss": 0.769, "step": 710 }, { "epoch": 0.89, "learning_rate": 1.4792646430098333e-05, "loss": 0.7304, "step": 720 }, { "epoch": 0.9, "learning_rate": 1.470713980333476e-05, "loss": 0.7442, "step": 730 }, { "epoch": 0.91, "learning_rate": 1.4621633176571186e-05, "loss": 0.7396, "step": 740 }, { "epoch": 0.92, "learning_rate": 1.4536126549807611e-05, "loss": 0.7051, "step": 750 }, { "epoch": 0.93, "learning_rate": 1.4450619923044038e-05, "loss": 0.758, "step": 760 }, { "epoch": 0.95, "learning_rate": 1.4365113296280463e-05, "loss": 0.7802, "step": 770 }, { "epoch": 0.96, "learning_rate": 1.4279606669516887e-05, "loss": 0.7661, "step": 780 }, { "epoch": 0.97, "learning_rate": 1.4194100042753314e-05, "loss": 0.7163, "step": 790 }, { "epoch": 0.98, "learning_rate": 1.410859341598974e-05, "loss": 0.7433, "step": 800 }, { "epoch": 1.0, "learning_rate": 1.4023086789226165e-05, "loss": 0.7869, "step": 810 }, { "epoch": 1.01, "learning_rate": 1.3937580162462592e-05, "loss": 0.7031, "step": 820 }, { "epoch": 1.02, "learning_rate": 1.3852073535699018e-05, "loss": 0.7133, "step": 830 }, { "epoch": 1.03, "learning_rate": 1.3766566908935443e-05, "loss": 0.7278, "step": 840 }, { "epoch": 1.05, "learning_rate": 1.368106028217187e-05, "loss": 0.7262, "step": 850 }, { "epoch": 1.06, "learning_rate": 1.3595553655408296e-05, "loss": 0.7666, "step": 860 }, { "epoch": 1.07, "learning_rate": 1.3510047028644721e-05, "loss": 0.7129, "step": 870 }, { "epoch": 1.08, "learning_rate": 1.3424540401881147e-05, "loss": 0.7383, "step": 880 }, { "epoch": 1.09, "learning_rate": 1.3339033775117574e-05, "loss": 0.7385, "step": 890 }, { "epoch": 1.11, "learning_rate": 1.3253527148353999e-05, "loss": 0.7017, "step": 900 }, { "epoch": 1.12, "learning_rate": 1.3168020521590424e-05, "loss": 0.7399, "step": 910 }, { "epoch": 1.13, "learning_rate": 1.3082513894826848e-05, "loss": 0.7196, "step": 920 }, { "epoch": 1.14, "learning_rate": 1.2997007268063275e-05, "loss": 0.7014, "step": 930 }, { "epoch": 1.16, "learning_rate": 1.2911500641299701e-05, "loss": 0.7285, "step": 940 }, { "epoch": 1.17, "learning_rate": 1.2825994014536128e-05, "loss": 0.7521, "step": 950 }, { "epoch": 1.18, "learning_rate": 1.2740487387772553e-05, "loss": 0.7519, "step": 960 }, { "epoch": 1.19, "learning_rate": 1.265498076100898e-05, "loss": 0.6893, "step": 970 }, { "epoch": 1.21, "learning_rate": 1.2569474134245406e-05, "loss": 0.713, "step": 980 }, { "epoch": 1.22, "learning_rate": 1.248396750748183e-05, "loss": 0.7504, "step": 990 }, { "epoch": 1.23, "learning_rate": 1.2398460880718257e-05, "loss": 0.7285, "step": 1000 }, { "epoch": 1.23, "eval_oasst_export_w_label_accuracy": 0.6494505494505495, "eval_oasst_export_w_label_kendalltau": 0.21098901098901038, "eval_oasst_export_w_label_loss": 0.71435546875, "eval_oasst_export_w_label_neg_score": 0.147216796875, "eval_oasst_export_w_label_pos_score": 0.5234375, "eval_oasst_export_w_label_runtime": 213.6968, "eval_oasst_export_w_label_samples_per_second": 8.517, "eval_oasst_export_w_label_score_diff": 0.37646484375, "eval_oasst_export_w_label_steps_per_second": 8.517, "step": 1000 }, { "epoch": 1.24, "learning_rate": 1.2312954253954684e-05, "loss": 0.6864, "step": 1010 }, { "epoch": 1.25, "learning_rate": 1.2227447627191109e-05, "loss": 0.7241, "step": 1020 }, { "epoch": 1.27, "learning_rate": 1.2141941000427535e-05, "loss": 0.7041, "step": 1030 }, { "epoch": 1.28, "learning_rate": 1.2056434373663958e-05, "loss": 0.697, "step": 1040 }, { "epoch": 1.29, "learning_rate": 1.1970927746900385e-05, "loss": 0.7384, "step": 1050 }, { "epoch": 1.3, "learning_rate": 1.1885421120136811e-05, "loss": 0.7321, "step": 1060 }, { "epoch": 1.32, "learning_rate": 1.1799914493373236e-05, "loss": 0.6843, "step": 1070 }, { "epoch": 1.33, "learning_rate": 1.1714407866609663e-05, "loss": 0.7113, "step": 1080 }, { "epoch": 1.34, "learning_rate": 1.1628901239846089e-05, "loss": 0.758, "step": 1090 }, { "epoch": 1.35, "learning_rate": 1.1543394613082516e-05, "loss": 0.7233, "step": 1100 }, { "epoch": 1.37, "learning_rate": 1.145788798631894e-05, "loss": 0.7166, "step": 1110 }, { "epoch": 1.38, "learning_rate": 1.1372381359555367e-05, "loss": 0.724, "step": 1120 }, { "epoch": 1.39, "learning_rate": 1.1286874732791793e-05, "loss": 0.6614, "step": 1130 }, { "epoch": 1.4, "learning_rate": 1.1201368106028218e-05, "loss": 0.6938, "step": 1140 }, { "epoch": 1.41, "learning_rate": 1.1115861479264645e-05, "loss": 0.6812, "step": 1150 }, { "epoch": 1.43, "learning_rate": 1.1030354852501071e-05, "loss": 0.7047, "step": 1160 }, { "epoch": 1.44, "learning_rate": 1.0944848225737496e-05, "loss": 0.7123, "step": 1170 }, { "epoch": 1.45, "learning_rate": 1.0859341598973921e-05, "loss": 0.7204, "step": 1180 }, { "epoch": 1.46, "learning_rate": 1.0773834972210346e-05, "loss": 0.7414, "step": 1190 }, { "epoch": 1.48, "learning_rate": 1.0688328345446772e-05, "loss": 0.721, "step": 1200 }, { "epoch": 1.49, "learning_rate": 1.0602821718683199e-05, "loss": 0.7352, "step": 1210 }, { "epoch": 1.5, "learning_rate": 1.0517315091919624e-05, "loss": 0.7305, "step": 1220 }, { "epoch": 1.51, "learning_rate": 1.043180846515605e-05, "loss": 0.6983, "step": 1230 }, { "epoch": 1.53, "learning_rate": 1.0346301838392477e-05, "loss": 0.745, "step": 1240 }, { "epoch": 1.54, "learning_rate": 1.0260795211628903e-05, "loss": 0.6753, "step": 1250 }, { "epoch": 1.55, "learning_rate": 1.0175288584865328e-05, "loss": 0.6954, "step": 1260 }, { "epoch": 1.56, "learning_rate": 1.0089781958101754e-05, "loss": 0.6928, "step": 1270 }, { "epoch": 1.57, "learning_rate": 1.0004275331338181e-05, "loss": 0.7393, "step": 1280 }, { "epoch": 1.59, "learning_rate": 9.918768704574606e-06, "loss": 0.7032, "step": 1290 }, { "epoch": 1.6, "learning_rate": 9.83326207781103e-06, "loss": 0.7233, "step": 1300 }, { "epoch": 1.61, "learning_rate": 9.747755451047457e-06, "loss": 0.7174, "step": 1310 }, { "epoch": 1.62, "learning_rate": 9.662248824283884e-06, "loss": 0.667, "step": 1320 }, { "epoch": 1.64, "learning_rate": 9.576742197520308e-06, "loss": 0.7421, "step": 1330 }, { "epoch": 1.65, "learning_rate": 9.491235570756735e-06, "loss": 0.7577, "step": 1340 }, { "epoch": 1.66, "learning_rate": 9.40572894399316e-06, "loss": 0.7291, "step": 1350 }, { "epoch": 1.67, "learning_rate": 9.320222317229586e-06, "loss": 0.7378, "step": 1360 }, { "epoch": 1.69, "learning_rate": 9.234715690466011e-06, "loss": 0.7434, "step": 1370 }, { "epoch": 1.7, "learning_rate": 9.149209063702438e-06, "loss": 0.6818, "step": 1380 }, { "epoch": 1.71, "learning_rate": 9.063702436938864e-06, "loss": 0.7174, "step": 1390 }, { "epoch": 1.72, "learning_rate": 8.978195810175289e-06, "loss": 0.726, "step": 1400 }, { "epoch": 1.73, "learning_rate": 8.892689183411715e-06, "loss": 0.7004, "step": 1410 }, { "epoch": 1.75, "learning_rate": 8.80718255664814e-06, "loss": 0.7205, "step": 1420 }, { "epoch": 1.76, "learning_rate": 8.721675929884567e-06, "loss": 0.6822, "step": 1430 }, { "epoch": 1.77, "learning_rate": 8.636169303120993e-06, "loss": 0.6895, "step": 1440 }, { "epoch": 1.78, "learning_rate": 8.550662676357418e-06, "loss": 0.7066, "step": 1450 }, { "epoch": 1.8, "learning_rate": 8.465156049593845e-06, "loss": 0.7274, "step": 1460 }, { "epoch": 1.81, "learning_rate": 8.379649422830271e-06, "loss": 0.646, "step": 1470 }, { "epoch": 1.82, "learning_rate": 8.294142796066696e-06, "loss": 0.6865, "step": 1480 }, { "epoch": 1.83, "learning_rate": 8.20863616930312e-06, "loss": 0.6999, "step": 1490 }, { "epoch": 1.84, "learning_rate": 8.123129542539547e-06, "loss": 0.7139, "step": 1500 }, { "epoch": 1.84, "eval_oasst_export_w_label_accuracy": 0.7082417582417583, "eval_oasst_export_w_label_kendalltau": 0.3408791208791206, "eval_oasst_export_w_label_loss": 0.7021484375, "eval_oasst_export_w_label_neg_score": -0.84716796875, "eval_oasst_export_w_label_pos_score": -0.405029296875, "eval_oasst_export_w_label_runtime": 213.6246, "eval_oasst_export_w_label_samples_per_second": 8.52, "eval_oasst_export_w_label_score_diff": 0.4423828125, "eval_oasst_export_w_label_steps_per_second": 8.52, "step": 1500 } ], "max_steps": 2439, "num_train_epochs": 3, "total_flos": 0.0, "trial_name": null, "trial_params": null }