{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8449049873931491, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.5801353078493935e-06, "loss": 0.8726, "step": 10 }, { "epoch": 0.02, "learning_rate": 7.087352805422317e-06, "loss": 0.8672, "step": 20 }, { "epoch": 0.04, "learning_rate": 8.228161798644422e-06, "loss": 0.8565, "step": 30 }, { "epoch": 0.05, "learning_rate": 9.014161010104347e-06, "loss": 0.8213, "step": 40 }, { "epoch": 0.06, "learning_rate": 9.673229499590088e-06, "loss": 0.7977, "step": 50 }, { "epoch": 0.07, "learning_rate": 9.987442444537463e-06, "loss": 0.7521, "step": 60 }, { "epoch": 0.09, "learning_rate": 9.94558392632901e-06, "loss": 0.7423, "step": 70 }, { "epoch": 0.1, "learning_rate": 9.903725408120554e-06, "loss": 0.7026, "step": 80 }, { "epoch": 0.11, "learning_rate": 9.861866889912098e-06, "loss": 0.6808, "step": 90 }, { "epoch": 0.12, "learning_rate": 9.820008371703642e-06, "loss": 0.6901, "step": 100 }, { "epoch": 0.14, "learning_rate": 9.778149853495186e-06, "loss": 0.6514, "step": 110 }, { "epoch": 0.15, "learning_rate": 9.736291335286732e-06, "loss": 0.6467, "step": 120 }, { "epoch": 0.16, "learning_rate": 9.694432817078277e-06, "loss": 0.6641, "step": 130 }, { "epoch": 0.17, "learning_rate": 9.652574298869821e-06, "loss": 0.6616, "step": 140 }, { "epoch": 0.18, "learning_rate": 9.610715780661365e-06, "loss": 0.6617, "step": 150 }, { "epoch": 0.2, "learning_rate": 9.56885726245291e-06, "loss": 0.6295, "step": 160 }, { "epoch": 0.21, "learning_rate": 9.526998744244454e-06, "loss": 0.6071, "step": 170 }, { "epoch": 0.22, "learning_rate": 9.485140226036e-06, "loss": 0.6354, "step": 180 }, { "epoch": 0.23, "learning_rate": 9.443281707827544e-06, "loss": 0.6211, "step": 190 }, { "epoch": 0.25, "learning_rate": 9.401423189619088e-06, "loss": 0.6272, "step": 200 }, { "epoch": 0.26, "learning_rate": 9.359564671410633e-06, "loss": 0.641, "step": 210 }, { "epoch": 0.27, "learning_rate": 9.317706153202177e-06, "loss": 0.6205, "step": 220 }, { "epoch": 0.28, "learning_rate": 9.275847634993721e-06, "loss": 0.6263, "step": 230 }, { "epoch": 0.3, "learning_rate": 9.233989116785267e-06, "loss": 0.6148, "step": 240 }, { "epoch": 0.31, "learning_rate": 9.192130598576812e-06, "loss": 0.6521, "step": 250 }, { "epoch": 0.31, "eval_oasst_export_w_label_accuracy": 0.7346153846153847, "eval_oasst_export_w_label_kendalltau": 0.3859706959706959, "eval_oasst_export_w_label_loss": 0.626953125, "eval_oasst_export_w_label_neg_score": 0.2529296875, "eval_oasst_export_w_label_pos_score": 0.91064453125, "eval_oasst_export_w_label_runtime": 190.5015, "eval_oasst_export_w_label_samples_per_second": 9.554, "eval_oasst_export_w_label_score_diff": 0.65771484375, "eval_oasst_export_w_label_steps_per_second": 2.388, "step": 251 }, { "epoch": 0.32, "learning_rate": 9.150272080368356e-06, "loss": 0.602, "step": 260 }, { "epoch": 0.33, "learning_rate": 9.1084135621599e-06, "loss": 0.5637, "step": 270 }, { "epoch": 0.34, "learning_rate": 9.066555043951444e-06, "loss": 0.5948, "step": 280 }, { "epoch": 0.36, "learning_rate": 9.02469652574299e-06, "loss": 0.6245, "step": 290 }, { "epoch": 0.37, "learning_rate": 8.982838007534535e-06, "loss": 0.5943, "step": 300 }, { "epoch": 0.38, "learning_rate": 8.940979489326079e-06, "loss": 0.5934, "step": 310 }, { "epoch": 0.39, "learning_rate": 8.899120971117623e-06, "loss": 0.5426, "step": 320 }, { "epoch": 0.41, "learning_rate": 8.857262452909168e-06, "loss": 0.6191, "step": 330 }, { "epoch": 0.42, "learning_rate": 8.815403934700712e-06, "loss": 0.6316, "step": 340 }, { "epoch": 0.43, "learning_rate": 8.773545416492258e-06, "loss": 0.611, "step": 350 }, { "epoch": 0.44, "learning_rate": 8.7316868982838e-06, "loss": 0.6434, "step": 360 }, { "epoch": 0.46, "learning_rate": 8.689828380075346e-06, "loss": 0.6327, "step": 370 }, { "epoch": 0.47, "learning_rate": 8.64796986186689e-06, "loss": 0.6021, "step": 380 }, { "epoch": 0.48, "learning_rate": 8.606111343658435e-06, "loss": 0.5986, "step": 390 }, { "epoch": 0.49, "learning_rate": 8.564252825449981e-06, "loss": 0.5798, "step": 400 }, { "epoch": 0.5, "learning_rate": 8.522394307241524e-06, "loss": 0.6055, "step": 410 }, { "epoch": 0.52, "learning_rate": 8.480535789033068e-06, "loss": 0.6172, "step": 420 }, { "epoch": 0.53, "learning_rate": 8.438677270824614e-06, "loss": 0.6657, "step": 430 }, { "epoch": 0.54, "learning_rate": 8.396818752616158e-06, "loss": 0.6206, "step": 440 }, { "epoch": 0.55, "learning_rate": 8.354960234407702e-06, "loss": 0.6177, "step": 450 }, { "epoch": 0.57, "learning_rate": 8.313101716199248e-06, "loss": 0.6201, "step": 460 }, { "epoch": 0.58, "learning_rate": 8.271243197990791e-06, "loss": 0.5299, "step": 470 }, { "epoch": 0.59, "learning_rate": 8.229384679782337e-06, "loss": 0.6733, "step": 480 }, { "epoch": 0.6, "learning_rate": 8.187526161573881e-06, "loss": 0.6546, "step": 490 }, { "epoch": 0.61, "learning_rate": 8.145667643365426e-06, "loss": 0.6234, "step": 500 }, { "epoch": 0.62, "eval_oasst_export_w_label_accuracy": 0.7423076923076923, "eval_oasst_export_w_label_kendalltau": 0.3931501831501834, "eval_oasst_export_w_label_loss": 0.60888671875, "eval_oasst_export_w_label_neg_score": 0.53515625, "eval_oasst_export_w_label_pos_score": 1.1513671875, "eval_oasst_export_w_label_runtime": 190.2556, "eval_oasst_export_w_label_samples_per_second": 9.566, "eval_oasst_export_w_label_score_diff": 0.6162109375, "eval_oasst_export_w_label_steps_per_second": 2.392, "step": 502 }, { "epoch": 0.63, "learning_rate": 8.10380912515697e-06, "loss": 0.6087, "step": 510 }, { "epoch": 0.64, "learning_rate": 8.061950606948514e-06, "loss": 0.6345, "step": 520 }, { "epoch": 0.65, "learning_rate": 8.020092088740058e-06, "loss": 0.5881, "step": 530 }, { "epoch": 0.66, "learning_rate": 7.978233570531604e-06, "loss": 0.5954, "step": 540 }, { "epoch": 0.68, "learning_rate": 7.936375052323149e-06, "loss": 0.61, "step": 550 }, { "epoch": 0.69, "learning_rate": 7.894516534114693e-06, "loss": 0.5808, "step": 560 }, { "epoch": 0.7, "learning_rate": 7.852658015906237e-06, "loss": 0.6579, "step": 570 }, { "epoch": 0.71, "learning_rate": 7.810799497697782e-06, "loss": 0.6155, "step": 580 }, { "epoch": 0.73, "learning_rate": 7.768940979489327e-06, "loss": 0.6049, "step": 590 }, { "epoch": 0.74, "learning_rate": 7.727082461280872e-06, "loss": 0.6135, "step": 600 }, { "epoch": 0.75, "learning_rate": 7.685223943072416e-06, "loss": 0.5793, "step": 610 }, { "epoch": 0.76, "learning_rate": 7.64336542486396e-06, "loss": 0.6091, "step": 620 }, { "epoch": 0.77, "learning_rate": 7.601506906655505e-06, "loss": 0.6196, "step": 630 }, { "epoch": 0.79, "learning_rate": 7.55964838844705e-06, "loss": 0.5951, "step": 640 }, { "epoch": 0.8, "learning_rate": 7.517789870238594e-06, "loss": 0.6366, "step": 650 }, { "epoch": 0.81, "learning_rate": 7.475931352030139e-06, "loss": 0.6109, "step": 660 }, { "epoch": 0.82, "learning_rate": 7.434072833821683e-06, "loss": 0.5869, "step": 670 }, { "epoch": 0.84, "learning_rate": 7.392214315613228e-06, "loss": 0.612, "step": 680 }, { "epoch": 0.85, "learning_rate": 7.350355797404772e-06, "loss": 0.6019, "step": 690 }, { "epoch": 0.86, "learning_rate": 7.308497279196317e-06, "loss": 0.6265, "step": 700 }, { "epoch": 0.87, "learning_rate": 7.2666387609878615e-06, "loss": 0.5994, "step": 710 }, { "epoch": 0.89, "learning_rate": 7.224780242779407e-06, "loss": 0.5607, "step": 720 }, { "epoch": 0.9, "learning_rate": 7.18292172457095e-06, "loss": 0.5985, "step": 730 }, { "epoch": 0.91, "learning_rate": 7.141063206362495e-06, "loss": 0.5915, "step": 740 }, { "epoch": 0.92, "learning_rate": 7.09920468815404e-06, "loss": 0.5442, "step": 750 }, { "epoch": 0.93, "eval_oasst_export_w_label_accuracy": 0.7478021978021978, "eval_oasst_export_w_label_kendalltau": 0.4016483516483517, "eval_oasst_export_w_label_loss": 0.62255859375, "eval_oasst_export_w_label_neg_score": -0.1964111328125, "eval_oasst_export_w_label_pos_score": 0.82470703125, "eval_oasst_export_w_label_runtime": 190.3739, "eval_oasst_export_w_label_samples_per_second": 9.56, "eval_oasst_export_w_label_score_diff": 1.021484375, "eval_oasst_export_w_label_steps_per_second": 2.39, "step": 753 }, { "epoch": 0.93, "learning_rate": 7.057346169945585e-06, "loss": 0.6559, "step": 760 }, { "epoch": 0.95, "learning_rate": 7.01548765173713e-06, "loss": 0.5909, "step": 770 }, { "epoch": 0.96, "learning_rate": 6.973629133528673e-06, "loss": 0.6263, "step": 780 }, { "epoch": 0.97, "learning_rate": 6.931770615320218e-06, "loss": 0.6288, "step": 790 }, { "epoch": 0.98, "learning_rate": 6.889912097111763e-06, "loss": 0.5627, "step": 800 }, { "epoch": 1.0, "learning_rate": 6.848053578903308e-06, "loss": 0.6076, "step": 810 }, { "epoch": 1.01, "learning_rate": 6.806195060694852e-06, "loss": 0.567, "step": 820 }, { "epoch": 1.02, "learning_rate": 6.764336542486396e-06, "loss": 0.565, "step": 830 }, { "epoch": 1.03, "learning_rate": 6.722478024277941e-06, "loss": 0.6091, "step": 840 }, { "epoch": 1.05, "learning_rate": 6.680619506069486e-06, "loss": 0.5579, "step": 850 }, { "epoch": 1.06, "learning_rate": 6.63876098786103e-06, "loss": 0.5943, "step": 860 }, { "epoch": 1.07, "learning_rate": 6.596902469652575e-06, "loss": 0.566, "step": 870 }, { "epoch": 1.08, "learning_rate": 6.555043951444119e-06, "loss": 0.5757, "step": 880 }, { "epoch": 1.09, "learning_rate": 6.513185433235664e-06, "loss": 0.5752, "step": 890 }, { "epoch": 1.11, "learning_rate": 6.471326915027208e-06, "loss": 0.5595, "step": 900 }, { "epoch": 1.12, "learning_rate": 6.429468396818753e-06, "loss": 0.6051, "step": 910 }, { "epoch": 1.13, "learning_rate": 6.387609878610298e-06, "loss": 0.5308, "step": 920 }, { "epoch": 1.14, "learning_rate": 6.345751360401843e-06, "loss": 0.5341, "step": 930 }, { "epoch": 1.16, "learning_rate": 6.303892842193386e-06, "loss": 0.5976, "step": 940 }, { "epoch": 1.17, "learning_rate": 6.262034323984931e-06, "loss": 0.568, "step": 950 }, { "epoch": 1.18, "learning_rate": 6.220175805776476e-06, "loss": 0.5865, "step": 960 }, { "epoch": 1.19, "learning_rate": 6.178317287568021e-06, "loss": 0.5486, "step": 970 }, { "epoch": 1.21, "learning_rate": 6.136458769359566e-06, "loss": 0.5699, "step": 980 }, { "epoch": 1.22, "learning_rate": 6.094600251151109e-06, "loss": 0.5831, "step": 990 }, { "epoch": 1.23, "learning_rate": 6.052741732942654e-06, "loss": 0.5558, "step": 1000 }, { "epoch": 1.23, "eval_oasst_export_w_label_accuracy": 0.743956043956044, "eval_oasst_export_w_label_kendalltau": 0.3847985347985353, "eval_oasst_export_w_label_loss": 0.61865234375, "eval_oasst_export_w_label_neg_score": -0.09515380859375, "eval_oasst_export_w_label_pos_score": 0.93212890625, "eval_oasst_export_w_label_runtime": 190.324, "eval_oasst_export_w_label_samples_per_second": 9.563, "eval_oasst_export_w_label_score_diff": 1.02734375, "eval_oasst_export_w_label_steps_per_second": 2.391, "step": 1004 }, { "epoch": 1.24, "learning_rate": 6.010883214734199e-06, "loss": 0.5286, "step": 1010 }, { "epoch": 1.25, "learning_rate": 5.969024696525744e-06, "loss": 0.5657, "step": 1020 }, { "epoch": 1.27, "learning_rate": 5.927166178317288e-06, "loss": 0.5376, "step": 1030 }, { "epoch": 1.28, "learning_rate": 5.885307660108832e-06, "loss": 0.5375, "step": 1040 }, { "epoch": 1.29, "learning_rate": 5.843449141900377e-06, "loss": 0.5513, "step": 1050 }, { "epoch": 1.3, "learning_rate": 5.801590623691922e-06, "loss": 0.5778, "step": 1060 }, { "epoch": 1.32, "learning_rate": 5.759732105483467e-06, "loss": 0.5445, "step": 1070 }, { "epoch": 1.33, "learning_rate": 5.717873587275011e-06, "loss": 0.5657, "step": 1080 }, { "epoch": 1.34, "learning_rate": 5.676015069066555e-06, "loss": 0.5808, "step": 1090 }, { "epoch": 1.35, "learning_rate": 5.6341565508581e-06, "loss": 0.5723, "step": 1100 }, { "epoch": 1.37, "learning_rate": 5.592298032649645e-06, "loss": 0.5413, "step": 1110 }, { "epoch": 1.38, "learning_rate": 5.550439514441189e-06, "loss": 0.6384, "step": 1120 }, { "epoch": 1.39, "learning_rate": 5.508580996232734e-06, "loss": 0.5336, "step": 1130 }, { "epoch": 1.4, "learning_rate": 5.466722478024279e-06, "loss": 0.5754, "step": 1140 }, { "epoch": 1.41, "learning_rate": 5.424863959815823e-06, "loss": 0.5171, "step": 1150 }, { "epoch": 1.43, "learning_rate": 5.383005441607367e-06, "loss": 0.5536, "step": 1160 }, { "epoch": 1.44, "learning_rate": 5.341146923398912e-06, "loss": 0.5812, "step": 1170 }, { "epoch": 1.45, "learning_rate": 5.299288405190457e-06, "loss": 0.5604, "step": 1180 }, { "epoch": 1.46, "learning_rate": 5.257429886982002e-06, "loss": 0.5433, "step": 1190 }, { "epoch": 1.48, "learning_rate": 5.215571368773545e-06, "loss": 0.5704, "step": 1200 }, { "epoch": 1.49, "learning_rate": 5.17371285056509e-06, "loss": 0.592, "step": 1210 }, { "epoch": 1.5, "learning_rate": 5.131854332356635e-06, "loss": 0.5406, "step": 1220 }, { "epoch": 1.51, "learning_rate": 5.08999581414818e-06, "loss": 0.5597, "step": 1230 }, { "epoch": 1.53, "learning_rate": 5.048137295939725e-06, "loss": 0.6011, "step": 1240 }, { "epoch": 1.54, "learning_rate": 5.006278777731268e-06, "loss": 0.5071, "step": 1250 }, { "epoch": 1.54, "eval_oasst_export_w_label_accuracy": 0.7478021978021978, "eval_oasst_export_w_label_kendalltau": 0.4045054945054951, "eval_oasst_export_w_label_loss": 0.61669921875, "eval_oasst_export_w_label_neg_score": -0.1842041015625, "eval_oasst_export_w_label_pos_score": 1.02734375, "eval_oasst_export_w_label_runtime": 190.3207, "eval_oasst_export_w_label_samples_per_second": 9.563, "eval_oasst_export_w_label_score_diff": 1.2109375, "eval_oasst_export_w_label_steps_per_second": 2.391, "step": 1255 }, { "epoch": 1.55, "learning_rate": 4.964420259522813e-06, "loss": 0.5374, "step": 1260 }, { "epoch": 1.56, "learning_rate": 4.922561741314358e-06, "loss": 0.5613, "step": 1270 }, { "epoch": 1.57, "learning_rate": 4.880703223105903e-06, "loss": 0.5643, "step": 1280 }, { "epoch": 1.59, "learning_rate": 4.838844704897447e-06, "loss": 0.5755, "step": 1290 }, { "epoch": 1.6, "learning_rate": 4.7969861866889915e-06, "loss": 0.5631, "step": 1300 }, { "epoch": 1.61, "learning_rate": 4.755127668480537e-06, "loss": 0.5557, "step": 1310 }, { "epoch": 1.62, "learning_rate": 4.713269150272081e-06, "loss": 0.5173, "step": 1320 }, { "epoch": 1.64, "learning_rate": 4.671410632063625e-06, "loss": 0.5938, "step": 1330 }, { "epoch": 1.65, "learning_rate": 4.6295521138551695e-06, "loss": 0.6386, "step": 1340 }, { "epoch": 1.66, "learning_rate": 4.587693595646715e-06, "loss": 0.5652, "step": 1350 }, { "epoch": 1.67, "learning_rate": 4.545835077438259e-06, "loss": 0.6097, "step": 1360 }, { "epoch": 1.69, "learning_rate": 4.503976559229803e-06, "loss": 0.5369, "step": 1370 }, { "epoch": 1.7, "learning_rate": 4.462118041021348e-06, "loss": 0.539, "step": 1380 }, { "epoch": 1.71, "learning_rate": 4.4202595228128935e-06, "loss": 0.5854, "step": 1390 }, { "epoch": 1.72, "learning_rate": 4.378401004604437e-06, "loss": 0.5574, "step": 1400 }, { "epoch": 1.73, "learning_rate": 4.336542486395982e-06, "loss": 0.5079, "step": 1410 }, { "epoch": 1.75, "learning_rate": 4.294683968187526e-06, "loss": 0.5889, "step": 1420 }, { "epoch": 1.76, "learning_rate": 4.2528254499790715e-06, "loss": 0.5908, "step": 1430 }, { "epoch": 1.77, "learning_rate": 4.210966931770616e-06, "loss": 0.5254, "step": 1440 }, { "epoch": 1.78, "learning_rate": 4.16910841356216e-06, "loss": 0.5566, "step": 1450 }, { "epoch": 1.8, "learning_rate": 4.127249895353705e-06, "loss": 0.5743, "step": 1460 }, { "epoch": 1.81, "learning_rate": 4.0853913771452495e-06, "loss": 0.5208, "step": 1470 }, { "epoch": 1.82, "learning_rate": 4.043532858936794e-06, "loss": 0.5486, "step": 1480 }, { "epoch": 1.83, "learning_rate": 4.001674340728339e-06, "loss": 0.587, "step": 1490 }, { "epoch": 1.84, "learning_rate": 3.959815822519883e-06, "loss": 0.5781, "step": 1500 } ], "max_steps": 2439, "num_train_epochs": 3, "total_flos": 0.0, "trial_name": null, "trial_params": null }