{ "best_metric": 0.8538553714752197, "best_model_checkpoint": "./lora-alpaca/checkpoint-1000", "epoch": 1.9282684149633629, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 2.6999999999999996e-05, "loss": 1.554, "step": 10 }, { "epoch": 0.04, "learning_rate": 5.6999999999999996e-05, "loss": 1.4793, "step": 20 }, { "epoch": 0.06, "learning_rate": 8.4e-05, "loss": 1.2861, "step": 30 }, { "epoch": 0.08, "learning_rate": 0.00011099999999999999, "loss": 1.1305, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00014099999999999998, "loss": 1.0653, "step": 50 }, { "epoch": 0.12, "learning_rate": 0.00017099999999999998, "loss": 1.0233, "step": 60 }, { "epoch": 0.13, "learning_rate": 0.000201, "loss": 0.9835, "step": 70 }, { "epoch": 0.15, "learning_rate": 0.00023099999999999998, "loss": 0.9385, "step": 80 }, { "epoch": 0.17, "learning_rate": 0.000261, "loss": 0.9179, "step": 90 }, { "epoch": 0.19, "learning_rate": 0.00029099999999999997, "loss": 0.9107, "step": 100 }, { "epoch": 0.21, "learning_rate": 0.0002985557083906465, "loss": 0.914, "step": 110 }, { "epoch": 0.23, "learning_rate": 0.0002964924346629986, "loss": 0.8971, "step": 120 }, { "epoch": 0.25, "learning_rate": 0.00029442916093535074, "loss": 0.9021, "step": 130 }, { "epoch": 0.27, "learning_rate": 0.00029236588720770286, "loss": 0.8939, "step": 140 }, { "epoch": 0.29, "learning_rate": 0.000290302613480055, "loss": 0.8917, "step": 150 }, { "epoch": 0.31, "learning_rate": 0.0002882393397524071, "loss": 0.8834, "step": 160 }, { "epoch": 0.33, "learning_rate": 0.00028617606602475925, "loss": 0.8927, "step": 170 }, { "epoch": 0.35, "learning_rate": 0.0002841127922971114, "loss": 0.8979, "step": 180 }, { "epoch": 0.37, "learning_rate": 0.0002820495185694635, "loss": 0.8812, "step": 190 }, { "epoch": 0.39, "learning_rate": 0.00027998624484181563, "loss": 0.8852, "step": 200 }, { "epoch": 0.39, "eval_loss": 0.8834338188171387, "eval_runtime": 172.9592, "eval_samples_per_second": 11.563, "eval_steps_per_second": 0.289, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002779229711141678, "loss": 0.8837, "step": 210 }, { "epoch": 0.42, "learning_rate": 0.00027585969738651994, "loss": 0.8837, "step": 220 }, { "epoch": 0.44, "learning_rate": 0.00027379642365887207, "loss": 0.8703, "step": 230 }, { "epoch": 0.46, "learning_rate": 0.0002717331499312242, "loss": 0.885, "step": 240 }, { "epoch": 0.48, "learning_rate": 0.0002696698762035763, "loss": 0.877, "step": 250 }, { "epoch": 0.5, "learning_rate": 0.00026760660247592845, "loss": 0.8698, "step": 260 }, { "epoch": 0.52, "learning_rate": 0.0002655433287482806, "loss": 0.8698, "step": 270 }, { "epoch": 0.54, "learning_rate": 0.0002634800550206327, "loss": 0.8732, "step": 280 }, { "epoch": 0.56, "learning_rate": 0.00026141678129298484, "loss": 0.8571, "step": 290 }, { "epoch": 0.58, "learning_rate": 0.00025935350756533696, "loss": 0.8597, "step": 300 }, { "epoch": 0.6, "learning_rate": 0.0002572902338376891, "loss": 0.8719, "step": 310 }, { "epoch": 0.62, "learning_rate": 0.0002552269601100413, "loss": 0.8624, "step": 320 }, { "epoch": 0.64, "learning_rate": 0.0002531636863823934, "loss": 0.8577, "step": 330 }, { "epoch": 0.66, "learning_rate": 0.00025110041265474553, "loss": 0.8624, "step": 340 }, { "epoch": 0.67, "learning_rate": 0.00024903713892709766, "loss": 0.8765, "step": 350 }, { "epoch": 0.69, "learning_rate": 0.0002469738651994498, "loss": 0.8595, "step": 360 }, { "epoch": 0.71, "learning_rate": 0.0002449105914718019, "loss": 0.8646, "step": 370 }, { "epoch": 0.73, "learning_rate": 0.00024284731774415404, "loss": 0.87, "step": 380 }, { "epoch": 0.75, "learning_rate": 0.00024078404401650617, "loss": 0.8568, "step": 390 }, { "epoch": 0.77, "learning_rate": 0.0002387207702888583, "loss": 0.8571, "step": 400 }, { "epoch": 0.77, "eval_loss": 0.8650650382041931, "eval_runtime": 175.1777, "eval_samples_per_second": 11.417, "eval_steps_per_second": 0.285, "step": 400 }, { "epoch": 0.79, "learning_rate": 0.00023665749656121043, "loss": 0.8495, "step": 410 }, { "epoch": 0.81, "learning_rate": 0.00023459422283356255, "loss": 0.8572, "step": 420 }, { "epoch": 0.83, "learning_rate": 0.0002325309491059147, "loss": 0.8629, "step": 430 }, { "epoch": 0.85, "learning_rate": 0.00023046767537826684, "loss": 0.8413, "step": 440 }, { "epoch": 0.87, "learning_rate": 0.00022840440165061896, "loss": 0.853, "step": 450 }, { "epoch": 0.89, "learning_rate": 0.0002263411279229711, "loss": 0.8662, "step": 460 }, { "epoch": 0.91, "learning_rate": 0.00022427785419532322, "loss": 0.8495, "step": 470 }, { "epoch": 0.93, "learning_rate": 0.00022221458046767537, "loss": 0.8502, "step": 480 }, { "epoch": 0.94, "learning_rate": 0.0002201513067400275, "loss": 0.8601, "step": 490 }, { "epoch": 0.96, "learning_rate": 0.00021808803301237963, "loss": 0.8503, "step": 500 }, { "epoch": 0.98, "learning_rate": 0.00021602475928473176, "loss": 0.8517, "step": 510 }, { "epoch": 1.0, "learning_rate": 0.00021396148555708389, "loss": 0.8633, "step": 520 }, { "epoch": 1.02, "learning_rate": 0.00021189821182943601, "loss": 0.853, "step": 530 }, { "epoch": 1.04, "learning_rate": 0.00020983493810178817, "loss": 0.8379, "step": 540 }, { "epoch": 1.06, "learning_rate": 0.0002077716643741403, "loss": 0.8396, "step": 550 }, { "epoch": 1.08, "learning_rate": 0.00020570839064649242, "loss": 0.8569, "step": 560 }, { "epoch": 1.1, "learning_rate": 0.00020364511691884455, "loss": 0.8594, "step": 570 }, { "epoch": 1.12, "learning_rate": 0.00020158184319119668, "loss": 0.8461, "step": 580 }, { "epoch": 1.14, "learning_rate": 0.00019951856946354884, "loss": 0.8429, "step": 590 }, { "epoch": 1.16, "learning_rate": 0.00019745529573590096, "loss": 0.8308, "step": 600 }, { "epoch": 1.16, "eval_loss": 0.8592662215232849, "eval_runtime": 175.4031, "eval_samples_per_second": 11.402, "eval_steps_per_second": 0.285, "step": 600 }, { "epoch": 1.18, "learning_rate": 0.0001953920220082531, "loss": 0.8355, "step": 610 }, { "epoch": 1.2, "learning_rate": 0.0001933287482806052, "loss": 0.834, "step": 620 }, { "epoch": 1.21, "learning_rate": 0.00019126547455295732, "loss": 0.8299, "step": 630 }, { "epoch": 1.23, "learning_rate": 0.00018920220082530945, "loss": 0.8431, "step": 640 }, { "epoch": 1.25, "learning_rate": 0.00018713892709766163, "loss": 0.841, "step": 650 }, { "epoch": 1.27, "learning_rate": 0.00018507565337001373, "loss": 0.8374, "step": 660 }, { "epoch": 1.29, "learning_rate": 0.00018301237964236586, "loss": 0.84, "step": 670 }, { "epoch": 1.31, "learning_rate": 0.000180949105914718, "loss": 0.8365, "step": 680 }, { "epoch": 1.33, "learning_rate": 0.00017888583218707011, "loss": 0.8287, "step": 690 }, { "epoch": 1.35, "learning_rate": 0.00017682255845942227, "loss": 0.8334, "step": 700 }, { "epoch": 1.37, "learning_rate": 0.0001747592847317744, "loss": 0.8445, "step": 710 }, { "epoch": 1.39, "learning_rate": 0.00017269601100412653, "loss": 0.8286, "step": 720 }, { "epoch": 1.41, "learning_rate": 0.00017063273727647865, "loss": 0.8266, "step": 730 }, { "epoch": 1.43, "learning_rate": 0.00016856946354883078, "loss": 0.8276, "step": 740 }, { "epoch": 1.45, "learning_rate": 0.00016650618982118294, "loss": 0.8386, "step": 750 }, { "epoch": 1.47, "learning_rate": 0.00016444291609353506, "loss": 0.8349, "step": 760 }, { "epoch": 1.48, "learning_rate": 0.0001623796423658872, "loss": 0.8292, "step": 770 }, { "epoch": 1.5, "learning_rate": 0.00016031636863823932, "loss": 0.8354, "step": 780 }, { "epoch": 1.52, "learning_rate": 0.00015825309491059145, "loss": 0.8306, "step": 790 }, { "epoch": 1.54, "learning_rate": 0.00015618982118294358, "loss": 0.8346, "step": 800 }, { "epoch": 1.54, "eval_loss": 0.856368899345398, "eval_runtime": 175.5494, "eval_samples_per_second": 11.393, "eval_steps_per_second": 0.285, "step": 800 }, { "epoch": 1.56, "learning_rate": 0.00015412654745529573, "loss": 0.8244, "step": 810 }, { "epoch": 1.58, "learning_rate": 0.00015206327372764786, "loss": 0.8129, "step": 820 }, { "epoch": 1.6, "learning_rate": 0.00015, "loss": 0.8216, "step": 830 }, { "epoch": 1.62, "learning_rate": 0.00014793672627235211, "loss": 0.8205, "step": 840 }, { "epoch": 1.64, "learning_rate": 0.00014587345254470424, "loss": 0.8215, "step": 850 }, { "epoch": 1.66, "learning_rate": 0.00014381017881705637, "loss": 0.8467, "step": 860 }, { "epoch": 1.68, "learning_rate": 0.0001417469050894085, "loss": 0.8258, "step": 870 }, { "epoch": 1.7, "learning_rate": 0.00013968363136176065, "loss": 0.8277, "step": 880 }, { "epoch": 1.72, "learning_rate": 0.00013762035763411278, "loss": 0.8249, "step": 890 }, { "epoch": 1.74, "learning_rate": 0.0001355570839064649, "loss": 0.8226, "step": 900 }, { "epoch": 1.75, "learning_rate": 0.00013349381017881704, "loss": 0.8268, "step": 910 }, { "epoch": 1.77, "learning_rate": 0.00013143053645116917, "loss": 0.8321, "step": 920 }, { "epoch": 1.79, "learning_rate": 0.00012936726272352132, "loss": 0.8163, "step": 930 }, { "epoch": 1.81, "learning_rate": 0.00012730398899587345, "loss": 0.8352, "step": 940 }, { "epoch": 1.83, "learning_rate": 0.00012524071526822558, "loss": 0.8106, "step": 950 }, { "epoch": 1.85, "learning_rate": 0.0001231774415405777, "loss": 0.8017, "step": 960 }, { "epoch": 1.87, "learning_rate": 0.00012111416781292983, "loss": 0.8244, "step": 970 }, { "epoch": 1.89, "learning_rate": 0.00011905089408528197, "loss": 0.8363, "step": 980 }, { "epoch": 1.91, "learning_rate": 0.0001169876203576341, "loss": 0.8307, "step": 990 }, { "epoch": 1.93, "learning_rate": 0.00011492434662998623, "loss": 0.8239, "step": 1000 }, { "epoch": 1.93, "eval_loss": 0.8538553714752197, "eval_runtime": 175.4751, "eval_samples_per_second": 11.398, "eval_steps_per_second": 0.285, "step": 1000 } ], "max_steps": 1554, "num_train_epochs": 3, "total_flos": 8.910668799611503e+18, "trial_name": null, "trial_params": null }