{ "best_metric": 0.7959330677986145, "best_model_checkpoint": "./lora-alpaca/checkpoint-800", "epoch": 2.045381911153723, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.9999999999999997e-05, "loss": 1.6613, "step": 10 }, { "epoch": 0.05, "learning_rate": 5.9999999999999995e-05, "loss": 1.6025, "step": 20 }, { "epoch": 0.08, "learning_rate": 8.999999999999999e-05, "loss": 1.4512, "step": 30 }, { "epoch": 0.1, "learning_rate": 0.00011999999999999999, "loss": 1.1653, "step": 40 }, { "epoch": 0.13, "learning_rate": 0.00015, "loss": 0.9588, "step": 50 }, { "epoch": 0.15, "learning_rate": 0.00017999999999999998, "loss": 0.9058, "step": 60 }, { "epoch": 0.18, "learning_rate": 0.00020999999999999998, "loss": 0.8655, "step": 70 }, { "epoch": 0.2, "learning_rate": 0.00023999999999999998, "loss": 0.8493, "step": 80 }, { "epoch": 0.23, "learning_rate": 0.00027, "loss": 0.8462, "step": 90 }, { "epoch": 0.26, "learning_rate": 0.0003, "loss": 0.8353, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.0002972041006523765, "loss": 0.833, "step": 110 }, { "epoch": 0.31, "learning_rate": 0.000294408201304753, "loss": 0.8322, "step": 120 }, { "epoch": 0.33, "learning_rate": 0.0002916123019571295, "loss": 0.8239, "step": 130 }, { "epoch": 0.36, "learning_rate": 0.00028881640260950607, "loss": 0.8174, "step": 140 }, { "epoch": 0.38, "learning_rate": 0.00028602050326188253, "loss": 0.8178, "step": 150 }, { "epoch": 0.41, "learning_rate": 0.00028322460391425904, "loss": 0.8321, "step": 160 }, { "epoch": 0.43, "learning_rate": 0.0002804287045666356, "loss": 0.8213, "step": 170 }, { "epoch": 0.46, "learning_rate": 0.0002776328052190121, "loss": 0.8088, "step": 180 }, { "epoch": 0.49, "learning_rate": 0.00027483690587138857, "loss": 0.8021, "step": 190 }, { "epoch": 0.51, "learning_rate": 0.00027204100652376514, "loss": 0.8297, "step": 200 }, { "epoch": 0.51, "eval_loss": 0.8141016960144043, "eval_runtime": 87.4274, "eval_samples_per_second": 22.876, "eval_steps_per_second": 2.86, "step": 200 }, { "epoch": 0.54, "learning_rate": 0.00026924510717614165, "loss": 0.8148, "step": 210 }, { "epoch": 0.56, "learning_rate": 0.00026644920782851816, "loss": 0.8204, "step": 220 }, { "epoch": 0.59, "learning_rate": 0.00026365330848089467, "loss": 0.8181, "step": 230 }, { "epoch": 0.61, "learning_rate": 0.0002608574091332712, "loss": 0.8146, "step": 240 }, { "epoch": 0.64, "learning_rate": 0.0002580615097856477, "loss": 0.8108, "step": 250 }, { "epoch": 0.66, "learning_rate": 0.0002552656104380242, "loss": 0.8069, "step": 260 }, { "epoch": 0.69, "learning_rate": 0.0002524697110904007, "loss": 0.8219, "step": 270 }, { "epoch": 0.72, "learning_rate": 0.0002496738117427772, "loss": 0.8076, "step": 280 }, { "epoch": 0.74, "learning_rate": 0.00024687791239515373, "loss": 0.8076, "step": 290 }, { "epoch": 0.77, "learning_rate": 0.00024408201304753027, "loss": 0.8173, "step": 300 }, { "epoch": 0.79, "learning_rate": 0.00024128611369990678, "loss": 0.8032, "step": 310 }, { "epoch": 0.82, "learning_rate": 0.0002384902143522833, "loss": 0.8185, "step": 320 }, { "epoch": 0.84, "learning_rate": 0.0002356943150046598, "loss": 0.8046, "step": 330 }, { "epoch": 0.87, "learning_rate": 0.00023289841565703632, "loss": 0.812, "step": 340 }, { "epoch": 0.89, "learning_rate": 0.00023010251630941285, "loss": 0.807, "step": 350 }, { "epoch": 0.92, "learning_rate": 0.00022730661696178936, "loss": 0.8003, "step": 360 }, { "epoch": 0.95, "learning_rate": 0.00022451071761416585, "loss": 0.802, "step": 370 }, { "epoch": 0.97, "learning_rate": 0.00022171481826654239, "loss": 0.8073, "step": 380 }, { "epoch": 1.0, "learning_rate": 0.0002189189189189189, "loss": 0.8055, "step": 390 }, { "epoch": 1.02, "learning_rate": 0.00021612301957129543, "loss": 0.8132, "step": 400 }, { "epoch": 1.02, "eval_loss": 0.8033702373504639, "eval_runtime": 87.6065, "eval_samples_per_second": 22.829, "eval_steps_per_second": 2.854, "step": 400 }, { "epoch": 1.05, "learning_rate": 0.00021332712022367195, "loss": 0.7992, "step": 410 }, { "epoch": 1.07, "learning_rate": 0.00021053122087604843, "loss": 0.7945, "step": 420 }, { "epoch": 1.1, "learning_rate": 0.00020773532152842497, "loss": 0.7917, "step": 430 }, { "epoch": 1.12, "learning_rate": 0.00020493942218080148, "loss": 0.8002, "step": 440 }, { "epoch": 1.15, "learning_rate": 0.00020214352283317796, "loss": 0.7934, "step": 450 }, { "epoch": 1.18, "learning_rate": 0.0001993476234855545, "loss": 0.7982, "step": 460 }, { "epoch": 1.2, "learning_rate": 0.000196551724137931, "loss": 0.7995, "step": 470 }, { "epoch": 1.23, "learning_rate": 0.00019375582479030755, "loss": 0.7935, "step": 480 }, { "epoch": 1.25, "learning_rate": 0.00019095992544268406, "loss": 0.8003, "step": 490 }, { "epoch": 1.28, "learning_rate": 0.00018816402609506054, "loss": 0.7967, "step": 500 }, { "epoch": 1.3, "learning_rate": 0.00018536812674743708, "loss": 0.7962, "step": 510 }, { "epoch": 1.33, "learning_rate": 0.0001825722273998136, "loss": 0.7956, "step": 520 }, { "epoch": 1.36, "learning_rate": 0.00017977632805219013, "loss": 0.7958, "step": 530 }, { "epoch": 1.38, "learning_rate": 0.00017698042870456661, "loss": 0.7944, "step": 540 }, { "epoch": 1.41, "learning_rate": 0.00017418452935694312, "loss": 0.7945, "step": 550 }, { "epoch": 1.43, "learning_rate": 0.00017138863000931966, "loss": 0.802, "step": 560 }, { "epoch": 1.46, "learning_rate": 0.00016859273066169617, "loss": 0.7923, "step": 570 }, { "epoch": 1.48, "learning_rate": 0.00016579683131407266, "loss": 0.8024, "step": 580 }, { "epoch": 1.51, "learning_rate": 0.0001630009319664492, "loss": 0.7902, "step": 590 }, { "epoch": 1.53, "learning_rate": 0.0001602050326188257, "loss": 0.8066, "step": 600 }, { "epoch": 1.53, "eval_loss": 0.7989646196365356, "eval_runtime": 87.5148, "eval_samples_per_second": 22.853, "eval_steps_per_second": 2.857, "step": 600 }, { "epoch": 1.56, "learning_rate": 0.00015740913327120224, "loss": 0.7958, "step": 610 }, { "epoch": 1.59, "learning_rate": 0.00015461323392357873, "loss": 0.7837, "step": 620 }, { "epoch": 1.61, "learning_rate": 0.00015181733457595524, "loss": 0.8008, "step": 630 }, { "epoch": 1.64, "learning_rate": 0.00014902143522833175, "loss": 0.7964, "step": 640 }, { "epoch": 1.66, "learning_rate": 0.0001462255358807083, "loss": 0.7951, "step": 650 }, { "epoch": 1.69, "learning_rate": 0.0001434296365330848, "loss": 0.8012, "step": 660 }, { "epoch": 1.71, "learning_rate": 0.0001406337371854613, "loss": 0.7928, "step": 670 }, { "epoch": 1.74, "learning_rate": 0.00013783783783783782, "loss": 0.7903, "step": 680 }, { "epoch": 1.76, "learning_rate": 0.00013504193849021433, "loss": 0.7949, "step": 690 }, { "epoch": 1.79, "learning_rate": 0.00013224603914259084, "loss": 0.7866, "step": 700 }, { "epoch": 1.82, "learning_rate": 0.00012945013979496738, "loss": 0.7889, "step": 710 }, { "epoch": 1.84, "learning_rate": 0.0001266542404473439, "loss": 0.7928, "step": 720 }, { "epoch": 1.87, "learning_rate": 0.0001238583410997204, "loss": 0.7932, "step": 730 }, { "epoch": 1.89, "learning_rate": 0.00012106244175209691, "loss": 0.7963, "step": 740 }, { "epoch": 1.92, "learning_rate": 0.00011826654240447344, "loss": 0.7979, "step": 750 }, { "epoch": 1.94, "learning_rate": 0.00011547064305684995, "loss": 0.7943, "step": 760 }, { "epoch": 1.97, "learning_rate": 0.00011267474370922644, "loss": 0.7983, "step": 770 }, { "epoch": 1.99, "learning_rate": 0.00010987884436160297, "loss": 0.801, "step": 780 }, { "epoch": 2.02, "learning_rate": 0.0001070829450139795, "loss": 0.7836, "step": 790 }, { "epoch": 2.05, "learning_rate": 0.000104287045666356, "loss": 0.785, "step": 800 }, { "epoch": 2.05, "eval_loss": 0.7959330677986145, "eval_runtime": 88.4745, "eval_samples_per_second": 22.605, "eval_steps_per_second": 2.826, "step": 800 } ], "max_steps": 1173, "num_train_epochs": 3, "total_flos": 1.040223650649342e+18, "trial_name": null, "trial_params": null }