{ "best_metric": 3.0165445804595947, "best_model_checkpoint": "output/lil-uzi-vert/checkpoint-218", "epoch": 1.0, "global_step": 218, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0001370219946819302, "loss": 4.0012, "step": 5 }, { "epoch": 0.05, "learning_rate": 0.0001364889025146963, "loss": 3.8869, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.00013560349006508517, "loss": 3.6034, "step": 15 }, { "epoch": 0.09, "learning_rate": 0.0001343703523221556, "loss": 3.5643, "step": 20 }, { "epoch": 0.11, "learning_rate": 0.00013279588885081002, "loss": 3.3069, "step": 25 }, { "epoch": 0.14, "learning_rate": 0.0001308882705802323, "loss": 3.431, "step": 30 }, { "epoch": 0.16, "learning_rate": 0.00012865739739954807, "loss": 3.5848, "step": 35 }, { "epoch": 0.18, "learning_rate": 0.00012611484678077197, "loss": 3.4069, "step": 40 }, { "epoch": 0.21, "learning_rate": 0.00012327381369567084, "loss": 3.3532, "step": 45 }, { "epoch": 0.23, "learning_rate": 0.00012014904213835435, "loss": 3.4164, "step": 50 }, { "epoch": 0.25, "learning_rate": 0.00011675674860896696, "loss": 3.5117, "step": 55 }, { "epoch": 0.28, "learning_rate": 0.00011311453795557528, "loss": 3.2964, "step": 60 }, { "epoch": 0.3, "learning_rate": 0.00010924131201100248, "loss": 3.2104, "step": 65 }, { "epoch": 0.32, "learning_rate": 0.00010515717149875347, "loss": 3.0675, "step": 70 }, { "epoch": 0.34, "learning_rate": 0.00010088331171710597, "loss": 3.4575, "step": 75 }, { "epoch": 0.37, "learning_rate": 9.644191254273106e-05, "loss": 3.1265, "step": 80 }, { "epoch": 0.39, "learning_rate": 9.185602332468734e-05, "loss": 3.3004, "step": 85 }, { "epoch": 0.41, "learning_rate": 8.714944326614944e-05, "loss": 3.2766, "step": 90 }, { "epoch": 0.44, "learning_rate": 8.234659791464919e-05, "loss": 3.1492, "step": 95 }, { "epoch": 0.46, "learning_rate": 7.747241240180272e-05, "loss": 3.3842, "step": 100 }, { "epoch": 0.48, "learning_rate": 7.255218209036649e-05, "loss": 3.3591, "step": 105 }, { "epoch": 0.5, "learning_rate": 6.76114412999196e-05, "loss": 3.1375, "step": 110 }, { "epoch": 0.53, "learning_rate": 6.267583079244174e-05, "loss": 3.2619, "step": 115 }, { "epoch": 0.55, "learning_rate": 5.777096470549132e-05, "loss": 3.1689, "step": 120 }, { "epoch": 0.57, "learning_rate": 5.2922297623555134e-05, "loss": 3.264, "step": 125 }, { "epoch": 0.6, "learning_rate": 4.815499247742428e-05, "loss": 3.0831, "step": 130 }, { "epoch": 0.62, "learning_rate": 4.349378995715337e-05, "loss": 3.1357, "step": 135 }, { "epoch": 0.64, "learning_rate": 3.896288011630533e-05, "loss": 3.164, "step": 140 }, { "epoch": 0.67, "learning_rate": 3.458577683381209e-05, "loss": 3.2134, "step": 145 }, { "epoch": 0.69, "learning_rate": 3.0385195784951376e-05, "loss": 3.1053, "step": 150 }, { "epoch": 0.71, "learning_rate": 2.63829365547284e-05, "loss": 3.099, "step": 155 }, { "epoch": 0.73, "learning_rate": 2.2599769505454377e-05, "loss": 3.1365, "step": 160 }, { "epoch": 0.76, "learning_rate": 1.905532798564004e-05, "loss": 3.1402, "step": 165 }, { "epoch": 0.78, "learning_rate": 1.5768006439603532e-05, "loss": 3.1203, "step": 170 }, { "epoch": 0.8, "learning_rate": 1.2754864946569404e-05, "loss": 3.2424, "step": 175 }, { "epoch": 0.83, "learning_rate": 1.0031540684667541e-05, "loss": 3.2244, "step": 180 }, { "epoch": 0.85, "learning_rate": 7.612166779304597e-06, "loss": 3.2007, "step": 185 }, { "epoch": 0.87, "learning_rate": 5.5092989570564855e-06, "loss": 3.0923, "step": 190 }, { "epoch": 0.89, "learning_rate": 3.7338503857237188e-06, "loss": 3.0841, "step": 195 }, { "epoch": 0.92, "learning_rate": 2.295035038707367e-06, "loss": 3.1703, "step": 200 }, { "epoch": 0.94, "learning_rate": 1.2003198776252066e-06, "loss": 3.2195, "step": 205 }, { "epoch": 0.96, "learning_rate": 4.5538610132401196e-07, "loss": 3.0425, "step": 210 }, { "epoch": 0.99, "learning_rate": 6.409966239244377e-08, "loss": 3.1853, "step": 215 }, { "epoch": 1.0, "eval_loss": 3.0165445804595947, "eval_runtime": 8.1641, "eval_samples_per_second": 37.236, "eval_steps_per_second": 4.655, "step": 218 } ], "max_steps": 218, "num_train_epochs": 1, "total_flos": 227454713856000.0, "trial_name": null, "trial_params": null }