{ "best_metric": 2.9723432064056396, "best_model_checkpoint": "output/lil-uzi-vert/checkpoint-218", "epoch": 1.0, "global_step": 218, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0001370219946819302, "loss": 2.8388, "step": 5 }, { "epoch": 0.05, "learning_rate": 0.00013648890251469632, "loss": 2.9, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.00013560349006508517, "loss": 2.895, "step": 15 }, { "epoch": 0.09, "learning_rate": 0.0001343703523221556, "loss": 2.8357, "step": 20 }, { "epoch": 0.11, "learning_rate": 0.00013279588885081002, "loss": 2.8199, "step": 25 }, { "epoch": 0.14, "learning_rate": 0.00013088827058023233, "loss": 2.7435, "step": 30 }, { "epoch": 0.16, "learning_rate": 0.00012865739739954807, "loss": 2.8855, "step": 35 }, { "epoch": 0.18, "learning_rate": 0.000126114846780772, "loss": 2.7549, "step": 40 }, { "epoch": 0.21, "learning_rate": 0.00012327381369567087, "loss": 2.84, "step": 45 }, { "epoch": 0.23, "learning_rate": 0.00012014904213835432, "loss": 2.8624, "step": 50 }, { "epoch": 0.25, "learning_rate": 0.00011675674860896702, "loss": 2.8777, "step": 55 }, { "epoch": 0.28, "learning_rate": 0.00011311453795557527, "loss": 2.9297, "step": 60 }, { "epoch": 0.3, "learning_rate": 0.00010924131201100249, "loss": 2.567, "step": 65 }, { "epoch": 0.32, "learning_rate": 0.00010515717149875348, "loss": 2.8801, "step": 70 }, { "epoch": 0.34, "learning_rate": 0.00010088331171710603, "loss": 2.9652, "step": 75 }, { "epoch": 0.37, "learning_rate": 9.644191254273106e-05, "loss": 2.6114, "step": 80 }, { "epoch": 0.39, "learning_rate": 9.185602332468731e-05, "loss": 2.7817, "step": 85 }, { "epoch": 0.41, "learning_rate": 8.714944326614947e-05, "loss": 2.8251, "step": 90 }, { "epoch": 0.44, "learning_rate": 8.234659791464915e-05, "loss": 2.8489, "step": 95 }, { "epoch": 0.46, "learning_rate": 7.747241240180279e-05, "loss": 2.7123, "step": 100 }, { "epoch": 0.48, "learning_rate": 7.255218209036647e-05, "loss": 2.6698, "step": 105 }, { "epoch": 0.5, "learning_rate": 6.761144129991962e-05, "loss": 2.7934, "step": 110 }, { "epoch": 0.53, "learning_rate": 6.267583079244176e-05, "loss": 2.8273, "step": 115 }, { "epoch": 0.55, "learning_rate": 5.777096470549139e-05, "loss": 2.8665, "step": 120 }, { "epoch": 0.57, "learning_rate": 5.2922297623555134e-05, "loss": 2.672, "step": 125 }, { "epoch": 0.6, "learning_rate": 4.815499247742421e-05, "loss": 2.6851, "step": 130 }, { "epoch": 0.62, "learning_rate": 4.3493789957153346e-05, "loss": 2.6811, "step": 135 }, { "epoch": 0.64, "learning_rate": 3.8962880116305346e-05, "loss": 2.8504, "step": 140 }, { "epoch": 0.67, "learning_rate": 3.458577683381216e-05, "loss": 2.7296, "step": 145 }, { "epoch": 0.69, "learning_rate": 3.0385195784951363e-05, "loss": 2.9651, "step": 150 }, { "epoch": 0.71, "learning_rate": 2.638293655472844e-05, "loss": 2.8441, "step": 155 }, { "epoch": 0.73, "learning_rate": 2.259976950545437e-05, "loss": 2.7186, "step": 160 }, { "epoch": 0.76, "learning_rate": 1.9055327985640047e-05, "loss": 2.8246, "step": 165 }, { "epoch": 0.78, "learning_rate": 1.5768006439603586e-05, "loss": 2.5588, "step": 170 }, { "epoch": 0.8, "learning_rate": 1.2754864946569397e-05, "loss": 2.6992, "step": 175 }, { "epoch": 0.83, "learning_rate": 1.0031540684667565e-05, "loss": 2.6534, "step": 180 }, { "epoch": 0.85, "learning_rate": 7.612166779304589e-06, "loss": 2.7594, "step": 185 }, { "epoch": 0.87, "learning_rate": 5.5092989570564855e-06, "loss": 2.5534, "step": 190 }, { "epoch": 0.89, "learning_rate": 3.733850385723696e-06, "loss": 2.955, "step": 195 }, { "epoch": 0.92, "learning_rate": 2.2950350387073973e-06, "loss": 2.8938, "step": 200 }, { "epoch": 0.94, "learning_rate": 1.2003198776252143e-06, "loss": 2.8361, "step": 205 }, { "epoch": 0.96, "learning_rate": 4.553861013240044e-07, "loss": 2.8043, "step": 210 }, { "epoch": 0.99, "learning_rate": 6.409966239244377e-08, "loss": 2.8706, "step": 215 }, { "epoch": 1.0, "eval_loss": 2.9723432064056396, "eval_runtime": 14.4988, "eval_samples_per_second": 20.898, "eval_steps_per_second": 2.621, "step": 218 } ], "max_steps": 872, "num_train_epochs": 4, "total_flos": 227585359872000.0, "trial_name": null, "trial_params": null }