{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2726859566732313, "eval_steps": 50, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019, "loss": 1.4805, "step": 50 }, { "epoch": 0.02, "eval_loss": 0.935647189617157, "eval_runtime": 1424.1427, "eval_samples_per_second": 9.27, "eval_steps_per_second": 1.159, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.00018, "loss": 0.8948, "step": 100 }, { "epoch": 0.03, "eval_loss": 0.8673275709152222, "eval_runtime": 1447.9268, "eval_samples_per_second": 9.118, "eval_steps_per_second": 1.14, "step": 100 }, { "epoch": 0.05, "learning_rate": 0.00017, "loss": 0.8357, "step": 150 }, { "epoch": 0.05, "eval_loss": 0.8413857221603394, "eval_runtime": 1449.9965, "eval_samples_per_second": 9.105, "eval_steps_per_second": 1.139, "step": 150 }, { "epoch": 0.06, "learning_rate": 0.00016, "loss": 0.8461, "step": 200 }, { "epoch": 0.06, "eval_loss": 0.8236712217330933, "eval_runtime": 1428.6867, "eval_samples_per_second": 9.241, "eval_steps_per_second": 1.156, "step": 200 }, { "epoch": 0.08, "learning_rate": 0.00015000000000000001, "loss": 0.8046, "step": 250 }, { "epoch": 0.08, "eval_loss": 0.7915458679199219, "eval_runtime": 1422.1414, "eval_samples_per_second": 9.283, "eval_steps_per_second": 1.161, "step": 250 }, { "epoch": 0.09, "learning_rate": 0.00014, "loss": 0.744, "step": 300 }, { "epoch": 0.09, "eval_loss": 0.7999407649040222, "eval_runtime": 1408.4646, "eval_samples_per_second": 9.373, "eval_steps_per_second": 1.172, "step": 300 }, { "epoch": 0.11, "learning_rate": 0.00013000000000000002, "loss": 0.7219, "step": 350 }, { "epoch": 0.11, "eval_loss": 0.7075337171554565, "eval_runtime": 1407.3666, "eval_samples_per_second": 9.381, "eval_steps_per_second": 1.173, "step": 350 }, { "epoch": 0.12, "learning_rate": 0.00012, "loss": 0.7027, "step": 400 }, { "epoch": 0.12, "eval_loss": 0.697420060634613, "eval_runtime": 1391.6821, "eval_samples_per_second": 9.486, "eval_steps_per_second": 1.186, "step": 400 }, { "epoch": 0.14, "learning_rate": 0.00011000000000000002, "loss": 0.6982, "step": 450 }, { "epoch": 0.14, "eval_loss": 0.6917020678520203, "eval_runtime": 1383.9747, "eval_samples_per_second": 9.539, "eval_steps_per_second": 1.193, "step": 450 }, { "epoch": 0.15, "learning_rate": 0.0001, "loss": 0.6746, "step": 500 }, { "epoch": 0.15, "eval_loss": 0.6870374083518982, "eval_runtime": 1381.4308, "eval_samples_per_second": 9.557, "eval_steps_per_second": 1.195, "step": 500 }, { "epoch": 0.17, "learning_rate": 9e-05, "loss": 0.6667, "step": 550 }, { "epoch": 0.17, "eval_loss": 0.6836341619491577, "eval_runtime": 1384.702, "eval_samples_per_second": 9.534, "eval_steps_per_second": 1.192, "step": 550 }, { "epoch": 0.18, "learning_rate": 8e-05, "loss": 0.6872, "step": 600 }, { "epoch": 0.18, "eval_loss": 0.6808720231056213, "eval_runtime": 1386.676, "eval_samples_per_second": 9.521, "eval_steps_per_second": 1.191, "step": 600 }, { "epoch": 0.2, "learning_rate": 7e-05, "loss": 0.6793, "step": 650 }, { "epoch": 0.2, "eval_loss": 0.6773844957351685, "eval_runtime": 1607.2067, "eval_samples_per_second": 8.214, "eval_steps_per_second": 1.027, "step": 650 }, { "epoch": 0.21, "learning_rate": 6e-05, "loss": 0.6989, "step": 700 }, { "epoch": 0.21, "eval_loss": 0.6752211451530457, "eval_runtime": 1618.5926, "eval_samples_per_second": 8.156, "eval_steps_per_second": 1.02, "step": 700 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 0.6525, "step": 750 }, { "epoch": 0.23, "eval_loss": 0.6731519103050232, "eval_runtime": 1621.8237, "eval_samples_per_second": 8.14, "eval_steps_per_second": 1.018, "step": 750 }, { "epoch": 0.24, "learning_rate": 4e-05, "loss": 0.6722, "step": 800 }, { "epoch": 0.24, "eval_loss": 0.6716203689575195, "eval_runtime": 1622.2693, "eval_samples_per_second": 8.138, "eval_steps_per_second": 1.018, "step": 800 }, { "epoch": 0.26, "learning_rate": 3e-05, "loss": 0.6687, "step": 850 }, { "epoch": 0.26, "eval_loss": 0.6699801683425903, "eval_runtime": 1622.5334, "eval_samples_per_second": 8.137, "eval_steps_per_second": 1.018, "step": 850 }, { "epoch": 0.27, "learning_rate": 2e-05, "loss": 0.6381, "step": 900 }, { "epoch": 0.27, "eval_loss": 0.6691889762878418, "eval_runtime": 1621.8507, "eval_samples_per_second": 8.14, "eval_steps_per_second": 1.018, "step": 900 } ], "logging_steps": 50, "max_steps": 1000, "num_train_epochs": 1, "save_steps": 50, "total_flos": 6.760273113356698e+16, "trial_name": null, "trial_params": null }