{ "best_metric": 1.1753153800964355, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamainstinwild/checkpoint-124", "epoch": 2.52899936265137, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "eval_loss": 1.39645254611969, "eval_runtime": 37.7333, "eval_samples_per_second": 53.004, "eval_steps_per_second": 0.848, "step": 4 }, { "epoch": 0.16, "eval_loss": 1.2984137535095215, "eval_runtime": 37.7164, "eval_samples_per_second": 53.027, "eval_steps_per_second": 0.848, "step": 8 }, { "epoch": 0.24, "eval_loss": 1.2540132999420166, "eval_runtime": 37.6847, "eval_samples_per_second": 53.072, "eval_steps_per_second": 0.849, "step": 12 }, { "epoch": 0.33, "eval_loss": 1.2342448234558105, "eval_runtime": 37.6879, "eval_samples_per_second": 53.067, "eval_steps_per_second": 0.849, "step": 16 }, { "epoch": 0.41, "learning_rate": 0.0002664335664335664, "loss": 1.3125, "step": 20 }, { "epoch": 0.41, "eval_loss": 1.2209277153015137, "eval_runtime": 37.7168, "eval_samples_per_second": 53.027, "eval_steps_per_second": 0.848, "step": 20 }, { "epoch": 0.49, "eval_loss": 1.2117934226989746, "eval_runtime": 37.6897, "eval_samples_per_second": 53.065, "eval_steps_per_second": 0.849, "step": 24 }, { "epoch": 0.57, "eval_loss": 1.2057584524154663, "eval_runtime": 37.7691, "eval_samples_per_second": 52.953, "eval_steps_per_second": 0.847, "step": 28 }, { "epoch": 0.65, "eval_loss": 1.2009178400039673, "eval_runtime": 37.7026, "eval_samples_per_second": 53.047, "eval_steps_per_second": 0.849, "step": 32 }, { "epoch": 0.73, "eval_loss": 1.197250485420227, "eval_runtime": 37.6765, "eval_samples_per_second": 53.084, "eval_steps_per_second": 0.849, "step": 36 }, { "epoch": 0.82, "learning_rate": 0.00022447552447552445, "loss": 1.2095, "step": 40 }, { "epoch": 0.82, "eval_loss": 1.1943359375, "eval_runtime": 37.6881, "eval_samples_per_second": 53.067, "eval_steps_per_second": 0.849, "step": 40 }, { "epoch": 0.9, "eval_loss": 1.1920316219329834, "eval_runtime": 37.694, "eval_samples_per_second": 53.059, "eval_steps_per_second": 0.849, "step": 44 }, { "epoch": 0.98, "eval_loss": 1.1898770332336426, "eval_runtime": 37.7089, "eval_samples_per_second": 53.038, "eval_steps_per_second": 0.849, "step": 48 }, { "epoch": 1.06, "eval_loss": 1.1879488229751587, "eval_runtime": 37.6975, "eval_samples_per_second": 53.054, "eval_steps_per_second": 0.849, "step": 52 }, { "epoch": 1.14, "eval_loss": 1.1864111423492432, "eval_runtime": 37.7188, "eval_samples_per_second": 53.024, "eval_steps_per_second": 0.848, "step": 56 }, { "epoch": 1.22, "learning_rate": 0.00018251748251748253, "loss": 1.188, "step": 60 }, { "epoch": 1.22, "eval_loss": 1.1849411725997925, "eval_runtime": 37.7686, "eval_samples_per_second": 52.954, "eval_steps_per_second": 0.847, "step": 60 }, { "epoch": 1.31, "eval_loss": 1.1838239431381226, "eval_runtime": 37.6945, "eval_samples_per_second": 53.058, "eval_steps_per_second": 0.849, "step": 64 }, { "epoch": 1.39, "eval_loss": 1.182841181755066, "eval_runtime": 37.6624, "eval_samples_per_second": 53.103, "eval_steps_per_second": 0.85, "step": 68 }, { "epoch": 1.47, "eval_loss": 1.1818122863769531, "eval_runtime": 37.7233, "eval_samples_per_second": 53.018, "eval_steps_per_second": 0.848, "step": 72 }, { "epoch": 1.55, "eval_loss": 1.1808911561965942, "eval_runtime": 37.7038, "eval_samples_per_second": 53.045, "eval_steps_per_second": 0.849, "step": 76 }, { "epoch": 1.63, "learning_rate": 0.00014055944055944055, "loss": 1.1876, "step": 80 }, { "epoch": 1.63, "eval_loss": 1.1801403760910034, "eval_runtime": 37.7277, "eval_samples_per_second": 53.011, "eval_steps_per_second": 0.848, "step": 80 }, { "epoch": 1.71, "eval_loss": 1.1793811321258545, "eval_runtime": 37.6641, "eval_samples_per_second": 53.101, "eval_steps_per_second": 0.85, "step": 84 }, { "epoch": 1.79, "eval_loss": 1.1788283586502075, "eval_runtime": 37.7097, "eval_samples_per_second": 53.037, "eval_steps_per_second": 0.849, "step": 88 }, { "epoch": 1.88, "eval_loss": 1.1783016920089722, "eval_runtime": 37.7104, "eval_samples_per_second": 53.036, "eval_steps_per_second": 0.849, "step": 92 }, { "epoch": 1.96, "eval_loss": 1.177699327468872, "eval_runtime": 37.6884, "eval_samples_per_second": 53.067, "eval_steps_per_second": 0.849, "step": 96 }, { "epoch": 2.04, "learning_rate": 9.860139860139858e-05, "loss": 1.182, "step": 100 }, { "epoch": 2.04, "eval_loss": 1.1773412227630615, "eval_runtime": 37.7134, "eval_samples_per_second": 53.032, "eval_steps_per_second": 0.849, "step": 100 }, { "epoch": 2.12, "eval_loss": 1.176965594291687, "eval_runtime": 37.6735, "eval_samples_per_second": 53.088, "eval_steps_per_second": 0.849, "step": 104 }, { "epoch": 2.2, "eval_loss": 1.1765486001968384, "eval_runtime": 37.7771, "eval_samples_per_second": 52.942, "eval_steps_per_second": 0.847, "step": 108 }, { "epoch": 2.28, "eval_loss": 1.1761963367462158, "eval_runtime": 37.7211, "eval_samples_per_second": 53.021, "eval_steps_per_second": 0.848, "step": 112 }, { "epoch": 2.37, "eval_loss": 1.1757991313934326, "eval_runtime": 37.7438, "eval_samples_per_second": 52.989, "eval_steps_per_second": 0.848, "step": 116 }, { "epoch": 2.45, "learning_rate": 5.664335664335664e-05, "loss": 1.1785, "step": 120 }, { "epoch": 2.45, "eval_loss": 1.1754933595657349, "eval_runtime": 37.7128, "eval_samples_per_second": 53.032, "eval_steps_per_second": 0.849, "step": 120 }, { "epoch": 2.53, "eval_loss": 1.1753153800964355, "eval_runtime": 37.6929, "eval_samples_per_second": 53.06, "eval_steps_per_second": 0.849, "step": 124 } ], "max_steps": 147, "num_train_epochs": 3, "total_flos": 1.4455053197393265e+18, "trial_name": null, "trial_params": null }