{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 8823, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 7.5471698113207555e-06, "loss": 6.6425, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.5094339622641511e-05, "loss": 4.8785, "step": 200 }, { "epoch": 0.1, "learning_rate": 1.9999174617418052e-05, "loss": 2.0576, "step": 300 }, { "epoch": 0.14, "learning_rate": 1.9987722672633802e-05, "loss": 1.7927, "step": 400 }, { "epoch": 0.17, "learning_rate": 1.9962812964571567e-05, "loss": 1.7317, "step": 500 }, { "epoch": 0.2, "learning_rate": 1.9924479057334537e-05, "loss": 1.7022, "step": 600 }, { "epoch": 0.24, "learning_rate": 1.9872772603202818e-05, "loss": 1.6741, "step": 700 }, { "epoch": 0.27, "learning_rate": 1.9807763273035574e-05, "loss": 1.6608, "step": 800 }, { "epoch": 0.31, "learning_rate": 1.9729538662394363e-05, "loss": 1.6597, "step": 900 }, { "epoch": 0.34, "learning_rate": 1.9638204173514217e-05, "loss": 1.6598, "step": 1000 }, { "epoch": 0.37, "learning_rate": 1.953388287328142e-05, "loss": 1.6501, "step": 1100 }, { "epoch": 0.41, "learning_rate": 1.9416715327409453e-05, "loss": 1.637, "step": 1200 }, { "epoch": 0.44, "learning_rate": 1.9286859411036396e-05, "loss": 1.6317, "step": 1300 }, { "epoch": 0.48, "learning_rate": 1.914449009599919e-05, "loss": 1.6292, "step": 1400 }, { "epoch": 0.51, "learning_rate": 1.898979921507119e-05, "loss": 1.6279, "step": 1500 }, { "epoch": 0.54, "learning_rate": 1.8822995203480823e-05, "loss": 1.6225, "step": 1600 }, { "epoch": 0.58, "learning_rate": 1.86443028180596e-05, "loss": 1.6234, "step": 1700 }, { "epoch": 0.61, "learning_rate": 1.8453962834397847e-05, "loss": 1.6073, "step": 1800 }, { "epoch": 0.65, "learning_rate": 1.8252231722416328e-05, "loss": 1.6119, "step": 1900 }, { "epoch": 0.68, "learning_rate": 1.8039381300790812e-05, "loss": 1.5936, "step": 2000 }, { "epoch": 0.71, "learning_rate": 1.781569837069528e-05, "loss": 1.6175, "step": 2100 }, { "epoch": 0.75, "learning_rate": 1.758148432935723e-05, "loss": 1.6093, "step": 2200 }, { "epoch": 0.78, "learning_rate": 1.7337054763945823e-05, "loss": 1.6081, "step": 2300 }, { "epoch": 0.82, "learning_rate": 1.7082739026340097e-05, "loss": 1.6064, "step": 2400 }, { "epoch": 0.85, "learning_rate": 1.6818879789350134e-05, "loss": 1.6016, "step": 2500 }, { "epoch": 0.88, "learning_rate": 1.6545832584989235e-05, "loss": 1.6062, "step": 2600 }, { "epoch": 0.92, "learning_rate": 1.6263965325419206e-05, "loss": 1.6042, "step": 2700 }, { "epoch": 0.95, "learning_rate": 1.5973657807214245e-05, "loss": 1.5967, "step": 2800 }, { "epoch": 0.99, "learning_rate": 1.5675301199611413e-05, "loss": 1.6095, "step": 2900 }, { "epoch": 1.02, "learning_rate": 1.536929751743723e-05, "loss": 1.5958, "step": 3000 }, { "epoch": 1.05, "learning_rate": 1.5056059079420575e-05, "loss": 1.6012, "step": 3100 }, { "epoch": 1.09, "learning_rate": 1.4736007952621852e-05, "loss": 1.5872, "step": 3200 }, { "epoch": 1.12, "learning_rate": 1.4409575383726852e-05, "loss": 1.5967, "step": 3300 }, { "epoch": 1.16, "learning_rate": 1.4077201217971817e-05, "loss": 1.5913, "step": 3400 }, { "epoch": 1.19, "learning_rate": 1.3739333306482481e-05, "loss": 1.5902, "step": 3500 }, { "epoch": 1.22, "learning_rate": 1.3396426902825753e-05, "loss": 1.5908, "step": 3600 }, { "epoch": 1.26, "learning_rate": 1.3048944049587138e-05, "loss": 1.5883, "step": 3700 }, { "epoch": 1.29, "learning_rate": 1.2697352955800396e-05, "loss": 1.5907, "step": 3800 }, { "epoch": 1.33, "learning_rate": 1.2342127366068364e-05, "loss": 1.5864, "step": 3900 }, { "epoch": 1.36, "learning_rate": 1.1983745922224985e-05, "loss": 1.5912, "step": 4000 }, { "epoch": 1.39, "learning_rate": 1.1622691518398636e-05, "loss": 1.59, "step": 4100 }, { "epoch": 1.43, "learning_rate": 1.1259450650345798e-05, "loss": 1.5837, "step": 4200 }, { "epoch": 1.46, "learning_rate": 1.0894512759931785e-05, "loss": 1.5805, "step": 4300 }, { "epoch": 1.5, "learning_rate": 1.0528369575641793e-05, "loss": 1.5916, "step": 4400 }, { "epoch": 1.53, "learning_rate": 1.0161514450010882e-05, "loss": 1.5975, "step": 4500 }, { "epoch": 1.56, "learning_rate": 9.794441694865673e-06, "loss": 1.5885, "step": 4600 }, { "epoch": 1.6, "learning_rate": 9.427645915273446e-06, "loss": 1.5849, "step": 4700 }, { "epoch": 1.63, "learning_rate": 9.061621343096156e-06, "loss": 1.5754, "step": 4800 }, { "epoch": 1.67, "learning_rate": 8.696861171047268e-06, "loss": 1.5906, "step": 4900 }, { "epoch": 1.7, "learning_rate": 8.33385688814881e-06, "loss": 1.5837, "step": 5000 }, { "epoch": 1.73, "learning_rate": 7.97309761748402e-06, "loss": 1.5807, "step": 5100 }, { "epoch": 1.77, "learning_rate": 7.615069457137927e-06, "loss": 1.5907, "step": 5200 }, { "epoch": 1.8, "learning_rate": 7.260254825213902e-06, "loss": 1.5826, "step": 5300 }, { "epoch": 1.84, "learning_rate": 6.909131809808755e-06, "loss": 1.5796, "step": 5400 }, { "epoch": 1.87, "learning_rate": 6.562173524822188e-06, "loss": 1.5814, "step": 5500 }, { "epoch": 1.9, "learning_rate": 6.219847472468641e-06, "loss": 1.5861, "step": 5600 }, { "epoch": 1.94, "learning_rate": 5.882614913350499e-06, "loss": 1.5744, "step": 5700 }, { "epoch": 1.97, "learning_rate": 5.550930244941448e-06, "loss": 1.5797, "step": 5800 }, { "epoch": 2.01, "learning_rate": 5.2252403893173835e-06, "loss": 1.5857, "step": 5900 }, { "epoch": 2.04, "learning_rate": 4.9059841909599456e-06, "loss": 1.5728, "step": 6000 }, { "epoch": 2.07, "learning_rate": 4.593591825444028e-06, "loss": 1.5701, "step": 6100 }, { "epoch": 2.11, "learning_rate": 4.288484219806016e-06, "loss": 1.5846, "step": 6200 }, { "epoch": 2.14, "learning_rate": 3.991072485373858e-06, "loss": 1.5716, "step": 6300 }, { "epoch": 2.18, "learning_rate": 3.7017573638230296e-06, "loss": 1.5798, "step": 6400 }, { "epoch": 2.21, "learning_rate": 3.420928687204965e-06, "loss": 1.5739, "step": 6500 }, { "epoch": 2.24, "learning_rate": 3.1489648526753913e-06, "loss": 1.5845, "step": 6600 }, { "epoch": 2.28, "learning_rate": 2.8862323126304427e-06, "loss": 1.5808, "step": 6700 }, { "epoch": 2.31, "learning_rate": 2.6330850809374685e-06, "loss": 1.5704, "step": 6800 }, { "epoch": 2.35, "learning_rate": 2.389864255925913e-06, "loss": 1.5769, "step": 6900 }, { "epoch": 2.38, "learning_rate": 2.1568975607809895e-06, "loss": 1.5765, "step": 7000 }, { "epoch": 2.41, "learning_rate": 1.934498901959424e-06, "loss": 1.5665, "step": 7100 }, { "epoch": 2.45, "learning_rate": 1.722967946222277e-06, "loss": 1.5822, "step": 7200 }, { "epoch": 2.48, "learning_rate": 1.5225897168548032e-06, "loss": 1.5931, "step": 7300 }, { "epoch": 2.52, "learning_rate": 1.3336342096173239e-06, "loss": 1.5855, "step": 7400 }, { "epoch": 2.55, "learning_rate": 1.1563560289446819e-06, "loss": 1.5736, "step": 7500 }, { "epoch": 2.58, "learning_rate": 9.909940448844412e-07, "loss": 1.5942, "step": 7600 }, { "epoch": 2.62, "learning_rate": 8.377710712360631e-07, "loss": 1.5792, "step": 7700 }, { "epoch": 2.65, "learning_rate": 6.968935653247766e-07, "loss": 1.5715, "step": 7800 }, { "epoch": 2.69, "learning_rate": 5.685513498146533e-07, "loss": 1.5777, "step": 7900 }, { "epoch": 2.72, "learning_rate": 4.529173569357459e-07, "loss": 1.5795, "step": 8000 }, { "epoch": 2.75, "learning_rate": 3.5014739546990087e-07, "loss": 1.5799, "step": 8100 }, { "epoch": 2.79, "learning_rate": 2.603799408092389e-07, "loss": 1.5743, "step": 8200 }, { "epoch": 2.82, "learning_rate": 1.8373594837017505e-07, "loss": 1.5779, "step": 8300 }, { "epoch": 2.86, "learning_rate": 1.2031869061438494e-07, "loss": 1.5689, "step": 8400 }, { "epoch": 2.89, "learning_rate": 7.02136178963242e-08, "loss": 1.5854, "step": 8500 }, { "epoch": 2.92, "learning_rate": 3.3488243324814044e-08, "loss": 1.5654, "step": 8600 }, { "epoch": 2.96, "learning_rate": 1.0192051793809221e-08, "loss": 1.5818, "step": 8700 }, { "epoch": 2.99, "learning_rate": 3.5643330492995953e-10, "loss": 1.5853, "step": 8800 }, { "epoch": 3.0, "step": 8823, "total_flos": 6.175868485067866e+18, "train_loss": 1.6989395520347432, "train_runtime": 13309.6743, "train_samples_per_second": 21.208, "train_steps_per_second": 0.663 } ], "max_steps": 8823, "num_train_epochs": 3, "total_flos": 6.175868485067866e+18, "trial_name": null, "trial_params": null }