{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 5538, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.945828819068256e-05, "loss": 4.3199, "step": 100 }, { "epoch": 0.07, "learning_rate": 2.8916576381365114e-05, "loss": 2.7819, "step": 200 }, { "epoch": 0.11, "learning_rate": 2.837486457204767e-05, "loss": 2.4102, "step": 300 }, { "epoch": 0.14, "learning_rate": 2.783315276273023e-05, "loss": 2.2734, "step": 400 }, { "epoch": 0.18, "learning_rate": 2.7291440953412788e-05, "loss": 2.1137, "step": 500 }, { "epoch": 0.22, "learning_rate": 2.674972914409534e-05, "loss": 2.0087, "step": 600 }, { "epoch": 0.25, "learning_rate": 2.6208017334777897e-05, "loss": 1.9438, "step": 700 }, { "epoch": 0.29, "learning_rate": 2.5666305525460455e-05, "loss": 1.9278, "step": 800 }, { "epoch": 0.33, "learning_rate": 2.5124593716143014e-05, "loss": 1.8629, "step": 900 }, { "epoch": 0.36, "learning_rate": 2.4582881906825568e-05, "loss": 1.8102, "step": 1000 }, { "epoch": 0.36, "eval_exact_match": 60.84200567644276, "eval_f1": 72.2063882008199, "step": 1000 }, { "epoch": 0.4, "learning_rate": 2.4041170097508126e-05, "loss": 1.8028, "step": 1100 }, { "epoch": 0.43, "learning_rate": 2.3499458288190684e-05, "loss": 1.7879, "step": 1200 }, { "epoch": 0.47, "learning_rate": 2.2957746478873243e-05, "loss": 1.7333, "step": 1300 }, { "epoch": 0.51, "learning_rate": 2.2416034669555797e-05, "loss": 1.6751, "step": 1400 }, { "epoch": 0.54, "learning_rate": 2.1874322860238355e-05, "loss": 1.6925, "step": 1500 }, { "epoch": 0.58, "learning_rate": 2.133261105092091e-05, "loss": 1.6655, "step": 1600 }, { "epoch": 0.61, "learning_rate": 2.0790899241603465e-05, "loss": 1.6675, "step": 1700 }, { "epoch": 0.65, "learning_rate": 2.0249187432286023e-05, "loss": 1.6169, "step": 1800 }, { "epoch": 0.69, "learning_rate": 1.970747562296858e-05, "loss": 1.6765, "step": 1900 }, { "epoch": 0.72, "learning_rate": 1.916576381365114e-05, "loss": 1.6591, "step": 2000 }, { "epoch": 0.72, "eval_exact_match": 64.56953642384106, "eval_f1": 75.74673565914026, "step": 2000 }, { "epoch": 0.76, "learning_rate": 1.8624052004333694e-05, "loss": 1.6145, "step": 2100 }, { "epoch": 0.79, "learning_rate": 1.8082340195016252e-05, "loss": 1.569, "step": 2200 }, { "epoch": 0.83, "learning_rate": 1.754062838569881e-05, "loss": 1.5951, "step": 2300 }, { "epoch": 0.87, "learning_rate": 1.6998916576381368e-05, "loss": 1.5693, "step": 2400 }, { "epoch": 0.9, "learning_rate": 1.6457204767063923e-05, "loss": 1.5276, "step": 2500 }, { "epoch": 0.94, "learning_rate": 1.5915492957746478e-05, "loss": 1.5738, "step": 2600 }, { "epoch": 0.98, "learning_rate": 1.5373781148429036e-05, "loss": 1.5839, "step": 2700 }, { "epoch": 1.01, "learning_rate": 1.4832069339111594e-05, "loss": 1.5627, "step": 2800 }, { "epoch": 1.05, "learning_rate": 1.4290357529794148e-05, "loss": 1.3985, "step": 2900 }, { "epoch": 1.08, "learning_rate": 1.3748645720476707e-05, "loss": 1.401, "step": 3000 }, { "epoch": 1.08, "eval_exact_match": 66.69820245979186, "eval_f1": 77.4518814156339, "step": 3000 }, { "epoch": 1.12, "learning_rate": 1.3206933911159263e-05, "loss": 1.374, "step": 3100 }, { "epoch": 1.16, "learning_rate": 1.2665222101841821e-05, "loss": 1.3751, "step": 3200 }, { "epoch": 1.19, "learning_rate": 1.2123510292524377e-05, "loss": 1.3844, "step": 3300 }, { "epoch": 1.23, "learning_rate": 1.1581798483206934e-05, "loss": 1.3643, "step": 3400 }, { "epoch": 1.26, "learning_rate": 1.104008667388949e-05, "loss": 1.4097, "step": 3500 }, { "epoch": 1.3, "learning_rate": 1.0498374864572048e-05, "loss": 1.3747, "step": 3600 }, { "epoch": 1.34, "learning_rate": 9.956663055254605e-06, "loss": 1.3667, "step": 3700 }, { "epoch": 1.37, "learning_rate": 9.414951245937163e-06, "loss": 1.333, "step": 3800 }, { "epoch": 1.41, "learning_rate": 8.873239436619718e-06, "loss": 1.384, "step": 3900 }, { "epoch": 1.44, "learning_rate": 8.331527627302276e-06, "loss": 1.3544, "step": 4000 }, { "epoch": 1.44, "eval_exact_match": 67.53074739829707, "eval_f1": 78.10232802082328, "step": 4000 }, { "epoch": 1.48, "learning_rate": 7.789815817984832e-06, "loss": 1.3762, "step": 4100 }, { "epoch": 1.52, "learning_rate": 7.248104008667389e-06, "loss": 1.3628, "step": 4200 }, { "epoch": 1.55, "learning_rate": 6.706392199349946e-06, "loss": 1.3639, "step": 4300 }, { "epoch": 1.59, "learning_rate": 6.164680390032503e-06, "loss": 1.397, "step": 4400 }, { "epoch": 1.63, "learning_rate": 5.62296858071506e-06, "loss": 1.3298, "step": 4500 }, { "epoch": 1.66, "learning_rate": 5.081256771397617e-06, "loss": 1.3641, "step": 4600 }, { "epoch": 1.7, "learning_rate": 4.539544962080174e-06, "loss": 1.3164, "step": 4700 }, { "epoch": 1.73, "learning_rate": 3.99783315276273e-06, "loss": 1.3582, "step": 4800 }, { "epoch": 1.77, "learning_rate": 3.456121343445287e-06, "loss": 1.3273, "step": 4900 }, { "epoch": 1.81, "learning_rate": 2.914409534127844e-06, "loss": 1.3176, "step": 5000 }, { "epoch": 1.81, "eval_exact_match": 68.1456953642384, "eval_f1": 78.69175735985112, "step": 5000 }, { "epoch": 1.84, "learning_rate": 2.3726977248104013e-06, "loss": 1.2995, "step": 5100 }, { "epoch": 1.88, "learning_rate": 1.8309859154929577e-06, "loss": 1.3327, "step": 5200 }, { "epoch": 1.91, "learning_rate": 1.2892741061755147e-06, "loss": 1.3254, "step": 5300 }, { "epoch": 1.95, "learning_rate": 7.475622968580716e-07, "loss": 1.36, "step": 5400 }, { "epoch": 1.99, "learning_rate": 2.0585048754062841e-07, "loss": 1.3627, "step": 5500 }, { "epoch": 2.0, "step": 5538, "total_flos": 2.809773228766003e+16, "train_loss": 1.629506731429398, "train_runtime": 1781.7081, "train_samples_per_second": 99.454, "train_steps_per_second": 3.108 } ], "max_steps": 5538, "num_train_epochs": 2, "total_flos": 2.809773228766003e+16, "trial_name": null, "trial_params": null }