{ "best_metric": 0.7640588283538818, "best_model_checkpoint": "./model_20230703_attempt1/checkpoint-400", "epoch": 4.481792717086835, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 2.9999999999999997e-05, "loss": 2.5488, "step": 10 }, { "epoch": 0.22, "learning_rate": 5.9999999999999995e-05, "loss": 2.3131, "step": 20 }, { "epoch": 0.34, "learning_rate": 8.999999999999999e-05, "loss": 2.1798, "step": 30 }, { "epoch": 0.45, "learning_rate": 0.00011999999999999999, "loss": 1.6567, "step": 40 }, { "epoch": 0.56, "learning_rate": 0.000147, "loss": 1.2086, "step": 50 }, { "epoch": 0.67, "learning_rate": 0.00017699999999999997, "loss": 0.8952, "step": 60 }, { "epoch": 0.78, "learning_rate": 0.00020699999999999996, "loss": 1.0646, "step": 70 }, { "epoch": 0.9, "learning_rate": 0.000237, "loss": 0.7725, "step": 80 }, { "epoch": 1.01, "learning_rate": 0.000267, "loss": 0.8192, "step": 90 }, { "epoch": 1.12, "learning_rate": 0.00029699999999999996, "loss": 0.7403, "step": 100 }, { "epoch": 1.23, "learning_rate": 0.00029764397905759156, "loss": 0.6473, "step": 110 }, { "epoch": 1.34, "learning_rate": 0.0002950261780104712, "loss": 0.7524, "step": 120 }, { "epoch": 1.46, "learning_rate": 0.00029240837696335075, "loss": 0.6775, "step": 130 }, { "epoch": 1.57, "learning_rate": 0.00028979057591623034, "loss": 0.5884, "step": 140 }, { "epoch": 1.68, "learning_rate": 0.00028717277486910994, "loss": 0.7117, "step": 150 }, { "epoch": 1.79, "learning_rate": 0.00028455497382198953, "loss": 0.5925, "step": 160 }, { "epoch": 1.9, "learning_rate": 0.00028193717277486907, "loss": 0.6675, "step": 170 }, { "epoch": 2.02, "learning_rate": 0.00027931937172774867, "loss": 0.6126, "step": 180 }, { "epoch": 2.13, "learning_rate": 0.00027670157068062826, "loss": 0.557, "step": 190 }, { "epoch": 2.24, "learning_rate": 0.00027408376963350786, "loss": 0.5362, "step": 200 }, { "epoch": 2.24, "eval_loss": 0.8287817239761353, "eval_runtime": 34.4542, "eval_samples_per_second": 11.61, "eval_steps_per_second": 1.451, "step": 200 }, { "epoch": 2.35, "learning_rate": 0.0002714659685863874, "loss": 0.4951, "step": 210 }, { "epoch": 2.46, "learning_rate": 0.000268848167539267, "loss": 0.4904, "step": 220 }, { "epoch": 2.58, "learning_rate": 0.0002662303664921466, "loss": 0.4544, "step": 230 }, { "epoch": 2.69, "learning_rate": 0.0002636125654450262, "loss": 0.5705, "step": 240 }, { "epoch": 2.8, "learning_rate": 0.0002609947643979057, "loss": 0.5654, "step": 250 }, { "epoch": 2.91, "learning_rate": 0.0002583769633507853, "loss": 0.445, "step": 260 }, { "epoch": 3.03, "learning_rate": 0.0002557591623036649, "loss": 0.3859, "step": 270 }, { "epoch": 3.14, "learning_rate": 0.0002531413612565445, "loss": 0.5015, "step": 280 }, { "epoch": 3.25, "learning_rate": 0.00025052356020942404, "loss": 0.4177, "step": 290 }, { "epoch": 3.36, "learning_rate": 0.00024790575916230364, "loss": 0.4322, "step": 300 }, { "epoch": 3.47, "learning_rate": 0.00024528795811518323, "loss": 0.3028, "step": 310 }, { "epoch": 3.59, "learning_rate": 0.00024267015706806283, "loss": 0.3839, "step": 320 }, { "epoch": 3.7, "learning_rate": 0.0002400523560209424, "loss": 0.3121, "step": 330 }, { "epoch": 3.81, "learning_rate": 0.00023743455497382196, "loss": 0.5043, "step": 340 }, { "epoch": 3.92, "learning_rate": 0.00023481675392670153, "loss": 0.3569, "step": 350 }, { "epoch": 4.03, "learning_rate": 0.00023219895287958115, "loss": 0.2791, "step": 360 }, { "epoch": 4.15, "learning_rate": 0.00022958115183246072, "loss": 0.2648, "step": 370 }, { "epoch": 4.26, "learning_rate": 0.00022696335078534028, "loss": 0.2998, "step": 380 }, { "epoch": 4.37, "learning_rate": 0.00022434554973821985, "loss": 0.3416, "step": 390 }, { "epoch": 4.48, "learning_rate": 0.00022172774869109947, "loss": 0.3044, "step": 400 }, { "epoch": 4.48, "eval_loss": 0.7640588283538818, "eval_runtime": 34.4837, "eval_samples_per_second": 11.6, "eval_steps_per_second": 1.45, "step": 400 } ], "max_steps": 1246, "num_train_epochs": 14, "total_flos": 1.32826870923264e+16, "trial_name": null, "trial_params": null }