{ "best_metric": 0.45565962643131613, "best_model_checkpoint": "", "epoch": 4.0, "global_step": 255184, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-10, "loss": 0.5804, "step": 1 }, { "epoch": 0.08, "learning_rate": 1e-06, "loss": 0.3416, "step": 5000 }, { "epoch": 0.16, "learning_rate": 2e-06, "loss": 0.2775, "step": 10000 }, { "epoch": 0.24, "learning_rate": 1.967635445659913e-06, "loss": 0.2661, "step": 15000 }, { "epoch": 0.31, "learning_rate": 1.9352708913198264e-06, "loss": 0.2615, "step": 20000 }, { "epoch": 0.39, "learning_rate": 1.9029063369797399e-06, "loss": 0.2637, "step": 25000 }, { "epoch": 0.47, "learning_rate": 1.870541782639653e-06, "loss": 0.2597, "step": 30000 }, { "epoch": 0.55, "learning_rate": 1.8381772282995661e-06, "loss": 0.2546, "step": 35000 }, { "epoch": 0.63, "learning_rate": 1.8058126739594794e-06, "loss": 0.2593, "step": 40000 }, { "epoch": 0.71, "learning_rate": 1.7734481196193926e-06, "loss": 0.2515, "step": 45000 }, { "epoch": 0.78, "learning_rate": 1.741083565279306e-06, "loss": 0.2546, "step": 50000 }, { "epoch": 0.86, "learning_rate": 1.7087190109392193e-06, "loss": 0.2567, "step": 55000 }, { "epoch": 0.94, "learning_rate": 1.6763544565991326e-06, "loss": 0.2528, "step": 60000 }, { "epoch": 1.0, "eval_accuracy": 0.21571615150195908, "eval_auc": 0.9180686985375446, "eval_f1": 0.22147402418434062, "eval_loss": 1.4364598989486694, "eval_precision": 0.908817624677932, "eval_recall": 0.21571615150195908, "eval_runtime": 4.8065, "eval_samples_per_second": 955.789, "step": 63796 }, { "epoch": 1.02, "learning_rate": 1.6439899022590458e-06, "loss": 0.2493, "step": 65000 }, { "epoch": 1.1, "learning_rate": 1.611625347918959e-06, "loss": 0.2448, "step": 70000 }, { "epoch": 1.18, "learning_rate": 1.5792607935788725e-06, "loss": 0.2507, "step": 75000 }, { "epoch": 1.25, "learning_rate": 1.5468962392387856e-06, "loss": 0.2445, "step": 80000 }, { "epoch": 1.33, "learning_rate": 1.5145316848986988e-06, "loss": 0.2481, "step": 85000 }, { "epoch": 1.41, "learning_rate": 1.482167130558612e-06, "loss": 0.2452, "step": 90000 }, { "epoch": 1.49, "learning_rate": 1.4498025762185253e-06, "loss": 0.2442, "step": 95000 }, { "epoch": 1.57, "learning_rate": 1.4174380218784387e-06, "loss": 0.2443, "step": 100000 }, { "epoch": 1.65, "learning_rate": 1.385073467538352e-06, "loss": 0.246, "step": 105000 }, { "epoch": 1.72, "learning_rate": 1.3527089131982652e-06, "loss": 0.2503, "step": 110000 }, { "epoch": 1.8, "learning_rate": 1.3203443588581785e-06, "loss": 0.2492, "step": 115000 }, { "epoch": 1.88, "learning_rate": 1.2879798045180917e-06, "loss": 0.2429, "step": 120000 }, { "epoch": 1.96, "learning_rate": 1.255615250178005e-06, "loss": 0.2491, "step": 125000 }, { "epoch": 2.0, "eval_accuracy": 0.3243360905528951, "eval_auc": 0.9164518557207816, "eval_f1": 0.3794720254407353, "eval_loss": 1.2563140392303467, "eval_precision": 0.9077943374743213, "eval_recall": 0.3243360905528951, "eval_runtime": 4.7493, "eval_samples_per_second": 967.295, "step": 127592 }, { "epoch": 2.04, "learning_rate": 1.2232506958379182e-06, "loss": 0.2508, "step": 130000 }, { "epoch": 2.12, "learning_rate": 1.1908861414978315e-06, "loss": 0.2422, "step": 135000 }, { "epoch": 2.19, "learning_rate": 1.1585215871577447e-06, "loss": 0.2404, "step": 140000 }, { "epoch": 2.27, "learning_rate": 1.126157032817658e-06, "loss": 0.2347, "step": 145000 }, { "epoch": 2.35, "learning_rate": 1.0937924784775714e-06, "loss": 0.2441, "step": 150000 }, { "epoch": 2.43, "learning_rate": 1.0614279241374847e-06, "loss": 0.2456, "step": 155000 }, { "epoch": 2.51, "learning_rate": 1.029063369797398e-06, "loss": 0.2375, "step": 160000 }, { "epoch": 2.59, "learning_rate": 9.966988154573111e-07, "loss": 0.2441, "step": 165000 }, { "epoch": 2.66, "learning_rate": 9.643342611172244e-07, "loss": 0.2376, "step": 170000 }, { "epoch": 2.74, "learning_rate": 9.319697067771376e-07, "loss": 0.2386, "step": 175000 }, { "epoch": 2.82, "learning_rate": 8.996051524370509e-07, "loss": 0.2387, "step": 180000 }, { "epoch": 2.9, "learning_rate": 8.672405980969641e-07, "loss": 0.238, "step": 185000 }, { "epoch": 2.98, "learning_rate": 8.348760437568775e-07, "loss": 0.234, "step": 190000 }, { "epoch": 3.0, "eval_accuracy": 0.32063561166739224, "eval_auc": 0.9156791507926754, "eval_f1": 0.3746009193872616, "eval_loss": 1.388353705406189, "eval_precision": 0.9076853642985949, "eval_recall": 0.32063561166739224, "eval_runtime": 4.7646, "eval_samples_per_second": 964.188, "step": 191388 }, { "epoch": 3.06, "learning_rate": 8.025114894167907e-07, "loss": 0.2366, "step": 195000 }, { "epoch": 3.13, "learning_rate": 7.70146935076704e-07, "loss": 0.2343, "step": 200000 }, { "epoch": 3.21, "learning_rate": 7.377823807366172e-07, "loss": 0.2353, "step": 205000 }, { "epoch": 3.29, "learning_rate": 7.054178263965305e-07, "loss": 0.2353, "step": 210000 }, { "epoch": 3.37, "learning_rate": 6.730532720564438e-07, "loss": 0.2328, "step": 215000 }, { "epoch": 3.45, "learning_rate": 6.40688717716357e-07, "loss": 0.2332, "step": 220000 }, { "epoch": 3.53, "learning_rate": 6.083241633762702e-07, "loss": 0.2374, "step": 225000 }, { "epoch": 3.61, "learning_rate": 5.759596090361835e-07, "loss": 0.2367, "step": 230000 }, { "epoch": 3.68, "learning_rate": 5.435950546960968e-07, "loss": 0.2379, "step": 235000 }, { "epoch": 3.76, "learning_rate": 5.112305003560101e-07, "loss": 0.2354, "step": 240000 }, { "epoch": 3.84, "learning_rate": 4.788659460159234e-07, "loss": 0.235, "step": 245000 }, { "epoch": 3.92, "learning_rate": 4.4650139167583663e-07, "loss": 0.2362, "step": 250000 }, { "epoch": 4.0, "learning_rate": 4.1413683733574987e-07, "loss": 0.24, "step": 255000 }, { "epoch": 4.0, "eval_accuracy": 0.38528515454941226, "eval_auc": 0.9194136352464053, "eval_f1": 0.45565962643131613, "eval_loss": 1.2999894618988037, "eval_precision": 0.9088106767314909, "eval_recall": 0.38528515454941226, "eval_runtime": 4.7332, "eval_samples_per_second": 970.591, "step": 255184 } ], "max_steps": 318980, "num_train_epochs": 5, "total_flos": 4.841807205051138e+18, "trial_name": null, "trial_params": null }