{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.998784511199862, "global_step": 7190, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 9.860917941585536e-05, "loss": 1.7018, "step": 100 }, { "epoch": 0.28, "learning_rate": 9.721835883171071e-05, "loss": 1.5678, "step": 200 }, { "epoch": 0.42, "learning_rate": 9.582753824756607e-05, "loss": 1.5333, "step": 300 }, { "epoch": 0.56, "learning_rate": 9.443671766342142e-05, "loss": 1.5235, "step": 400 }, { "epoch": 0.69, "learning_rate": 9.304589707927678e-05, "loss": 1.4892, "step": 500 }, { "epoch": 0.83, "learning_rate": 9.165507649513213e-05, "loss": 1.4781, "step": 600 }, { "epoch": 0.97, "learning_rate": 9.026425591098748e-05, "loss": 1.4319, "step": 700 }, { "epoch": 1.11, "learning_rate": 8.887343532684285e-05, "loss": 1.3011, "step": 800 }, { "epoch": 1.25, "learning_rate": 8.74826147426982e-05, "loss": 1.2885, "step": 900 }, { "epoch": 1.39, "learning_rate": 8.609179415855355e-05, "loss": 1.2524, "step": 1000 }, { "epoch": 1.53, "learning_rate": 8.47009735744089e-05, "loss": 1.2674, "step": 1100 }, { "epoch": 1.67, "learning_rate": 8.331015299026426e-05, "loss": 1.2533, "step": 1200 }, { "epoch": 1.81, "learning_rate": 8.191933240611962e-05, "loss": 1.268, "step": 1300 }, { "epoch": 1.95, "learning_rate": 8.052851182197498e-05, "loss": 1.2499, "step": 1400 }, { "epoch": 2.09, "learning_rate": 7.913769123783032e-05, "loss": 1.1782, "step": 1500 }, { "epoch": 2.23, "learning_rate": 7.774687065368567e-05, "loss": 1.1113, "step": 1600 }, { "epoch": 2.36, "learning_rate": 7.635605006954103e-05, "loss": 1.1167, "step": 1700 }, { "epoch": 2.5, "learning_rate": 7.496522948539638e-05, "loss": 1.1422, "step": 1800 }, { "epoch": 2.64, "learning_rate": 7.357440890125175e-05, "loss": 1.118, "step": 1900 }, { "epoch": 2.78, "learning_rate": 7.21835883171071e-05, "loss": 1.1133, "step": 2000 }, { "epoch": 2.92, "learning_rate": 7.079276773296244e-05, "loss": 1.1058, "step": 2100 }, { "epoch": 3.06, "learning_rate": 6.94019471488178e-05, "loss": 1.0747, "step": 2200 }, { "epoch": 3.2, "learning_rate": 6.801112656467315e-05, "loss": 1.0059, "step": 2300 }, { "epoch": 3.34, "learning_rate": 6.662030598052852e-05, "loss": 0.9988, "step": 2400 }, { "epoch": 3.48, "learning_rate": 6.522948539638388e-05, "loss": 1.0111, "step": 2500 }, { "epoch": 3.62, "learning_rate": 6.383866481223923e-05, "loss": 1.0237, "step": 2600 }, { "epoch": 3.75, "learning_rate": 6.244784422809457e-05, "loss": 1.0011, "step": 2700 }, { "epoch": 3.89, "learning_rate": 6.105702364394992e-05, "loss": 1.0133, "step": 2800 }, { "epoch": 4.03, "learning_rate": 5.966620305980529e-05, "loss": 0.9724, "step": 2900 }, { "epoch": 4.17, "learning_rate": 5.827538247566065e-05, "loss": 0.9005, "step": 3000 }, { "epoch": 4.31, "learning_rate": 5.6884561891515995e-05, "loss": 0.9264, "step": 3100 }, { "epoch": 4.45, "learning_rate": 5.549374130737135e-05, "loss": 0.933, "step": 3200 }, { "epoch": 4.59, "learning_rate": 5.41029207232267e-05, "loss": 0.9283, "step": 3300 }, { "epoch": 4.73, "learning_rate": 5.2712100139082064e-05, "loss": 0.9033, "step": 3400 }, { "epoch": 4.87, "learning_rate": 5.132127955493742e-05, "loss": 0.9139, "step": 3500 }, { "epoch": 5.01, "learning_rate": 4.993045897079277e-05, "loss": 0.9383, "step": 3600 }, { "epoch": 5.15, "learning_rate": 4.853963838664812e-05, "loss": 0.8268, "step": 3700 }, { "epoch": 5.28, "learning_rate": 4.714881780250348e-05, "loss": 0.841, "step": 3800 }, { "epoch": 5.42, "learning_rate": 4.5757997218358836e-05, "loss": 0.8594, "step": 3900 }, { "epoch": 5.56, "learning_rate": 4.436717663421418e-05, "loss": 0.8346, "step": 4000 }, { "epoch": 5.7, "learning_rate": 4.2976356050069544e-05, "loss": 0.8626, "step": 4100 }, { "epoch": 5.84, "learning_rate": 4.15855354659249e-05, "loss": 0.8691, "step": 4200 }, { "epoch": 5.98, "learning_rate": 4.019471488178025e-05, "loss": 0.8424, "step": 4300 }, { "epoch": 6.12, "learning_rate": 3.880389429763561e-05, "loss": 0.793, "step": 4400 }, { "epoch": 6.26, "learning_rate": 3.741307371349096e-05, "loss": 0.8025, "step": 4500 }, { "epoch": 6.4, "learning_rate": 3.6022253129346316e-05, "loss": 0.8245, "step": 4600 }, { "epoch": 6.54, "learning_rate": 3.463143254520167e-05, "loss": 0.797, "step": 4700 }, { "epoch": 6.68, "learning_rate": 3.3240611961057024e-05, "loss": 0.8063, "step": 4800 }, { "epoch": 6.81, "learning_rate": 3.184979137691238e-05, "loss": 0.8042, "step": 4900 }, { "epoch": 6.95, "learning_rate": 3.0458970792767733e-05, "loss": 0.8068, "step": 5000 }, { "epoch": 7.09, "learning_rate": 2.906815020862309e-05, "loss": 0.7873, "step": 5100 }, { "epoch": 7.23, "learning_rate": 2.767732962447844e-05, "loss": 0.7495, "step": 5200 }, { "epoch": 7.37, "learning_rate": 2.6286509040333796e-05, "loss": 0.7538, "step": 5300 }, { "epoch": 7.51, "learning_rate": 2.4895688456189153e-05, "loss": 0.7631, "step": 5400 }, { "epoch": 7.65, "learning_rate": 2.3504867872044508e-05, "loss": 0.7532, "step": 5500 }, { "epoch": 7.79, "learning_rate": 2.2114047287899862e-05, "loss": 0.7772, "step": 5600 }, { "epoch": 7.93, "learning_rate": 2.0723226703755216e-05, "loss": 0.7691, "step": 5700 }, { "epoch": 8.07, "learning_rate": 1.933240611961057e-05, "loss": 0.7544, "step": 5800 }, { "epoch": 8.21, "learning_rate": 1.7941585535465928e-05, "loss": 0.7243, "step": 5900 }, { "epoch": 8.34, "learning_rate": 1.655076495132128e-05, "loss": 0.7308, "step": 6000 }, { "epoch": 8.48, "learning_rate": 1.5159944367176635e-05, "loss": 0.7115, "step": 6100 }, { "epoch": 8.62, "learning_rate": 1.376912378303199e-05, "loss": 0.7275, "step": 6200 }, { "epoch": 8.76, "learning_rate": 1.2378303198887344e-05, "loss": 0.7279, "step": 6300 }, { "epoch": 8.9, "learning_rate": 1.0987482614742698e-05, "loss": 0.7497, "step": 6400 }, { "epoch": 9.04, "learning_rate": 9.596662030598054e-06, "loss": 0.7163, "step": 6500 }, { "epoch": 9.18, "learning_rate": 8.205841446453408e-06, "loss": 0.7047, "step": 6600 }, { "epoch": 9.32, "learning_rate": 6.815020862308763e-06, "loss": 0.7089, "step": 6700 }, { "epoch": 9.46, "learning_rate": 5.424200278164117e-06, "loss": 0.7078, "step": 6800 }, { "epoch": 9.6, "learning_rate": 4.033379694019471e-06, "loss": 0.7065, "step": 6900 }, { "epoch": 9.73, "learning_rate": 2.6425591098748263e-06, "loss": 0.7017, "step": 7000 }, { "epoch": 9.87, "learning_rate": 1.2517385257301808e-06, "loss": 0.6855, "step": 7100 }, { "epoch": 10.0, "step": 7190, "total_flos": 6.20936877252096e+16, "train_loss": 0.9689414988638463, "train_runtime": 6282.5026, "train_samples_per_second": 18.333, "train_steps_per_second": 1.144 } ], "max_steps": 7190, "num_train_epochs": 10, "total_flos": 6.20936877252096e+16, "trial_name": null, "trial_params": null }