{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.869565217391305, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17391304347826086, "grad_norm": 0.5795015692710876, "learning_rate": 4e-05, "loss": 2.6851, "step": 1 }, { "epoch": 0.34782608695652173, "grad_norm": 0.6652081608772278, "learning_rate": 8e-05, "loss": 2.5809, "step": 2 }, { "epoch": 0.5217391304347826, "grad_norm": 0.3569735884666443, "learning_rate": 0.00012, "loss": 2.2489, "step": 3 }, { "epoch": 0.6956521739130435, "grad_norm": 0.5530170798301697, "learning_rate": 0.00016, "loss": 2.6054, "step": 4 }, { "epoch": 0.8695652173913043, "grad_norm": 0.9040398597717285, "learning_rate": 0.0002, "loss": 2.6919, "step": 5 }, { "epoch": 1.0869565217391304, "grad_norm": 1.6528335809707642, "learning_rate": 0.00019636363636363636, "loss": 3.4822, "step": 6 }, { "epoch": 1.2608695652173914, "grad_norm": 0.5991894602775574, "learning_rate": 0.00019272727272727274, "loss": 1.5508, "step": 7 }, { "epoch": 1.434782608695652, "grad_norm": 1.1784238815307617, "learning_rate": 0.0001890909090909091, "loss": 2.3071, "step": 8 }, { "epoch": 1.608695652173913, "grad_norm": 1.242445468902588, "learning_rate": 0.00018545454545454545, "loss": 2.4738, "step": 9 }, { "epoch": 1.7826086956521738, "grad_norm": 1.0758299827575684, "learning_rate": 0.00018181818181818183, "loss": 1.5476, "step": 10 }, { "epoch": 1.9565217391304348, "grad_norm": 1.110364317893982, "learning_rate": 0.0001781818181818182, "loss": 2.0381, "step": 11 }, { "epoch": 2.1739130434782608, "grad_norm": 0.5819414258003235, "learning_rate": 0.00017454545454545454, "loss": 1.6068, "step": 12 }, { "epoch": 2.3478260869565215, "grad_norm": 0.6612680554389954, "learning_rate": 0.0001709090909090909, "loss": 1.9076, "step": 13 }, { "epoch": 2.5217391304347827, "grad_norm": 0.5913424491882324, "learning_rate": 0.00016727272727272728, "loss": 1.2975, "step": 14 }, { "epoch": 2.6956521739130435, "grad_norm": 0.8625181317329407, "learning_rate": 0.00016363636363636366, "loss": 1.6797, "step": 15 }, { "epoch": 2.869565217391304, "grad_norm": 0.8947856426239014, "learning_rate": 0.00016, "loss": 1.3828, "step": 16 }, { "epoch": 3.0869565217391304, "grad_norm": 0.6648727655410767, "learning_rate": 0.00015636363636363637, "loss": 2.1334, "step": 17 }, { "epoch": 3.260869565217391, "grad_norm": 0.6269745826721191, "learning_rate": 0.00015272727272727275, "loss": 1.2481, "step": 18 }, { "epoch": 3.4347826086956523, "grad_norm": 0.9015301465988159, "learning_rate": 0.0001490909090909091, "loss": 1.5312, "step": 19 }, { "epoch": 3.608695652173913, "grad_norm": 0.56430983543396, "learning_rate": 0.00014545454545454546, "loss": 1.1648, "step": 20 }, { "epoch": 3.782608695652174, "grad_norm": 0.6650995016098022, "learning_rate": 0.00014181818181818184, "loss": 1.9122, "step": 21 }, { "epoch": 3.9565217391304346, "grad_norm": 1.211255669593811, "learning_rate": 0.0001381818181818182, "loss": 1.905, "step": 22 }, { "epoch": 4.173913043478261, "grad_norm": 0.8868653178215027, "learning_rate": 0.00013454545454545455, "loss": 1.0216, "step": 23 }, { "epoch": 4.3478260869565215, "grad_norm": 1.4661606550216675, "learning_rate": 0.00013090909090909093, "loss": 1.2432, "step": 24 }, { "epoch": 4.521739130434782, "grad_norm": 0.5382258892059326, "learning_rate": 0.00012727272727272728, "loss": 1.2831, "step": 25 }, { "epoch": 4.695652173913043, "grad_norm": 1.1182520389556885, "learning_rate": 0.00012363636363636364, "loss": 1.0676, "step": 26 }, { "epoch": 4.869565217391305, "grad_norm": 0.7625539898872375, "learning_rate": 0.00012, "loss": 1.2568, "step": 27 }, { "epoch": 5.086956521739131, "grad_norm": 1.9197959899902344, "learning_rate": 0.00011636363636363636, "loss": 1.4596, "step": 28 }, { "epoch": 5.260869565217392, "grad_norm": 1.1358859539031982, "learning_rate": 0.00011272727272727272, "loss": 0.6785, "step": 29 }, { "epoch": 5.434782608695652, "grad_norm": 2.8852992057800293, "learning_rate": 0.00010909090909090909, "loss": 0.9755, "step": 30 }, { "epoch": 5.608695652173913, "grad_norm": 2.618217945098877, "learning_rate": 0.00010545454545454545, "loss": 0.9123, "step": 31 }, { "epoch": 5.782608695652174, "grad_norm": 1.0614593029022217, "learning_rate": 0.00010181818181818181, "loss": 1.5262, "step": 32 }, { "epoch": 5.956521739130435, "grad_norm": 2.063232898712158, "learning_rate": 9.818181818181818e-05, "loss": 1.3585, "step": 33 }, { "epoch": 6.173913043478261, "grad_norm": 1.3435719013214111, "learning_rate": 9.454545454545455e-05, "loss": 0.9793, "step": 34 }, { "epoch": 6.3478260869565215, "grad_norm": 1.4425543546676636, "learning_rate": 9.090909090909092e-05, "loss": 0.8326, "step": 35 }, { "epoch": 6.521739130434782, "grad_norm": 1.411342740058899, "learning_rate": 8.727272727272727e-05, "loss": 0.7428, "step": 36 }, { "epoch": 6.695652173913043, "grad_norm": 2.045598268508911, "learning_rate": 8.363636363636364e-05, "loss": 0.816, "step": 37 }, { "epoch": 6.869565217391305, "grad_norm": 1.8088502883911133, "learning_rate": 8e-05, "loss": 0.7132, "step": 38 }, { "epoch": 7.086956521739131, "grad_norm": 2.4303224086761475, "learning_rate": 7.636363636363637e-05, "loss": 1.2982, "step": 39 }, { "epoch": 7.260869565217392, "grad_norm": 1.6079760789871216, "learning_rate": 7.272727272727273e-05, "loss": 0.7633, "step": 40 }, { "epoch": 7.434782608695652, "grad_norm": 1.357133388519287, "learning_rate": 6.90909090909091e-05, "loss": 0.4112, "step": 41 }, { "epoch": 7.608695652173913, "grad_norm": 1.36146080493927, "learning_rate": 6.545454545454546e-05, "loss": 0.4352, "step": 42 }, { "epoch": 7.782608695652174, "grad_norm": 1.250328540802002, "learning_rate": 6.181818181818182e-05, "loss": 1.0597, "step": 43 }, { "epoch": 7.956521739130435, "grad_norm": 2.3408303260803223, "learning_rate": 5.818181818181818e-05, "loss": 0.6463, "step": 44 }, { "epoch": 8.173913043478262, "grad_norm": 1.5889191627502441, "learning_rate": 5.4545454545454546e-05, "loss": 0.6388, "step": 45 }, { "epoch": 8.347826086956522, "grad_norm": 0.8752424120903015, "learning_rate": 5.090909090909091e-05, "loss": 0.7383, "step": 46 }, { "epoch": 8.521739130434783, "grad_norm": 1.2275079488754272, "learning_rate": 4.7272727272727275e-05, "loss": 0.4331, "step": 47 }, { "epoch": 8.695652173913043, "grad_norm": 1.4064780473709106, "learning_rate": 4.3636363636363636e-05, "loss": 0.3424, "step": 48 }, { "epoch": 8.869565217391305, "grad_norm": 1.825239896774292, "learning_rate": 4e-05, "loss": 0.5151, "step": 49 }, { "epoch": 9.08695652173913, "grad_norm": 3.0296473503112793, "learning_rate": 3.6363636363636364e-05, "loss": 0.5569, "step": 50 }, { "epoch": 9.26086956521739, "grad_norm": 1.301712155342102, "learning_rate": 3.272727272727273e-05, "loss": 0.3894, "step": 51 }, { "epoch": 9.434782608695652, "grad_norm": 1.1728489398956299, "learning_rate": 2.909090909090909e-05, "loss": 0.8132, "step": 52 }, { "epoch": 9.608695652173914, "grad_norm": 1.175506830215454, "learning_rate": 2.5454545454545454e-05, "loss": 0.2151, "step": 53 }, { "epoch": 9.782608695652174, "grad_norm": 0.9729976058006287, "learning_rate": 2.1818181818181818e-05, "loss": 0.1576, "step": 54 }, { "epoch": 9.956521739130435, "grad_norm": 2.4422378540039062, "learning_rate": 1.8181818181818182e-05, "loss": 0.4448, "step": 55 }, { "epoch": 10.173913043478262, "grad_norm": 1.0331871509552002, "learning_rate": 1.4545454545454545e-05, "loss": 0.1602, "step": 56 }, { "epoch": 10.347826086956522, "grad_norm": 0.9095517992973328, "learning_rate": 1.0909090909090909e-05, "loss": 0.6064, "step": 57 }, { "epoch": 10.521739130434783, "grad_norm": 1.2574269771575928, "learning_rate": 7.272727272727272e-06, "loss": 0.2496, "step": 58 }, { "epoch": 10.695652173913043, "grad_norm": 1.1020245552062988, "learning_rate": 3.636363636363636e-06, "loss": 0.185, "step": 59 }, { "epoch": 10.869565217391305, "grad_norm": 1.7821040153503418, "learning_rate": 0.0, "loss": 0.5375, "step": 60 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6160960925368320.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }