{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.692307692307693, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31, "learning_rate": 6.666666666666667e-06, "loss": 3.768, "step": 1 }, { "epoch": 0.62, "learning_rate": 1.3333333333333333e-05, "loss": 12.368, "step": 2 }, { "epoch": 0.92, "learning_rate": 2e-05, "loss": 11.537, "step": 3 }, { "epoch": 1.23, "learning_rate": 1.999348095389677e-05, "loss": 7.8117, "step": 4 }, { "epoch": 1.54, "learning_rate": 1.9973932315179502e-05, "loss": 5.6659, "step": 5 }, { "epoch": 1.85, "learning_rate": 1.9941379571543597e-05, "loss": 4.2924, "step": 6 }, { "epoch": 2.15, "learning_rate": 1.9895865165556375e-05, "loss": 2.1566, "step": 7 }, { "epoch": 2.46, "learning_rate": 1.9837448439320027e-05, "loss": 2.607, "step": 8 }, { "epoch": 2.77, "learning_rate": 1.976620555710087e-05, "loss": 2.6508, "step": 9 }, { "epoch": 3.08, "learning_rate": 1.9682229406025635e-05, "loss": 3.4377, "step": 10 }, { "epoch": 3.38, "learning_rate": 1.9585629474974413e-05, "loss": 2.3918, "step": 11 }, { "epoch": 3.69, "learning_rate": 1.9476531711828027e-05, "loss": 0.9541, "step": 12 }, { "epoch": 4.0, "learning_rate": 1.935507835925601e-05, "loss": 0.2939, "step": 13 }, { "epoch": 4.31, "learning_rate": 1.9221427769259333e-05, "loss": 0.717, "step": 14 }, { "epoch": 4.62, "learning_rate": 1.9075754196709574e-05, "loss": 0.3357, "step": 15 }, { "epoch": 4.92, "learning_rate": 1.8918247572153822e-05, "loss": 0.2568, "step": 16 }, { "epoch": 5.23, "learning_rate": 1.8749113254181498e-05, "loss": 0.1306, "step": 17 }, { "epoch": 5.54, "learning_rate": 1.8568571761675893e-05, "loss": 0.3485, "step": 18 }, { "epoch": 5.85, "learning_rate": 1.837685848629965e-05, "loss": 0.1659, "step": 19 }, { "epoch": 6.15, "learning_rate": 1.817422338558892e-05, "loss": 0.6525, "step": 20 }, { "epoch": 6.46, "learning_rate": 1.796093065705644e-05, "loss": 0.1003, "step": 21 }, { "epoch": 6.77, "learning_rate": 1.7737258393728363e-05, "loss": 0.3485, "step": 22 }, { "epoch": 7.08, "learning_rate": 1.7503498221564026e-05, "loss": 0.2097, "step": 23 }, { "epoch": 7.38, "learning_rate": 1.725995491923131e-05, "loss": 0.1424, "step": 24 }, { "epoch": 7.69, "learning_rate": 1.7006946020733426e-05, "loss": 0.1512, "step": 25 }, { "epoch": 8.0, "learning_rate": 1.6744801401405138e-05, "loss": 0.0815, "step": 26 }, { "epoch": 8.31, "learning_rate": 1.647386284781828e-05, "loss": 0.0202, "step": 27 }, { "epoch": 8.62, "learning_rate": 1.6194483612157232e-05, "loss": 0.0909, "step": 28 }, { "epoch": 8.92, "learning_rate": 1.590702795164551e-05, "loss": 0.1048, "step": 29 }, { "epoch": 9.23, "learning_rate": 1.5611870653623826e-05, "loss": 0.1186, "step": 30 }, { "epoch": 9.54, "learning_rate": 1.530939654689887e-05, "loss": 0.0219, "step": 31 }, { "epoch": 9.85, "learning_rate": 1.5000000000000002e-05, "loss": 0.0718, "step": 32 }, { "epoch": 10.15, "learning_rate": 1.4684084406997903e-05, "loss": 0.0788, "step": 33 }, { "epoch": 10.46, "learning_rate": 1.4362061661555675e-05, "loss": 0.0534, "step": 34 }, { "epoch": 10.77, "learning_rate": 1.4034351619898088e-05, "loss": 0.0225, "step": 35 }, { "epoch": 11.08, "learning_rate": 1.3701381553399147e-05, "loss": 0.0345, "step": 36 }, { "epoch": 11.38, "learning_rate": 1.3363585591501751e-05, "loss": 0.0125, "step": 37 }, { "epoch": 11.69, "learning_rate": 1.3021404155695728e-05, "loss": 0.0211, "step": 38 }, { "epoch": 12.0, "learning_rate": 1.2675283385292212e-05, "loss": 0.0998, "step": 39 }, { "epoch": 12.31, "learning_rate": 1.2325674555743106e-05, "loss": 0.0139, "step": 40 }, { "epoch": 12.62, "learning_rate": 1.1973033490264e-05, "loss": 0.0052, "step": 41 }, { "epoch": 12.92, "learning_rate": 1.161781996552765e-05, "loss": 0.0069, "step": 42 }, { "epoch": 13.23, "learning_rate": 1.1260497112202895e-05, "loss": 0.1117, "step": 43 }, { "epoch": 13.54, "learning_rate": 1.0901530811120655e-05, "loss": 0.0081, "step": 44 }, { "epoch": 13.85, "learning_rate": 1.0541389085854177e-05, "loss": 0.009, "step": 45 }, { "epoch": 14.15, "learning_rate": 1.0180541492505605e-05, "loss": 0.0019, "step": 46 }, { "epoch": 14.46, "learning_rate": 9.819458507494395e-06, "loss": 0.0309, "step": 47 }, { "epoch": 14.77, "learning_rate": 9.458610914145826e-06, "loss": 0.0016, "step": 48 }, { "epoch": 15.08, "learning_rate": 9.098469188879348e-06, "loss": 0.0158, "step": 49 }, { "epoch": 15.38, "learning_rate": 8.739502887797108e-06, "loss": 0.0013, "step": 50 }, { "epoch": 15.69, "learning_rate": 8.382180034472353e-06, "loss": 0.0056, "step": 51 }, { "epoch": 16.0, "learning_rate": 8.026966509736001e-06, "loss": 0.0311, "step": 52 }, { "epoch": 16.31, "learning_rate": 7.674325444256899e-06, "loss": 0.0198, "step": 53 }, { "epoch": 16.62, "learning_rate": 7.324716614707794e-06, "loss": 0.0032, "step": 54 }, { "epoch": 16.92, "learning_rate": 6.978595844304272e-06, "loss": 0.0011, "step": 55 }, { "epoch": 17.23, "learning_rate": 6.636414408498249e-06, "loss": 0.0038, "step": 56 }, { "epoch": 17.54, "learning_rate": 6.298618446600856e-06, "loss": 0.011, "step": 57 }, { "epoch": 17.85, "learning_rate": 5.965648380101916e-06, "loss": 0.023, "step": 58 }, { "epoch": 18.15, "learning_rate": 5.637938338444325e-06, "loss": 0.0005, "step": 59 }, { "epoch": 18.46, "learning_rate": 5.3159155930021e-06, "loss": 0.0007, "step": 60 }, { "epoch": 18.77, "learning_rate": 5.000000000000003e-06, "loss": 0.0063, "step": 61 }, { "epoch": 19.08, "learning_rate": 4.690603453101134e-06, "loss": 0.0009, "step": 62 }, { "epoch": 19.38, "learning_rate": 4.388129346376177e-06, "loss": 0.0021, "step": 63 }, { "epoch": 19.69, "learning_rate": 4.092972048354491e-06, "loss": 0.02, "step": 64 }, { "epoch": 20.0, "learning_rate": 3.8055163878427703e-06, "loss": 0.001, "step": 65 }, { "epoch": 20.31, "learning_rate": 3.5261371521817247e-06, "loss": 0.0008, "step": 66 }, { "epoch": 20.62, "learning_rate": 3.255198598594862e-06, "loss": 0.0098, "step": 67 }, { "epoch": 20.92, "learning_rate": 2.9930539792665767e-06, "loss": 0.0014, "step": 68 }, { "epoch": 21.23, "learning_rate": 2.740045080768694e-06, "loss": 0.0015, "step": 69 }, { "epoch": 21.54, "learning_rate": 2.496501778435977e-06, "loss": 0.0006, "step": 70 }, { "epoch": 21.85, "learning_rate": 2.2627416062716366e-06, "loss": 0.0064, "step": 71 }, { "epoch": 22.15, "learning_rate": 2.0390693429435626e-06, "loss": 0.0007, "step": 72 }, { "epoch": 22.46, "learning_rate": 1.8257766144110823e-06, "loss": 0.0015, "step": 73 }, { "epoch": 22.77, "learning_rate": 1.6231415137003536e-06, "loss": 0.0089, "step": 74 }, { "epoch": 23.08, "learning_rate": 1.4314282383241097e-06, "loss": 0.0022, "step": 75 }, { "epoch": 23.38, "learning_rate": 1.2508867458185037e-06, "loss": 0.0076, "step": 76 }, { "epoch": 23.69, "learning_rate": 1.0817524278461777e-06, "loss": 0.0004, "step": 77 }, { "epoch": 24.0, "learning_rate": 9.242458032904311e-07, "loss": 0.0005, "step": 78 }, { "epoch": 24.31, "learning_rate": 7.785722307406685e-07, "loss": 0.0004, "step": 79 }, { "epoch": 24.62, "learning_rate": 6.449216407439906e-07, "loss": 0.0049, "step": 80 }, { "epoch": 24.92, "learning_rate": 5.234682881719766e-07, "loss": 0.004, "step": 81 }, { "epoch": 25.23, "learning_rate": 4.1437052502558693e-07, "loss": 0.0037, "step": 82 }, { "epoch": 25.54, "learning_rate": 3.1777059397436693e-07, "loss": 0.0099, "step": 83 }, { "epoch": 25.85, "learning_rate": 2.3379444289913344e-07, "loss": 0.001, "step": 84 }, { "epoch": 26.15, "learning_rate": 1.6255156067997325e-07, "loss": 0.0004, "step": 85 }, { "epoch": 26.46, "learning_rate": 1.041348344436277e-07, "loss": 0.0021, "step": 86 }, { "epoch": 26.77, "learning_rate": 5.862042845640403e-08, "loss": 0.0052, "step": 87 }, { "epoch": 27.08, "learning_rate": 2.606768482050215e-08, "loss": 0.0047, "step": 88 }, { "epoch": 27.38, "learning_rate": 6.5190461032305085e-09, "loss": 0.0004, "step": 89 }, { "epoch": 27.69, "learning_rate": 0.0, "loss": 0.0003, "step": 90 }, { "epoch": 27.69, "step": 90, "total_flos": 9963642317438976.0, "train_loss": 0.7193527906240585, "train_runtime": 2724.9841, "train_samples_per_second": 1.134, "train_steps_per_second": 0.033 } ], "max_steps": 90, "num_train_epochs": 30, "total_flos": 9963642317438976.0, "trial_name": null, "trial_params": null }