| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 752, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0026595744680851063, | |
| "grad_norm": 7.823265044222166, | |
| "learning_rate": 0.0, | |
| "loss": 3.0134, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005319148936170213, | |
| "grad_norm": 7.642957709635029, | |
| "learning_rate": 1.6005307325482135e-07, | |
| "loss": 3.1765, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007978723404255319, | |
| "grad_norm": 7.334880781186477, | |
| "learning_rate": 2.5367811923406806e-07, | |
| "loss": 3.015, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.010638297872340425, | |
| "grad_norm": 7.622164840160959, | |
| "learning_rate": 3.201061465096427e-07, | |
| "loss": 3.0191, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013297872340425532, | |
| "grad_norm": 7.559561166288389, | |
| "learning_rate": 3.716317274634347e-07, | |
| "loss": 3.0604, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015957446808510637, | |
| "grad_norm": 7.537486932594524, | |
| "learning_rate": 4.137311924888894e-07, | |
| "loss": 3.0974, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.018617021276595744, | |
| "grad_norm": 7.852202340999875, | |
| "learning_rate": 4.4932578299236894e-07, | |
| "loss": 3.0015, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02127659574468085, | |
| "grad_norm": 7.556325866974648, | |
| "learning_rate": 4.80159219764464e-07, | |
| "loss": 3.0507, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.023936170212765957, | |
| "grad_norm": 7.332175625826142, | |
| "learning_rate": 5.073562384681361e-07, | |
| "loss": 3.127, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.026595744680851064, | |
| "grad_norm": 7.725255533578292, | |
| "learning_rate": 5.316848007182561e-07, | |
| "loss": 3.0381, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02925531914893617, | |
| "grad_norm": 7.457326924140823, | |
| "learning_rate": 5.536926622778005e-07, | |
| "loss": 3.0634, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.031914893617021274, | |
| "grad_norm": 7.693199711944396, | |
| "learning_rate": 5.737842657437107e-07, | |
| "loss": 3.0101, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.034574468085106384, | |
| "grad_norm": 7.313122323445423, | |
| "learning_rate": 5.922667492826867e-07, | |
| "loss": 3.0967, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03723404255319149, | |
| "grad_norm": 7.381687042192129, | |
| "learning_rate": 6.093788562471904e-07, | |
| "loss": 3.0606, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0398936170212766, | |
| "grad_norm": 7.501689466289098, | |
| "learning_rate": 6.253098466975028e-07, | |
| "loss": 3.0923, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0425531914893617, | |
| "grad_norm": 7.764166896561166, | |
| "learning_rate": 6.402122930192854e-07, | |
| "loss": 3.0133, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04521276595744681, | |
| "grad_norm": 7.632126776388045, | |
| "learning_rate": 6.542109895570008e-07, | |
| "loss": 3.0261, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.047872340425531915, | |
| "grad_norm": 7.5260472985128875, | |
| "learning_rate": 6.674093117229574e-07, | |
| "loss": 3.0122, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.05053191489361702, | |
| "grad_norm": 7.760501268851623, | |
| "learning_rate": 6.798938534903572e-07, | |
| "loss": 2.8592, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05319148936170213, | |
| "grad_norm": 7.498060611474783, | |
| "learning_rate": 6.917378739730775e-07, | |
| "loss": 2.8595, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05585106382978723, | |
| "grad_norm": 7.527553046681602, | |
| "learning_rate": 7.030039022264371e-07, | |
| "loss": 2.7323, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05851063829787234, | |
| "grad_norm": 7.139599749462118, | |
| "learning_rate": 7.13745735532622e-07, | |
| "loss": 3.0506, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.061170212765957445, | |
| "grad_norm": 7.6904167286597165, | |
| "learning_rate": 7.24009993125516e-07, | |
| "loss": 2.8334, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06382978723404255, | |
| "grad_norm": 7.13831891441032, | |
| "learning_rate": 7.338373389985321e-07, | |
| "loss": 3.0714, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06648936170212766, | |
| "grad_norm": 7.277657469838149, | |
| "learning_rate": 7.432634549268694e-07, | |
| "loss": 3.0034, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06914893617021277, | |
| "grad_norm": 7.601860581578553, | |
| "learning_rate": 7.52319822537508e-07, | |
| "loss": 2.8405, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07180851063829788, | |
| "grad_norm": 7.406160378215662, | |
| "learning_rate": 7.610343577022042e-07, | |
| "loss": 2.8776, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07446808510638298, | |
| "grad_norm": 7.501158815505739, | |
| "learning_rate": 7.694319295020116e-07, | |
| "loss": 2.8623, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07712765957446809, | |
| "grad_norm": 7.475182990956878, | |
| "learning_rate": 7.775347880836832e-07, | |
| "loss": 2.8176, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0797872340425532, | |
| "grad_norm": 6.90076598186568, | |
| "learning_rate": 7.853629199523242e-07, | |
| "loss": 2.6601, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08244680851063829, | |
| "grad_norm": 7.317267534729387, | |
| "learning_rate": 7.929343449851162e-07, | |
| "loss": 2.5921, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 6.540702440994457, | |
| "learning_rate": 8.002653662741068e-07, | |
| "loss": 2.7996, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08776595744680851, | |
| "grad_norm": 7.135880043595326, | |
| "learning_rate": 8.073707815118686e-07, | |
| "loss": 2.5778, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09042553191489362, | |
| "grad_norm": 6.507937939592382, | |
| "learning_rate": 8.142640628118222e-07, | |
| "loss": 2.7356, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09308510638297872, | |
| "grad_norm": 6.445617298664252, | |
| "learning_rate": 8.209575104558038e-07, | |
| "loss": 2.7163, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09574468085106383, | |
| "grad_norm": 6.62462859642164, | |
| "learning_rate": 8.274623849777788e-07, | |
| "loss": 2.6283, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09840425531914894, | |
| "grad_norm": 6.7066040969467995, | |
| "learning_rate": 8.337890211465859e-07, | |
| "loss": 2.5485, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.10106382978723404, | |
| "grad_norm": 6.6019115107446815, | |
| "learning_rate": 8.399469267451787e-07, | |
| "loss": 2.5864, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.10372340425531915, | |
| "grad_norm": 6.526338473606032, | |
| "learning_rate": 8.459448685167547e-07, | |
| "loss": 2.6546, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10638297872340426, | |
| "grad_norm": 6.677090538207485, | |
| "learning_rate": 8.517909472278988e-07, | |
| "loss": 2.6135, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10904255319148937, | |
| "grad_norm": 6.627853354895093, | |
| "learning_rate": 8.574926634616532e-07, | |
| "loss": 2.6117, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.11170212765957446, | |
| "grad_norm": 6.210420944517809, | |
| "learning_rate": 8.630569754812584e-07, | |
| "loss": 2.6221, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.11436170212765957, | |
| "grad_norm": 6.490293150689325, | |
| "learning_rate": 8.684903502843901e-07, | |
| "loss": 2.6191, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11702127659574468, | |
| "grad_norm": 6.5133887096434036, | |
| "learning_rate": 8.737988087874431e-07, | |
| "loss": 2.6479, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1196808510638298, | |
| "grad_norm": 6.5629458321401275, | |
| "learning_rate": 8.789879659315709e-07, | |
| "loss": 2.5919, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12234042553191489, | |
| "grad_norm": 6.263965110944729, | |
| "learning_rate": 8.840630663803374e-07, | |
| "loss": 2.564, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 6.2925368290438355, | |
| "learning_rate": 8.890290163779749e-07, | |
| "loss": 2.4828, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1276595744680851, | |
| "grad_norm": 5.562972488052771, | |
| "learning_rate": 8.938904122533535e-07, | |
| "loss": 2.3536, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.13031914893617022, | |
| "grad_norm": 5.051596119252882, | |
| "learning_rate": 8.986515659847379e-07, | |
| "loss": 2.2057, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13297872340425532, | |
| "grad_norm": 4.783115795828969, | |
| "learning_rate": 9.033165281816909e-07, | |
| "loss": 2.1078, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1356382978723404, | |
| "grad_norm": 4.0379873353666, | |
| "learning_rate": 9.078891087910689e-07, | |
| "loss": 2.1146, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13829787234042554, | |
| "grad_norm": 4.425461679286027, | |
| "learning_rate": 9.123728957923294e-07, | |
| "loss": 2.106, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14095744680851063, | |
| "grad_norm": 4.219063302168513, | |
| "learning_rate": 9.167712721119934e-07, | |
| "loss": 2.0448, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.14361702127659576, | |
| "grad_norm": 4.177230312333208, | |
| "learning_rate": 9.210874309570255e-07, | |
| "loss": 2.106, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.14627659574468085, | |
| "grad_norm": 3.58662530955563, | |
| "learning_rate": 9.253243897412354e-07, | |
| "loss": 2.1577, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14893617021276595, | |
| "grad_norm": 3.8076903225375607, | |
| "learning_rate": 9.294850027568331e-07, | |
| "loss": 2.044, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.15159574468085107, | |
| "grad_norm": 3.9757823965828445, | |
| "learning_rate": 9.335719727244254e-07, | |
| "loss": 2.1354, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.15425531914893617, | |
| "grad_norm": 3.8984355429604305, | |
| "learning_rate": 9.375878613385046e-07, | |
| "loss": 2.0297, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.15691489361702127, | |
| "grad_norm": 4.006300970220442, | |
| "learning_rate": 9.415350989114764e-07, | |
| "loss": 1.8268, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1595744680851064, | |
| "grad_norm": 3.7231660155630126, | |
| "learning_rate": 9.454159932071455e-07, | |
| "loss": 1.8824, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1622340425531915, | |
| "grad_norm": 3.896921356096762, | |
| "learning_rate": 9.492327375440568e-07, | |
| "loss": 1.9475, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.16489361702127658, | |
| "grad_norm": 3.1704796037774394, | |
| "learning_rate": 9.529874182399376e-07, | |
| "loss": 1.9461, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1675531914893617, | |
| "grad_norm": 3.624185273266048, | |
| "learning_rate": 9.566820214605051e-07, | |
| "loss": 2.0426, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 3.438777616799716, | |
| "learning_rate": 9.60318439528928e-07, | |
| "loss": 1.9094, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.17287234042553193, | |
| "grad_norm": 4.546206080990496, | |
| "learning_rate": 9.638984767461214e-07, | |
| "loss": 1.9037, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.17553191489361702, | |
| "grad_norm": 3.092553572071205, | |
| "learning_rate": 9.6742385476669e-07, | |
| "loss": 1.9928, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.17819148936170212, | |
| "grad_norm": 3.3574221590495807, | |
| "learning_rate": 9.708962175706178e-07, | |
| "loss": 1.9752, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.18085106382978725, | |
| "grad_norm": 3.0865121040891714, | |
| "learning_rate": 9.743171360666435e-07, | |
| "loss": 1.9853, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.18351063829787234, | |
| "grad_norm": 3.25288028731065, | |
| "learning_rate": 9.776881123595842e-07, | |
| "loss": 1.8024, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.18617021276595744, | |
| "grad_norm": 2.8739512645435865, | |
| "learning_rate": 9.810105837106252e-07, | |
| "loss": 2.0918, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18882978723404256, | |
| "grad_norm": 2.8379601865829414, | |
| "learning_rate": 9.842859262167094e-07, | |
| "loss": 1.801, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.19148936170212766, | |
| "grad_norm": 2.634217473181439, | |
| "learning_rate": 9.875154582326002e-07, | |
| "loss": 1.9093, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.19414893617021275, | |
| "grad_norm": 2.674409166489119, | |
| "learning_rate": 9.907004435569156e-07, | |
| "loss": 1.8468, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.19680851063829788, | |
| "grad_norm": 2.5418462429291178, | |
| "learning_rate": 9.938420944014074e-07, | |
| "loss": 2.0187, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.19946808510638298, | |
| "grad_norm": 2.249520992577069, | |
| "learning_rate": 9.969415741609375e-07, | |
| "loss": 1.8433, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.20212765957446807, | |
| "grad_norm": 3.4070896898561567, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6904, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2047872340425532, | |
| "grad_norm": 2.405587439537431, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9263, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2074468085106383, | |
| "grad_norm": 2.1351508990882686, | |
| "learning_rate": 9.985207100591716e-07, | |
| "loss": 1.9915, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.21010638297872342, | |
| "grad_norm": 2.253674050573154, | |
| "learning_rate": 9.97041420118343e-07, | |
| "loss": 1.8581, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 1.9816102720192281, | |
| "learning_rate": 9.955621301775147e-07, | |
| "loss": 1.8838, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2154255319148936, | |
| "grad_norm": 2.1585154035600502, | |
| "learning_rate": 9.940828402366864e-07, | |
| "loss": 1.7891, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.21808510638297873, | |
| "grad_norm": 2.1927798177222466, | |
| "learning_rate": 9.92603550295858e-07, | |
| "loss": 1.6338, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.22074468085106383, | |
| "grad_norm": 2.1195681092445606, | |
| "learning_rate": 9.911242603550295e-07, | |
| "loss": 1.8233, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.22340425531914893, | |
| "grad_norm": 1.79894840198714, | |
| "learning_rate": 9.896449704142011e-07, | |
| "loss": 1.6456, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.22606382978723405, | |
| "grad_norm": 2.1334819930653004, | |
| "learning_rate": 9.881656804733728e-07, | |
| "loss": 1.7721, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.22872340425531915, | |
| "grad_norm": 2.2522144017174988, | |
| "learning_rate": 9.866863905325444e-07, | |
| "loss": 1.8225, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.23138297872340424, | |
| "grad_norm": 2.1109395194449885, | |
| "learning_rate": 9.852071005917159e-07, | |
| "loss": 1.8103, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.23404255319148937, | |
| "grad_norm": 1.889220101950186, | |
| "learning_rate": 9.837278106508875e-07, | |
| "loss": 1.7531, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.23670212765957446, | |
| "grad_norm": 2.0103297503036797, | |
| "learning_rate": 9.822485207100592e-07, | |
| "loss": 1.4589, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2393617021276596, | |
| "grad_norm": 2.0087468092299012, | |
| "learning_rate": 9.807692307692306e-07, | |
| "loss": 1.7291, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.24202127659574468, | |
| "grad_norm": 2.1537427838116603, | |
| "learning_rate": 9.792899408284023e-07, | |
| "loss": 1.8277, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.24468085106382978, | |
| "grad_norm": 1.811327456337824, | |
| "learning_rate": 9.77810650887574e-07, | |
| "loss": 1.6806, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2473404255319149, | |
| "grad_norm": 1.8193216533171808, | |
| "learning_rate": 9.763313609467456e-07, | |
| "loss": 1.5615, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.7880991378455267, | |
| "learning_rate": 9.748520710059172e-07, | |
| "loss": 1.6315, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2526595744680851, | |
| "grad_norm": 1.9732905510612142, | |
| "learning_rate": 9.733727810650887e-07, | |
| "loss": 1.6118, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2553191489361702, | |
| "grad_norm": 1.8507510310387487, | |
| "learning_rate": 9.718934911242603e-07, | |
| "loss": 1.562, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2579787234042553, | |
| "grad_norm": 1.937704198597928, | |
| "learning_rate": 9.704142011834318e-07, | |
| "loss": 1.5891, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.26063829787234044, | |
| "grad_norm": 1.8949539814094551, | |
| "learning_rate": 9.689349112426034e-07, | |
| "loss": 1.55, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2632978723404255, | |
| "grad_norm": 1.7741323445830024, | |
| "learning_rate": 9.67455621301775e-07, | |
| "loss": 1.734, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.26595744680851063, | |
| "grad_norm": 1.7021314190064671, | |
| "learning_rate": 9.659763313609467e-07, | |
| "loss": 1.4889, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26861702127659576, | |
| "grad_norm": 1.9644914618403917, | |
| "learning_rate": 9.644970414201184e-07, | |
| "loss": 1.7278, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2712765957446808, | |
| "grad_norm": 1.8634125925152643, | |
| "learning_rate": 9.630177514792898e-07, | |
| "loss": 1.5682, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.27393617021276595, | |
| "grad_norm": 1.8401952841001055, | |
| "learning_rate": 9.615384615384615e-07, | |
| "loss": 1.565, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2765957446808511, | |
| "grad_norm": 1.804040900318666, | |
| "learning_rate": 9.600591715976331e-07, | |
| "loss": 1.5869, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.27925531914893614, | |
| "grad_norm": 1.706090462740245, | |
| "learning_rate": 9.585798816568048e-07, | |
| "loss": 1.5148, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.28191489361702127, | |
| "grad_norm": 1.728525487149655, | |
| "learning_rate": 9.571005917159762e-07, | |
| "loss": 1.5603, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2845744680851064, | |
| "grad_norm": 1.7524632420405768, | |
| "learning_rate": 9.556213017751479e-07, | |
| "loss": 1.6348, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2872340425531915, | |
| "grad_norm": 1.7478581672975904, | |
| "learning_rate": 9.541420118343195e-07, | |
| "loss": 1.6436, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2898936170212766, | |
| "grad_norm": 1.9985633100646443, | |
| "learning_rate": 9.526627218934911e-07, | |
| "loss": 1.7681, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2925531914893617, | |
| "grad_norm": 2.079481786216591, | |
| "learning_rate": 9.511834319526627e-07, | |
| "loss": 1.419, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.29521276595744683, | |
| "grad_norm": 1.6892443930394687, | |
| "learning_rate": 9.497041420118342e-07, | |
| "loss": 1.6604, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2978723404255319, | |
| "grad_norm": 1.610960466459826, | |
| "learning_rate": 9.482248520710058e-07, | |
| "loss": 1.6354, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.300531914893617, | |
| "grad_norm": 1.8544084912738468, | |
| "learning_rate": 9.467455621301774e-07, | |
| "loss": 1.596, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.30319148936170215, | |
| "grad_norm": 1.9170248346565737, | |
| "learning_rate": 9.45266272189349e-07, | |
| "loss": 1.6547, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3058510638297872, | |
| "grad_norm": 1.7881140714522759, | |
| "learning_rate": 9.437869822485207e-07, | |
| "loss": 1.6039, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.30851063829787234, | |
| "grad_norm": 1.9088889444538937, | |
| "learning_rate": 9.423076923076923e-07, | |
| "loss": 1.7795, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.31117021276595747, | |
| "grad_norm": 2.2553186073976383, | |
| "learning_rate": 9.408284023668639e-07, | |
| "loss": 1.4582, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.31382978723404253, | |
| "grad_norm": 1.843300845507743, | |
| "learning_rate": 9.393491124260355e-07, | |
| "loss": 1.5459, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.31648936170212766, | |
| "grad_norm": 1.7169781655337961, | |
| "learning_rate": 9.378698224852071e-07, | |
| "loss": 1.5621, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3191489361702128, | |
| "grad_norm": 1.690585106680432, | |
| "learning_rate": 9.363905325443787e-07, | |
| "loss": 1.5449, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.32180851063829785, | |
| "grad_norm": 1.634720340224596, | |
| "learning_rate": 9.349112426035502e-07, | |
| "loss": 1.5937, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.324468085106383, | |
| "grad_norm": 1.905851103523696, | |
| "learning_rate": 9.334319526627219e-07, | |
| "loss": 1.6611, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3271276595744681, | |
| "grad_norm": 1.5977114642239374, | |
| "learning_rate": 9.319526627218934e-07, | |
| "loss": 1.3517, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.32978723404255317, | |
| "grad_norm": 1.7739378101582597, | |
| "learning_rate": 9.304733727810651e-07, | |
| "loss": 1.6623, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3324468085106383, | |
| "grad_norm": 1.7433956729666737, | |
| "learning_rate": 9.289940828402366e-07, | |
| "loss": 1.565, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3351063829787234, | |
| "grad_norm": 1.8292660534852752, | |
| "learning_rate": 9.275147928994083e-07, | |
| "loss": 1.6095, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3377659574468085, | |
| "grad_norm": 1.8582148418654536, | |
| "learning_rate": 9.260355029585798e-07, | |
| "loss": 1.6689, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3404255319148936, | |
| "grad_norm": 1.5903483721719576, | |
| "learning_rate": 9.245562130177515e-07, | |
| "loss": 1.4741, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.34308510638297873, | |
| "grad_norm": 1.5937293556222676, | |
| "learning_rate": 9.230769230769231e-07, | |
| "loss": 1.4603, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.34574468085106386, | |
| "grad_norm": 1.8563999128333846, | |
| "learning_rate": 9.215976331360947e-07, | |
| "loss": 1.6907, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3484042553191489, | |
| "grad_norm": 1.7139518347374663, | |
| "learning_rate": 9.201183431952662e-07, | |
| "loss": 1.6474, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.35106382978723405, | |
| "grad_norm": 1.7465122753927422, | |
| "learning_rate": 9.186390532544378e-07, | |
| "loss": 1.6324, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3537234042553192, | |
| "grad_norm": 1.6610443253134841, | |
| "learning_rate": 9.171597633136094e-07, | |
| "loss": 1.5045, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.35638297872340424, | |
| "grad_norm": 1.655396079412198, | |
| "learning_rate": 9.15680473372781e-07, | |
| "loss": 1.5022, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.35904255319148937, | |
| "grad_norm": 1.6336073697442315, | |
| "learning_rate": 9.142011834319526e-07, | |
| "loss": 1.5522, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3617021276595745, | |
| "grad_norm": 1.707143679480118, | |
| "learning_rate": 9.127218934911243e-07, | |
| "loss": 1.551, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.36436170212765956, | |
| "grad_norm": 1.685651756020523, | |
| "learning_rate": 9.112426035502958e-07, | |
| "loss": 1.6122, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3670212765957447, | |
| "grad_norm": 2.358800653945757, | |
| "learning_rate": 9.097633136094675e-07, | |
| "loss": 1.5604, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3696808510638298, | |
| "grad_norm": 1.9426261400386715, | |
| "learning_rate": 9.08284023668639e-07, | |
| "loss": 1.4685, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3723404255319149, | |
| "grad_norm": 1.7028161565048658, | |
| "learning_rate": 9.068047337278106e-07, | |
| "loss": 1.4377, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 1.6566345919926695, | |
| "learning_rate": 9.053254437869821e-07, | |
| "loss": 1.4231, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3776595744680851, | |
| "grad_norm": 1.750830521373255, | |
| "learning_rate": 9.038461538461538e-07, | |
| "loss": 1.5528, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3803191489361702, | |
| "grad_norm": 1.673773427490192, | |
| "learning_rate": 9.023668639053253e-07, | |
| "loss": 1.5206, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3829787234042553, | |
| "grad_norm": 1.9158107325263525, | |
| "learning_rate": 9.00887573964497e-07, | |
| "loss": 1.5624, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.38563829787234044, | |
| "grad_norm": 1.863606934304487, | |
| "learning_rate": 8.994082840236686e-07, | |
| "loss": 1.7617, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3882978723404255, | |
| "grad_norm": 1.743890793561562, | |
| "learning_rate": 8.979289940828402e-07, | |
| "loss": 1.6736, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.39095744680851063, | |
| "grad_norm": 1.7765187119696408, | |
| "learning_rate": 8.964497041420118e-07, | |
| "loss": 1.401, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.39361702127659576, | |
| "grad_norm": 1.9492055399414594, | |
| "learning_rate": 8.949704142011834e-07, | |
| "loss": 1.7687, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3962765957446808, | |
| "grad_norm": 1.9491511698228168, | |
| "learning_rate": 8.93491124260355e-07, | |
| "loss": 1.5873, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.39893617021276595, | |
| "grad_norm": 1.7732893423967535, | |
| "learning_rate": 8.920118343195265e-07, | |
| "loss": 1.4666, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4015957446808511, | |
| "grad_norm": 1.628295930467344, | |
| "learning_rate": 8.905325443786981e-07, | |
| "loss": 1.4253, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.40425531914893614, | |
| "grad_norm": 1.9501879161375453, | |
| "learning_rate": 8.890532544378698e-07, | |
| "loss": 1.5748, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.40691489361702127, | |
| "grad_norm": 1.6883577356837587, | |
| "learning_rate": 8.875739644970413e-07, | |
| "loss": 1.3546, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4095744680851064, | |
| "grad_norm": 1.7177779483356421, | |
| "learning_rate": 8.86094674556213e-07, | |
| "loss": 1.6715, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4122340425531915, | |
| "grad_norm": 1.806803334100437, | |
| "learning_rate": 8.846153846153846e-07, | |
| "loss": 1.5485, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4148936170212766, | |
| "grad_norm": 1.7522522193654075, | |
| "learning_rate": 8.831360946745562e-07, | |
| "loss": 1.5091, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4175531914893617, | |
| "grad_norm": 1.8508800423865754, | |
| "learning_rate": 8.816568047337278e-07, | |
| "loss": 1.7112, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.42021276595744683, | |
| "grad_norm": 1.752024433296569, | |
| "learning_rate": 8.801775147928994e-07, | |
| "loss": 1.3702, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4228723404255319, | |
| "grad_norm": 2.0875697232783246, | |
| "learning_rate": 8.786982248520711e-07, | |
| "loss": 1.5972, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 1.7852623572002673, | |
| "learning_rate": 8.772189349112425e-07, | |
| "loss": 1.5496, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.42819148936170215, | |
| "grad_norm": 2.0049459574733968, | |
| "learning_rate": 8.757396449704142e-07, | |
| "loss": 1.5256, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4308510638297872, | |
| "grad_norm": 1.8394745863340762, | |
| "learning_rate": 8.742603550295857e-07, | |
| "loss": 1.5466, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.43351063829787234, | |
| "grad_norm": 1.890821588557376, | |
| "learning_rate": 8.727810650887574e-07, | |
| "loss": 1.4839, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.43617021276595747, | |
| "grad_norm": 1.6481011214712673, | |
| "learning_rate": 8.713017751479289e-07, | |
| "loss": 1.6322, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.43882978723404253, | |
| "grad_norm": 1.6910215297075097, | |
| "learning_rate": 8.698224852071006e-07, | |
| "loss": 1.4294, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.44148936170212766, | |
| "grad_norm": 2.2849926490581978, | |
| "learning_rate": 8.683431952662722e-07, | |
| "loss": 1.5214, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4441489361702128, | |
| "grad_norm": 1.6530282697158378, | |
| "learning_rate": 8.668639053254438e-07, | |
| "loss": 1.5387, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.44680851063829785, | |
| "grad_norm": 1.8612064349812791, | |
| "learning_rate": 8.653846153846154e-07, | |
| "loss": 1.4698, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.449468085106383, | |
| "grad_norm": 1.844773154127249, | |
| "learning_rate": 8.639053254437869e-07, | |
| "loss": 1.6155, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4521276595744681, | |
| "grad_norm": 1.7920892424117567, | |
| "learning_rate": 8.624260355029585e-07, | |
| "loss": 1.4435, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.45478723404255317, | |
| "grad_norm": 1.7631889349519279, | |
| "learning_rate": 8.609467455621301e-07, | |
| "loss": 1.5657, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4574468085106383, | |
| "grad_norm": 1.850684750618834, | |
| "learning_rate": 8.594674556213017e-07, | |
| "loss": 1.4599, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4601063829787234, | |
| "grad_norm": 1.8936077580680233, | |
| "learning_rate": 8.579881656804734e-07, | |
| "loss": 1.4487, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4627659574468085, | |
| "grad_norm": 1.8302300226282981, | |
| "learning_rate": 8.565088757396449e-07, | |
| "loss": 1.3982, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4654255319148936, | |
| "grad_norm": 1.77826681795055, | |
| "learning_rate": 8.550295857988166e-07, | |
| "loss": 1.4513, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.46808510638297873, | |
| "grad_norm": 1.687204688334926, | |
| "learning_rate": 8.535502958579881e-07, | |
| "loss": 1.4119, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.47074468085106386, | |
| "grad_norm": 1.8120179028458203, | |
| "learning_rate": 8.520710059171598e-07, | |
| "loss": 1.6192, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4734042553191489, | |
| "grad_norm": 1.8795981293168291, | |
| "learning_rate": 8.505917159763313e-07, | |
| "loss": 1.4954, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.47606382978723405, | |
| "grad_norm": 1.7065716786077503, | |
| "learning_rate": 8.491124260355029e-07, | |
| "loss": 1.5966, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4787234042553192, | |
| "grad_norm": 1.7627718668452295, | |
| "learning_rate": 8.476331360946745e-07, | |
| "loss": 1.4327, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.48138297872340424, | |
| "grad_norm": 1.8665938451163775, | |
| "learning_rate": 8.461538461538461e-07, | |
| "loss": 1.4918, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.48404255319148937, | |
| "grad_norm": 1.819110850294668, | |
| "learning_rate": 8.446745562130177e-07, | |
| "loss": 1.5539, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4867021276595745, | |
| "grad_norm": 1.8453397847354074, | |
| "learning_rate": 8.431952662721893e-07, | |
| "loss": 1.5331, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.48936170212765956, | |
| "grad_norm": 2.622110865899153, | |
| "learning_rate": 8.417159763313609e-07, | |
| "loss": 1.3705, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4920212765957447, | |
| "grad_norm": 2.0496831369913378, | |
| "learning_rate": 8.402366863905325e-07, | |
| "loss": 1.434, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4946808510638298, | |
| "grad_norm": 1.7777460669960974, | |
| "learning_rate": 8.387573964497041e-07, | |
| "loss": 1.507, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4973404255319149, | |
| "grad_norm": 1.7648525067264564, | |
| "learning_rate": 8.372781065088757e-07, | |
| "loss": 1.5419, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.7346580205717035, | |
| "learning_rate": 8.357988165680473e-07, | |
| "loss": 1.4474, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5026595744680851, | |
| "grad_norm": 1.8941186829293386, | |
| "learning_rate": 8.343195266272189e-07, | |
| "loss": 1.4239, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5053191489361702, | |
| "grad_norm": 1.793062893259623, | |
| "learning_rate": 8.328402366863904e-07, | |
| "loss": 1.5339, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5079787234042553, | |
| "grad_norm": 1.867427730668421, | |
| "learning_rate": 8.313609467455621e-07, | |
| "loss": 1.3395, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5106382978723404, | |
| "grad_norm": 1.836942681632619, | |
| "learning_rate": 8.298816568047336e-07, | |
| "loss": 1.6135, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5132978723404256, | |
| "grad_norm": 1.6942657192312134, | |
| "learning_rate": 8.284023668639053e-07, | |
| "loss": 1.5308, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5159574468085106, | |
| "grad_norm": 1.841411465300408, | |
| "learning_rate": 8.269230769230768e-07, | |
| "loss": 1.5345, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5186170212765957, | |
| "grad_norm": 1.8794098811564628, | |
| "learning_rate": 8.254437869822485e-07, | |
| "loss": 1.6901, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5212765957446809, | |
| "grad_norm": 1.7012388651957833, | |
| "learning_rate": 8.239644970414202e-07, | |
| "loss": 1.474, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.523936170212766, | |
| "grad_norm": 1.7944418314011599, | |
| "learning_rate": 8.224852071005917e-07, | |
| "loss": 1.32, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.526595744680851, | |
| "grad_norm": 1.7425046897179257, | |
| "learning_rate": 8.210059171597633e-07, | |
| "loss": 1.368, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5292553191489362, | |
| "grad_norm": 1.7880936710475852, | |
| "learning_rate": 8.195266272189348e-07, | |
| "loss": 1.645, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 1.6715457627732746, | |
| "learning_rate": 8.180473372781065e-07, | |
| "loss": 1.4261, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5345744680851063, | |
| "grad_norm": 1.822736509594185, | |
| "learning_rate": 8.16568047337278e-07, | |
| "loss": 1.624, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5372340425531915, | |
| "grad_norm": 1.6809290356200428, | |
| "learning_rate": 8.150887573964497e-07, | |
| "loss": 1.3937, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5398936170212766, | |
| "grad_norm": 1.7496410410443377, | |
| "learning_rate": 8.136094674556213e-07, | |
| "loss": 1.5429, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5425531914893617, | |
| "grad_norm": 1.9199038686131074, | |
| "learning_rate": 8.121301775147929e-07, | |
| "loss": 1.5023, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5452127659574468, | |
| "grad_norm": 1.6850758554154257, | |
| "learning_rate": 8.106508875739645e-07, | |
| "loss": 1.4991, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5478723404255319, | |
| "grad_norm": 1.9329660957508767, | |
| "learning_rate": 8.091715976331361e-07, | |
| "loss": 1.5398, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.550531914893617, | |
| "grad_norm": 1.666476222919606, | |
| "learning_rate": 8.076923076923077e-07, | |
| "loss": 1.5637, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5531914893617021, | |
| "grad_norm": 2.224757697809998, | |
| "learning_rate": 8.062130177514792e-07, | |
| "loss": 1.6452, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5558510638297872, | |
| "grad_norm": 1.861249667872802, | |
| "learning_rate": 8.047337278106508e-07, | |
| "loss": 1.541, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5585106382978723, | |
| "grad_norm": 1.7749036870266581, | |
| "learning_rate": 8.032544378698225e-07, | |
| "loss": 1.4811, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5611702127659575, | |
| "grad_norm": 1.9725453562773687, | |
| "learning_rate": 8.01775147928994e-07, | |
| "loss": 1.4289, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5638297872340425, | |
| "grad_norm": 1.5771745142242444, | |
| "learning_rate": 8.002958579881657e-07, | |
| "loss": 1.1851, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5664893617021277, | |
| "grad_norm": 1.7671763938332208, | |
| "learning_rate": 7.988165680473372e-07, | |
| "loss": 1.4632, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5691489361702128, | |
| "grad_norm": 2.3539254619170147, | |
| "learning_rate": 7.973372781065089e-07, | |
| "loss": 1.4399, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5718085106382979, | |
| "grad_norm": 1.6811659582751803, | |
| "learning_rate": 7.958579881656804e-07, | |
| "loss": 1.3874, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.574468085106383, | |
| "grad_norm": 1.816100982937805, | |
| "learning_rate": 7.943786982248521e-07, | |
| "loss": 1.3507, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5771276595744681, | |
| "grad_norm": 1.6054386033989114, | |
| "learning_rate": 7.928994082840237e-07, | |
| "loss": 1.3523, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5797872340425532, | |
| "grad_norm": 1.747109205347203, | |
| "learning_rate": 7.914201183431952e-07, | |
| "loss": 1.4471, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5824468085106383, | |
| "grad_norm": 2.544095072667201, | |
| "learning_rate": 7.899408284023668e-07, | |
| "loss": 1.4659, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5851063829787234, | |
| "grad_norm": 1.9052355208698295, | |
| "learning_rate": 7.884615384615384e-07, | |
| "loss": 1.6808, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5877659574468085, | |
| "grad_norm": 1.7475037482225553, | |
| "learning_rate": 7.8698224852071e-07, | |
| "loss": 1.4223, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5904255319148937, | |
| "grad_norm": 1.7030078252678653, | |
| "learning_rate": 7.855029585798816e-07, | |
| "loss": 1.558, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5930851063829787, | |
| "grad_norm": 1.6935707673119045, | |
| "learning_rate": 7.840236686390532e-07, | |
| "loss": 1.3466, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5957446808510638, | |
| "grad_norm": 1.9730090137474936, | |
| "learning_rate": 7.825443786982249e-07, | |
| "loss": 1.6373, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.598404255319149, | |
| "grad_norm": 1.7329005942211182, | |
| "learning_rate": 7.810650887573964e-07, | |
| "loss": 1.3348, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.601063829787234, | |
| "grad_norm": 2.033731441401403, | |
| "learning_rate": 7.795857988165681e-07, | |
| "loss": 1.5524, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6037234042553191, | |
| "grad_norm": 1.7207604873916247, | |
| "learning_rate": 7.781065088757395e-07, | |
| "loss": 1.3862, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6063829787234043, | |
| "grad_norm": 1.8075753757910789, | |
| "learning_rate": 7.766272189349112e-07, | |
| "loss": 1.5981, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6090425531914894, | |
| "grad_norm": 2.291978352476086, | |
| "learning_rate": 7.751479289940827e-07, | |
| "loss": 1.4514, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6117021276595744, | |
| "grad_norm": 1.8644568615293915, | |
| "learning_rate": 7.736686390532544e-07, | |
| "loss": 1.6587, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6143617021276596, | |
| "grad_norm": 2.594171053250292, | |
| "learning_rate": 7.721893491124259e-07, | |
| "loss": 1.6336, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6170212765957447, | |
| "grad_norm": 1.5011539788709316, | |
| "learning_rate": 7.707100591715976e-07, | |
| "loss": 1.2387, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6196808510638298, | |
| "grad_norm": 1.6819405282763624, | |
| "learning_rate": 7.692307692307693e-07, | |
| "loss": 1.5038, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6223404255319149, | |
| "grad_norm": 1.7251235005494032, | |
| "learning_rate": 7.677514792899408e-07, | |
| "loss": 1.5774, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 1.864499827243002, | |
| "learning_rate": 7.662721893491125e-07, | |
| "loss": 1.5276, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6276595744680851, | |
| "grad_norm": 1.7781078666304035, | |
| "learning_rate": 7.64792899408284e-07, | |
| "loss": 1.5232, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6303191489361702, | |
| "grad_norm": 1.6599021088795032, | |
| "learning_rate": 7.633136094674556e-07, | |
| "loss": 1.4473, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6329787234042553, | |
| "grad_norm": 1.6721336663765791, | |
| "learning_rate": 7.618343195266271e-07, | |
| "loss": 1.3851, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6356382978723404, | |
| "grad_norm": 1.797473310291003, | |
| "learning_rate": 7.603550295857988e-07, | |
| "loss": 1.4871, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 1.68684289642348, | |
| "learning_rate": 7.588757396449704e-07, | |
| "loss": 1.3971, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6409574468085106, | |
| "grad_norm": 1.6548030218587813, | |
| "learning_rate": 7.57396449704142e-07, | |
| "loss": 1.4413, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6436170212765957, | |
| "grad_norm": 1.7764920048747164, | |
| "learning_rate": 7.559171597633136e-07, | |
| "loss": 1.5327, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6462765957446809, | |
| "grad_norm": 2.3776019048662627, | |
| "learning_rate": 7.544378698224852e-07, | |
| "loss": 1.3973, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.648936170212766, | |
| "grad_norm": 2.180898241246454, | |
| "learning_rate": 7.529585798816568e-07, | |
| "loss": 1.4108, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.651595744680851, | |
| "grad_norm": 1.7308120559219609, | |
| "learning_rate": 7.514792899408284e-07, | |
| "loss": 1.437, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6542553191489362, | |
| "grad_norm": 1.6797613083347633, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.4266, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6569148936170213, | |
| "grad_norm": 1.7244677372074293, | |
| "learning_rate": 7.485207100591716e-07, | |
| "loss": 1.4562, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6595744680851063, | |
| "grad_norm": 1.831008658275623, | |
| "learning_rate": 7.470414201183431e-07, | |
| "loss": 1.625, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6622340425531915, | |
| "grad_norm": 1.5987807515924746, | |
| "learning_rate": 7.455621301775148e-07, | |
| "loss": 1.351, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6648936170212766, | |
| "grad_norm": 1.657627324756177, | |
| "learning_rate": 7.440828402366863e-07, | |
| "loss": 1.3021, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6675531914893617, | |
| "grad_norm": 1.6806656229564951, | |
| "learning_rate": 7.42603550295858e-07, | |
| "loss": 1.4708, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6702127659574468, | |
| "grad_norm": 1.6469208307421896, | |
| "learning_rate": 7.411242603550295e-07, | |
| "loss": 1.4309, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6728723404255319, | |
| "grad_norm": 1.6396856616158755, | |
| "learning_rate": 7.396449704142012e-07, | |
| "loss": 1.501, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.675531914893617, | |
| "grad_norm": 1.6377964159170837, | |
| "learning_rate": 7.381656804733728e-07, | |
| "loss": 1.5208, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6781914893617021, | |
| "grad_norm": 1.6580558864253538, | |
| "learning_rate": 7.366863905325444e-07, | |
| "loss": 1.4638, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6808510638297872, | |
| "grad_norm": 1.837851772242258, | |
| "learning_rate": 7.352071005917159e-07, | |
| "loss": 1.3164, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6835106382978723, | |
| "grad_norm": 2.224825104258165, | |
| "learning_rate": 7.337278106508875e-07, | |
| "loss": 1.6295, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6861702127659575, | |
| "grad_norm": 1.6131790535172048, | |
| "learning_rate": 7.322485207100591e-07, | |
| "loss": 1.4414, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6888297872340425, | |
| "grad_norm": 1.549489595607848, | |
| "learning_rate": 7.307692307692307e-07, | |
| "loss": 1.4455, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6914893617021277, | |
| "grad_norm": 1.761687284810298, | |
| "learning_rate": 7.292899408284023e-07, | |
| "loss": 1.4913, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6941489361702128, | |
| "grad_norm": 1.6593936380320258, | |
| "learning_rate": 7.27810650887574e-07, | |
| "loss": 1.6427, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6968085106382979, | |
| "grad_norm": 1.7879593292364175, | |
| "learning_rate": 7.263313609467455e-07, | |
| "loss": 1.6127, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.699468085106383, | |
| "grad_norm": 1.559119726167982, | |
| "learning_rate": 7.248520710059172e-07, | |
| "loss": 1.3617, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7021276595744681, | |
| "grad_norm": 1.5376887507996986, | |
| "learning_rate": 7.233727810650887e-07, | |
| "loss": 1.3915, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7047872340425532, | |
| "grad_norm": 1.892877482230423, | |
| "learning_rate": 7.218934911242604e-07, | |
| "loss": 1.3938, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7074468085106383, | |
| "grad_norm": 2.1615047832844647, | |
| "learning_rate": 7.204142011834318e-07, | |
| "loss": 1.433, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7101063829787234, | |
| "grad_norm": 1.5754637988987956, | |
| "learning_rate": 7.189349112426035e-07, | |
| "loss": 1.3913, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7127659574468085, | |
| "grad_norm": 1.4917666655680848, | |
| "learning_rate": 7.17455621301775e-07, | |
| "loss": 1.4024, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7154255319148937, | |
| "grad_norm": 1.7371252437936426, | |
| "learning_rate": 7.159763313609467e-07, | |
| "loss": 1.5104, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7180851063829787, | |
| "grad_norm": 1.479255763133087, | |
| "learning_rate": 7.144970414201183e-07, | |
| "loss": 1.3533, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7207446808510638, | |
| "grad_norm": 1.6094715867178733, | |
| "learning_rate": 7.130177514792899e-07, | |
| "loss": 1.3532, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.723404255319149, | |
| "grad_norm": 1.565198399335246, | |
| "learning_rate": 7.115384615384616e-07, | |
| "loss": 1.3988, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.726063829787234, | |
| "grad_norm": 1.5067122007483011, | |
| "learning_rate": 7.100591715976331e-07, | |
| "loss": 1.3825, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7287234042553191, | |
| "grad_norm": 1.7140633929936213, | |
| "learning_rate": 7.085798816568048e-07, | |
| "loss": 1.4082, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7313829787234043, | |
| "grad_norm": 1.540948863934289, | |
| "learning_rate": 7.071005917159762e-07, | |
| "loss": 1.5153, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7340425531914894, | |
| "grad_norm": 1.7664241501358, | |
| "learning_rate": 7.056213017751479e-07, | |
| "loss": 1.2721, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7367021276595744, | |
| "grad_norm": 1.5709026992552224, | |
| "learning_rate": 7.041420118343195e-07, | |
| "loss": 1.3492, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7393617021276596, | |
| "grad_norm": 1.5068566647857482, | |
| "learning_rate": 7.026627218934911e-07, | |
| "loss": 1.362, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7420212765957447, | |
| "grad_norm": 1.9554416192824882, | |
| "learning_rate": 7.011834319526627e-07, | |
| "loss": 1.6618, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7446808510638298, | |
| "grad_norm": 1.6405976792740071, | |
| "learning_rate": 6.997041420118343e-07, | |
| "loss": 1.5917, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7473404255319149, | |
| "grad_norm": 1.7066156854813295, | |
| "learning_rate": 6.982248520710059e-07, | |
| "loss": 1.2984, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.616654607721298, | |
| "learning_rate": 6.967455621301775e-07, | |
| "loss": 1.4085, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7526595744680851, | |
| "grad_norm": 1.6119917549130687, | |
| "learning_rate": 6.952662721893491e-07, | |
| "loss": 1.4059, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7553191489361702, | |
| "grad_norm": 1.4894224582399371, | |
| "learning_rate": 6.937869822485208e-07, | |
| "loss": 1.4205, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7579787234042553, | |
| "grad_norm": 1.7561130701083838, | |
| "learning_rate": 6.923076923076922e-07, | |
| "loss": 1.5931, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7606382978723404, | |
| "grad_norm": 1.5507572662266917, | |
| "learning_rate": 6.908284023668639e-07, | |
| "loss": 1.3968, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7632978723404256, | |
| "grad_norm": 1.4671913155048064, | |
| "learning_rate": 6.893491124260354e-07, | |
| "loss": 1.2951, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7659574468085106, | |
| "grad_norm": 1.5498863732312698, | |
| "learning_rate": 6.878698224852071e-07, | |
| "loss": 1.2232, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7686170212765957, | |
| "grad_norm": 1.680206076834721, | |
| "learning_rate": 6.863905325443786e-07, | |
| "loss": 1.4992, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7712765957446809, | |
| "grad_norm": 1.718088751084764, | |
| "learning_rate": 6.849112426035503e-07, | |
| "loss": 1.4422, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.773936170212766, | |
| "grad_norm": 1.5282347438855142, | |
| "learning_rate": 6.834319526627219e-07, | |
| "loss": 1.4063, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.776595744680851, | |
| "grad_norm": 1.9525999050003993, | |
| "learning_rate": 6.819526627218935e-07, | |
| "loss": 1.5957, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7792553191489362, | |
| "grad_norm": 2.258813866966866, | |
| "learning_rate": 6.804733727810651e-07, | |
| "loss": 1.4431, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7819148936170213, | |
| "grad_norm": 1.5364750834268603, | |
| "learning_rate": 6.789940828402367e-07, | |
| "loss": 1.3558, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7845744680851063, | |
| "grad_norm": 1.5393402313754123, | |
| "learning_rate": 6.775147928994082e-07, | |
| "loss": 1.439, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7872340425531915, | |
| "grad_norm": 1.6455162885770198, | |
| "learning_rate": 6.760355029585798e-07, | |
| "loss": 1.5158, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7898936170212766, | |
| "grad_norm": 1.6475778661453933, | |
| "learning_rate": 6.745562130177514e-07, | |
| "loss": 1.4278, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7925531914893617, | |
| "grad_norm": 1.502594611161215, | |
| "learning_rate": 6.730769230769231e-07, | |
| "loss": 1.3064, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7952127659574468, | |
| "grad_norm": 1.4819306978451936, | |
| "learning_rate": 6.715976331360946e-07, | |
| "loss": 1.4, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7978723404255319, | |
| "grad_norm": 1.6911681538448085, | |
| "learning_rate": 6.701183431952663e-07, | |
| "loss": 1.3364, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.800531914893617, | |
| "grad_norm": 1.4712764033020207, | |
| "learning_rate": 6.686390532544378e-07, | |
| "loss": 1.3514, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.8031914893617021, | |
| "grad_norm": 1.5453820007555663, | |
| "learning_rate": 6.671597633136095e-07, | |
| "loss": 1.252, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8058510638297872, | |
| "grad_norm": 1.6870546106387143, | |
| "learning_rate": 6.65680473372781e-07, | |
| "loss": 1.4819, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8085106382978723, | |
| "grad_norm": 1.539899104888, | |
| "learning_rate": 6.642011834319526e-07, | |
| "loss": 1.4248, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8111702127659575, | |
| "grad_norm": 1.8570540873303243, | |
| "learning_rate": 6.627218934911242e-07, | |
| "loss": 1.398, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8138297872340425, | |
| "grad_norm": 1.6462980732890118, | |
| "learning_rate": 6.612426035502958e-07, | |
| "loss": 1.472, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8164893617021277, | |
| "grad_norm": 5.047207753458083, | |
| "learning_rate": 6.597633136094674e-07, | |
| "loss": 1.4934, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8191489361702128, | |
| "grad_norm": 1.6578320558708661, | |
| "learning_rate": 6.58284023668639e-07, | |
| "loss": 1.4467, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8218085106382979, | |
| "grad_norm": 1.650877101009254, | |
| "learning_rate": 6.568047337278106e-07, | |
| "loss": 1.3491, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.824468085106383, | |
| "grad_norm": 1.7139451577038085, | |
| "learning_rate": 6.553254437869822e-07, | |
| "loss": 1.4975, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8271276595744681, | |
| "grad_norm": 1.6275656326818695, | |
| "learning_rate": 6.538461538461538e-07, | |
| "loss": 1.4493, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8297872340425532, | |
| "grad_norm": 1.693438289435893, | |
| "learning_rate": 6.523668639053254e-07, | |
| "loss": 1.3593, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8324468085106383, | |
| "grad_norm": 1.5252049292780119, | |
| "learning_rate": 6.50887573964497e-07, | |
| "loss": 1.4798, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8351063829787234, | |
| "grad_norm": 1.7006952995622482, | |
| "learning_rate": 6.494082840236686e-07, | |
| "loss": 1.5054, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8377659574468085, | |
| "grad_norm": 1.7203889834996966, | |
| "learning_rate": 6.479289940828401e-07, | |
| "loss": 1.599, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8404255319148937, | |
| "grad_norm": 1.665289055188048, | |
| "learning_rate": 6.464497041420118e-07, | |
| "loss": 1.47, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8430851063829787, | |
| "grad_norm": 1.783255201333473, | |
| "learning_rate": 6.449704142011834e-07, | |
| "loss": 1.3293, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8457446808510638, | |
| "grad_norm": 1.5474686687545494, | |
| "learning_rate": 6.43491124260355e-07, | |
| "loss": 1.5827, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.848404255319149, | |
| "grad_norm": 1.7096057045749924, | |
| "learning_rate": 6.420118343195266e-07, | |
| "loss": 1.4208, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 1.660091264238197, | |
| "learning_rate": 6.405325443786982e-07, | |
| "loss": 1.3729, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8537234042553191, | |
| "grad_norm": 2.1515758550003663, | |
| "learning_rate": 6.390532544378699e-07, | |
| "loss": 1.6061, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8563829787234043, | |
| "grad_norm": 1.6705826372283528, | |
| "learning_rate": 6.375739644970414e-07, | |
| "loss": 1.3534, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8590425531914894, | |
| "grad_norm": 1.6232024300738965, | |
| "learning_rate": 6.360946745562131e-07, | |
| "loss": 1.425, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8617021276595744, | |
| "grad_norm": 1.7044169574045285, | |
| "learning_rate": 6.346153846153845e-07, | |
| "loss": 1.5695, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8643617021276596, | |
| "grad_norm": 1.7606258681853417, | |
| "learning_rate": 6.331360946745562e-07, | |
| "loss": 1.418, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8670212765957447, | |
| "grad_norm": 1.5280589114761016, | |
| "learning_rate": 6.316568047337277e-07, | |
| "loss": 1.4349, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8696808510638298, | |
| "grad_norm": 1.7139434884413298, | |
| "learning_rate": 6.301775147928994e-07, | |
| "loss": 1.4371, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8723404255319149, | |
| "grad_norm": 1.5926203744807812, | |
| "learning_rate": 6.28698224852071e-07, | |
| "loss": 1.4015, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 1.6264161761425606, | |
| "learning_rate": 6.272189349112426e-07, | |
| "loss": 1.4729, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8776595744680851, | |
| "grad_norm": 1.5831228752137032, | |
| "learning_rate": 6.257396449704142e-07, | |
| "loss": 1.421, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8803191489361702, | |
| "grad_norm": 1.5811866295220025, | |
| "learning_rate": 6.242603550295858e-07, | |
| "loss": 1.3628, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8829787234042553, | |
| "grad_norm": 1.5679708453260865, | |
| "learning_rate": 6.227810650887574e-07, | |
| "loss": 1.2859, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8856382978723404, | |
| "grad_norm": 1.6766225130373726, | |
| "learning_rate": 6.213017751479289e-07, | |
| "loss": 1.4369, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8882978723404256, | |
| "grad_norm": 1.8047128650814857, | |
| "learning_rate": 6.198224852071005e-07, | |
| "loss": 1.5913, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8909574468085106, | |
| "grad_norm": 1.6456822515106042, | |
| "learning_rate": 6.183431952662722e-07, | |
| "loss": 1.4972, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8936170212765957, | |
| "grad_norm": 1.552523155961138, | |
| "learning_rate": 6.168639053254437e-07, | |
| "loss": 1.3171, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8962765957446809, | |
| "grad_norm": 1.788183804411441, | |
| "learning_rate": 6.153846153846154e-07, | |
| "loss": 1.5059, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.898936170212766, | |
| "grad_norm": 1.5907686060024624, | |
| "learning_rate": 6.139053254437869e-07, | |
| "loss": 1.1485, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.901595744680851, | |
| "grad_norm": 1.7254040314022046, | |
| "learning_rate": 6.124260355029586e-07, | |
| "loss": 1.5628, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9042553191489362, | |
| "grad_norm": 1.6347353623664331, | |
| "learning_rate": 6.109467455621301e-07, | |
| "loss": 1.3704, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9069148936170213, | |
| "grad_norm": 2.194464251540189, | |
| "learning_rate": 6.094674556213018e-07, | |
| "loss": 1.4758, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9095744680851063, | |
| "grad_norm": 1.5698776022464798, | |
| "learning_rate": 6.079881656804734e-07, | |
| "loss": 1.3871, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9122340425531915, | |
| "grad_norm": 1.8859732282362605, | |
| "learning_rate": 6.065088757396449e-07, | |
| "loss": 1.4136, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9148936170212766, | |
| "grad_norm": 1.7373147056080605, | |
| "learning_rate": 6.050295857988165e-07, | |
| "loss": 1.5494, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9175531914893617, | |
| "grad_norm": 1.6179407549268443, | |
| "learning_rate": 6.035502958579881e-07, | |
| "loss": 1.3776, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9202127659574468, | |
| "grad_norm": 1.77670135626407, | |
| "learning_rate": 6.020710059171597e-07, | |
| "loss": 1.3275, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9228723404255319, | |
| "grad_norm": 1.7482955670467306, | |
| "learning_rate": 6.005917159763313e-07, | |
| "loss": 1.4015, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.925531914893617, | |
| "grad_norm": 1.6887523807534266, | |
| "learning_rate": 5.991124260355029e-07, | |
| "loss": 1.5069, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9281914893617021, | |
| "grad_norm": 1.514381055516736, | |
| "learning_rate": 5.976331360946746e-07, | |
| "loss": 1.3818, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9308510638297872, | |
| "grad_norm": 1.4907168186147164, | |
| "learning_rate": 5.961538461538461e-07, | |
| "loss": 1.4495, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9335106382978723, | |
| "grad_norm": 2.265910373999388, | |
| "learning_rate": 5.946745562130178e-07, | |
| "loss": 1.2853, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9361702127659575, | |
| "grad_norm": 1.7992082788491501, | |
| "learning_rate": 5.931952662721894e-07, | |
| "loss": 1.539, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9388297872340425, | |
| "grad_norm": 1.443028062263383, | |
| "learning_rate": 5.917159763313609e-07, | |
| "loss": 1.324, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9414893617021277, | |
| "grad_norm": 1.6139434859203183, | |
| "learning_rate": 5.902366863905324e-07, | |
| "loss": 1.3336, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9441489361702128, | |
| "grad_norm": 2.252829785523421, | |
| "learning_rate": 5.887573964497041e-07, | |
| "loss": 1.2986, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9468085106382979, | |
| "grad_norm": 1.7284412087838827, | |
| "learning_rate": 5.872781065088757e-07, | |
| "loss": 1.4817, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.949468085106383, | |
| "grad_norm": 1.7787571244355151, | |
| "learning_rate": 5.857988165680473e-07, | |
| "loss": 1.5187, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9521276595744681, | |
| "grad_norm": 2.181835688354598, | |
| "learning_rate": 5.84319526627219e-07, | |
| "loss": 1.5578, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9547872340425532, | |
| "grad_norm": 1.4634212657053263, | |
| "learning_rate": 5.828402366863905e-07, | |
| "loss": 1.2286, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9574468085106383, | |
| "grad_norm": 1.687131629579792, | |
| "learning_rate": 5.813609467455622e-07, | |
| "loss": 1.3256, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9601063829787234, | |
| "grad_norm": 1.629444719409858, | |
| "learning_rate": 5.798816568047337e-07, | |
| "loss": 1.5522, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9627659574468085, | |
| "grad_norm": 1.6487449612370586, | |
| "learning_rate": 5.784023668639053e-07, | |
| "loss": 1.5252, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9654255319148937, | |
| "grad_norm": 1.5119623190054727, | |
| "learning_rate": 5.769230769230768e-07, | |
| "loss": 1.4479, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9680851063829787, | |
| "grad_norm": 1.529900871256959, | |
| "learning_rate": 5.754437869822485e-07, | |
| "loss": 1.4081, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9707446808510638, | |
| "grad_norm": 1.679158185017686, | |
| "learning_rate": 5.739644970414201e-07, | |
| "loss": 1.3219, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.973404255319149, | |
| "grad_norm": 1.5743852626682602, | |
| "learning_rate": 5.724852071005917e-07, | |
| "loss": 1.4408, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.976063829787234, | |
| "grad_norm": 1.4327135424204693, | |
| "learning_rate": 5.710059171597633e-07, | |
| "loss": 1.4267, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9787234042553191, | |
| "grad_norm": 1.693248001536766, | |
| "learning_rate": 5.695266272189349e-07, | |
| "loss": 1.459, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9813829787234043, | |
| "grad_norm": 1.6118417002332202, | |
| "learning_rate": 5.680473372781065e-07, | |
| "loss": 1.3239, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9840425531914894, | |
| "grad_norm": 1.5994817848229685, | |
| "learning_rate": 5.665680473372781e-07, | |
| "loss": 1.3316, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9867021276595744, | |
| "grad_norm": 1.734698428678095, | |
| "learning_rate": 5.650887573964497e-07, | |
| "loss": 1.4394, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.9893617021276596, | |
| "grad_norm": 2.8750724783344626, | |
| "learning_rate": 5.636094674556213e-07, | |
| "loss": 1.3439, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9920212765957447, | |
| "grad_norm": 1.5483975094463054, | |
| "learning_rate": 5.621301775147928e-07, | |
| "loss": 1.3684, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9946808510638298, | |
| "grad_norm": 1.5202730618700395, | |
| "learning_rate": 5.606508875739645e-07, | |
| "loss": 1.3361, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9973404255319149, | |
| "grad_norm": 1.6144865765856777, | |
| "learning_rate": 5.59171597633136e-07, | |
| "loss": 1.3195, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.6792801473937533, | |
| "learning_rate": 5.576923076923077e-07, | |
| "loss": 1.4384, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.002659574468085, | |
| "grad_norm": 1.9175762077814629, | |
| "learning_rate": 5.562130177514792e-07, | |
| "loss": 1.4758, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.0053191489361701, | |
| "grad_norm": 1.8048610851481421, | |
| "learning_rate": 5.547337278106509e-07, | |
| "loss": 1.4803, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.0079787234042554, | |
| "grad_norm": 1.606071563190404, | |
| "learning_rate": 5.532544378698225e-07, | |
| "loss": 1.485, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.0106382978723405, | |
| "grad_norm": 1.5572569044777356, | |
| "learning_rate": 5.517751479289941e-07, | |
| "loss": 1.2355, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0132978723404256, | |
| "grad_norm": 1.5959684601920348, | |
| "learning_rate": 5.502958579881657e-07, | |
| "loss": 1.2246, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.0159574468085106, | |
| "grad_norm": 1.9674075560318893, | |
| "learning_rate": 5.488165680473372e-07, | |
| "loss": 1.5334, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.0186170212765957, | |
| "grad_norm": 1.6680206362227628, | |
| "learning_rate": 5.473372781065088e-07, | |
| "loss": 1.4226, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.0212765957446808, | |
| "grad_norm": 1.5700791218738284, | |
| "learning_rate": 5.458579881656804e-07, | |
| "loss": 1.3727, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.023936170212766, | |
| "grad_norm": 1.5969942768737249, | |
| "learning_rate": 5.44378698224852e-07, | |
| "loss": 1.4911, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0265957446808511, | |
| "grad_norm": 1.5398360114287806, | |
| "learning_rate": 5.428994082840237e-07, | |
| "loss": 1.3769, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0292553191489362, | |
| "grad_norm": 1.5805625597294484, | |
| "learning_rate": 5.414201183431952e-07, | |
| "loss": 1.4166, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.0319148936170213, | |
| "grad_norm": 1.5312252431931253, | |
| "learning_rate": 5.399408284023669e-07, | |
| "loss": 1.2332, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0345744680851063, | |
| "grad_norm": 2.185966499141712, | |
| "learning_rate": 5.384615384615384e-07, | |
| "loss": 1.3489, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.0372340425531914, | |
| "grad_norm": 1.5033859343676257, | |
| "learning_rate": 5.369822485207101e-07, | |
| "loss": 1.4487, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0398936170212767, | |
| "grad_norm": 1.6054054860368354, | |
| "learning_rate": 5.355029585798815e-07, | |
| "loss": 1.4788, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.0425531914893618, | |
| "grad_norm": 1.6494604615754016, | |
| "learning_rate": 5.340236686390532e-07, | |
| "loss": 1.479, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0452127659574468, | |
| "grad_norm": 1.7222866777780232, | |
| "learning_rate": 5.325443786982249e-07, | |
| "loss": 1.3891, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.047872340425532, | |
| "grad_norm": 1.7350078493539867, | |
| "learning_rate": 5.310650887573964e-07, | |
| "loss": 1.5214, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.050531914893617, | |
| "grad_norm": 1.677699700420203, | |
| "learning_rate": 5.295857988165681e-07, | |
| "loss": 1.4027, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.053191489361702, | |
| "grad_norm": 1.7218061845324277, | |
| "learning_rate": 5.281065088757396e-07, | |
| "loss": 1.5612, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0558510638297873, | |
| "grad_norm": 2.0460338465780015, | |
| "learning_rate": 5.266272189349113e-07, | |
| "loss": 1.7095, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.0585106382978724, | |
| "grad_norm": 1.8707733198479073, | |
| "learning_rate": 5.251479289940828e-07, | |
| "loss": 1.3582, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.0611702127659575, | |
| "grad_norm": 1.6674094055135629, | |
| "learning_rate": 5.236686390532545e-07, | |
| "loss": 1.4667, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 1.9223542274996348, | |
| "learning_rate": 5.22189349112426e-07, | |
| "loss": 1.3237, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0664893617021276, | |
| "grad_norm": 1.442702870639783, | |
| "learning_rate": 5.207100591715976e-07, | |
| "loss": 1.3436, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.0691489361702127, | |
| "grad_norm": 1.459623592531859, | |
| "learning_rate": 5.192307692307692e-07, | |
| "loss": 1.3075, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.071808510638298, | |
| "grad_norm": 1.7736298040913328, | |
| "learning_rate": 5.177514792899408e-07, | |
| "loss": 1.55, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.074468085106383, | |
| "grad_norm": 1.492584255658168, | |
| "learning_rate": 5.162721893491124e-07, | |
| "loss": 1.3287, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.077127659574468, | |
| "grad_norm": 1.5311371897968131, | |
| "learning_rate": 5.14792899408284e-07, | |
| "loss": 1.2852, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.0797872340425532, | |
| "grad_norm": 1.7056998990486645, | |
| "learning_rate": 5.133136094674556e-07, | |
| "loss": 1.3844, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.0824468085106382, | |
| "grad_norm": 1.5754295217572547, | |
| "learning_rate": 5.118343195266272e-07, | |
| "loss": 1.4362, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.0851063829787233, | |
| "grad_norm": 1.7118767376849466, | |
| "learning_rate": 5.103550295857988e-07, | |
| "loss": 1.4678, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0877659574468086, | |
| "grad_norm": 1.7720130880057632, | |
| "learning_rate": 5.088757396449705e-07, | |
| "loss": 1.407, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0904255319148937, | |
| "grad_norm": 1.6779654968724649, | |
| "learning_rate": 5.07396449704142e-07, | |
| "loss": 1.4306, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0930851063829787, | |
| "grad_norm": 1.6236129122592553, | |
| "learning_rate": 5.059171597633136e-07, | |
| "loss": 1.3498, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0957446808510638, | |
| "grad_norm": 1.6329048532167492, | |
| "learning_rate": 5.044378698224851e-07, | |
| "loss": 1.4461, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0984042553191489, | |
| "grad_norm": 1.6207024159387382, | |
| "learning_rate": 5.029585798816568e-07, | |
| "loss": 1.3772, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.101063829787234, | |
| "grad_norm": 1.5324741841766363, | |
| "learning_rate": 5.014792899408283e-07, | |
| "loss": 1.1312, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1037234042553192, | |
| "grad_norm": 1.7401441557132455, | |
| "learning_rate": 5e-07, | |
| "loss": 1.1982, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.1063829787234043, | |
| "grad_norm": 1.7504453773507886, | |
| "learning_rate": 4.985207100591715e-07, | |
| "loss": 1.4541, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1090425531914894, | |
| "grad_norm": 1.699882851098421, | |
| "learning_rate": 4.970414201183432e-07, | |
| "loss": 1.2368, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.1117021276595744, | |
| "grad_norm": 1.6218516588828402, | |
| "learning_rate": 4.955621301775147e-07, | |
| "loss": 1.2906, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.1143617021276595, | |
| "grad_norm": 1.6649091116123456, | |
| "learning_rate": 4.940828402366864e-07, | |
| "loss": 1.4454, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.1170212765957448, | |
| "grad_norm": 1.728282227356823, | |
| "learning_rate": 4.926035502958579e-07, | |
| "loss": 1.4663, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1196808510638299, | |
| "grad_norm": 1.6435295189184387, | |
| "learning_rate": 4.911242603550296e-07, | |
| "loss": 1.4789, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.122340425531915, | |
| "grad_norm": 1.8191659615562332, | |
| "learning_rate": 4.896449704142011e-07, | |
| "loss": 1.3986, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.125, | |
| "grad_norm": 1.5470082389400086, | |
| "learning_rate": 4.881656804733728e-07, | |
| "loss": 1.4072, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.127659574468085, | |
| "grad_norm": 1.581839768866324, | |
| "learning_rate": 4.866863905325443e-07, | |
| "loss": 1.3122, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1303191489361701, | |
| "grad_norm": 1.4620677635311095, | |
| "learning_rate": 4.852071005917159e-07, | |
| "loss": 1.2643, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.1329787234042552, | |
| "grad_norm": 1.6707102916564711, | |
| "learning_rate": 4.837278106508875e-07, | |
| "loss": 1.3747, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.1356382978723405, | |
| "grad_norm": 1.5396285202284683, | |
| "learning_rate": 4.822485207100592e-07, | |
| "loss": 1.3101, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.1382978723404256, | |
| "grad_norm": 1.8606687901078265, | |
| "learning_rate": 4.807692307692307e-07, | |
| "loss": 1.3172, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.1409574468085106, | |
| "grad_norm": 1.6119139560046312, | |
| "learning_rate": 4.792899408284024e-07, | |
| "loss": 1.3865, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.1436170212765957, | |
| "grad_norm": 1.715672112601465, | |
| "learning_rate": 4.778106508875739e-07, | |
| "loss": 1.4168, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1462765957446808, | |
| "grad_norm": 1.6367162736314051, | |
| "learning_rate": 4.7633136094674555e-07, | |
| "loss": 1.6202, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.148936170212766, | |
| "grad_norm": 1.6173047746530647, | |
| "learning_rate": 4.748520710059171e-07, | |
| "loss": 1.4345, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.1515957446808511, | |
| "grad_norm": 1.591852292459417, | |
| "learning_rate": 4.733727810650887e-07, | |
| "loss": 1.3504, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.1542553191489362, | |
| "grad_norm": 1.704091419091409, | |
| "learning_rate": 4.7189349112426035e-07, | |
| "loss": 1.3978, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1569148936170213, | |
| "grad_norm": 1.6750388468322808, | |
| "learning_rate": 4.7041420118343195e-07, | |
| "loss": 1.5323, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.1595744680851063, | |
| "grad_norm": 1.550611356946591, | |
| "learning_rate": 4.6893491124260356e-07, | |
| "loss": 1.3516, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1622340425531914, | |
| "grad_norm": 1.6666235759250934, | |
| "learning_rate": 4.674556213017751e-07, | |
| "loss": 1.3193, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.1648936170212765, | |
| "grad_norm": 1.6060648830034072, | |
| "learning_rate": 4.659763313609467e-07, | |
| "loss": 1.4802, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.1675531914893618, | |
| "grad_norm": 2.7759623465499113, | |
| "learning_rate": 4.644970414201183e-07, | |
| "loss": 1.3673, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.1702127659574468, | |
| "grad_norm": 1.6142500584687862, | |
| "learning_rate": 4.630177514792899e-07, | |
| "loss": 1.2367, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.172872340425532, | |
| "grad_norm": 1.6293255382971552, | |
| "learning_rate": 4.6153846153846156e-07, | |
| "loss": 1.4771, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.175531914893617, | |
| "grad_norm": 1.6166636037633662, | |
| "learning_rate": 4.600591715976331e-07, | |
| "loss": 1.3891, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.178191489361702, | |
| "grad_norm": 1.6156668770120142, | |
| "learning_rate": 4.585798816568047e-07, | |
| "loss": 1.3015, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.1808510638297873, | |
| "grad_norm": 1.541456190983287, | |
| "learning_rate": 4.571005917159763e-07, | |
| "loss": 1.325, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.1835106382978724, | |
| "grad_norm": 1.5371528822910774, | |
| "learning_rate": 4.556213017751479e-07, | |
| "loss": 1.391, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.1861702127659575, | |
| "grad_norm": 1.8047509120352834, | |
| "learning_rate": 4.541420118343195e-07, | |
| "loss": 1.3802, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.1888297872340425, | |
| "grad_norm": 1.4772002442457595, | |
| "learning_rate": 4.5266272189349107e-07, | |
| "loss": 1.2972, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1914893617021276, | |
| "grad_norm": 1.4833680602448407, | |
| "learning_rate": 4.5118343195266267e-07, | |
| "loss": 1.3515, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1941489361702127, | |
| "grad_norm": 1.557530779220624, | |
| "learning_rate": 4.497041420118343e-07, | |
| "loss": 1.367, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.196808510638298, | |
| "grad_norm": 1.8027220753490893, | |
| "learning_rate": 4.482248520710059e-07, | |
| "loss": 1.5443, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.199468085106383, | |
| "grad_norm": 1.5684441226470547, | |
| "learning_rate": 4.467455621301775e-07, | |
| "loss": 1.3059, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.202127659574468, | |
| "grad_norm": 1.593970040483734, | |
| "learning_rate": 4.4526627218934907e-07, | |
| "loss": 1.2474, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.2047872340425532, | |
| "grad_norm": 1.7048839620218588, | |
| "learning_rate": 4.437869822485207e-07, | |
| "loss": 1.4689, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.2074468085106382, | |
| "grad_norm": 1.6500745120708162, | |
| "learning_rate": 4.423076923076923e-07, | |
| "loss": 1.3768, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.2101063829787235, | |
| "grad_norm": 1.6649022378945304, | |
| "learning_rate": 4.408284023668639e-07, | |
| "loss": 1.6992, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.2127659574468086, | |
| "grad_norm": 2.150475218838757, | |
| "learning_rate": 4.3934911242603553e-07, | |
| "loss": 1.4338, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.2154255319148937, | |
| "grad_norm": 1.4810681098612493, | |
| "learning_rate": 4.378698224852071e-07, | |
| "loss": 1.2523, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.2180851063829787, | |
| "grad_norm": 1.5941194592252996, | |
| "learning_rate": 4.363905325443787e-07, | |
| "loss": 1.5144, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.2207446808510638, | |
| "grad_norm": 2.9846606692055855, | |
| "learning_rate": 4.349112426035503e-07, | |
| "loss": 1.4394, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.2234042553191489, | |
| "grad_norm": 1.5758645515570575, | |
| "learning_rate": 4.334319526627219e-07, | |
| "loss": 1.314, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.226063829787234, | |
| "grad_norm": 2.0348791713600374, | |
| "learning_rate": 4.3195266272189343e-07, | |
| "loss": 1.3581, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.2287234042553192, | |
| "grad_norm": 1.65492749945659, | |
| "learning_rate": 4.3047337278106503e-07, | |
| "loss": 1.5053, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.2313829787234043, | |
| "grad_norm": 1.6722641251398465, | |
| "learning_rate": 4.289940828402367e-07, | |
| "loss": 1.4641, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.2340425531914894, | |
| "grad_norm": 1.5474460973272384, | |
| "learning_rate": 4.275147928994083e-07, | |
| "loss": 1.4182, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.2367021276595744, | |
| "grad_norm": 1.7345506046508428, | |
| "learning_rate": 4.260355029585799e-07, | |
| "loss": 1.3139, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.2393617021276595, | |
| "grad_norm": 1.7713814803315784, | |
| "learning_rate": 4.2455621301775144e-07, | |
| "loss": 1.4832, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.2420212765957448, | |
| "grad_norm": 1.5498103025703653, | |
| "learning_rate": 4.2307692307692304e-07, | |
| "loss": 1.4115, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.2446808510638299, | |
| "grad_norm": 1.5577840972729278, | |
| "learning_rate": 4.2159763313609464e-07, | |
| "loss": 1.3256, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.247340425531915, | |
| "grad_norm": 1.578861933203747, | |
| "learning_rate": 4.2011834319526624e-07, | |
| "loss": 1.2007, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.6507686385229483, | |
| "learning_rate": 4.1863905325443785e-07, | |
| "loss": 1.3944, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.252659574468085, | |
| "grad_norm": 1.7990714539210155, | |
| "learning_rate": 4.1715976331360945e-07, | |
| "loss": 1.4632, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.2553191489361701, | |
| "grad_norm": 1.7618234269198014, | |
| "learning_rate": 4.1568047337278105e-07, | |
| "loss": 1.3313, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2579787234042552, | |
| "grad_norm": 1.5213599490802718, | |
| "learning_rate": 4.1420118343195265e-07, | |
| "loss": 1.5047, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.2606382978723405, | |
| "grad_norm": 1.6052633557883167, | |
| "learning_rate": 4.1272189349112425e-07, | |
| "loss": 1.4177, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.2632978723404256, | |
| "grad_norm": 1.9773267391803975, | |
| "learning_rate": 4.1124260355029585e-07, | |
| "loss": 1.2606, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.2659574468085106, | |
| "grad_norm": 1.7023545368582853, | |
| "learning_rate": 4.097633136094674e-07, | |
| "loss": 1.3522, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.2686170212765957, | |
| "grad_norm": 1.657218002450086, | |
| "learning_rate": 4.08284023668639e-07, | |
| "loss": 1.307, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.2712765957446808, | |
| "grad_norm": 1.6560677482089055, | |
| "learning_rate": 4.0680473372781066e-07, | |
| "loss": 1.5599, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.273936170212766, | |
| "grad_norm": 1.5827603390864668, | |
| "learning_rate": 4.0532544378698226e-07, | |
| "loss": 1.3864, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.2765957446808511, | |
| "grad_norm": 1.490492812079521, | |
| "learning_rate": 4.0384615384615386e-07, | |
| "loss": 1.3238, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2792553191489362, | |
| "grad_norm": 1.4427306337618429, | |
| "learning_rate": 4.023668639053254e-07, | |
| "loss": 1.3381, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.2819148936170213, | |
| "grad_norm": 1.8739427128710302, | |
| "learning_rate": 4.00887573964497e-07, | |
| "loss": 1.5195, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.2845744680851063, | |
| "grad_norm": 1.4205586135195478, | |
| "learning_rate": 3.994082840236686e-07, | |
| "loss": 1.3342, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.2872340425531914, | |
| "grad_norm": 1.4978308888768397, | |
| "learning_rate": 3.979289940828402e-07, | |
| "loss": 1.3198, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.2898936170212765, | |
| "grad_norm": 1.453096779169849, | |
| "learning_rate": 3.9644970414201187e-07, | |
| "loss": 1.0572, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.2925531914893618, | |
| "grad_norm": 1.9700050592115472, | |
| "learning_rate": 3.949704142011834e-07, | |
| "loss": 1.5476, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.2952127659574468, | |
| "grad_norm": 1.52650807341244, | |
| "learning_rate": 3.93491124260355e-07, | |
| "loss": 1.3027, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.297872340425532, | |
| "grad_norm": 1.6797022619264115, | |
| "learning_rate": 3.920118343195266e-07, | |
| "loss": 1.4014, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.300531914893617, | |
| "grad_norm": 1.4684740172475148, | |
| "learning_rate": 3.905325443786982e-07, | |
| "loss": 1.2891, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.3031914893617023, | |
| "grad_norm": 1.7009794386978352, | |
| "learning_rate": 3.8905325443786977e-07, | |
| "loss": 1.498, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.3058510638297873, | |
| "grad_norm": 1.8679273089411261, | |
| "learning_rate": 3.8757396449704137e-07, | |
| "loss": 1.5135, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.3085106382978724, | |
| "grad_norm": 2.6124670473785723, | |
| "learning_rate": 3.8609467455621297e-07, | |
| "loss": 1.4419, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.3111702127659575, | |
| "grad_norm": 1.531497234704401, | |
| "learning_rate": 3.8461538461538463e-07, | |
| "loss": 1.441, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.3138297872340425, | |
| "grad_norm": 1.6983808183380165, | |
| "learning_rate": 3.8313609467455623e-07, | |
| "loss": 1.3176, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.3164893617021276, | |
| "grad_norm": 1.7106971746124235, | |
| "learning_rate": 3.816568047337278e-07, | |
| "loss": 1.2673, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.3191489361702127, | |
| "grad_norm": 1.7661676163840787, | |
| "learning_rate": 3.801775147928994e-07, | |
| "loss": 1.6258, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.3218085106382977, | |
| "grad_norm": 1.6248132891862335, | |
| "learning_rate": 3.78698224852071e-07, | |
| "loss": 1.3813, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.324468085106383, | |
| "grad_norm": 1.5079876101311178, | |
| "learning_rate": 3.772189349112426e-07, | |
| "loss": 1.3491, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.327127659574468, | |
| "grad_norm": 1.9080784267885529, | |
| "learning_rate": 3.757396449704142e-07, | |
| "loss": 1.4263, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.3297872340425532, | |
| "grad_norm": 1.7134136936747053, | |
| "learning_rate": 3.742603550295858e-07, | |
| "loss": 1.4804, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3324468085106382, | |
| "grad_norm": 2.6890220767611934, | |
| "learning_rate": 3.727810650887574e-07, | |
| "loss": 1.4301, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.3351063829787235, | |
| "grad_norm": 1.4491614153026324, | |
| "learning_rate": 3.71301775147929e-07, | |
| "loss": 1.2226, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.3377659574468086, | |
| "grad_norm": 1.4673050610910694, | |
| "learning_rate": 3.698224852071006e-07, | |
| "loss": 1.2824, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.3404255319148937, | |
| "grad_norm": 1.5811077672143066, | |
| "learning_rate": 3.683431952662722e-07, | |
| "loss": 1.4056, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.3430851063829787, | |
| "grad_norm": 1.784207214911482, | |
| "learning_rate": 3.6686390532544374e-07, | |
| "loss": 1.4456, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.3457446808510638, | |
| "grad_norm": 1.7545013437687231, | |
| "learning_rate": 3.6538461538461534e-07, | |
| "loss": 1.4255, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.3484042553191489, | |
| "grad_norm": 1.5378814658235478, | |
| "learning_rate": 3.63905325443787e-07, | |
| "loss": 1.4752, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.351063829787234, | |
| "grad_norm": 1.5663338737224375, | |
| "learning_rate": 3.624260355029586e-07, | |
| "loss": 1.4324, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.3537234042553192, | |
| "grad_norm": 1.5530747526395428, | |
| "learning_rate": 3.609467455621302e-07, | |
| "loss": 1.3294, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.3563829787234043, | |
| "grad_norm": 1.5555181536643647, | |
| "learning_rate": 3.5946745562130175e-07, | |
| "loss": 1.2615, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.3590425531914894, | |
| "grad_norm": 1.561907923100703, | |
| "learning_rate": 3.5798816568047335e-07, | |
| "loss": 1.4247, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.3617021276595744, | |
| "grad_norm": 1.561727561754077, | |
| "learning_rate": 3.5650887573964495e-07, | |
| "loss": 1.442, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.3643617021276595, | |
| "grad_norm": 1.571729864924405, | |
| "learning_rate": 3.5502958579881655e-07, | |
| "loss": 1.3471, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.3670212765957448, | |
| "grad_norm": 1.6501651767936791, | |
| "learning_rate": 3.535502958579881e-07, | |
| "loss": 1.4957, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.3696808510638299, | |
| "grad_norm": 1.7712985007484374, | |
| "learning_rate": 3.5207100591715975e-07, | |
| "loss": 1.3116, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.372340425531915, | |
| "grad_norm": 1.6021754882790804, | |
| "learning_rate": 3.5059171597633135e-07, | |
| "loss": 1.3507, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.375, | |
| "grad_norm": 1.9744682223829157, | |
| "learning_rate": 3.4911242603550296e-07, | |
| "loss": 1.3187, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.377659574468085, | |
| "grad_norm": 1.437548678030046, | |
| "learning_rate": 3.4763313609467456e-07, | |
| "loss": 1.3055, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.3803191489361701, | |
| "grad_norm": 1.7376163882785898, | |
| "learning_rate": 3.461538461538461e-07, | |
| "loss": 1.3712, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.3829787234042552, | |
| "grad_norm": 1.709895613646418, | |
| "learning_rate": 3.446745562130177e-07, | |
| "loss": 1.4941, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3856382978723405, | |
| "grad_norm": 1.5064773577923485, | |
| "learning_rate": 3.431952662721893e-07, | |
| "loss": 1.3598, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.3882978723404256, | |
| "grad_norm": 1.6991123209979573, | |
| "learning_rate": 3.4171597633136096e-07, | |
| "loss": 1.3859, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.3909574468085106, | |
| "grad_norm": 1.611358975201833, | |
| "learning_rate": 3.4023668639053256e-07, | |
| "loss": 1.3624, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.3936170212765957, | |
| "grad_norm": 1.5235030722566782, | |
| "learning_rate": 3.387573964497041e-07, | |
| "loss": 1.306, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.3962765957446808, | |
| "grad_norm": 1.5097567026286727, | |
| "learning_rate": 3.372781065088757e-07, | |
| "loss": 1.3098, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.398936170212766, | |
| "grad_norm": 1.5501867735527708, | |
| "learning_rate": 3.357988165680473e-07, | |
| "loss": 1.2582, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.4015957446808511, | |
| "grad_norm": 1.5737400889065642, | |
| "learning_rate": 3.343195266272189e-07, | |
| "loss": 1.4226, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.4042553191489362, | |
| "grad_norm": 1.8163702192116935, | |
| "learning_rate": 3.328402366863905e-07, | |
| "loss": 1.45, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.4069148936170213, | |
| "grad_norm": 1.6761526127572786, | |
| "learning_rate": 3.313609467455621e-07, | |
| "loss": 1.4133, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.4095744680851063, | |
| "grad_norm": 1.7300976770863319, | |
| "learning_rate": 3.298816568047337e-07, | |
| "loss": 1.5036, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.4122340425531914, | |
| "grad_norm": 1.7219520565452116, | |
| "learning_rate": 3.284023668639053e-07, | |
| "loss": 1.4172, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.4148936170212765, | |
| "grad_norm": 1.8137826656078981, | |
| "learning_rate": 3.269230769230769e-07, | |
| "loss": 1.5673, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.4175531914893618, | |
| "grad_norm": 1.9605494871424245, | |
| "learning_rate": 3.254437869822485e-07, | |
| "loss": 1.4421, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.4202127659574468, | |
| "grad_norm": 1.5063443324517625, | |
| "learning_rate": 3.239644970414201e-07, | |
| "loss": 1.3858, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.422872340425532, | |
| "grad_norm": 1.5929428001187216, | |
| "learning_rate": 3.224852071005917e-07, | |
| "loss": 1.4245, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.425531914893617, | |
| "grad_norm": 1.5090052181328104, | |
| "learning_rate": 3.210059171597633e-07, | |
| "loss": 1.185, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.4281914893617023, | |
| "grad_norm": 1.7599894966549008, | |
| "learning_rate": 3.1952662721893493e-07, | |
| "loss": 1.5936, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.4308510638297873, | |
| "grad_norm": 1.8274682976599146, | |
| "learning_rate": 3.1804733727810653e-07, | |
| "loss": 1.5133, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.4335106382978724, | |
| "grad_norm": 1.6304863965807304, | |
| "learning_rate": 3.165680473372781e-07, | |
| "loss": 1.4513, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.4361702127659575, | |
| "grad_norm": 1.865748149954226, | |
| "learning_rate": 3.150887573964497e-07, | |
| "loss": 1.579, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4388297872340425, | |
| "grad_norm": 1.497890260310679, | |
| "learning_rate": 3.136094674556213e-07, | |
| "loss": 1.3996, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.4414893617021276, | |
| "grad_norm": 1.5505684579290944, | |
| "learning_rate": 3.121301775147929e-07, | |
| "loss": 1.4765, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.4441489361702127, | |
| "grad_norm": 1.5934674629645669, | |
| "learning_rate": 3.1065088757396443e-07, | |
| "loss": 1.2206, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.4468085106382977, | |
| "grad_norm": 2.5003698075483776, | |
| "learning_rate": 3.091715976331361e-07, | |
| "loss": 1.4785, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.449468085106383, | |
| "grad_norm": 1.5430363507491573, | |
| "learning_rate": 3.076923076923077e-07, | |
| "loss": 1.3596, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.452127659574468, | |
| "grad_norm": 1.6114525579321486, | |
| "learning_rate": 3.062130177514793e-07, | |
| "loss": 1.3768, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.4547872340425532, | |
| "grad_norm": 1.51705181171149, | |
| "learning_rate": 3.047337278106509e-07, | |
| "loss": 1.3161, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.4574468085106382, | |
| "grad_norm": 1.659706683154854, | |
| "learning_rate": 3.0325443786982244e-07, | |
| "loss": 1.4808, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.4601063829787235, | |
| "grad_norm": 1.6484483474446856, | |
| "learning_rate": 3.0177514792899404e-07, | |
| "loss": 1.398, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.4627659574468086, | |
| "grad_norm": 1.6054531570011474, | |
| "learning_rate": 3.0029585798816564e-07, | |
| "loss": 1.1421, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.4654255319148937, | |
| "grad_norm": 1.5260699880356663, | |
| "learning_rate": 2.988165680473373e-07, | |
| "loss": 1.4223, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.4680851063829787, | |
| "grad_norm": 1.5022650148070196, | |
| "learning_rate": 2.973372781065089e-07, | |
| "loss": 1.3579, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.4707446808510638, | |
| "grad_norm": 1.696210632092268, | |
| "learning_rate": 2.9585798816568045e-07, | |
| "loss": 1.4437, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.4734042553191489, | |
| "grad_norm": 1.50505509525979, | |
| "learning_rate": 2.9437869822485205e-07, | |
| "loss": 1.3666, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.476063829787234, | |
| "grad_norm": 1.6283581586889138, | |
| "learning_rate": 2.9289940828402365e-07, | |
| "loss": 1.3807, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.4787234042553192, | |
| "grad_norm": 1.57845733466985, | |
| "learning_rate": 2.9142011834319525e-07, | |
| "loss": 1.4947, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.4813829787234043, | |
| "grad_norm": 1.6269594263364617, | |
| "learning_rate": 2.8994082840236686e-07, | |
| "loss": 1.5315, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.4840425531914894, | |
| "grad_norm": 1.4901674188093539, | |
| "learning_rate": 2.884615384615384e-07, | |
| "loss": 1.2067, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.4867021276595744, | |
| "grad_norm": 1.608926803251607, | |
| "learning_rate": 2.8698224852071006e-07, | |
| "loss": 1.4501, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.4893617021276595, | |
| "grad_norm": 1.5736272188001768, | |
| "learning_rate": 2.8550295857988166e-07, | |
| "loss": 1.4938, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.4920212765957448, | |
| "grad_norm": 1.6178988306695008, | |
| "learning_rate": 2.8402366863905326e-07, | |
| "loss": 1.2858, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.4946808510638299, | |
| "grad_norm": 1.612098241628475, | |
| "learning_rate": 2.8254437869822486e-07, | |
| "loss": 1.3793, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.497340425531915, | |
| "grad_norm": 1.521850228548639, | |
| "learning_rate": 2.810650887573964e-07, | |
| "loss": 1.3616, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.4283693834886921, | |
| "learning_rate": 2.79585798816568e-07, | |
| "loss": 1.2373, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.502659574468085, | |
| "grad_norm": 1.4614575118454327, | |
| "learning_rate": 2.781065088757396e-07, | |
| "loss": 1.3506, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.5053191489361701, | |
| "grad_norm": 4.833934856122629, | |
| "learning_rate": 2.7662721893491127e-07, | |
| "loss": 1.3368, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.5079787234042552, | |
| "grad_norm": 1.5417407593664367, | |
| "learning_rate": 2.7514792899408287e-07, | |
| "loss": 1.3806, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.5106382978723403, | |
| "grad_norm": 1.3942611390001125, | |
| "learning_rate": 2.736686390532544e-07, | |
| "loss": 1.2778, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.5132978723404256, | |
| "grad_norm": 1.5232973474443783, | |
| "learning_rate": 2.72189349112426e-07, | |
| "loss": 1.5106, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.5159574468085106, | |
| "grad_norm": 1.6181295111494955, | |
| "learning_rate": 2.707100591715976e-07, | |
| "loss": 1.3182, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.5186170212765957, | |
| "grad_norm": 1.4905875051329172, | |
| "learning_rate": 2.692307692307692e-07, | |
| "loss": 1.359, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.521276595744681, | |
| "grad_norm": 1.5438422326091557, | |
| "learning_rate": 2.6775147928994077e-07, | |
| "loss": 1.4581, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.523936170212766, | |
| "grad_norm": 1.6689444553647594, | |
| "learning_rate": 2.662721893491124e-07, | |
| "loss": 1.4416, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.5265957446808511, | |
| "grad_norm": 1.732092721800618, | |
| "learning_rate": 2.6479289940828403e-07, | |
| "loss": 1.4653, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.5292553191489362, | |
| "grad_norm": 1.5939357125781168, | |
| "learning_rate": 2.6331360946745563e-07, | |
| "loss": 1.3659, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.5319148936170213, | |
| "grad_norm": 1.619819379203523, | |
| "learning_rate": 2.6183431952662723e-07, | |
| "loss": 1.4057, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.5345744680851063, | |
| "grad_norm": 1.5228031500567076, | |
| "learning_rate": 2.603550295857988e-07, | |
| "loss": 1.3322, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.5372340425531914, | |
| "grad_norm": 1.6403075138073668, | |
| "learning_rate": 2.588757396449704e-07, | |
| "loss": 1.3243, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.5398936170212765, | |
| "grad_norm": 1.6158463818930031, | |
| "learning_rate": 2.57396449704142e-07, | |
| "loss": 1.3743, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.5425531914893615, | |
| "grad_norm": 1.4401607766731626, | |
| "learning_rate": 2.559171597633136e-07, | |
| "loss": 1.3209, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5452127659574468, | |
| "grad_norm": 1.610458527778034, | |
| "learning_rate": 2.5443786982248524e-07, | |
| "loss": 1.437, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.547872340425532, | |
| "grad_norm": 1.4720391313596763, | |
| "learning_rate": 2.529585798816568e-07, | |
| "loss": 1.2406, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.550531914893617, | |
| "grad_norm": 1.4693642812943966, | |
| "learning_rate": 2.514792899408284e-07, | |
| "loss": 1.3345, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.5531914893617023, | |
| "grad_norm": 1.6024699547818029, | |
| "learning_rate": 2.5e-07, | |
| "loss": 1.4164, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.5558510638297873, | |
| "grad_norm": 1.602502091357314, | |
| "learning_rate": 2.485207100591716e-07, | |
| "loss": 1.4412, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.5585106382978724, | |
| "grad_norm": 1.7241679714315328, | |
| "learning_rate": 2.470414201183432e-07, | |
| "loss": 1.331, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.5611702127659575, | |
| "grad_norm": 1.7371187244572857, | |
| "learning_rate": 2.455621301775148e-07, | |
| "loss": 1.4532, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.5638297872340425, | |
| "grad_norm": 1.4995956670676633, | |
| "learning_rate": 2.440828402366864e-07, | |
| "loss": 1.2702, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.5664893617021276, | |
| "grad_norm": 1.4659221291046236, | |
| "learning_rate": 2.4260355029585794e-07, | |
| "loss": 1.4754, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.5691489361702127, | |
| "grad_norm": 1.5385805721266792, | |
| "learning_rate": 2.411242603550296e-07, | |
| "loss": 1.5509, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.5718085106382977, | |
| "grad_norm": 1.5161262548508925, | |
| "learning_rate": 2.396449704142012e-07, | |
| "loss": 1.3936, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.574468085106383, | |
| "grad_norm": 1.4666556990097799, | |
| "learning_rate": 2.3816568047337277e-07, | |
| "loss": 1.3143, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.577127659574468, | |
| "grad_norm": 1.6300523417207398, | |
| "learning_rate": 2.3668639053254435e-07, | |
| "loss": 1.3385, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.5797872340425532, | |
| "grad_norm": 1.5121985962743036, | |
| "learning_rate": 2.3520710059171598e-07, | |
| "loss": 1.4693, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.5824468085106385, | |
| "grad_norm": 1.6977627534281994, | |
| "learning_rate": 2.3372781065088755e-07, | |
| "loss": 1.5827, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.5851063829787235, | |
| "grad_norm": 1.4675438168952388, | |
| "learning_rate": 2.3224852071005915e-07, | |
| "loss": 1.4037, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.5877659574468086, | |
| "grad_norm": 1.5087620408684652, | |
| "learning_rate": 2.3076923076923078e-07, | |
| "loss": 1.2713, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.5904255319148937, | |
| "grad_norm": 1.853798719037303, | |
| "learning_rate": 2.2928994082840236e-07, | |
| "loss": 1.4517, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.5930851063829787, | |
| "grad_norm": 1.5624391162454545, | |
| "learning_rate": 2.2781065088757396e-07, | |
| "loss": 1.5716, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.5957446808510638, | |
| "grad_norm": 1.5647362537380562, | |
| "learning_rate": 2.2633136094674553e-07, | |
| "loss": 1.2679, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.5984042553191489, | |
| "grad_norm": 1.5028293469540326, | |
| "learning_rate": 2.2485207100591716e-07, | |
| "loss": 1.3477, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.601063829787234, | |
| "grad_norm": 1.5616178692766567, | |
| "learning_rate": 2.2337278106508876e-07, | |
| "loss": 1.349, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.603723404255319, | |
| "grad_norm": 1.5652068533404448, | |
| "learning_rate": 2.2189349112426034e-07, | |
| "loss": 1.3408, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.6063829787234043, | |
| "grad_norm": 1.8312281167867779, | |
| "learning_rate": 2.2041420118343194e-07, | |
| "loss": 1.5744, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.6090425531914894, | |
| "grad_norm": 1.5113532834536092, | |
| "learning_rate": 2.1893491124260354e-07, | |
| "loss": 1.5116, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.6117021276595744, | |
| "grad_norm": 1.4148002933798485, | |
| "learning_rate": 2.1745562130177514e-07, | |
| "loss": 1.3254, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.6143617021276597, | |
| "grad_norm": 1.4128390757612144, | |
| "learning_rate": 2.1597633136094672e-07, | |
| "loss": 1.3424, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.6170212765957448, | |
| "grad_norm": 1.664151543039297, | |
| "learning_rate": 2.1449704142011834e-07, | |
| "loss": 1.4507, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.6196808510638299, | |
| "grad_norm": 1.5001892924079347, | |
| "learning_rate": 2.1301775147928995e-07, | |
| "loss": 1.3598, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.622340425531915, | |
| "grad_norm": 1.7189011247258703, | |
| "learning_rate": 2.1153846153846152e-07, | |
| "loss": 1.4798, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.625, | |
| "grad_norm": 1.4495039913652832, | |
| "learning_rate": 2.1005917159763312e-07, | |
| "loss": 1.1879, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.627659574468085, | |
| "grad_norm": 1.4863964571390131, | |
| "learning_rate": 2.0857988165680472e-07, | |
| "loss": 1.4149, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.6303191489361701, | |
| "grad_norm": 1.470842696782351, | |
| "learning_rate": 2.0710059171597633e-07, | |
| "loss": 1.5213, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.6329787234042552, | |
| "grad_norm": 1.5332931589309218, | |
| "learning_rate": 2.0562130177514793e-07, | |
| "loss": 1.3847, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.6356382978723403, | |
| "grad_norm": 1.5012230655181953, | |
| "learning_rate": 2.041420118343195e-07, | |
| "loss": 1.2194, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.6382978723404256, | |
| "grad_norm": 1.4592244922211661, | |
| "learning_rate": 2.0266272189349113e-07, | |
| "loss": 1.2863, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.6409574468085106, | |
| "grad_norm": 1.6194968573694928, | |
| "learning_rate": 2.011834319526627e-07, | |
| "loss": 1.563, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.6436170212765957, | |
| "grad_norm": 1.5398995693701385, | |
| "learning_rate": 1.997041420118343e-07, | |
| "loss": 1.5, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.646276595744681, | |
| "grad_norm": 1.803830954994613, | |
| "learning_rate": 1.9822485207100593e-07, | |
| "loss": 1.3459, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.648936170212766, | |
| "grad_norm": 1.5731270083148248, | |
| "learning_rate": 1.967455621301775e-07, | |
| "loss": 1.3277, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6515957446808511, | |
| "grad_norm": 1.6370008858204694, | |
| "learning_rate": 1.952662721893491e-07, | |
| "loss": 1.4752, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.6542553191489362, | |
| "grad_norm": 1.3905339157621093, | |
| "learning_rate": 1.9378698224852069e-07, | |
| "loss": 1.1591, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.6569148936170213, | |
| "grad_norm": 1.521784820078054, | |
| "learning_rate": 1.9230769230769231e-07, | |
| "loss": 1.348, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.6595744680851063, | |
| "grad_norm": 2.480779673395715, | |
| "learning_rate": 1.908284023668639e-07, | |
| "loss": 1.3468, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.6622340425531914, | |
| "grad_norm": 1.5047866424190777, | |
| "learning_rate": 1.893491124260355e-07, | |
| "loss": 1.3808, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.6648936170212765, | |
| "grad_norm": 1.5186127777273435, | |
| "learning_rate": 1.878698224852071e-07, | |
| "loss": 1.4201, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.6675531914893615, | |
| "grad_norm": 1.4407427328000266, | |
| "learning_rate": 1.863905325443787e-07, | |
| "loss": 1.273, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.6702127659574468, | |
| "grad_norm": 1.5224116074533014, | |
| "learning_rate": 1.849112426035503e-07, | |
| "loss": 1.2098, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.672872340425532, | |
| "grad_norm": 1.527239003211648, | |
| "learning_rate": 1.8343195266272187e-07, | |
| "loss": 1.3724, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.675531914893617, | |
| "grad_norm": 1.6525871512419401, | |
| "learning_rate": 1.819526627218935e-07, | |
| "loss": 1.3946, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.6781914893617023, | |
| "grad_norm": 3.0200043340992933, | |
| "learning_rate": 1.804733727810651e-07, | |
| "loss": 1.4742, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.6808510638297873, | |
| "grad_norm": 1.5029965510376364, | |
| "learning_rate": 1.7899408284023667e-07, | |
| "loss": 1.3623, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.6835106382978724, | |
| "grad_norm": 1.5389625013367383, | |
| "learning_rate": 1.7751479289940827e-07, | |
| "loss": 1.5043, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.6861702127659575, | |
| "grad_norm": 1.5608661501656413, | |
| "learning_rate": 1.7603550295857988e-07, | |
| "loss": 1.2883, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.6888297872340425, | |
| "grad_norm": 1.6847845057440693, | |
| "learning_rate": 1.7455621301775148e-07, | |
| "loss": 1.4244, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.6914893617021276, | |
| "grad_norm": 1.5793904433648327, | |
| "learning_rate": 1.7307692307692305e-07, | |
| "loss": 1.4062, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.6941489361702127, | |
| "grad_norm": 1.4350293530642095, | |
| "learning_rate": 1.7159763313609465e-07, | |
| "loss": 1.2754, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.6968085106382977, | |
| "grad_norm": 1.902506858522582, | |
| "learning_rate": 1.7011834319526628e-07, | |
| "loss": 1.4541, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.699468085106383, | |
| "grad_norm": 1.478754263683889, | |
| "learning_rate": 1.6863905325443786e-07, | |
| "loss": 1.3463, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 1.6464724285737642, | |
| "learning_rate": 1.6715976331360946e-07, | |
| "loss": 1.3807, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.7047872340425532, | |
| "grad_norm": 1.6125752749357112, | |
| "learning_rate": 1.6568047337278106e-07, | |
| "loss": 1.2933, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.7074468085106385, | |
| "grad_norm": 1.5928623495071816, | |
| "learning_rate": 1.6420118343195266e-07, | |
| "loss": 1.4326, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.7101063829787235, | |
| "grad_norm": 1.5193190242572798, | |
| "learning_rate": 1.6272189349112426e-07, | |
| "loss": 1.3588, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.7127659574468086, | |
| "grad_norm": 1.5482920311769846, | |
| "learning_rate": 1.6124260355029584e-07, | |
| "loss": 1.3839, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.7154255319148937, | |
| "grad_norm": 1.8407335336806905, | |
| "learning_rate": 1.5976331360946747e-07, | |
| "loss": 1.3248, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.7180851063829787, | |
| "grad_norm": 1.6055785649743377, | |
| "learning_rate": 1.5828402366863904e-07, | |
| "loss": 1.3872, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.7207446808510638, | |
| "grad_norm": 1.6297496194969232, | |
| "learning_rate": 1.5680473372781064e-07, | |
| "loss": 1.389, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.7234042553191489, | |
| "grad_norm": 1.577321745146047, | |
| "learning_rate": 1.5532544378698222e-07, | |
| "loss": 1.1947, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.726063829787234, | |
| "grad_norm": 1.6447713137577962, | |
| "learning_rate": 1.5384615384615385e-07, | |
| "loss": 1.2652, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.728723404255319, | |
| "grad_norm": 1.6234194331407543, | |
| "learning_rate": 1.5236686390532545e-07, | |
| "loss": 1.4239, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.7313829787234043, | |
| "grad_norm": 1.532776130454777, | |
| "learning_rate": 1.5088757396449702e-07, | |
| "loss": 1.3875, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.7340425531914894, | |
| "grad_norm": 1.4837535962878305, | |
| "learning_rate": 1.4940828402366865e-07, | |
| "loss": 1.2059, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.7367021276595744, | |
| "grad_norm": 1.5395205053467318, | |
| "learning_rate": 1.4792899408284022e-07, | |
| "loss": 1.3513, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.7393617021276597, | |
| "grad_norm": 1.4112077844892696, | |
| "learning_rate": 1.4644970414201183e-07, | |
| "loss": 1.3336, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.7420212765957448, | |
| "grad_norm": 1.481010800777514, | |
| "learning_rate": 1.4497041420118343e-07, | |
| "loss": 1.4028, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.7446808510638299, | |
| "grad_norm": 1.4564408238676725, | |
| "learning_rate": 1.4349112426035503e-07, | |
| "loss": 1.3502, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.747340425531915, | |
| "grad_norm": 1.6956227102239596, | |
| "learning_rate": 1.4201183431952663e-07, | |
| "loss": 1.5672, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.5705454639314052, | |
| "learning_rate": 1.405325443786982e-07, | |
| "loss": 1.4109, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.752659574468085, | |
| "grad_norm": 1.5656622358755812, | |
| "learning_rate": 1.390532544378698e-07, | |
| "loss": 1.557, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.7553191489361701, | |
| "grad_norm": 1.8848625197729474, | |
| "learning_rate": 1.3757396449704143e-07, | |
| "loss": 1.4017, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.7579787234042552, | |
| "grad_norm": 1.4196764538431994, | |
| "learning_rate": 1.36094674556213e-07, | |
| "loss": 1.2331, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.7606382978723403, | |
| "grad_norm": 1.4675927168298655, | |
| "learning_rate": 1.346153846153846e-07, | |
| "loss": 1.4689, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.7632978723404256, | |
| "grad_norm": 1.6895719453339277, | |
| "learning_rate": 1.331360946745562e-07, | |
| "loss": 1.6055, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.7659574468085106, | |
| "grad_norm": 1.6565509018980442, | |
| "learning_rate": 1.3165680473372781e-07, | |
| "loss": 1.346, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.7686170212765957, | |
| "grad_norm": 1.6111421234975374, | |
| "learning_rate": 1.301775147928994e-07, | |
| "loss": 1.3318, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.771276595744681, | |
| "grad_norm": 1.5477525938145107, | |
| "learning_rate": 1.28698224852071e-07, | |
| "loss": 1.4311, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.773936170212766, | |
| "grad_norm": 1.4344548853484294, | |
| "learning_rate": 1.2721893491124262e-07, | |
| "loss": 1.4168, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.7765957446808511, | |
| "grad_norm": 2.002400150167084, | |
| "learning_rate": 1.257396449704142e-07, | |
| "loss": 1.5304, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.7792553191489362, | |
| "grad_norm": 1.6203137830914942, | |
| "learning_rate": 1.242603550295858e-07, | |
| "loss": 1.4902, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.7819148936170213, | |
| "grad_norm": 1.653101321305009, | |
| "learning_rate": 1.227810650887574e-07, | |
| "loss": 1.523, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.7845744680851063, | |
| "grad_norm": 1.4583067028702263, | |
| "learning_rate": 1.2130177514792897e-07, | |
| "loss": 1.3307, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.7872340425531914, | |
| "grad_norm": 1.4416958484378999, | |
| "learning_rate": 1.198224852071006e-07, | |
| "loss": 1.2879, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.7898936170212765, | |
| "grad_norm": 1.5342015216907867, | |
| "learning_rate": 1.1834319526627217e-07, | |
| "loss": 1.3491, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.7925531914893615, | |
| "grad_norm": 1.5120417917571398, | |
| "learning_rate": 1.1686390532544378e-07, | |
| "loss": 1.5533, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.7952127659574468, | |
| "grad_norm": 1.6448669091043147, | |
| "learning_rate": 1.1538461538461539e-07, | |
| "loss": 1.4507, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.797872340425532, | |
| "grad_norm": 1.5744246355313867, | |
| "learning_rate": 1.1390532544378698e-07, | |
| "loss": 1.4762, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.800531914893617, | |
| "grad_norm": 1.407351126310039, | |
| "learning_rate": 1.1242603550295858e-07, | |
| "loss": 1.2665, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.8031914893617023, | |
| "grad_norm": 1.4428356495487928, | |
| "learning_rate": 1.1094674556213017e-07, | |
| "loss": 1.4222, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.8058510638297873, | |
| "grad_norm": 1.4978022369408812, | |
| "learning_rate": 1.0946745562130177e-07, | |
| "loss": 1.3571, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.8085106382978724, | |
| "grad_norm": 1.608694580830846, | |
| "learning_rate": 1.0798816568047336e-07, | |
| "loss": 1.2468, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.8111702127659575, | |
| "grad_norm": 1.3671652219864612, | |
| "learning_rate": 1.0650887573964497e-07, | |
| "loss": 1.1918, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.8138297872340425, | |
| "grad_norm": 1.5436563625586248, | |
| "learning_rate": 1.0502958579881656e-07, | |
| "loss": 1.3447, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.8164893617021276, | |
| "grad_norm": 2.0668175329496448, | |
| "learning_rate": 1.0355029585798816e-07, | |
| "loss": 1.2851, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.8191489361702127, | |
| "grad_norm": 1.4711737418040087, | |
| "learning_rate": 1.0207100591715975e-07, | |
| "loss": 1.4054, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.8218085106382977, | |
| "grad_norm": 1.628475068104997, | |
| "learning_rate": 1.0059171597633135e-07, | |
| "loss": 1.2297, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.824468085106383, | |
| "grad_norm": 1.6652537635356375, | |
| "learning_rate": 9.911242603550297e-08, | |
| "loss": 1.4249, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.827127659574468, | |
| "grad_norm": 1.4549454801379844, | |
| "learning_rate": 9.763313609467456e-08, | |
| "loss": 1.4738, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.8297872340425532, | |
| "grad_norm": 1.4571125733944477, | |
| "learning_rate": 9.615384615384616e-08, | |
| "loss": 1.2531, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.8324468085106385, | |
| "grad_norm": 1.4934710030590315, | |
| "learning_rate": 9.467455621301774e-08, | |
| "loss": 1.4224, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.8351063829787235, | |
| "grad_norm": 1.5068998589001918, | |
| "learning_rate": 9.319526627218935e-08, | |
| "loss": 1.4137, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.8377659574468086, | |
| "grad_norm": 1.5592030646382606, | |
| "learning_rate": 9.171597633136093e-08, | |
| "loss": 1.4923, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.8404255319148937, | |
| "grad_norm": 1.5420672523438603, | |
| "learning_rate": 9.023668639053255e-08, | |
| "loss": 1.3542, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.8430851063829787, | |
| "grad_norm": 1.4933658760362354, | |
| "learning_rate": 8.875739644970414e-08, | |
| "loss": 1.4062, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.8457446808510638, | |
| "grad_norm": 2.1197107348039648, | |
| "learning_rate": 8.727810650887574e-08, | |
| "loss": 1.3514, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.8484042553191489, | |
| "grad_norm": 1.420310868366173, | |
| "learning_rate": 8.579881656804733e-08, | |
| "loss": 1.3865, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.851063829787234, | |
| "grad_norm": 2.1476526664851083, | |
| "learning_rate": 8.431952662721893e-08, | |
| "loss": 1.1886, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.853723404255319, | |
| "grad_norm": 1.3847908910859454, | |
| "learning_rate": 8.284023668639053e-08, | |
| "loss": 1.4107, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.8563829787234043, | |
| "grad_norm": 1.6527903429011437, | |
| "learning_rate": 8.136094674556213e-08, | |
| "loss": 1.2876, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.8590425531914894, | |
| "grad_norm": 1.5745014854949893, | |
| "learning_rate": 7.988165680473373e-08, | |
| "loss": 1.4558, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.8617021276595744, | |
| "grad_norm": 1.5350363492855568, | |
| "learning_rate": 7.840236686390532e-08, | |
| "loss": 1.4523, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.8643617021276597, | |
| "grad_norm": 1.4853786087332579, | |
| "learning_rate": 7.692307692307692e-08, | |
| "loss": 1.3292, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.8670212765957448, | |
| "grad_norm": 1.4473821719214552, | |
| "learning_rate": 7.544378698224851e-08, | |
| "loss": 1.2034, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.8696808510638299, | |
| "grad_norm": 1.4659266830367277, | |
| "learning_rate": 7.396449704142011e-08, | |
| "loss": 1.2584, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.872340425531915, | |
| "grad_norm": 1.4759466915583441, | |
| "learning_rate": 7.248520710059171e-08, | |
| "loss": 1.3187, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 2.111257320773056, | |
| "learning_rate": 7.100591715976332e-08, | |
| "loss": 1.3323, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.877659574468085, | |
| "grad_norm": 1.5831480252428458, | |
| "learning_rate": 6.95266272189349e-08, | |
| "loss": 1.4302, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.8803191489361701, | |
| "grad_norm": 1.6086043948043176, | |
| "learning_rate": 6.80473372781065e-08, | |
| "loss": 1.4017, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.8829787234042552, | |
| "grad_norm": 2.0849492736061332, | |
| "learning_rate": 6.65680473372781e-08, | |
| "loss": 1.5211, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.8856382978723403, | |
| "grad_norm": 1.5043217865201886, | |
| "learning_rate": 6.50887573964497e-08, | |
| "loss": 1.2166, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.8882978723404256, | |
| "grad_norm": 1.5612635488662876, | |
| "learning_rate": 6.360946745562131e-08, | |
| "loss": 1.3481, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.8909574468085106, | |
| "grad_norm": 1.4947402449036076, | |
| "learning_rate": 6.21301775147929e-08, | |
| "loss": 1.1988, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.8936170212765957, | |
| "grad_norm": 1.7123431001612024, | |
| "learning_rate": 6.065088757396449e-08, | |
| "loss": 1.6215, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.896276595744681, | |
| "grad_norm": 1.5722027689056413, | |
| "learning_rate": 5.917159763313609e-08, | |
| "loss": 1.435, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.898936170212766, | |
| "grad_norm": 1.5736347184744337, | |
| "learning_rate": 5.7692307692307695e-08, | |
| "loss": 1.4243, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.9015957446808511, | |
| "grad_norm": 1.4558857769282714, | |
| "learning_rate": 5.621301775147929e-08, | |
| "loss": 1.3732, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.9042553191489362, | |
| "grad_norm": 1.430432818475582, | |
| "learning_rate": 5.4733727810650885e-08, | |
| "loss": 1.2582, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.9069148936170213, | |
| "grad_norm": 1.4010572562597123, | |
| "learning_rate": 5.3254437869822486e-08, | |
| "loss": 1.2036, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.9095744680851063, | |
| "grad_norm": 1.5030183623430164, | |
| "learning_rate": 5.177514792899408e-08, | |
| "loss": 1.3348, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.9122340425531914, | |
| "grad_norm": 1.5264425521471463, | |
| "learning_rate": 5.0295857988165676e-08, | |
| "loss": 1.3486, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.9148936170212765, | |
| "grad_norm": 1.6568867880777098, | |
| "learning_rate": 4.881656804733728e-08, | |
| "loss": 1.5102, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.9175531914893615, | |
| "grad_norm": 1.426877705408139, | |
| "learning_rate": 4.733727810650887e-08, | |
| "loss": 1.2843, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.9202127659574468, | |
| "grad_norm": 1.5745176121540452, | |
| "learning_rate": 4.585798816568047e-08, | |
| "loss": 1.5892, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.922872340425532, | |
| "grad_norm": 1.5299374151207628, | |
| "learning_rate": 4.437869822485207e-08, | |
| "loss": 1.2833, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.925531914893617, | |
| "grad_norm": 1.5485924328569498, | |
| "learning_rate": 4.2899408284023664e-08, | |
| "loss": 1.2528, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.9281914893617023, | |
| "grad_norm": 1.5650812571579016, | |
| "learning_rate": 4.1420118343195265e-08, | |
| "loss": 1.4623, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.9308510638297873, | |
| "grad_norm": 1.4874887834654986, | |
| "learning_rate": 3.9940828402366866e-08, | |
| "loss": 1.331, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.9335106382978724, | |
| "grad_norm": 2.2386953559992606, | |
| "learning_rate": 3.846153846153846e-08, | |
| "loss": 1.3, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.9361702127659575, | |
| "grad_norm": 1.6479534866842882, | |
| "learning_rate": 3.6982248520710056e-08, | |
| "loss": 1.5275, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.9388297872340425, | |
| "grad_norm": 1.3421164941268597, | |
| "learning_rate": 3.550295857988166e-08, | |
| "loss": 1.2612, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.9414893617021276, | |
| "grad_norm": 1.4738714329564195, | |
| "learning_rate": 3.402366863905325e-08, | |
| "loss": 1.4314, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.9441489361702127, | |
| "grad_norm": 1.44156180682146, | |
| "learning_rate": 3.254437869822485e-08, | |
| "loss": 1.3338, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.9468085106382977, | |
| "grad_norm": 1.5784061294788538, | |
| "learning_rate": 3.106508875739645e-08, | |
| "loss": 1.3386, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.949468085106383, | |
| "grad_norm": 1.3371318363907538, | |
| "learning_rate": 2.9585798816568044e-08, | |
| "loss": 1.3028, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.952127659574468, | |
| "grad_norm": 1.6852290803170833, | |
| "learning_rate": 2.8106508875739645e-08, | |
| "loss": 1.4942, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.9547872340425532, | |
| "grad_norm": 1.4000270483265091, | |
| "learning_rate": 2.6627218934911243e-08, | |
| "loss": 1.2736, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.9574468085106385, | |
| "grad_norm": 1.5150814064740574, | |
| "learning_rate": 2.5147928994082838e-08, | |
| "loss": 1.3892, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.9601063829787235, | |
| "grad_norm": 1.6902820824629503, | |
| "learning_rate": 2.3668639053254436e-08, | |
| "loss": 1.4473, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.9627659574468086, | |
| "grad_norm": 1.5540380607077866, | |
| "learning_rate": 2.2189349112426034e-08, | |
| "loss": 1.4118, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.9654255319148937, | |
| "grad_norm": 1.7104646212150858, | |
| "learning_rate": 2.0710059171597633e-08, | |
| "loss": 1.3168, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.9680851063829787, | |
| "grad_norm": 2.2605324172049865, | |
| "learning_rate": 1.923076923076923e-08, | |
| "loss": 1.5395, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.9707446808510638, | |
| "grad_norm": 1.5974851484011308, | |
| "learning_rate": 1.775147928994083e-08, | |
| "loss": 1.6206, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.9734042553191489, | |
| "grad_norm": 1.5065611553522427, | |
| "learning_rate": 1.6272189349112424e-08, | |
| "loss": 1.282, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.976063829787234, | |
| "grad_norm": 1.5885436344655675, | |
| "learning_rate": 1.4792899408284022e-08, | |
| "loss": 1.3356, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.978723404255319, | |
| "grad_norm": 1.503074753014641, | |
| "learning_rate": 1.3313609467455622e-08, | |
| "loss": 1.4519, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.9813829787234043, | |
| "grad_norm": 1.4858939981761545, | |
| "learning_rate": 1.1834319526627218e-08, | |
| "loss": 1.4708, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.9840425531914894, | |
| "grad_norm": 1.5483339923710784, | |
| "learning_rate": 1.0355029585798816e-08, | |
| "loss": 1.4574, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.9867021276595744, | |
| "grad_norm": 1.4527735951794787, | |
| "learning_rate": 8.875739644970414e-09, | |
| "loss": 1.2918, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.9893617021276597, | |
| "grad_norm": 1.6044461968692099, | |
| "learning_rate": 7.396449704142011e-09, | |
| "loss": 1.4377, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.9920212765957448, | |
| "grad_norm": 1.530330381812826, | |
| "learning_rate": 5.917159763313609e-09, | |
| "loss": 1.4002, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.9946808510638299, | |
| "grad_norm": 1.5188564969623919, | |
| "learning_rate": 4.437869822485207e-09, | |
| "loss": 1.3378, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.997340425531915, | |
| "grad_norm": 1.470529168569605, | |
| "learning_rate": 2.9585798816568045e-09, | |
| "loss": 1.3217, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.4650169982184404, | |
| "learning_rate": 1.4792899408284023e-09, | |
| "loss": 1.3323, | |
| "step": 752 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 752, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 3000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |