| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2504113796760157, | |
| "eval_steps": 500, | |
| "global_step": 428, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005850733170000365, | |
| "grad_norm": 0.3784801935906415, | |
| "learning_rate": 7.999998479354072e-06, | |
| "loss": 1.1815, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001170146634000073, | |
| "grad_norm": 0.10369689761982695, | |
| "learning_rate": 7.999993917417574e-06, | |
| "loss": 1.0085, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0017552199510001097, | |
| "grad_norm": 0.08165678751844849, | |
| "learning_rate": 7.99998631419436e-06, | |
| "loss": 0.934, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.002340293268000146, | |
| "grad_norm": 0.06369675215202451, | |
| "learning_rate": 7.999975669690855e-06, | |
| "loss": 1.0323, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002925366585000183, | |
| "grad_norm": 0.08076101376684344, | |
| "learning_rate": 7.999961983916048e-06, | |
| "loss": 1.0676, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0035104399020002195, | |
| "grad_norm": 0.06288930819751326, | |
| "learning_rate": 7.999945256881502e-06, | |
| "loss": 1.0421, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004095513219000256, | |
| "grad_norm": 0.06851051488443692, | |
| "learning_rate": 7.999925488601348e-06, | |
| "loss": 0.9544, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.004680586536000292, | |
| "grad_norm": 0.06007707301663692, | |
| "learning_rate": 7.999902679092287e-06, | |
| "loss": 1.0349, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005265659853000329, | |
| "grad_norm": 0.07674940091671953, | |
| "learning_rate": 7.99987682837359e-06, | |
| "loss": 1.1809, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.005850733170000366, | |
| "grad_norm": 0.06356726822529891, | |
| "learning_rate": 7.999847936467093e-06, | |
| "loss": 0.9502, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006435806487000402, | |
| "grad_norm": 0.060093989725189235, | |
| "learning_rate": 7.999816003397203e-06, | |
| "loss": 1.0007, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.007020879804000439, | |
| "grad_norm": 0.04797598057222403, | |
| "learning_rate": 7.999781029190902e-06, | |
| "loss": 1.0316, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.007605953121000476, | |
| "grad_norm": 0.08827709210231136, | |
| "learning_rate": 7.999743013877734e-06, | |
| "loss": 1.0688, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.008191026438000511, | |
| "grad_norm": 0.04993529689396162, | |
| "learning_rate": 7.999701957489811e-06, | |
| "loss": 0.9695, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.008776099755000549, | |
| "grad_norm": 0.053685150458545855, | |
| "learning_rate": 7.999657860061823e-06, | |
| "loss": 1.0826, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.009361173072000585, | |
| "grad_norm": 0.04357397086005245, | |
| "learning_rate": 7.99961072163102e-06, | |
| "loss": 0.9788, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.009946246389000622, | |
| "grad_norm": 0.051838589957396514, | |
| "learning_rate": 7.999560542237226e-06, | |
| "loss": 1.0222, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.010531319706000658, | |
| "grad_norm": 0.04948384582310689, | |
| "learning_rate": 7.999507321922833e-06, | |
| "loss": 0.9882, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.011116393023000696, | |
| "grad_norm": 0.05488935540485584, | |
| "learning_rate": 7.9994510607328e-06, | |
| "loss": 1.0124, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.011701466340000731, | |
| "grad_norm": 0.04632979465330071, | |
| "learning_rate": 7.999391758714659e-06, | |
| "loss": 0.9646, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012286539657000767, | |
| "grad_norm": 0.04102257708769348, | |
| "learning_rate": 7.999329415918508e-06, | |
| "loss": 0.8946, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.012871612974000805, | |
| "grad_norm": 0.045466366302330634, | |
| "learning_rate": 7.999264032397013e-06, | |
| "loss": 0.9973, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01345668629100084, | |
| "grad_norm": 0.04440538659368297, | |
| "learning_rate": 7.99919560820541e-06, | |
| "loss": 1.0596, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.014041759608000878, | |
| "grad_norm": 0.042500986542824803, | |
| "learning_rate": 7.999124143401507e-06, | |
| "loss": 0.9303, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.014626832925000914, | |
| "grad_norm": 0.05180121412922999, | |
| "learning_rate": 7.999049638045675e-06, | |
| "loss": 0.9412, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.015211906242000951, | |
| "grad_norm": 0.06441302150178038, | |
| "learning_rate": 7.998972092200859e-06, | |
| "loss": 0.9986, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.015796979559000987, | |
| "grad_norm": 0.05297417453471626, | |
| "learning_rate": 7.998891505932565e-06, | |
| "loss": 1.0701, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.016382052876001023, | |
| "grad_norm": 0.04305692283062352, | |
| "learning_rate": 7.998807879308878e-06, | |
| "loss": 0.9823, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.016967126193001062, | |
| "grad_norm": 0.048331091733781785, | |
| "learning_rate": 7.998721212400443e-06, | |
| "loss": 0.9145, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.017552199510001098, | |
| "grad_norm": 0.04249327256576877, | |
| "learning_rate": 7.998631505280477e-06, | |
| "loss": 0.9469, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018137272827001134, | |
| "grad_norm": 0.042672714299481426, | |
| "learning_rate": 7.998538758024765e-06, | |
| "loss": 0.9814, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.01872234614400117, | |
| "grad_norm": 0.04643044969291229, | |
| "learning_rate": 7.998442970711661e-06, | |
| "loss": 0.9342, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.019307419461001205, | |
| "grad_norm": 0.042411147754236, | |
| "learning_rate": 7.998344143422087e-06, | |
| "loss": 0.8851, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.019892492778001244, | |
| "grad_norm": 0.04407093546490425, | |
| "learning_rate": 7.998242276239527e-06, | |
| "loss": 1.0026, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02047756609500128, | |
| "grad_norm": 0.044039695433035646, | |
| "learning_rate": 7.998137369250046e-06, | |
| "loss": 0.9637, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.021062639412001316, | |
| "grad_norm": 0.04258725681727216, | |
| "learning_rate": 7.998029422542267e-06, | |
| "loss": 0.9249, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.021647712729001352, | |
| "grad_norm": 0.045486820543060684, | |
| "learning_rate": 7.997918436207383e-06, | |
| "loss": 1.005, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.02223278604600139, | |
| "grad_norm": 0.03843585783476988, | |
| "learning_rate": 7.997804410339156e-06, | |
| "loss": 0.9815, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022817859363001427, | |
| "grad_norm": 0.05544665751989288, | |
| "learning_rate": 7.997687345033915e-06, | |
| "loss": 0.94, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.023402932680001463, | |
| "grad_norm": 0.041440042889966715, | |
| "learning_rate": 7.99756724039056e-06, | |
| "loss": 0.9337, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0239880059970015, | |
| "grad_norm": 0.040659761048442974, | |
| "learning_rate": 7.997444096510552e-06, | |
| "loss": 0.897, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.024573079314001534, | |
| "grad_norm": 0.06541743477477353, | |
| "learning_rate": 7.997317913497925e-06, | |
| "loss": 0.9325, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.025158152631001574, | |
| "grad_norm": 0.04101226832374059, | |
| "learning_rate": 7.997188691459279e-06, | |
| "loss": 0.9323, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02574322594800161, | |
| "grad_norm": 0.03954326036578227, | |
| "learning_rate": 7.997056430503782e-06, | |
| "loss": 0.8793, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.026328299265001645, | |
| "grad_norm": 0.0396268421897809, | |
| "learning_rate": 7.996921130743165e-06, | |
| "loss": 0.9849, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02691337258200168, | |
| "grad_norm": 0.03855187149117414, | |
| "learning_rate": 7.996782792291732e-06, | |
| "loss": 0.9328, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.02749844589900172, | |
| "grad_norm": 0.03791722568157901, | |
| "learning_rate": 7.996641415266355e-06, | |
| "loss": 0.9293, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.028083519216001756, | |
| "grad_norm": 0.04497262810695708, | |
| "learning_rate": 7.996496999786465e-06, | |
| "loss": 0.9884, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.02866859253300179, | |
| "grad_norm": 0.041089653577607424, | |
| "learning_rate": 7.996349545974065e-06, | |
| "loss": 0.9305, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.029253665850001827, | |
| "grad_norm": 0.04162057876282252, | |
| "learning_rate": 7.996199053953729e-06, | |
| "loss": 1.0771, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029838739167001863, | |
| "grad_norm": 0.045060287442874644, | |
| "learning_rate": 7.996045523852587e-06, | |
| "loss": 0.9819, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.030423812484001903, | |
| "grad_norm": 0.03950486400361273, | |
| "learning_rate": 7.995888955800346e-06, | |
| "loss": 0.8911, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03100888580100194, | |
| "grad_norm": 0.040638774400984866, | |
| "learning_rate": 7.995729349929275e-06, | |
| "loss": 0.9289, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.031593959118001974, | |
| "grad_norm": 0.04034434081592363, | |
| "learning_rate": 7.995566706374206e-06, | |
| "loss": 0.9529, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03217903243500201, | |
| "grad_norm": 0.04315614004870732, | |
| "learning_rate": 7.995401025272545e-06, | |
| "loss": 0.9554, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.032764105752002046, | |
| "grad_norm": 0.03862522293897866, | |
| "learning_rate": 7.995232306764258e-06, | |
| "loss": 0.9604, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03334917906900208, | |
| "grad_norm": 0.042952485422922036, | |
| "learning_rate": 7.995060550991879e-06, | |
| "loss": 0.9731, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.033934252386002124, | |
| "grad_norm": 0.04399639647859174, | |
| "learning_rate": 7.994885758100508e-06, | |
| "loss": 0.868, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.03451932570300216, | |
| "grad_norm": 0.03794998627596821, | |
| "learning_rate": 7.99470792823781e-06, | |
| "loss": 0.9176, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.035104399020002196, | |
| "grad_norm": 0.046410465067380204, | |
| "learning_rate": 7.99452706155402e-06, | |
| "loss": 0.9456, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03568947233700223, | |
| "grad_norm": 0.04773216743183266, | |
| "learning_rate": 7.994343158201927e-06, | |
| "loss": 1.0049, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.03627454565400227, | |
| "grad_norm": 0.04395331205981261, | |
| "learning_rate": 7.994156218336901e-06, | |
| "loss": 0.9197, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0368596189710023, | |
| "grad_norm": 0.04353152162367831, | |
| "learning_rate": 7.993966242116865e-06, | |
| "loss": 0.8978, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.03744469228800234, | |
| "grad_norm": 0.041195331266407585, | |
| "learning_rate": 7.993773229702312e-06, | |
| "loss": 0.965, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.038029765605002375, | |
| "grad_norm": 0.050163343033375746, | |
| "learning_rate": 7.993577181256304e-06, | |
| "loss": 0.8876, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03861483892200241, | |
| "grad_norm": 0.06660490581550026, | |
| "learning_rate": 7.993378096944456e-06, | |
| "loss": 0.9082, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03919991223900245, | |
| "grad_norm": 0.03984407119317513, | |
| "learning_rate": 7.99317597693496e-06, | |
| "loss": 0.9233, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.03978498555600249, | |
| "grad_norm": 0.04866369054757869, | |
| "learning_rate": 7.992970821398567e-06, | |
| "loss": 0.9349, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.040370058873002525, | |
| "grad_norm": 0.04990567711730111, | |
| "learning_rate": 7.99276263050859e-06, | |
| "loss": 0.985, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.04095513219000256, | |
| "grad_norm": 0.17411986574945643, | |
| "learning_rate": 7.992551404440914e-06, | |
| "loss": 1.0208, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.041540205507002596, | |
| "grad_norm": 0.04353986815925501, | |
| "learning_rate": 7.992337143373981e-06, | |
| "loss": 0.9633, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.04212527882400263, | |
| "grad_norm": 0.05318272939257367, | |
| "learning_rate": 7.9921198474888e-06, | |
| "loss": 1.0015, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04271035214100267, | |
| "grad_norm": 0.059411931897509304, | |
| "learning_rate": 7.991899516968942e-06, | |
| "loss": 1.0224, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.043295425458002704, | |
| "grad_norm": 0.05746696657441689, | |
| "learning_rate": 7.991676152000545e-06, | |
| "loss": 0.9817, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04388049877500274, | |
| "grad_norm": 0.03994063377968752, | |
| "learning_rate": 7.991449752772307e-06, | |
| "loss": 0.899, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04446557209200278, | |
| "grad_norm": 0.05114807327866483, | |
| "learning_rate": 7.991220319475492e-06, | |
| "loss": 0.9627, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.04505064540900282, | |
| "grad_norm": 0.043413594203031196, | |
| "learning_rate": 7.990987852303923e-06, | |
| "loss": 0.9385, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.045635718726002854, | |
| "grad_norm": 0.044141893159488445, | |
| "learning_rate": 7.990752351453994e-06, | |
| "loss": 0.9214, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.04622079204300289, | |
| "grad_norm": 0.07098639926950194, | |
| "learning_rate": 7.990513817124652e-06, | |
| "loss": 0.9762, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.046805865360002925, | |
| "grad_norm": 0.04487096138718826, | |
| "learning_rate": 7.990272249517416e-06, | |
| "loss": 0.9379, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04739093867700296, | |
| "grad_norm": 0.040488382771263605, | |
| "learning_rate": 7.990027648836359e-06, | |
| "loss": 0.9563, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.047976011994003, | |
| "grad_norm": 0.04352730030611419, | |
| "learning_rate": 7.989780015288123e-06, | |
| "loss": 0.9488, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04856108531100303, | |
| "grad_norm": 0.04413441845817798, | |
| "learning_rate": 7.98952934908191e-06, | |
| "loss": 1.0336, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.04914615862800307, | |
| "grad_norm": 0.04193745680850997, | |
| "learning_rate": 7.989275650429482e-06, | |
| "loss": 0.8785, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.04973123194500311, | |
| "grad_norm": 0.04475381091812719, | |
| "learning_rate": 7.989018919545165e-06, | |
| "loss": 0.9443, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05031630526200315, | |
| "grad_norm": 0.04234754821679888, | |
| "learning_rate": 7.988759156645845e-06, | |
| "loss": 0.9564, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05090137857900318, | |
| "grad_norm": 0.054324472784088765, | |
| "learning_rate": 7.988496361950972e-06, | |
| "loss": 0.9824, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05148645189600322, | |
| "grad_norm": 0.051967709927586946, | |
| "learning_rate": 7.988230535682556e-06, | |
| "loss": 0.914, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.052071525213003254, | |
| "grad_norm": 0.03996756018971234, | |
| "learning_rate": 7.987961678065169e-06, | |
| "loss": 0.9421, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.05265659853000329, | |
| "grad_norm": 0.07556612961446735, | |
| "learning_rate": 7.987689789325939e-06, | |
| "loss": 0.9791, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.053241671847003326, | |
| "grad_norm": 0.04557130060143385, | |
| "learning_rate": 7.987414869694562e-06, | |
| "loss": 0.9318, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.05382674516400336, | |
| "grad_norm": 0.0424700455078842, | |
| "learning_rate": 7.98713691940329e-06, | |
| "loss": 0.9745, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0544118184810034, | |
| "grad_norm": 0.07879025661937863, | |
| "learning_rate": 7.986855938686935e-06, | |
| "loss": 0.9614, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.05499689179800344, | |
| "grad_norm": 0.04227267907542001, | |
| "learning_rate": 7.986571927782871e-06, | |
| "loss": 0.9317, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.055581965115003476, | |
| "grad_norm": 0.042275976377898476, | |
| "learning_rate": 7.986284886931033e-06, | |
| "loss": 0.8982, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.05616703843200351, | |
| "grad_norm": 0.0457764180229334, | |
| "learning_rate": 7.985994816373913e-06, | |
| "loss": 0.8803, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.05675211174900355, | |
| "grad_norm": 0.04305545472943831, | |
| "learning_rate": 7.985701716356565e-06, | |
| "loss": 0.9786, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.05733718506600358, | |
| "grad_norm": 0.045580685701444475, | |
| "learning_rate": 7.985405587126597e-06, | |
| "loss": 0.9036, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.05792225838300362, | |
| "grad_norm": 0.0442896526405325, | |
| "learning_rate": 7.985106428934183e-06, | |
| "loss": 0.9871, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.058507331700003655, | |
| "grad_norm": 0.04396583794537118, | |
| "learning_rate": 7.984804242032051e-06, | |
| "loss": 1.0145, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05909240501700369, | |
| "grad_norm": 0.04488641679254391, | |
| "learning_rate": 7.984499026675494e-06, | |
| "loss": 0.9673, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.05967747833400373, | |
| "grad_norm": 0.0402258783966036, | |
| "learning_rate": 7.984190783122351e-06, | |
| "loss": 1.0458, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.06026255165100377, | |
| "grad_norm": 0.048502182555142354, | |
| "learning_rate": 7.983879511633036e-06, | |
| "loss": 0.8879, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.060847624968003805, | |
| "grad_norm": 0.04818474096979939, | |
| "learning_rate": 7.983565212470504e-06, | |
| "loss": 0.9467, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06143269828500384, | |
| "grad_norm": 0.04176125713160911, | |
| "learning_rate": 7.983247885900283e-06, | |
| "loss": 0.9266, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06201777160200388, | |
| "grad_norm": 0.04212530605033032, | |
| "learning_rate": 7.982927532190447e-06, | |
| "loss": 0.9179, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06260284491900392, | |
| "grad_norm": 0.037198964746819226, | |
| "learning_rate": 7.982604151611633e-06, | |
| "loss": 1.0018, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.06318791823600395, | |
| "grad_norm": 0.04090799839826158, | |
| "learning_rate": 7.982277744437035e-06, | |
| "loss": 0.8756, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06377299155300399, | |
| "grad_norm": 0.03857422273289164, | |
| "learning_rate": 7.981948310942402e-06, | |
| "loss": 0.8855, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.06435806487000402, | |
| "grad_norm": 0.05241752689478532, | |
| "learning_rate": 7.981615851406039e-06, | |
| "loss": 0.8862, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06494313818700406, | |
| "grad_norm": 0.04086973369607326, | |
| "learning_rate": 7.981280366108814e-06, | |
| "loss": 0.9221, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.06552821150400409, | |
| "grad_norm": 0.03931043694281754, | |
| "learning_rate": 7.98094185533414e-06, | |
| "loss": 0.9417, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06611328482100413, | |
| "grad_norm": 0.04169878931857935, | |
| "learning_rate": 7.980600319367995e-06, | |
| "loss": 0.958, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.06669835813800416, | |
| "grad_norm": 0.044230077430854955, | |
| "learning_rate": 7.980255758498908e-06, | |
| "loss": 0.9265, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.0672834314550042, | |
| "grad_norm": 0.04488148180330816, | |
| "learning_rate": 7.979908173017968e-06, | |
| "loss": 0.8908, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06786850477200425, | |
| "grad_norm": 0.042038370275589085, | |
| "learning_rate": 7.979557563218815e-06, | |
| "loss": 0.8961, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06845357808900428, | |
| "grad_norm": 0.04747064970378541, | |
| "learning_rate": 7.979203929397646e-06, | |
| "loss": 1.0609, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.06903865140600432, | |
| "grad_norm": 0.04392999493678844, | |
| "learning_rate": 7.97884727185321e-06, | |
| "loss": 0.9001, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.06962372472300435, | |
| "grad_norm": 0.040693633998808994, | |
| "learning_rate": 7.978487590886814e-06, | |
| "loss": 0.8562, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.07020879804000439, | |
| "grad_norm": 0.08337676841807191, | |
| "learning_rate": 7.978124886802316e-06, | |
| "loss": 0.9344, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07079387135700442, | |
| "grad_norm": 0.03951441645023813, | |
| "learning_rate": 7.977759159906134e-06, | |
| "loss": 0.9182, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.07137894467400446, | |
| "grad_norm": 0.04427536962304041, | |
| "learning_rate": 7.977390410507229e-06, | |
| "loss": 0.9079, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.07196401799100449, | |
| "grad_norm": 0.047402666476443076, | |
| "learning_rate": 7.977018638917126e-06, | |
| "loss": 0.9442, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.07254909130800453, | |
| "grad_norm": 0.07806155365092546, | |
| "learning_rate": 7.976643845449897e-06, | |
| "loss": 0.9453, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07313416462500458, | |
| "grad_norm": 0.04187989433422361, | |
| "learning_rate": 7.97626603042217e-06, | |
| "loss": 0.9762, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0737192379420046, | |
| "grad_norm": 0.038153971027990764, | |
| "learning_rate": 7.975885194153125e-06, | |
| "loss": 0.9377, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07430431125900465, | |
| "grad_norm": 0.04398811005912341, | |
| "learning_rate": 7.975501336964492e-06, | |
| "loss": 0.9655, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.07488938457600468, | |
| "grad_norm": 0.04310577216463342, | |
| "learning_rate": 7.975114459180555e-06, | |
| "loss": 0.9082, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07547445789300472, | |
| "grad_norm": 0.04552741227747631, | |
| "learning_rate": 7.97472456112815e-06, | |
| "loss": 0.8667, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.07605953121000475, | |
| "grad_norm": 0.0406705187810207, | |
| "learning_rate": 7.974331643136666e-06, | |
| "loss": 0.9286, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07664460452700479, | |
| "grad_norm": 0.16819457832404855, | |
| "learning_rate": 7.973935705538039e-06, | |
| "loss": 0.9724, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.07722967784400482, | |
| "grad_norm": 0.04524796652654962, | |
| "learning_rate": 7.973536748666756e-06, | |
| "loss": 0.961, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.07781475116100486, | |
| "grad_norm": 0.051171072191227115, | |
| "learning_rate": 7.973134772859862e-06, | |
| "loss": 0.9513, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.0783998244780049, | |
| "grad_norm": 0.044436153110071305, | |
| "learning_rate": 7.972729778456946e-06, | |
| "loss": 0.9363, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07898489779500494, | |
| "grad_norm": 0.04485419817375143, | |
| "learning_rate": 7.97232176580015e-06, | |
| "loss": 0.8583, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07956997111200498, | |
| "grad_norm": 0.044866876801222304, | |
| "learning_rate": 7.971910735234161e-06, | |
| "loss": 0.9859, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.080155044429005, | |
| "grad_norm": 0.03934716109284772, | |
| "learning_rate": 7.971496687106219e-06, | |
| "loss": 0.8592, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.08074011774600505, | |
| "grad_norm": 0.041852675369480244, | |
| "learning_rate": 7.971079621766117e-06, | |
| "loss": 0.9353, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08132519106300508, | |
| "grad_norm": 0.3430745253576026, | |
| "learning_rate": 7.97065953956619e-06, | |
| "loss": 0.9602, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.08191026438000512, | |
| "grad_norm": 0.44783710464834237, | |
| "learning_rate": 7.970236440861327e-06, | |
| "loss": 0.9833, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08249533769700515, | |
| "grad_norm": 0.29750847371388817, | |
| "learning_rate": 7.96981032600896e-06, | |
| "loss": 0.8244, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.08308041101400519, | |
| "grad_norm": 0.169443611740874, | |
| "learning_rate": 7.969381195369076e-06, | |
| "loss": 0.8983, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.08366548433100524, | |
| "grad_norm": 0.04111262427570532, | |
| "learning_rate": 7.968949049304204e-06, | |
| "loss": 0.9552, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.08425055764800526, | |
| "grad_norm": 0.04087231642049412, | |
| "learning_rate": 7.968513888179421e-06, | |
| "loss": 0.9051, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.08483563096500531, | |
| "grad_norm": 0.05663350951363164, | |
| "learning_rate": 7.968075712362356e-06, | |
| "loss": 0.8366, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08542070428200534, | |
| "grad_norm": 0.04202895629977615, | |
| "learning_rate": 7.967634522223179e-06, | |
| "loss": 0.8115, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08600577759900538, | |
| "grad_norm": 0.04238111014919017, | |
| "learning_rate": 7.96719031813461e-06, | |
| "loss": 0.9086, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.08659085091600541, | |
| "grad_norm": 0.04226501020383857, | |
| "learning_rate": 7.966743100471913e-06, | |
| "loss": 0.9286, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08717592423300545, | |
| "grad_norm": 0.046325797442375215, | |
| "learning_rate": 7.9662928696129e-06, | |
| "loss": 0.9393, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.08776099755000548, | |
| "grad_norm": 0.05532068772615188, | |
| "learning_rate": 7.965839625937926e-06, | |
| "loss": 0.9202, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08834607086700552, | |
| "grad_norm": 0.039360829289514745, | |
| "learning_rate": 7.965383369829894e-06, | |
| "loss": 0.908, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.08893114418400556, | |
| "grad_norm": 0.04965498407233415, | |
| "learning_rate": 7.964924101674252e-06, | |
| "loss": 0.9406, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.0895162175010056, | |
| "grad_norm": 0.04177674200968805, | |
| "learning_rate": 7.964461821858987e-06, | |
| "loss": 0.8933, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.09010129081800564, | |
| "grad_norm": 0.04711456314429998, | |
| "learning_rate": 7.963996530774639e-06, | |
| "loss": 1.0111, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.09068636413500566, | |
| "grad_norm": 0.045100723994096155, | |
| "learning_rate": 7.963528228814285e-06, | |
| "loss": 0.9806, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.09127143745200571, | |
| "grad_norm": 0.09448573408012474, | |
| "learning_rate": 7.96305691637355e-06, | |
| "loss": 0.9142, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.09185651076900574, | |
| "grad_norm": 0.04297907772876167, | |
| "learning_rate": 7.962582593850596e-06, | |
| "loss": 0.8852, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.09244158408600578, | |
| "grad_norm": 0.05151683390684187, | |
| "learning_rate": 7.962105261646138e-06, | |
| "loss": 0.9975, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09302665740300581, | |
| "grad_norm": 0.07242957126413647, | |
| "learning_rate": 7.961624920163423e-06, | |
| "loss": 0.9196, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.09361173072000585, | |
| "grad_norm": 0.08840439035290122, | |
| "learning_rate": 7.961141569808248e-06, | |
| "loss": 0.92, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0941968040370059, | |
| "grad_norm": 0.11047843778949552, | |
| "learning_rate": 7.960655210988948e-06, | |
| "loss": 0.9452, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.09478187735400592, | |
| "grad_norm": 0.04275273565605597, | |
| "learning_rate": 7.960165844116399e-06, | |
| "loss": 0.9641, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09536695067100597, | |
| "grad_norm": 0.05662855927390397, | |
| "learning_rate": 7.959673469604025e-06, | |
| "loss": 0.9354, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.095952023988006, | |
| "grad_norm": 0.04769002643125012, | |
| "learning_rate": 7.959178087867779e-06, | |
| "loss": 0.9087, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.09653709730500604, | |
| "grad_norm": 0.050744023655463925, | |
| "learning_rate": 7.958679699326164e-06, | |
| "loss": 0.9561, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09712217062200607, | |
| "grad_norm": 0.058451109924341715, | |
| "learning_rate": 7.958178304400222e-06, | |
| "loss": 0.9881, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.09770724393900611, | |
| "grad_norm": 0.04338399471073774, | |
| "learning_rate": 7.95767390351353e-06, | |
| "loss": 0.9705, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.09829231725600614, | |
| "grad_norm": 0.04901291931441639, | |
| "learning_rate": 7.95716649709221e-06, | |
| "loss": 0.9229, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09887739057300618, | |
| "grad_norm": 0.044284928725944674, | |
| "learning_rate": 7.95665608556492e-06, | |
| "loss": 0.9493, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.09946246389000622, | |
| "grad_norm": 0.04914804115067185, | |
| "learning_rate": 7.956142669362855e-06, | |
| "loss": 0.9879, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.10004753720700625, | |
| "grad_norm": 0.04087870881565583, | |
| "learning_rate": 7.955626248919752e-06, | |
| "loss": 0.9435, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.1006326105240063, | |
| "grad_norm": 0.04938743203389444, | |
| "learning_rate": 7.955106824671888e-06, | |
| "loss": 0.8813, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.10121768384100632, | |
| "grad_norm": 0.04440441336302364, | |
| "learning_rate": 7.95458439705807e-06, | |
| "loss": 0.954, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.10180275715800637, | |
| "grad_norm": 0.04052619663535755, | |
| "learning_rate": 7.954058966519649e-06, | |
| "loss": 0.8759, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1023878304750064, | |
| "grad_norm": 0.04315943430366373, | |
| "learning_rate": 7.953530533500507e-06, | |
| "loss": 0.8621, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.10297290379200644, | |
| "grad_norm": 0.07065437183104553, | |
| "learning_rate": 7.952999098447072e-06, | |
| "loss": 0.9796, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10355797710900647, | |
| "grad_norm": 0.04286710979601013, | |
| "learning_rate": 7.952464661808297e-06, | |
| "loss": 0.9187, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.10414305042600651, | |
| "grad_norm": 0.044637305084684484, | |
| "learning_rate": 7.951927224035678e-06, | |
| "loss": 0.8772, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10472812374300655, | |
| "grad_norm": 0.04369651860095562, | |
| "learning_rate": 7.951386785583244e-06, | |
| "loss": 0.8969, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.10531319706000658, | |
| "grad_norm": 0.05048322425196143, | |
| "learning_rate": 7.950843346907559e-06, | |
| "loss": 0.8907, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10589827037700662, | |
| "grad_norm": 0.03884181381210202, | |
| "learning_rate": 7.95029690846772e-06, | |
| "loss": 0.931, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.10648334369400665, | |
| "grad_norm": 0.05060670376656713, | |
| "learning_rate": 7.949747470725362e-06, | |
| "loss": 0.8624, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1070684170110067, | |
| "grad_norm": 0.04162121727818916, | |
| "learning_rate": 7.949195034144653e-06, | |
| "loss": 0.9141, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.10765349032800672, | |
| "grad_norm": 0.04729330740672752, | |
| "learning_rate": 7.94863959919229e-06, | |
| "loss": 0.921, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10823856364500677, | |
| "grad_norm": 0.04459204791909808, | |
| "learning_rate": 7.948081166337509e-06, | |
| "loss": 0.8993, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1088236369620068, | |
| "grad_norm": 0.04645986236352756, | |
| "learning_rate": 7.947519736052075e-06, | |
| "loss": 0.9158, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10940871027900684, | |
| "grad_norm": 0.04275967646092167, | |
| "learning_rate": 7.946955308810285e-06, | |
| "loss": 0.9387, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.10999378359600688, | |
| "grad_norm": 0.07421648959437231, | |
| "learning_rate": 7.94638788508897e-06, | |
| "loss": 0.8497, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.11057885691300691, | |
| "grad_norm": 0.04955170539289863, | |
| "learning_rate": 7.945817465367493e-06, | |
| "loss": 0.8525, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.11116393023000695, | |
| "grad_norm": 0.12013696366466048, | |
| "learning_rate": 7.945244050127744e-06, | |
| "loss": 0.9616, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.11174900354700698, | |
| "grad_norm": 0.040829512145039985, | |
| "learning_rate": 7.944667639854148e-06, | |
| "loss": 0.8344, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.11233407686400702, | |
| "grad_norm": 0.05068364591326354, | |
| "learning_rate": 7.944088235033657e-06, | |
| "loss": 0.9403, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.11291915018100705, | |
| "grad_norm": 0.0499939002215986, | |
| "learning_rate": 7.943505836155753e-06, | |
| "loss": 0.9475, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1135042234980071, | |
| "grad_norm": 0.05407026250866459, | |
| "learning_rate": 7.94292044371245e-06, | |
| "loss": 0.9101, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11408929681500712, | |
| "grad_norm": 0.06417314751489454, | |
| "learning_rate": 7.94233205819829e-06, | |
| "loss": 0.8787, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.11467437013200717, | |
| "grad_norm": 0.03871241656337873, | |
| "learning_rate": 7.941740680110343e-06, | |
| "loss": 0.9059, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.11525944344900721, | |
| "grad_norm": 0.041501526897382096, | |
| "learning_rate": 7.941146309948205e-06, | |
| "loss": 0.8946, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.11584451676600724, | |
| "grad_norm": 0.037938175729775744, | |
| "learning_rate": 7.940548948214005e-06, | |
| "loss": 0.8534, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11642959008300728, | |
| "grad_norm": 0.038697315108935856, | |
| "learning_rate": 7.939948595412394e-06, | |
| "loss": 0.9671, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.11701466340000731, | |
| "grad_norm": 0.040128164685532784, | |
| "learning_rate": 7.939345252050552e-06, | |
| "loss": 0.9452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11759973671700735, | |
| "grad_norm": 0.04159138776071103, | |
| "learning_rate": 7.938738918638187e-06, | |
| "loss": 0.9525, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.11818481003400738, | |
| "grad_norm": 0.044356876333774484, | |
| "learning_rate": 7.93812959568753e-06, | |
| "loss": 0.8863, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.11876988335100742, | |
| "grad_norm": 0.04110203273326361, | |
| "learning_rate": 7.93751728371334e-06, | |
| "loss": 0.8585, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.11935495666800745, | |
| "grad_norm": 0.03907623380015084, | |
| "learning_rate": 7.9369019832329e-06, | |
| "loss": 0.8901, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1199400299850075, | |
| "grad_norm": 0.06841747716076531, | |
| "learning_rate": 7.936283694766016e-06, | |
| "loss": 0.9382, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.12052510330200754, | |
| "grad_norm": 0.03864267029597622, | |
| "learning_rate": 7.935662418835023e-06, | |
| "loss": 0.9445, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.12111017661900757, | |
| "grad_norm": 0.04187477888552647, | |
| "learning_rate": 7.935038155964775e-06, | |
| "loss": 0.949, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.12169524993600761, | |
| "grad_norm": 0.03844046778107278, | |
| "learning_rate": 7.934410906682653e-06, | |
| "loss": 0.8185, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.12228032325300764, | |
| "grad_norm": 0.03894778215959397, | |
| "learning_rate": 7.933780671518558e-06, | |
| "loss": 0.8226, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.12286539657000768, | |
| "grad_norm": 0.05316470088962357, | |
| "learning_rate": 7.933147451004914e-06, | |
| "loss": 0.9149, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12345046988700771, | |
| "grad_norm": 0.04054196015487159, | |
| "learning_rate": 7.932511245676669e-06, | |
| "loss": 0.907, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.12403554320400775, | |
| "grad_norm": 0.04197248747401694, | |
| "learning_rate": 7.931872056071292e-06, | |
| "loss": 0.8974, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12462061652100778, | |
| "grad_norm": 0.04107563523275673, | |
| "learning_rate": 7.931229882728771e-06, | |
| "loss": 0.8758, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.12520568983800784, | |
| "grad_norm": 0.04898006391812857, | |
| "learning_rate": 7.930584726191616e-06, | |
| "loss": 0.9015, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.12579076315500787, | |
| "grad_norm": 0.053828637223887026, | |
| "learning_rate": 7.92993658700486e-06, | |
| "loss": 0.9095, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1263758364720079, | |
| "grad_norm": 0.041596264391364365, | |
| "learning_rate": 7.929285465716051e-06, | |
| "loss": 0.9324, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.12696090978900793, | |
| "grad_norm": 0.04270202089537964, | |
| "learning_rate": 7.928631362875258e-06, | |
| "loss": 0.9712, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.12754598310600798, | |
| "grad_norm": 0.04508600982672278, | |
| "learning_rate": 7.927974279035069e-06, | |
| "loss": 0.8526, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.128131056423008, | |
| "grad_norm": 0.04210505656160864, | |
| "learning_rate": 7.927314214750592e-06, | |
| "loss": 0.8483, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.12871612974000804, | |
| "grad_norm": 0.04228456764136032, | |
| "learning_rate": 7.926651170579451e-06, | |
| "loss": 1.0049, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12930120305700807, | |
| "grad_norm": 0.039866451292078504, | |
| "learning_rate": 7.92598514708179e-06, | |
| "loss": 0.9421, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.12988627637400812, | |
| "grad_norm": 0.037613132725661406, | |
| "learning_rate": 7.925316144820263e-06, | |
| "loss": 0.9474, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.13047134969100815, | |
| "grad_norm": 0.04168086524600805, | |
| "learning_rate": 7.92464416436005e-06, | |
| "loss": 0.9058, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.13105642300800818, | |
| "grad_norm": 0.039621964711338775, | |
| "learning_rate": 7.923969206268839e-06, | |
| "loss": 0.9086, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.13164149632500824, | |
| "grad_norm": 0.05209282643387043, | |
| "learning_rate": 7.923291271116838e-06, | |
| "loss": 0.9298, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.13222656964200827, | |
| "grad_norm": 0.044258185244179175, | |
| "learning_rate": 7.92261035947677e-06, | |
| "loss": 0.8925, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1328116429590083, | |
| "grad_norm": 0.039844108391859055, | |
| "learning_rate": 7.92192647192387e-06, | |
| "loss": 0.8392, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.13339671627600833, | |
| "grad_norm": 0.044744651789733476, | |
| "learning_rate": 7.92123960903589e-06, | |
| "loss": 0.9329, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13398178959300838, | |
| "grad_norm": 0.04494192821446448, | |
| "learning_rate": 7.92054977139309e-06, | |
| "loss": 0.8606, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.1345668629100084, | |
| "grad_norm": 0.05863838322698434, | |
| "learning_rate": 7.919856959578252e-06, | |
| "loss": 0.9302, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13515193622700844, | |
| "grad_norm": 0.04085984556832983, | |
| "learning_rate": 7.919161174176663e-06, | |
| "loss": 0.9993, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.1357370095440085, | |
| "grad_norm": 0.04500674877406008, | |
| "learning_rate": 7.918462415776125e-06, | |
| "loss": 0.9377, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.13632208286100853, | |
| "grad_norm": 0.039173528958006375, | |
| "learning_rate": 7.917760684966955e-06, | |
| "loss": 0.813, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.13690715617800855, | |
| "grad_norm": 0.04298295272921228, | |
| "learning_rate": 7.91705598234197e-06, | |
| "loss": 0.8952, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.13749222949500858, | |
| "grad_norm": 0.04028408438122686, | |
| "learning_rate": 7.916348308496513e-06, | |
| "loss": 0.9051, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.13807730281200864, | |
| "grad_norm": 0.06743695497866435, | |
| "learning_rate": 7.915637664028423e-06, | |
| "loss": 0.9475, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13866237612900867, | |
| "grad_norm": 0.03949575625475006, | |
| "learning_rate": 7.914924049538061e-06, | |
| "loss": 0.9316, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.1392474494460087, | |
| "grad_norm": 0.04030623921903529, | |
| "learning_rate": 7.914207465628284e-06, | |
| "loss": 0.885, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.13983252276300873, | |
| "grad_norm": 0.047528726409039795, | |
| "learning_rate": 7.91348791290447e-06, | |
| "loss": 0.918, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.14041759608000878, | |
| "grad_norm": 0.04466991571391728, | |
| "learning_rate": 7.912765391974496e-06, | |
| "loss": 0.9305, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1410026693970088, | |
| "grad_norm": 0.04186868219039162, | |
| "learning_rate": 7.912039903448752e-06, | |
| "loss": 0.9415, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.14158774271400884, | |
| "grad_norm": 0.05801571080351748, | |
| "learning_rate": 7.91131144794013e-06, | |
| "loss": 0.8787, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1421728160310089, | |
| "grad_norm": 0.045088484806881386, | |
| "learning_rate": 7.910580026064038e-06, | |
| "loss": 1.0604, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.14275788934800893, | |
| "grad_norm": 0.06574863742707004, | |
| "learning_rate": 7.909845638438377e-06, | |
| "loss": 0.9216, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.14334296266500896, | |
| "grad_norm": 0.0372095564143721, | |
| "learning_rate": 7.909108285683563e-06, | |
| "loss": 0.9336, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.14392803598200898, | |
| "grad_norm": 0.04397627871472844, | |
| "learning_rate": 7.908367968422515e-06, | |
| "loss": 0.9261, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14451310929900904, | |
| "grad_norm": 0.04276892267104892, | |
| "learning_rate": 7.907624687280654e-06, | |
| "loss": 0.8673, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.14509818261600907, | |
| "grad_norm": 0.045304865664725656, | |
| "learning_rate": 7.906878442885907e-06, | |
| "loss": 0.9475, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1456832559330091, | |
| "grad_norm": 0.04370076931511423, | |
| "learning_rate": 7.906129235868702e-06, | |
| "loss": 0.8433, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.14626832925000915, | |
| "grad_norm": 0.0672588807453601, | |
| "learning_rate": 7.905377066861973e-06, | |
| "loss": 0.9124, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14685340256700918, | |
| "grad_norm": 0.046912566764935076, | |
| "learning_rate": 7.904621936501156e-06, | |
| "loss": 0.9044, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.1474384758840092, | |
| "grad_norm": 0.07278415346178851, | |
| "learning_rate": 7.903863845424185e-06, | |
| "loss": 0.901, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.14802354920100924, | |
| "grad_norm": 0.04218918671820467, | |
| "learning_rate": 7.9031027942715e-06, | |
| "loss": 0.9019, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.1486086225180093, | |
| "grad_norm": 0.03858325311022567, | |
| "learning_rate": 7.90233878368604e-06, | |
| "loss": 0.8601, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14919369583500933, | |
| "grad_norm": 0.04209886442727145, | |
| "learning_rate": 7.90157181431324e-06, | |
| "loss": 0.8681, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.14977876915200936, | |
| "grad_norm": 0.049400812285918, | |
| "learning_rate": 7.90080188680104e-06, | |
| "loss": 0.9494, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.15036384246900938, | |
| "grad_norm": 0.047941941474994906, | |
| "learning_rate": 7.900029001799882e-06, | |
| "loss": 0.8439, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.15094891578600944, | |
| "grad_norm": 0.042101153780900284, | |
| "learning_rate": 7.899253159962694e-06, | |
| "loss": 0.917, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.15153398910300947, | |
| "grad_norm": 0.040583990176968884, | |
| "learning_rate": 7.898474361944915e-06, | |
| "loss": 0.9328, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.1521190624200095, | |
| "grad_norm": 0.04000469028913075, | |
| "learning_rate": 7.897692608404474e-06, | |
| "loss": 0.9454, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15270413573700956, | |
| "grad_norm": 0.04819787772789072, | |
| "learning_rate": 7.8969079000018e-06, | |
| "loss": 1.0061, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.15328920905400958, | |
| "grad_norm": 0.04670811295251141, | |
| "learning_rate": 7.896120237399817e-06, | |
| "loss": 0.8678, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1538742823710096, | |
| "grad_norm": 0.03876277230266676, | |
| "learning_rate": 7.895329621263945e-06, | |
| "loss": 0.9465, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.15445935568800964, | |
| "grad_norm": 0.06917560168966953, | |
| "learning_rate": 7.894536052262098e-06, | |
| "loss": 0.9114, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1550444290050097, | |
| "grad_norm": 0.052197549375990714, | |
| "learning_rate": 7.893739531064688e-06, | |
| "loss": 0.8966, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.15562950232200973, | |
| "grad_norm": 0.05089520469658166, | |
| "learning_rate": 7.892940058344615e-06, | |
| "loss": 0.8403, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.15621457563900976, | |
| "grad_norm": 0.04200303069403226, | |
| "learning_rate": 7.89213763477728e-06, | |
| "loss": 0.7954, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.1567996489560098, | |
| "grad_norm": 0.04045997821257577, | |
| "learning_rate": 7.89133226104057e-06, | |
| "loss": 0.9484, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15738472227300984, | |
| "grad_norm": 0.04355833968928891, | |
| "learning_rate": 7.890523937814872e-06, | |
| "loss": 0.8871, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.15796979559000987, | |
| "grad_norm": 0.038980722774793516, | |
| "learning_rate": 7.889712665783055e-06, | |
| "loss": 0.8242, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1585548689070099, | |
| "grad_norm": 0.0417431555190703, | |
| "learning_rate": 7.888898445630486e-06, | |
| "loss": 0.918, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.15913994222400996, | |
| "grad_norm": 0.04420422353440596, | |
| "learning_rate": 7.888081278045022e-06, | |
| "loss": 0.9345, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.15972501554100998, | |
| "grad_norm": 0.043453256463112454, | |
| "learning_rate": 7.88726116371701e-06, | |
| "loss": 0.8439, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.16031008885801, | |
| "grad_norm": 0.04018335492133053, | |
| "learning_rate": 7.88643810333928e-06, | |
| "loss": 0.9024, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.16089516217501004, | |
| "grad_norm": 0.03885288803364117, | |
| "learning_rate": 7.885612097607161e-06, | |
| "loss": 0.9005, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1614802354920101, | |
| "grad_norm": 0.03940189643105726, | |
| "learning_rate": 7.884783147218464e-06, | |
| "loss": 0.8726, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.16206530880901013, | |
| "grad_norm": 0.04322545289358093, | |
| "learning_rate": 7.88395125287349e-06, | |
| "loss": 0.9309, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.16265038212601016, | |
| "grad_norm": 0.03991615308835092, | |
| "learning_rate": 7.883116415275022e-06, | |
| "loss": 0.9319, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.1632354554430102, | |
| "grad_norm": 0.04774591346437184, | |
| "learning_rate": 7.882278635128339e-06, | |
| "loss": 0.9976, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.16382052876001024, | |
| "grad_norm": 0.043726280435400645, | |
| "learning_rate": 7.881437913141196e-06, | |
| "loss": 0.9041, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16440560207701027, | |
| "grad_norm": 0.05349577122745657, | |
| "learning_rate": 7.880594250023842e-06, | |
| "loss": 0.9109, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.1649906753940103, | |
| "grad_norm": 0.04664425873865413, | |
| "learning_rate": 7.879747646489002e-06, | |
| "loss": 0.8872, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.16557574871101036, | |
| "grad_norm": 0.04689991827127376, | |
| "learning_rate": 7.878898103251891e-06, | |
| "loss": 0.946, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.16616082202801039, | |
| "grad_norm": 0.04419788368438809, | |
| "learning_rate": 7.87804562103021e-06, | |
| "loss": 0.8699, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16674589534501041, | |
| "grad_norm": 0.0498909584325992, | |
| "learning_rate": 7.877190200544131e-06, | |
| "loss": 0.8396, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.16733096866201047, | |
| "grad_norm": 0.04446094526551824, | |
| "learning_rate": 7.876331842516323e-06, | |
| "loss": 0.887, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1679160419790105, | |
| "grad_norm": 0.06094680175561847, | |
| "learning_rate": 7.875470547671926e-06, | |
| "loss": 0.8834, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.16850111529601053, | |
| "grad_norm": 0.038876474999689326, | |
| "learning_rate": 7.874606316738566e-06, | |
| "loss": 0.8975, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16908618861301056, | |
| "grad_norm": 0.04076135396799628, | |
| "learning_rate": 7.873739150446349e-06, | |
| "loss": 0.9094, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.16967126193001061, | |
| "grad_norm": 0.04242085203333459, | |
| "learning_rate": 7.872869049527855e-06, | |
| "loss": 0.9346, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.17025633524701064, | |
| "grad_norm": 0.04192270928126719, | |
| "learning_rate": 7.871996014718154e-06, | |
| "loss": 0.916, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.17084140856401067, | |
| "grad_norm": 0.06296131776401025, | |
| "learning_rate": 7.871120046754787e-06, | |
| "loss": 0.7869, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1714264818810107, | |
| "grad_norm": 0.04208658542590707, | |
| "learning_rate": 7.870241146377773e-06, | |
| "loss": 0.863, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.17201155519801076, | |
| "grad_norm": 0.04221040890826715, | |
| "learning_rate": 7.869359314329613e-06, | |
| "loss": 0.8125, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.17259662851501079, | |
| "grad_norm": 0.044062682914531, | |
| "learning_rate": 7.868474551355277e-06, | |
| "loss": 0.8283, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.17318170183201081, | |
| "grad_norm": 0.053136044791769796, | |
| "learning_rate": 7.867586858202221e-06, | |
| "loss": 0.9321, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.17376677514901087, | |
| "grad_norm": 0.041998835388755755, | |
| "learning_rate": 7.866696235620367e-06, | |
| "loss": 0.9435, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.1743518484660109, | |
| "grad_norm": 0.04432061620173052, | |
| "learning_rate": 7.865802684362119e-06, | |
| "loss": 0.944, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.17493692178301093, | |
| "grad_norm": 0.03816132379303917, | |
| "learning_rate": 7.864906205182347e-06, | |
| "loss": 0.9222, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.17552199510001096, | |
| "grad_norm": 0.04061878988742196, | |
| "learning_rate": 7.864006798838405e-06, | |
| "loss": 0.9344, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17610706841701101, | |
| "grad_norm": 0.038725999488975066, | |
| "learning_rate": 7.863104466090113e-06, | |
| "loss": 0.9477, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.17669214173401104, | |
| "grad_norm": 0.0675670307551766, | |
| "learning_rate": 7.862199207699763e-06, | |
| "loss": 0.8939, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.17727721505101107, | |
| "grad_norm": 0.04866290151842816, | |
| "learning_rate": 7.861291024432122e-06, | |
| "loss": 0.944, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.17786228836801113, | |
| "grad_norm": 0.04231401792052211, | |
| "learning_rate": 7.860379917054426e-06, | |
| "loss": 0.9108, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.17844736168501116, | |
| "grad_norm": 0.21645206187990054, | |
| "learning_rate": 7.859465886336381e-06, | |
| "loss": 0.9328, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1790324350020112, | |
| "grad_norm": 0.04212212891416765, | |
| "learning_rate": 7.858548933050162e-06, | |
| "loss": 0.9755, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17961750831901122, | |
| "grad_norm": 0.047331820432207364, | |
| "learning_rate": 7.857629057970417e-06, | |
| "loss": 0.8702, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.18020258163601127, | |
| "grad_norm": 0.040260553510288316, | |
| "learning_rate": 7.856706261874258e-06, | |
| "loss": 0.8934, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.1807876549530113, | |
| "grad_norm": 0.04326897093604986, | |
| "learning_rate": 7.855780545541264e-06, | |
| "loss": 0.877, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.18137272827001133, | |
| "grad_norm": 0.03936456606263684, | |
| "learning_rate": 7.854851909753487e-06, | |
| "loss": 0.9206, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.18195780158701136, | |
| "grad_norm": 0.040529920237504666, | |
| "learning_rate": 7.853920355295438e-06, | |
| "loss": 0.8469, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.18254287490401142, | |
| "grad_norm": 0.07984199172508148, | |
| "learning_rate": 7.852985882954102e-06, | |
| "loss": 0.856, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.18312794822101144, | |
| "grad_norm": 0.04341308255910565, | |
| "learning_rate": 7.85204849351892e-06, | |
| "loss": 0.8975, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.18371302153801147, | |
| "grad_norm": 0.05163482815007264, | |
| "learning_rate": 7.851108187781802e-06, | |
| "loss": 0.8516, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.18429809485501153, | |
| "grad_norm": 0.04363599571939765, | |
| "learning_rate": 7.850164966537124e-06, | |
| "loss": 0.9088, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.18488316817201156, | |
| "grad_norm": 0.043024652012164384, | |
| "learning_rate": 7.84921883058172e-06, | |
| "loss": 0.8291, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1854682414890116, | |
| "grad_norm": 0.041659608271311335, | |
| "learning_rate": 7.848269780714892e-06, | |
| "loss": 0.9719, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.18605331480601162, | |
| "grad_norm": 0.04557805725835434, | |
| "learning_rate": 7.847317817738394e-06, | |
| "loss": 0.9638, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18663838812301167, | |
| "grad_norm": 0.04388413396186285, | |
| "learning_rate": 7.846362942456455e-06, | |
| "loss": 0.93, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.1872234614400117, | |
| "grad_norm": 0.04052950822615268, | |
| "learning_rate": 7.845405155675752e-06, | |
| "loss": 0.8951, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18780853475701173, | |
| "grad_norm": 0.03852369264523642, | |
| "learning_rate": 7.844444458205428e-06, | |
| "loss": 0.8521, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1883936080740118, | |
| "grad_norm": 0.045799254500923765, | |
| "learning_rate": 7.843480850857083e-06, | |
| "loss": 0.8966, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.18897868139101182, | |
| "grad_norm": 0.04373024374645557, | |
| "learning_rate": 7.842514334444776e-06, | |
| "loss": 0.973, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.18956375470801184, | |
| "grad_norm": 0.037372310076302304, | |
| "learning_rate": 7.841544909785022e-06, | |
| "loss": 1.0232, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.19014882802501187, | |
| "grad_norm": 0.044911079932608254, | |
| "learning_rate": 7.840572577696798e-06, | |
| "loss": 0.8351, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.19073390134201193, | |
| "grad_norm": 0.04820487217400144, | |
| "learning_rate": 7.839597339001529e-06, | |
| "loss": 0.9381, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.19131897465901196, | |
| "grad_norm": 0.04460622903204633, | |
| "learning_rate": 7.8386191945231e-06, | |
| "loss": 0.9047, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.191904047976012, | |
| "grad_norm": 0.04344227987010105, | |
| "learning_rate": 7.837638145087855e-06, | |
| "loss": 0.8882, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.19248912129301204, | |
| "grad_norm": 0.041162516007716705, | |
| "learning_rate": 7.836654191524583e-06, | |
| "loss": 0.888, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.19307419461001207, | |
| "grad_norm": 0.050217278727892056, | |
| "learning_rate": 7.835667334664533e-06, | |
| "loss": 0.9425, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1936592679270121, | |
| "grad_norm": 0.045239661384039016, | |
| "learning_rate": 7.834677575341407e-06, | |
| "loss": 0.845, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.19424434124401213, | |
| "grad_norm": 0.04069762643444013, | |
| "learning_rate": 7.833684914391354e-06, | |
| "loss": 0.9045, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.1948294145610122, | |
| "grad_norm": 0.03659391253836006, | |
| "learning_rate": 7.832689352652978e-06, | |
| "loss": 0.8415, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.19541448787801222, | |
| "grad_norm": 0.0478253523475305, | |
| "learning_rate": 7.831690890967332e-06, | |
| "loss": 0.9023, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.19599956119501225, | |
| "grad_norm": 0.03931532297283958, | |
| "learning_rate": 7.830689530177923e-06, | |
| "loss": 0.8757, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.19658463451201227, | |
| "grad_norm": 0.04480666786944768, | |
| "learning_rate": 7.8296852711307e-06, | |
| "loss": 0.8393, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19716970782901233, | |
| "grad_norm": 0.0734058976406723, | |
| "learning_rate": 7.828678114674066e-06, | |
| "loss": 0.9038, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.19775478114601236, | |
| "grad_norm": 0.03967258990394233, | |
| "learning_rate": 7.827668061658871e-06, | |
| "loss": 0.8009, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.1983398544630124, | |
| "grad_norm": 0.0418553799297778, | |
| "learning_rate": 7.82665511293841e-06, | |
| "loss": 0.8865, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.19892492778001244, | |
| "grad_norm": 0.0384561013270465, | |
| "learning_rate": 7.825639269368426e-06, | |
| "loss": 0.872, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19951000109701247, | |
| "grad_norm": 0.04182210062250734, | |
| "learning_rate": 7.824620531807106e-06, | |
| "loss": 0.8974, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2000950744140125, | |
| "grad_norm": 0.0402445680829306, | |
| "learning_rate": 7.823598901115085e-06, | |
| "loss": 0.8017, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.20068014773101253, | |
| "grad_norm": 0.03976795416941979, | |
| "learning_rate": 7.822574378155436e-06, | |
| "loss": 0.9298, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.2012652210480126, | |
| "grad_norm": 0.043031163568613286, | |
| "learning_rate": 7.821546963793683e-06, | |
| "loss": 0.9508, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.20185029436501262, | |
| "grad_norm": 0.0485982687560341, | |
| "learning_rate": 7.82051665889779e-06, | |
| "loss": 0.8536, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.20243536768201265, | |
| "grad_norm": 0.04149899672912665, | |
| "learning_rate": 7.819483464338156e-06, | |
| "loss": 0.8767, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.2030204409990127, | |
| "grad_norm": 0.0971131140419449, | |
| "learning_rate": 7.818447380987634e-06, | |
| "loss": 0.9271, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.20360551431601273, | |
| "grad_norm": 0.04183099462603424, | |
| "learning_rate": 7.817408409721506e-06, | |
| "loss": 0.8362, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.20419058763301276, | |
| "grad_norm": 0.03719681766847752, | |
| "learning_rate": 7.8163665514175e-06, | |
| "loss": 0.8544, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2047756609500128, | |
| "grad_norm": 0.08061763199476867, | |
| "learning_rate": 7.815321806955782e-06, | |
| "loss": 0.8335, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20536073426701285, | |
| "grad_norm": 0.053441055551360056, | |
| "learning_rate": 7.814274177218955e-06, | |
| "loss": 0.8602, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.20594580758401287, | |
| "grad_norm": 0.1775803458323184, | |
| "learning_rate": 7.81322366309206e-06, | |
| "loss": 0.8998, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2065308809010129, | |
| "grad_norm": 0.0637002971995377, | |
| "learning_rate": 7.812170265462573e-06, | |
| "loss": 0.9737, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.20711595421801293, | |
| "grad_norm": 0.04274639590829472, | |
| "learning_rate": 7.81111398522041e-06, | |
| "loss": 1.009, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.207701027535013, | |
| "grad_norm": 0.041282961278004064, | |
| "learning_rate": 7.81005482325792e-06, | |
| "loss": 0.9035, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.20828610085201302, | |
| "grad_norm": 0.04195399372218902, | |
| "learning_rate": 7.808992780469889e-06, | |
| "loss": 0.9128, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20887117416901305, | |
| "grad_norm": 0.03698717701423449, | |
| "learning_rate": 7.807927857753527e-06, | |
| "loss": 0.8562, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.2094562474860131, | |
| "grad_norm": 0.1081591722204418, | |
| "learning_rate": 7.80686005600849e-06, | |
| "loss": 0.8931, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.21004132080301313, | |
| "grad_norm": 0.04152721891763337, | |
| "learning_rate": 7.80578937613686e-06, | |
| "loss": 0.8633, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.21062639412001316, | |
| "grad_norm": 0.04200660652374662, | |
| "learning_rate": 7.804715819043148e-06, | |
| "loss": 0.8773, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2112114674370132, | |
| "grad_norm": 0.04405111681557889, | |
| "learning_rate": 7.803639385634302e-06, | |
| "loss": 0.8587, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.21179654075401325, | |
| "grad_norm": 0.04571477197647962, | |
| "learning_rate": 7.802560076819694e-06, | |
| "loss": 0.8334, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.21238161407101327, | |
| "grad_norm": 0.041607118224879065, | |
| "learning_rate": 7.80147789351113e-06, | |
| "loss": 0.8739, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.2129666873880133, | |
| "grad_norm": 0.06194034993642153, | |
| "learning_rate": 7.800392836622838e-06, | |
| "loss": 0.8956, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.21355176070501336, | |
| "grad_norm": 0.03798806008002444, | |
| "learning_rate": 7.79930490707148e-06, | |
| "loss": 0.8966, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2141368340220134, | |
| "grad_norm": 0.04739784630899101, | |
| "learning_rate": 7.798214105776146e-06, | |
| "loss": 0.9552, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.21472190733901342, | |
| "grad_norm": 0.04497114482691456, | |
| "learning_rate": 7.797120433658343e-06, | |
| "loss": 0.8666, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.21530698065601345, | |
| "grad_norm": 0.04176901867071411, | |
| "learning_rate": 7.796023891642011e-06, | |
| "loss": 0.9051, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2158920539730135, | |
| "grad_norm": 0.056698801623745465, | |
| "learning_rate": 7.794924480653513e-06, | |
| "loss": 0.8745, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.21647712729001353, | |
| "grad_norm": 0.04267067239376988, | |
| "learning_rate": 7.793822201621633e-06, | |
| "loss": 0.9129, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21706220060701356, | |
| "grad_norm": 0.03999203881276761, | |
| "learning_rate": 7.79271705547758e-06, | |
| "loss": 0.8814, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2176472739240136, | |
| "grad_norm": 0.04206496870855173, | |
| "learning_rate": 7.79160904315499e-06, | |
| "loss": 0.8936, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.21823234724101365, | |
| "grad_norm": 0.040567992793796616, | |
| "learning_rate": 7.79049816558991e-06, | |
| "loss": 0.8961, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.21881742055801368, | |
| "grad_norm": 0.04165915715538525, | |
| "learning_rate": 7.789384423720815e-06, | |
| "loss": 0.901, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2194024938750137, | |
| "grad_norm": 0.04311161567240108, | |
| "learning_rate": 7.788267818488597e-06, | |
| "loss": 0.8571, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.21998756719201376, | |
| "grad_norm": 0.04090984120457054, | |
| "learning_rate": 7.78714835083657e-06, | |
| "loss": 0.879, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.2205726405090138, | |
| "grad_norm": 0.03723653818234615, | |
| "learning_rate": 7.786026021710462e-06, | |
| "loss": 0.8687, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.22115771382601382, | |
| "grad_norm": 0.040732056413017595, | |
| "learning_rate": 7.78490083205842e-06, | |
| "loss": 0.9033, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.22174278714301385, | |
| "grad_norm": 0.03755340315603773, | |
| "learning_rate": 7.783772782831008e-06, | |
| "loss": 0.8919, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.2223278604600139, | |
| "grad_norm": 0.04513035465018611, | |
| "learning_rate": 7.782641874981207e-06, | |
| "loss": 0.8766, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.22291293377701393, | |
| "grad_norm": 0.042927929340526826, | |
| "learning_rate": 7.78150810946441e-06, | |
| "loss": 0.8692, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.22349800709401396, | |
| "grad_norm": 0.03548281821425231, | |
| "learning_rate": 7.780371487238428e-06, | |
| "loss": 0.7295, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.22408308041101402, | |
| "grad_norm": 0.03978070402906236, | |
| "learning_rate": 7.779232009263484e-06, | |
| "loss": 0.8555, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.22466815372801405, | |
| "grad_norm": 0.036709565386030436, | |
| "learning_rate": 7.778089676502209e-06, | |
| "loss": 0.7492, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.22525322704501408, | |
| "grad_norm": 0.04547788495521431, | |
| "learning_rate": 7.776944489919649e-06, | |
| "loss": 0.8334, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2258383003620141, | |
| "grad_norm": 0.04220442842369723, | |
| "learning_rate": 7.775796450483267e-06, | |
| "loss": 0.8244, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.22642337367901416, | |
| "grad_norm": 0.04778060193840243, | |
| "learning_rate": 7.774645559162927e-06, | |
| "loss": 0.8511, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2270084469960142, | |
| "grad_norm": 0.05014682109980806, | |
| "learning_rate": 7.773491816930904e-06, | |
| "loss": 0.8334, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.22759352031301422, | |
| "grad_norm": 0.04001986651909544, | |
| "learning_rate": 7.772335224761886e-06, | |
| "loss": 0.8224, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.22817859363001425, | |
| "grad_norm": 0.040094874463681825, | |
| "learning_rate": 7.771175783632966e-06, | |
| "loss": 0.9069, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2287636669470143, | |
| "grad_norm": 0.03678306586668835, | |
| "learning_rate": 7.770013494523641e-06, | |
| "loss": 0.8758, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.22934874026401433, | |
| "grad_norm": 0.04444193067206238, | |
| "learning_rate": 7.768848358415819e-06, | |
| "loss": 0.946, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.22993381358101436, | |
| "grad_norm": 0.042386218395158284, | |
| "learning_rate": 7.767680376293811e-06, | |
| "loss": 0.8395, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.23051888689801442, | |
| "grad_norm": 0.051874603825963005, | |
| "learning_rate": 7.766509549144332e-06, | |
| "loss": 0.8867, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.23110396021501445, | |
| "grad_norm": 0.04115935591406729, | |
| "learning_rate": 7.765335877956498e-06, | |
| "loss": 0.8509, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.23168903353201448, | |
| "grad_norm": 0.045377388296697053, | |
| "learning_rate": 7.764159363721833e-06, | |
| "loss": 0.8783, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.2322741068490145, | |
| "grad_norm": 0.03845331275197764, | |
| "learning_rate": 7.762980007434261e-06, | |
| "loss": 0.8721, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.23285918016601456, | |
| "grad_norm": 0.06130607399623932, | |
| "learning_rate": 7.761797810090103e-06, | |
| "loss": 0.896, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.2334442534830146, | |
| "grad_norm": 0.0653248890033106, | |
| "learning_rate": 7.760612772688086e-06, | |
| "loss": 0.9239, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.23402932680001462, | |
| "grad_norm": 0.04386903271065406, | |
| "learning_rate": 7.759424896229329e-06, | |
| "loss": 0.9055, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23461440011701468, | |
| "grad_norm": 0.05112057938888221, | |
| "learning_rate": 7.758234181717359e-06, | |
| "loss": 0.8179, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2351994734340147, | |
| "grad_norm": 0.03919834965990091, | |
| "learning_rate": 7.757040630158094e-06, | |
| "loss": 0.9131, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.23578454675101473, | |
| "grad_norm": 0.04080472273046829, | |
| "learning_rate": 7.75584424255985e-06, | |
| "loss": 0.8772, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.23636962006801476, | |
| "grad_norm": 0.036834620421617906, | |
| "learning_rate": 7.754645019933338e-06, | |
| "loss": 0.8155, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23695469338501482, | |
| "grad_norm": 0.03654317750178369, | |
| "learning_rate": 7.753442963291668e-06, | |
| "loss": 0.8346, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.23753976670201485, | |
| "grad_norm": 0.04519798929105044, | |
| "learning_rate": 7.752238073650339e-06, | |
| "loss": 0.912, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.23812484001901488, | |
| "grad_norm": 0.03826695253269902, | |
| "learning_rate": 7.751030352027246e-06, | |
| "loss": 0.8772, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2387099133360149, | |
| "grad_norm": 0.04486993387188878, | |
| "learning_rate": 7.749819799442676e-06, | |
| "loss": 0.8826, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23929498665301496, | |
| "grad_norm": 0.0776681654995442, | |
| "learning_rate": 7.74860641691931e-06, | |
| "loss": 0.8987, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.239880059970015, | |
| "grad_norm": 0.05169098865704706, | |
| "learning_rate": 7.747390205482216e-06, | |
| "loss": 0.7904, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.24046513328701502, | |
| "grad_norm": 0.042053672534972886, | |
| "learning_rate": 7.746171166158855e-06, | |
| "loss": 0.9188, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.24105020660401508, | |
| "grad_norm": 0.044614916290635534, | |
| "learning_rate": 7.744949299979071e-06, | |
| "loss": 0.9118, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.2416352799210151, | |
| "grad_norm": 0.04594381480411999, | |
| "learning_rate": 7.743724607975105e-06, | |
| "loss": 0.8547, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.24222035323801513, | |
| "grad_norm": 0.037664202042716706, | |
| "learning_rate": 7.742497091181578e-06, | |
| "loss": 0.8446, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.24280542655501516, | |
| "grad_norm": 0.04794778502796113, | |
| "learning_rate": 7.741266750635502e-06, | |
| "loss": 0.897, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.24339049987201522, | |
| "grad_norm": 0.04188595026753968, | |
| "learning_rate": 7.740033587376275e-06, | |
| "loss": 0.9061, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.24397557318901525, | |
| "grad_norm": 0.04407359629305258, | |
| "learning_rate": 7.738797602445671e-06, | |
| "loss": 0.9146, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.24456064650601528, | |
| "grad_norm": 0.04077311126531583, | |
| "learning_rate": 7.73755879688786e-06, | |
| "loss": 0.8515, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.24514571982301533, | |
| "grad_norm": 0.055784116703384304, | |
| "learning_rate": 7.736317171749385e-06, | |
| "loss": 0.851, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.24573079314001536, | |
| "grad_norm": 0.049743142106565615, | |
| "learning_rate": 7.735072728079179e-06, | |
| "loss": 0.8718, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2463158664570154, | |
| "grad_norm": 0.04470234941233142, | |
| "learning_rate": 7.73382546692855e-06, | |
| "loss": 0.9624, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.24690093977401542, | |
| "grad_norm": 0.040010409109375616, | |
| "learning_rate": 7.732575389351187e-06, | |
| "loss": 0.8925, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.24748601309101548, | |
| "grad_norm": 0.04513853855792064, | |
| "learning_rate": 7.731322496403161e-06, | |
| "loss": 0.8163, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.2480710864080155, | |
| "grad_norm": 0.04305001532204258, | |
| "learning_rate": 7.730066789142922e-06, | |
| "loss": 0.818, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24865615972501554, | |
| "grad_norm": 0.0381328984157233, | |
| "learning_rate": 7.728808268631291e-06, | |
| "loss": 0.8655, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.24924123304201556, | |
| "grad_norm": 0.04141777979618345, | |
| "learning_rate": 7.727546935931473e-06, | |
| "loss": 0.8447, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.24982630635901562, | |
| "grad_norm": 0.0441532278076972, | |
| "learning_rate": 7.726282792109049e-06, | |
| "loss": 0.7839, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.2504113796760157, | |
| "grad_norm": 0.04369415131884431, | |
| "learning_rate": 7.725015838231966e-06, | |
| "loss": 0.8375, | |
| "step": 428 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 3418, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 428, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1775486329946112.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |