| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 241, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004149377593360996, |
| "grad_norm": 0.5114469528198242, |
| "learning_rate": 2e-05, |
| "loss": 0.7995174527168274, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008298755186721992, |
| "grad_norm": 0.5205491185188293, |
| "learning_rate": 2e-05, |
| "loss": 0.8812965750694275, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.012448132780082987, |
| "grad_norm": 0.6057224273681641, |
| "learning_rate": 2e-05, |
| "loss": 0.8402022123336792, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.016597510373443983, |
| "grad_norm": 0.5623906254768372, |
| "learning_rate": 2e-05, |
| "loss": 0.8188848495483398, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02074688796680498, |
| "grad_norm": 0.574876606464386, |
| "learning_rate": 2e-05, |
| "loss": 0.8380811214447021, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.024896265560165973, |
| "grad_norm": 0.4625989496707916, |
| "learning_rate": 2e-05, |
| "loss": 0.7132218480110168, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.029045643153526972, |
| "grad_norm": 0.5183306336402893, |
| "learning_rate": 2e-05, |
| "loss": 0.8268325328826904, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03319502074688797, |
| "grad_norm": 0.4928549826145172, |
| "learning_rate": 2e-05, |
| "loss": 0.7686080932617188, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03734439834024896, |
| "grad_norm": 0.4636511206626892, |
| "learning_rate": 2e-05, |
| "loss": 0.8444753289222717, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.04149377593360996, |
| "grad_norm": 0.5008803606033325, |
| "learning_rate": 2e-05, |
| "loss": 0.6671140789985657, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04564315352697095, |
| "grad_norm": 0.49685290455818176, |
| "learning_rate": 2e-05, |
| "loss": 0.7625027894973755, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04979253112033195, |
| "grad_norm": 0.5161386728286743, |
| "learning_rate": 2e-05, |
| "loss": 0.5999635457992554, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05394190871369295, |
| "grad_norm": 0.46996110677719116, |
| "learning_rate": 2e-05, |
| "loss": 0.7389070987701416, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.058091286307053944, |
| "grad_norm": 0.45131370425224304, |
| "learning_rate": 2e-05, |
| "loss": 0.6111957430839539, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06224066390041494, |
| "grad_norm": 0.4911205470561981, |
| "learning_rate": 2e-05, |
| "loss": 0.5750669240951538, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06639004149377593, |
| "grad_norm": 0.46468034386634827, |
| "learning_rate": 2e-05, |
| "loss": 0.6607809066772461, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07053941908713693, |
| "grad_norm": 0.5140272378921509, |
| "learning_rate": 2e-05, |
| "loss": 0.8089659214019775, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.07468879668049792, |
| "grad_norm": 0.49761149287223816, |
| "learning_rate": 2e-05, |
| "loss": 0.8017055988311768, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.07883817427385892, |
| "grad_norm": 0.45623964071273804, |
| "learning_rate": 2e-05, |
| "loss": 0.725612223148346, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08298755186721991, |
| "grad_norm": 0.4778558015823364, |
| "learning_rate": 2e-05, |
| "loss": 0.6465242505073547, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08713692946058091, |
| "grad_norm": 0.4813624620437622, |
| "learning_rate": 2e-05, |
| "loss": 0.6812542676925659, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0912863070539419, |
| "grad_norm": 0.45828455686569214, |
| "learning_rate": 2e-05, |
| "loss": 0.6355943083763123, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0954356846473029, |
| "grad_norm": 0.39770182967185974, |
| "learning_rate": 2e-05, |
| "loss": 0.734164297580719, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0995850622406639, |
| "grad_norm": 0.515662431716919, |
| "learning_rate": 2e-05, |
| "loss": 0.775545060634613, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1037344398340249, |
| "grad_norm": 0.4875846207141876, |
| "learning_rate": 2e-05, |
| "loss": 0.7608263492584229, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1078838174273859, |
| "grad_norm": 0.4272926449775696, |
| "learning_rate": 2e-05, |
| "loss": 0.655767560005188, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11203319502074689, |
| "grad_norm": 0.47189342975616455, |
| "learning_rate": 2e-05, |
| "loss": 0.6984891295433044, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.11618257261410789, |
| "grad_norm": 0.49677926301956177, |
| "learning_rate": 2e-05, |
| "loss": 0.6952549815177917, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12033195020746888, |
| "grad_norm": 0.5341811776161194, |
| "learning_rate": 2e-05, |
| "loss": 0.6844781041145325, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.12448132780082988, |
| "grad_norm": 0.49139678478240967, |
| "learning_rate": 2e-05, |
| "loss": 0.7043532729148865, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12863070539419086, |
| "grad_norm": 0.42113780975341797, |
| "learning_rate": 2e-05, |
| "loss": 0.6791371703147888, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.13278008298755187, |
| "grad_norm": 0.490699827671051, |
| "learning_rate": 2e-05, |
| "loss": 0.66917484998703, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.13692946058091288, |
| "grad_norm": 0.48269012570381165, |
| "learning_rate": 2e-05, |
| "loss": 0.6663049459457397, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.14107883817427386, |
| "grad_norm": 0.4833972454071045, |
| "learning_rate": 2e-05, |
| "loss": 0.7479192018508911, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.14522821576763487, |
| "grad_norm": 0.4521920382976532, |
| "learning_rate": 2e-05, |
| "loss": 0.5006750822067261, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.14937759336099585, |
| "grad_norm": 0.4805753231048584, |
| "learning_rate": 2e-05, |
| "loss": 0.7437685132026672, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.15352697095435686, |
| "grad_norm": 0.4702300429344177, |
| "learning_rate": 2e-05, |
| "loss": 0.7820006608963013, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.15767634854771784, |
| "grad_norm": 0.4416898190975189, |
| "learning_rate": 2e-05, |
| "loss": 0.5911201238632202, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.16182572614107885, |
| "grad_norm": 0.46818608045578003, |
| "learning_rate": 2e-05, |
| "loss": 0.6237752437591553, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.16597510373443983, |
| "grad_norm": 0.38742795586586, |
| "learning_rate": 2e-05, |
| "loss": 0.6044095754623413, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17012448132780084, |
| "grad_norm": 0.4806065857410431, |
| "learning_rate": 2e-05, |
| "loss": 0.6341798901557922, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.17427385892116182, |
| "grad_norm": 0.4329955279827118, |
| "learning_rate": 2e-05, |
| "loss": 0.621407687664032, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.17842323651452283, |
| "grad_norm": 0.46890074014663696, |
| "learning_rate": 2e-05, |
| "loss": 0.7025566697120667, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1825726141078838, |
| "grad_norm": 0.4821957051753998, |
| "learning_rate": 2e-05, |
| "loss": 0.6547812819480896, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.18672199170124482, |
| "grad_norm": 0.4716266691684723, |
| "learning_rate": 2e-05, |
| "loss": 0.6434807777404785, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1908713692946058, |
| "grad_norm": 0.5017584562301636, |
| "learning_rate": 2e-05, |
| "loss": 0.6461539268493652, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1950207468879668, |
| "grad_norm": 0.4837803244590759, |
| "learning_rate": 2e-05, |
| "loss": 0.6638780236244202, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1991701244813278, |
| "grad_norm": 0.4523409605026245, |
| "learning_rate": 2e-05, |
| "loss": 0.5731872916221619, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2033195020746888, |
| "grad_norm": 0.46308189630508423, |
| "learning_rate": 2e-05, |
| "loss": 0.6024616956710815, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2074688796680498, |
| "grad_norm": 0.4565693140029907, |
| "learning_rate": 2e-05, |
| "loss": 0.5795129537582397, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21161825726141079, |
| "grad_norm": 0.48081323504447937, |
| "learning_rate": 2e-05, |
| "loss": 0.6645175814628601, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2157676348547718, |
| "grad_norm": 0.4649989902973175, |
| "learning_rate": 2e-05, |
| "loss": 0.6339988112449646, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.21991701244813278, |
| "grad_norm": 0.45999905467033386, |
| "learning_rate": 2e-05, |
| "loss": 0.6070005297660828, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.22406639004149378, |
| "grad_norm": 0.43405112624168396, |
| "learning_rate": 2e-05, |
| "loss": 0.6078118085861206, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.22821576763485477, |
| "grad_norm": 0.557212233543396, |
| "learning_rate": 2e-05, |
| "loss": 0.6502783894538879, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.23236514522821577, |
| "grad_norm": 0.4206949472427368, |
| "learning_rate": 2e-05, |
| "loss": 0.5604119896888733, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.23651452282157676, |
| "grad_norm": 0.4931945502758026, |
| "learning_rate": 2e-05, |
| "loss": 0.5463195443153381, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.24066390041493776, |
| "grad_norm": 0.44888630509376526, |
| "learning_rate": 2e-05, |
| "loss": 0.49333369731903076, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.24481327800829875, |
| "grad_norm": 0.4515199363231659, |
| "learning_rate": 2e-05, |
| "loss": 0.66854327917099, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.24896265560165975, |
| "grad_norm": 0.46686026453971863, |
| "learning_rate": 2e-05, |
| "loss": 0.5279274582862854, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.25311203319502074, |
| "grad_norm": 0.46663975715637207, |
| "learning_rate": 2e-05, |
| "loss": 0.6141489148139954, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2572614107883817, |
| "grad_norm": 0.45049089193344116, |
| "learning_rate": 2e-05, |
| "loss": 0.6643646955490112, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.26141078838174275, |
| "grad_norm": 0.49262335896492004, |
| "learning_rate": 2e-05, |
| "loss": 0.6589719653129578, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.26556016597510373, |
| "grad_norm": 0.5234288573265076, |
| "learning_rate": 2e-05, |
| "loss": 0.6250555515289307, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2697095435684647, |
| "grad_norm": 0.4657873809337616, |
| "learning_rate": 2e-05, |
| "loss": 0.5761417150497437, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.27385892116182575, |
| "grad_norm": 2.8522613048553467, |
| "learning_rate": 2e-05, |
| "loss": 0.6810148358345032, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.27800829875518673, |
| "grad_norm": 0.45667174458503723, |
| "learning_rate": 2e-05, |
| "loss": 0.5667203664779663, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2821576763485477, |
| "grad_norm": 0.48965880274772644, |
| "learning_rate": 2e-05, |
| "loss": 0.6057634949684143, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2863070539419087, |
| "grad_norm": 0.4700252115726471, |
| "learning_rate": 2e-05, |
| "loss": 0.5498369932174683, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.29045643153526973, |
| "grad_norm": 0.4457707703113556, |
| "learning_rate": 2e-05, |
| "loss": 0.5500881671905518, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2946058091286307, |
| "grad_norm": 0.5242801904678345, |
| "learning_rate": 2e-05, |
| "loss": 0.6648991703987122, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.2987551867219917, |
| "grad_norm": 0.4845593273639679, |
| "learning_rate": 2e-05, |
| "loss": 0.6495253443717957, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3029045643153527, |
| "grad_norm": 0.4535577595233917, |
| "learning_rate": 2e-05, |
| "loss": 0.6440762281417847, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3070539419087137, |
| "grad_norm": 0.4424896240234375, |
| "learning_rate": 2e-05, |
| "loss": 0.5427602529525757, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3112033195020747, |
| "grad_norm": 0.4791293144226074, |
| "learning_rate": 2e-05, |
| "loss": 0.6312339901924133, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3153526970954357, |
| "grad_norm": 0.49440717697143555, |
| "learning_rate": 2e-05, |
| "loss": 0.7304765582084656, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.31950207468879666, |
| "grad_norm": 0.47376683354377747, |
| "learning_rate": 2e-05, |
| "loss": 0.5550855994224548, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3236514522821577, |
| "grad_norm": 0.5386195182800293, |
| "learning_rate": 2e-05, |
| "loss": 0.7627665996551514, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3278008298755187, |
| "grad_norm": 0.5139470100402832, |
| "learning_rate": 2e-05, |
| "loss": 0.7294001579284668, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.33195020746887965, |
| "grad_norm": 0.5727441310882568, |
| "learning_rate": 2e-05, |
| "loss": 0.6094337105751038, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3360995850622407, |
| "grad_norm": 0.4475933313369751, |
| "learning_rate": 2e-05, |
| "loss": 0.6689184904098511, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.34024896265560167, |
| "grad_norm": 0.48615196347236633, |
| "learning_rate": 2e-05, |
| "loss": 0.5170673727989197, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.34439834024896265, |
| "grad_norm": 0.4444977939128876, |
| "learning_rate": 2e-05, |
| "loss": 0.5426638126373291, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.34854771784232363, |
| "grad_norm": 0.4532429873943329, |
| "learning_rate": 2e-05, |
| "loss": 0.5246436595916748, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.35269709543568467, |
| "grad_norm": 0.5425305962562561, |
| "learning_rate": 2e-05, |
| "loss": 0.7444034814834595, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.35684647302904565, |
| "grad_norm": 0.4604993164539337, |
| "learning_rate": 2e-05, |
| "loss": 0.6390590071678162, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.36099585062240663, |
| "grad_norm": 0.4503551423549652, |
| "learning_rate": 2e-05, |
| "loss": 0.7437008023262024, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3651452282157676, |
| "grad_norm": 0.473531037569046, |
| "learning_rate": 2e-05, |
| "loss": 0.5801289677619934, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.36929460580912865, |
| "grad_norm": 0.43614616990089417, |
| "learning_rate": 2e-05, |
| "loss": 0.5945846438407898, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.37344398340248963, |
| "grad_norm": 0.5157416462898254, |
| "learning_rate": 2e-05, |
| "loss": 0.5870503187179565, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3775933609958506, |
| "grad_norm": 0.4724714756011963, |
| "learning_rate": 2e-05, |
| "loss": 0.7136172652244568, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3817427385892116, |
| "grad_norm": 0.49608129262924194, |
| "learning_rate": 2e-05, |
| "loss": 0.5707521438598633, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.38589211618257263, |
| "grad_norm": 0.4372619390487671, |
| "learning_rate": 2e-05, |
| "loss": 0.6751445531845093, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3900414937759336, |
| "grad_norm": 0.8502039909362793, |
| "learning_rate": 2e-05, |
| "loss": 0.7432682514190674, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3941908713692946, |
| "grad_norm": 0.43237465620040894, |
| "learning_rate": 2e-05, |
| "loss": 0.5463064908981323, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3983402489626556, |
| "grad_norm": 0.4683166444301605, |
| "learning_rate": 2e-05, |
| "loss": 0.5722454190254211, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4024896265560166, |
| "grad_norm": 0.49307140707969666, |
| "learning_rate": 2e-05, |
| "loss": 0.7676360011100769, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4066390041493776, |
| "grad_norm": 0.45873740315437317, |
| "learning_rate": 2e-05, |
| "loss": 0.7670221328735352, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4107883817427386, |
| "grad_norm": 0.522739589214325, |
| "learning_rate": 2e-05, |
| "loss": 0.6198732256889343, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.4149377593360996, |
| "grad_norm": 0.513500988483429, |
| "learning_rate": 2e-05, |
| "loss": 0.6557285189628601, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4190871369294606, |
| "grad_norm": 0.5162559747695923, |
| "learning_rate": 2e-05, |
| "loss": 0.6777411699295044, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.42323651452282157, |
| "grad_norm": 0.4742807447910309, |
| "learning_rate": 2e-05, |
| "loss": 0.5189216732978821, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.42738589211618255, |
| "grad_norm": 0.3864991068840027, |
| "learning_rate": 2e-05, |
| "loss": 0.5397198796272278, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.4315352697095436, |
| "grad_norm": 0.44808462262153625, |
| "learning_rate": 2e-05, |
| "loss": 0.5719993710517883, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.43568464730290457, |
| "grad_norm": 0.5047919154167175, |
| "learning_rate": 2e-05, |
| "loss": 0.7246726751327515, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.43983402489626555, |
| "grad_norm": 0.4501510262489319, |
| "learning_rate": 2e-05, |
| "loss": 0.5421350598335266, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.44398340248962653, |
| "grad_norm": 0.5187399983406067, |
| "learning_rate": 2e-05, |
| "loss": 0.6851190328598022, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.44813278008298757, |
| "grad_norm": 0.4442541003227234, |
| "learning_rate": 2e-05, |
| "loss": 0.7323095798492432, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.45228215767634855, |
| "grad_norm": 0.4546023905277252, |
| "learning_rate": 2e-05, |
| "loss": 0.5949406027793884, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.45643153526970953, |
| "grad_norm": 0.43765076994895935, |
| "learning_rate": 2e-05, |
| "loss": 0.5195109248161316, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4605809128630705, |
| "grad_norm": 0.6012418866157532, |
| "learning_rate": 2e-05, |
| "loss": 0.5891928672790527, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.46473029045643155, |
| "grad_norm": 0.5350989699363708, |
| "learning_rate": 2e-05, |
| "loss": 0.7073556184768677, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.46887966804979253, |
| "grad_norm": 0.40423402190208435, |
| "learning_rate": 2e-05, |
| "loss": 0.6081284284591675, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4730290456431535, |
| "grad_norm": 0.48459556698799133, |
| "learning_rate": 2e-05, |
| "loss": 0.7626031637191772, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.47717842323651455, |
| "grad_norm": 0.5132282972335815, |
| "learning_rate": 2e-05, |
| "loss": 0.7070454359054565, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.48132780082987553, |
| "grad_norm": 0.40754643082618713, |
| "learning_rate": 2e-05, |
| "loss": 0.7881268858909607, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.4854771784232365, |
| "grad_norm": 0.46227574348449707, |
| "learning_rate": 2e-05, |
| "loss": 0.5589393973350525, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.4896265560165975, |
| "grad_norm": 0.458891898393631, |
| "learning_rate": 2e-05, |
| "loss": 0.6076244711875916, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.49377593360995853, |
| "grad_norm": 0.4314862787723541, |
| "learning_rate": 2e-05, |
| "loss": 0.58890700340271, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.4979253112033195, |
| "grad_norm": 0.4849430322647095, |
| "learning_rate": 2e-05, |
| "loss": 0.7297042012214661, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5020746887966805, |
| "grad_norm": 0.4734286963939667, |
| "learning_rate": 2e-05, |
| "loss": 0.7929898500442505, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5062240663900415, |
| "grad_norm": 0.4982983469963074, |
| "learning_rate": 2e-05, |
| "loss": 0.6973749399185181, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5103734439834025, |
| "grad_norm": 0.4555007517337799, |
| "learning_rate": 2e-05, |
| "loss": 0.6363988518714905, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5145228215767634, |
| "grad_norm": 0.469707190990448, |
| "learning_rate": 2e-05, |
| "loss": 0.6936283111572266, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5186721991701245, |
| "grad_norm": 0.45310160517692566, |
| "learning_rate": 2e-05, |
| "loss": 0.8045607209205627, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5228215767634855, |
| "grad_norm": 0.5117340087890625, |
| "learning_rate": 2e-05, |
| "loss": 0.5602521300315857, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5269709543568465, |
| "grad_norm": 0.4890298545360565, |
| "learning_rate": 2e-05, |
| "loss": 0.5749447345733643, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5311203319502075, |
| "grad_norm": 0.4680368900299072, |
| "learning_rate": 2e-05, |
| "loss": 0.6603504419326782, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5352697095435685, |
| "grad_norm": 0.4364625811576843, |
| "learning_rate": 2e-05, |
| "loss": 0.6615546941757202, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5394190871369294, |
| "grad_norm": 0.44393712282180786, |
| "learning_rate": 2e-05, |
| "loss": 0.7206588387489319, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5435684647302904, |
| "grad_norm": 0.4770648777484894, |
| "learning_rate": 2e-05, |
| "loss": 0.5122599005699158, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5477178423236515, |
| "grad_norm": 0.4254826307296753, |
| "learning_rate": 2e-05, |
| "loss": 0.5919891595840454, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5518672199170125, |
| "grad_norm": 0.49948850274086, |
| "learning_rate": 2e-05, |
| "loss": 0.7168218493461609, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5560165975103735, |
| "grad_norm": 0.46940577030181885, |
| "learning_rate": 2e-05, |
| "loss": 0.559630274772644, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5601659751037344, |
| "grad_norm": 0.38155895471572876, |
| "learning_rate": 2e-05, |
| "loss": 0.35719043016433716, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5643153526970954, |
| "grad_norm": 0.446111798286438, |
| "learning_rate": 2e-05, |
| "loss": 0.5944488644599915, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5684647302904564, |
| "grad_norm": 0.44898721575737, |
| "learning_rate": 2e-05, |
| "loss": 0.6778333187103271, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5726141078838174, |
| "grad_norm": 0.4727020263671875, |
| "learning_rate": 2e-05, |
| "loss": 0.6683153510093689, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.5767634854771784, |
| "grad_norm": 0.4775353968143463, |
| "learning_rate": 2e-05, |
| "loss": 0.7357037663459778, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.5809128630705395, |
| "grad_norm": 0.5201453566551208, |
| "learning_rate": 2e-05, |
| "loss": 0.5672426819801331, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5850622406639004, |
| "grad_norm": 0.4446447491645813, |
| "learning_rate": 2e-05, |
| "loss": 0.6665009260177612, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.5892116182572614, |
| "grad_norm": 0.44674625992774963, |
| "learning_rate": 2e-05, |
| "loss": 0.6256436705589294, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5933609958506224, |
| "grad_norm": 0.48278629779815674, |
| "learning_rate": 2e-05, |
| "loss": 0.652278482913971, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.5975103734439834, |
| "grad_norm": 0.4608626067638397, |
| "learning_rate": 2e-05, |
| "loss": 0.687121570110321, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6016597510373444, |
| "grad_norm": 0.5146644711494446, |
| "learning_rate": 2e-05, |
| "loss": 0.7759085297584534, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6058091286307054, |
| "grad_norm": 0.4703519344329834, |
| "learning_rate": 2e-05, |
| "loss": 0.6268375515937805, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6099585062240664, |
| "grad_norm": 0.4373490512371063, |
| "learning_rate": 2e-05, |
| "loss": 0.7350006699562073, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6141078838174274, |
| "grad_norm": 0.48525917530059814, |
| "learning_rate": 2e-05, |
| "loss": 0.6609182357788086, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6182572614107884, |
| "grad_norm": 0.509609043598175, |
| "learning_rate": 2e-05, |
| "loss": 0.7720542550086975, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6224066390041494, |
| "grad_norm": 0.46813687682151794, |
| "learning_rate": 2e-05, |
| "loss": 0.658400297164917, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6265560165975104, |
| "grad_norm": 0.48811477422714233, |
| "learning_rate": 2e-05, |
| "loss": 0.6340473890304565, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6307053941908713, |
| "grad_norm": 0.48529860377311707, |
| "learning_rate": 2e-05, |
| "loss": 0.7543718218803406, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6348547717842323, |
| "grad_norm": 0.4565221965312958, |
| "learning_rate": 2e-05, |
| "loss": 0.5810791254043579, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6390041493775933, |
| "grad_norm": 0.4667608141899109, |
| "learning_rate": 2e-05, |
| "loss": 0.5940293669700623, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6431535269709544, |
| "grad_norm": 0.476724773645401, |
| "learning_rate": 2e-05, |
| "loss": 0.5076797604560852, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6473029045643154, |
| "grad_norm": 0.48997762799263, |
| "learning_rate": 2e-05, |
| "loss": 0.5588229894638062, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6514522821576764, |
| "grad_norm": 0.4687066674232483, |
| "learning_rate": 2e-05, |
| "loss": 0.7414963245391846, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6556016597510373, |
| "grad_norm": 0.5096819400787354, |
| "learning_rate": 2e-05, |
| "loss": 0.6766090393066406, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.6597510373443983, |
| "grad_norm": 0.40396353602409363, |
| "learning_rate": 2e-05, |
| "loss": 0.5890622735023499, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6639004149377593, |
| "grad_norm": 0.46985870599746704, |
| "learning_rate": 2e-05, |
| "loss": 0.5969380140304565, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6680497925311203, |
| "grad_norm": 0.49084073305130005, |
| "learning_rate": 2e-05, |
| "loss": 0.6371229887008667, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.6721991701244814, |
| "grad_norm": 0.4466313123703003, |
| "learning_rate": 2e-05, |
| "loss": 0.6732550263404846, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.6763485477178424, |
| "grad_norm": 0.4656016528606415, |
| "learning_rate": 2e-05, |
| "loss": 0.7082672119140625, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.6804979253112033, |
| "grad_norm": 0.43604540824890137, |
| "learning_rate": 2e-05, |
| "loss": 0.5961745977401733, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.6846473029045643, |
| "grad_norm": 0.45962008833885193, |
| "learning_rate": 2e-05, |
| "loss": 0.5974591374397278, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6887966804979253, |
| "grad_norm": 0.4566839635372162, |
| "learning_rate": 2e-05, |
| "loss": 0.5828849673271179, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.6929460580912863, |
| "grad_norm": 0.38006696105003357, |
| "learning_rate": 2e-05, |
| "loss": 0.6747267246246338, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.6970954356846473, |
| "grad_norm": 0.439981609582901, |
| "learning_rate": 2e-05, |
| "loss": 0.7797038555145264, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7012448132780082, |
| "grad_norm": 0.47687003016471863, |
| "learning_rate": 2e-05, |
| "loss": 0.570720911026001, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7053941908713693, |
| "grad_norm": 0.4829600155353546, |
| "learning_rate": 2e-05, |
| "loss": 0.5899892449378967, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7095435684647303, |
| "grad_norm": 0.4642188847064972, |
| "learning_rate": 2e-05, |
| "loss": 0.6866733431816101, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7136929460580913, |
| "grad_norm": 0.4619278013706207, |
| "learning_rate": 2e-05, |
| "loss": 0.5310846567153931, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7178423236514523, |
| "grad_norm": 0.40906423330307007, |
| "learning_rate": 2e-05, |
| "loss": 0.6505522131919861, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7219917012448133, |
| "grad_norm": 0.47687482833862305, |
| "learning_rate": 2e-05, |
| "loss": 0.6477482318878174, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7261410788381742, |
| "grad_norm": 0.4249359369277954, |
| "learning_rate": 2e-05, |
| "loss": 0.542078971862793, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7302904564315352, |
| "grad_norm": 0.4437820315361023, |
| "learning_rate": 2e-05, |
| "loss": 0.7326051592826843, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7344398340248963, |
| "grad_norm": 0.47250184416770935, |
| "learning_rate": 2e-05, |
| "loss": 0.7204862236976624, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7385892116182573, |
| "grad_norm": 0.45673149824142456, |
| "learning_rate": 2e-05, |
| "loss": 0.6894567608833313, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7427385892116183, |
| "grad_norm": 0.4065015912055969, |
| "learning_rate": 2e-05, |
| "loss": 0.5020947456359863, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.7468879668049793, |
| "grad_norm": 0.480761855840683, |
| "learning_rate": 2e-05, |
| "loss": 0.652772843837738, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7510373443983402, |
| "grad_norm": 0.4796382784843445, |
| "learning_rate": 2e-05, |
| "loss": 0.5466834306716919, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7551867219917012, |
| "grad_norm": 0.427696168422699, |
| "learning_rate": 2e-05, |
| "loss": 0.46073320508003235, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7593360995850622, |
| "grad_norm": 0.4324597716331482, |
| "learning_rate": 2e-05, |
| "loss": 0.6211638450622559, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.7634854771784232, |
| "grad_norm": 0.47733691334724426, |
| "learning_rate": 2e-05, |
| "loss": 0.6684774160385132, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.7676348547717843, |
| "grad_norm": 0.431084007024765, |
| "learning_rate": 2e-05, |
| "loss": 0.6145834922790527, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7717842323651453, |
| "grad_norm": 0.5007755160331726, |
| "learning_rate": 2e-05, |
| "loss": 0.6526326537132263, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.7759336099585062, |
| "grad_norm": 0.4393167793750763, |
| "learning_rate": 2e-05, |
| "loss": 0.6100775599479675, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.7800829875518672, |
| "grad_norm": 0.4865422248840332, |
| "learning_rate": 2e-05, |
| "loss": 0.7980203032493591, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.7842323651452282, |
| "grad_norm": 0.4837598502635956, |
| "learning_rate": 2e-05, |
| "loss": 0.5299490690231323, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.7883817427385892, |
| "grad_norm": 0.5101847052574158, |
| "learning_rate": 2e-05, |
| "loss": 0.636174201965332, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7925311203319502, |
| "grad_norm": 0.481587678194046, |
| "learning_rate": 2e-05, |
| "loss": 0.584964394569397, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.7966804979253111, |
| "grad_norm": 0.4833771288394928, |
| "learning_rate": 2e-05, |
| "loss": 0.660033643245697, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8008298755186722, |
| "grad_norm": 0.47723522782325745, |
| "learning_rate": 2e-05, |
| "loss": 0.5514160394668579, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8049792531120332, |
| "grad_norm": 0.46386954188346863, |
| "learning_rate": 2e-05, |
| "loss": 0.5447302460670471, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8091286307053942, |
| "grad_norm": 0.47975945472717285, |
| "learning_rate": 2e-05, |
| "loss": 0.6700522303581238, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8132780082987552, |
| "grad_norm": 0.45628130435943604, |
| "learning_rate": 2e-05, |
| "loss": 0.725788950920105, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8174273858921162, |
| "grad_norm": 0.5276447534561157, |
| "learning_rate": 2e-05, |
| "loss": 0.4795994460582733, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8215767634854771, |
| "grad_norm": 0.4197767376899719, |
| "learning_rate": 2e-05, |
| "loss": 0.5689822435379028, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8257261410788381, |
| "grad_norm": 0.4988608956336975, |
| "learning_rate": 2e-05, |
| "loss": 0.5570112466812134, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8298755186721992, |
| "grad_norm": 0.43889400362968445, |
| "learning_rate": 2e-05, |
| "loss": 0.5546621680259705, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8340248962655602, |
| "grad_norm": 0.4966701865196228, |
| "learning_rate": 2e-05, |
| "loss": 0.7806369066238403, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8381742738589212, |
| "grad_norm": 0.444965124130249, |
| "learning_rate": 2e-05, |
| "loss": 0.6175658702850342, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8423236514522822, |
| "grad_norm": 0.47721561789512634, |
| "learning_rate": 2e-05, |
| "loss": 0.608608603477478, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8464730290456431, |
| "grad_norm": 0.41363325715065, |
| "learning_rate": 2e-05, |
| "loss": 0.5362960696220398, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8506224066390041, |
| "grad_norm": 0.4979526102542877, |
| "learning_rate": 2e-05, |
| "loss": 0.6923606395721436, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8547717842323651, |
| "grad_norm": 0.4715823829174042, |
| "learning_rate": 2e-05, |
| "loss": 0.5849528312683105, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.8589211618257261, |
| "grad_norm": 0.43941834568977356, |
| "learning_rate": 2e-05, |
| "loss": 0.5507952570915222, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.8630705394190872, |
| "grad_norm": 0.6943396925926208, |
| "learning_rate": 2e-05, |
| "loss": 0.6139302253723145, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.8672199170124482, |
| "grad_norm": 0.4135432541370392, |
| "learning_rate": 2e-05, |
| "loss": 0.6495124697685242, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.8713692946058091, |
| "grad_norm": 0.4735243320465088, |
| "learning_rate": 2e-05, |
| "loss": 0.6073355674743652, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8755186721991701, |
| "grad_norm": 0.5081479549407959, |
| "learning_rate": 2e-05, |
| "loss": 0.5338884592056274, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.8796680497925311, |
| "grad_norm": 0.44402876496315, |
| "learning_rate": 2e-05, |
| "loss": 0.5649405717849731, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.8838174273858921, |
| "grad_norm": 0.4597266614437103, |
| "learning_rate": 2e-05, |
| "loss": 0.851700484752655, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.8879668049792531, |
| "grad_norm": 0.49691715836524963, |
| "learning_rate": 2e-05, |
| "loss": 0.6800894141197205, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.8921161825726142, |
| "grad_norm": 0.4347255825996399, |
| "learning_rate": 2e-05, |
| "loss": 0.6838465332984924, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.8962655601659751, |
| "grad_norm": 0.4532018303871155, |
| "learning_rate": 2e-05, |
| "loss": 0.6527755856513977, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9004149377593361, |
| "grad_norm": 0.5003204941749573, |
| "learning_rate": 2e-05, |
| "loss": 0.6630940437316895, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9045643153526971, |
| "grad_norm": 0.4661204218864441, |
| "learning_rate": 2e-05, |
| "loss": 0.693079948425293, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9087136929460581, |
| "grad_norm": 0.4552728235721588, |
| "learning_rate": 2e-05, |
| "loss": 0.6484197974205017, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9128630705394191, |
| "grad_norm": 0.4681585133075714, |
| "learning_rate": 2e-05, |
| "loss": 0.6020994186401367, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.91701244813278, |
| "grad_norm": 0.41022825241088867, |
| "learning_rate": 2e-05, |
| "loss": 0.530207097530365, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.921161825726141, |
| "grad_norm": 0.39006152749061584, |
| "learning_rate": 2e-05, |
| "loss": 0.445180743932724, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9253112033195021, |
| "grad_norm": 0.4057929217815399, |
| "learning_rate": 2e-05, |
| "loss": 0.5387605428695679, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9294605809128631, |
| "grad_norm": 0.42876264452934265, |
| "learning_rate": 2e-05, |
| "loss": 0.5825240015983582, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9336099585062241, |
| "grad_norm": 0.48948875069618225, |
| "learning_rate": 2e-05, |
| "loss": 0.6396217942237854, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9377593360995851, |
| "grad_norm": 0.4649500548839569, |
| "learning_rate": 2e-05, |
| "loss": 0.4400583505630493, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.941908713692946, |
| "grad_norm": 0.43061113357543945, |
| "learning_rate": 2e-05, |
| "loss": 0.5668185353279114, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.946058091286307, |
| "grad_norm": 0.37659695744514465, |
| "learning_rate": 2e-05, |
| "loss": 0.3734014630317688, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.950207468879668, |
| "grad_norm": 0.5160449743270874, |
| "learning_rate": 2e-05, |
| "loss": 0.7836225032806396, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.9543568464730291, |
| "grad_norm": 0.5332698822021484, |
| "learning_rate": 2e-05, |
| "loss": 0.6564600467681885, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9585062240663901, |
| "grad_norm": 0.48597726225852966, |
| "learning_rate": 2e-05, |
| "loss": 0.7620537281036377, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.9626556016597511, |
| "grad_norm": 0.437928169965744, |
| "learning_rate": 2e-05, |
| "loss": 0.5499407052993774, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.966804979253112, |
| "grad_norm": 0.4861524701118469, |
| "learning_rate": 2e-05, |
| "loss": 0.6248472332954407, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.970954356846473, |
| "grad_norm": 0.4638573229312897, |
| "learning_rate": 2e-05, |
| "loss": 0.5971051454544067, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.975103734439834, |
| "grad_norm": 0.4368666410446167, |
| "learning_rate": 2e-05, |
| "loss": 0.5971348285675049, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.979253112033195, |
| "grad_norm": 0.4261365830898285, |
| "learning_rate": 2e-05, |
| "loss": 0.5625735521316528, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.983402489626556, |
| "grad_norm": 0.47601279616355896, |
| "learning_rate": 2e-05, |
| "loss": 0.518233597278595, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.9875518672199171, |
| "grad_norm": 0.4935397803783417, |
| "learning_rate": 2e-05, |
| "loss": 0.7158107161521912, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.991701244813278, |
| "grad_norm": 0.456167072057724, |
| "learning_rate": 2e-05, |
| "loss": 0.6627569198608398, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.995850622406639, |
| "grad_norm": 0.4805908799171448, |
| "learning_rate": 2e-05, |
| "loss": 0.6887528896331787, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.6356716156005859, |
| "learning_rate": 2e-05, |
| "loss": 0.65900057554245, |
| "step": 241 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 241, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1048678841008456e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|