|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.22003725936659077, |
|
"eval_steps": 500, |
|
"global_step": 1063, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00020699648105982198, |
|
"grad_norm": 0.3105248212814331, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3097, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00041399296211964395, |
|
"grad_norm": 0.35843613743782043, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3333, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.000620989443179466, |
|
"grad_norm": 0.31169694662094116, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3367, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0008279859242392879, |
|
"grad_norm": 0.4025513529777527, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3568, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0010349824052991099, |
|
"grad_norm": 0.38164222240448, |
|
"learning_rate": 5e-05, |
|
"loss": 0.375, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001241978886358932, |
|
"grad_norm": 0.41811275482177734, |
|
"learning_rate": 6e-05, |
|
"loss": 0.3421, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001448975367418754, |
|
"grad_norm": 0.40563058853149414, |
|
"learning_rate": 7e-05, |
|
"loss": 0.3046, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0016559718484785758, |
|
"grad_norm": 0.32561907172203064, |
|
"learning_rate": 8e-05, |
|
"loss": 0.3298, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0018629683295383979, |
|
"grad_norm": 0.2336910218000412, |
|
"learning_rate": 9e-05, |
|
"loss": 0.2331, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0020699648105982197, |
|
"grad_norm": 0.2214404195547104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2747, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002276961291658042, |
|
"grad_norm": 0.1755189299583435, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 0.215, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.002483957772717864, |
|
"grad_norm": 0.13464422523975372, |
|
"learning_rate": 0.00012, |
|
"loss": 0.1854, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0026909542537776857, |
|
"grad_norm": 0.1568724364042282, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 0.1954, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002897950734837508, |
|
"grad_norm": 0.1855469048023224, |
|
"learning_rate": 0.00014, |
|
"loss": 0.1788, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00310494721589733, |
|
"grad_norm": 0.16363286972045898, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.1816, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0033119436969571516, |
|
"grad_norm": 0.14675703644752502, |
|
"learning_rate": 0.00016, |
|
"loss": 0.144, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.003518940178016974, |
|
"grad_norm": 0.13585928082466125, |
|
"learning_rate": 0.00017, |
|
"loss": 0.1265, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0037259366590767957, |
|
"grad_norm": 0.13742923736572266, |
|
"learning_rate": 0.00018, |
|
"loss": 0.1373, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.003932933140136618, |
|
"grad_norm": 0.09649409353733063, |
|
"learning_rate": 0.00019, |
|
"loss": 0.1161, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.004139929621196439, |
|
"grad_norm": 0.08537352085113525, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1086, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004346926102256262, |
|
"grad_norm": 0.07582477480173111, |
|
"learning_rate": 0.00019995842860112245, |
|
"loss": 0.1047, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004553922583316084, |
|
"grad_norm": 0.07321921736001968, |
|
"learning_rate": 0.00019991685720224486, |
|
"loss": 0.1095, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004760919064375905, |
|
"grad_norm": 0.07746334373950958, |
|
"learning_rate": 0.0001998752858033673, |
|
"loss": 0.08, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004967915545435728, |
|
"grad_norm": 0.06693358719348907, |
|
"learning_rate": 0.00019983371440448973, |
|
"loss": 0.0932, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00517491202649555, |
|
"grad_norm": 0.09097249805927277, |
|
"learning_rate": 0.00019979214300561214, |
|
"loss": 0.095, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005381908507555371, |
|
"grad_norm": 0.06965727359056473, |
|
"learning_rate": 0.00019975057160673458, |
|
"loss": 0.0847, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005588904988615194, |
|
"grad_norm": 0.060345325618982315, |
|
"learning_rate": 0.00019970900020785701, |
|
"loss": 0.0609, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005795901469675016, |
|
"grad_norm": 0.06169256567955017, |
|
"learning_rate": 0.00019966742880897945, |
|
"loss": 0.0651, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.006002897950734837, |
|
"grad_norm": 0.07536791265010834, |
|
"learning_rate": 0.00019962585741010186, |
|
"loss": 0.0614, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00620989443179466, |
|
"grad_norm": 0.06803280860185623, |
|
"learning_rate": 0.0001995842860112243, |
|
"loss": 0.0589, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006416890912854482, |
|
"grad_norm": 0.09668745845556259, |
|
"learning_rate": 0.0001995427146123467, |
|
"loss": 0.0511, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.006623887393914303, |
|
"grad_norm": 0.060853298753499985, |
|
"learning_rate": 0.00019950114321346915, |
|
"loss": 0.0428, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0068308838749741255, |
|
"grad_norm": 0.04822453856468201, |
|
"learning_rate": 0.00019945957181459156, |
|
"loss": 0.0442, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.007037880356033948, |
|
"grad_norm": 0.07689573615789413, |
|
"learning_rate": 0.000199418000415714, |
|
"loss": 0.0448, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.007244876837093769, |
|
"grad_norm": 0.07483747601509094, |
|
"learning_rate": 0.00019937642901683643, |
|
"loss": 0.0449, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0074518733181535915, |
|
"grad_norm": 0.06677654385566711, |
|
"learning_rate": 0.00019933485761795884, |
|
"loss": 0.0368, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.007658869799213414, |
|
"grad_norm": 0.06132747232913971, |
|
"learning_rate": 0.00019929328621908128, |
|
"loss": 0.0365, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.007865866280273236, |
|
"grad_norm": 0.04215759411454201, |
|
"learning_rate": 0.00019925171482020371, |
|
"loss": 0.0307, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.008072862761333057, |
|
"grad_norm": 0.05545351654291153, |
|
"learning_rate": 0.00019921014342132612, |
|
"loss": 0.0308, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.008279859242392879, |
|
"grad_norm": 0.06014477089047432, |
|
"learning_rate": 0.00019916857202244856, |
|
"loss": 0.0335, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008486855723452702, |
|
"grad_norm": 0.06840485334396362, |
|
"learning_rate": 0.000199127000623571, |
|
"loss": 0.0287, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.008693852204512523, |
|
"grad_norm": 0.0705009400844574, |
|
"learning_rate": 0.0001990854292246934, |
|
"loss": 0.0271, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008900848685572345, |
|
"grad_norm": 0.05818328261375427, |
|
"learning_rate": 0.00019904385782581585, |
|
"loss": 0.0253, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.009107845166632168, |
|
"grad_norm": 0.03936947509646416, |
|
"learning_rate": 0.00019900228642693828, |
|
"loss": 0.0216, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.00931484164769199, |
|
"grad_norm": 0.044559165835380554, |
|
"learning_rate": 0.00019896071502806072, |
|
"loss": 0.0212, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00952183812875181, |
|
"grad_norm": 0.04012398421764374, |
|
"learning_rate": 0.00019891914362918313, |
|
"loss": 0.014, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.009728834609811634, |
|
"grad_norm": 0.048441193997859955, |
|
"learning_rate": 0.00019887757223030557, |
|
"loss": 0.0144, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009935831090871455, |
|
"grad_norm": 0.046538762748241425, |
|
"learning_rate": 0.000198836000831428, |
|
"loss": 0.0111, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.010142827571931277, |
|
"grad_norm": 0.04020772501826286, |
|
"learning_rate": 0.0001987944294325504, |
|
"loss": 0.0103, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0103498240529911, |
|
"grad_norm": 0.030860010534524918, |
|
"learning_rate": 0.00019875285803367285, |
|
"loss": 0.0071, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.010556820534050921, |
|
"grad_norm": 0.0534852109849453, |
|
"learning_rate": 0.0001987112866347953, |
|
"loss": 0.0082, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.010763817015110743, |
|
"grad_norm": 0.061904191970825195, |
|
"learning_rate": 0.0001986697152359177, |
|
"loss": 0.0092, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.010970813496170566, |
|
"grad_norm": 0.06135628744959831, |
|
"learning_rate": 0.0001986281438370401, |
|
"loss": 0.0091, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.011177809977230387, |
|
"grad_norm": 0.060930103063583374, |
|
"learning_rate": 0.00019858657243816254, |
|
"loss": 0.0103, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.011384806458290209, |
|
"grad_norm": 0.038847871124744415, |
|
"learning_rate": 0.00019854500103928498, |
|
"loss": 0.0077, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.011591802939350032, |
|
"grad_norm": 0.01643364317715168, |
|
"learning_rate": 0.0001985034296404074, |
|
"loss": 0.0065, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.011798799420409853, |
|
"grad_norm": 0.03226076811552048, |
|
"learning_rate": 0.00019846185824152983, |
|
"loss": 0.0059, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.012005795901469675, |
|
"grad_norm": 0.045181699097156525, |
|
"learning_rate": 0.00019842028684265227, |
|
"loss": 0.0075, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.012212792382529498, |
|
"grad_norm": 0.0442410409450531, |
|
"learning_rate": 0.00019837871544377468, |
|
"loss": 0.007, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01241978886358932, |
|
"grad_norm": 0.04646408557891846, |
|
"learning_rate": 0.0001983371440448971, |
|
"loss": 0.009, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01262678534464914, |
|
"grad_norm": 0.037814535200595856, |
|
"learning_rate": 0.00019829557264601955, |
|
"loss": 0.0062, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.012833781825708964, |
|
"grad_norm": 0.029840698465704918, |
|
"learning_rate": 0.000198254001247142, |
|
"loss": 0.0075, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.013040778306768785, |
|
"grad_norm": 0.009363808669149876, |
|
"learning_rate": 0.0001982124298482644, |
|
"loss": 0.0054, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.013247774787828607, |
|
"grad_norm": 0.02504296600818634, |
|
"learning_rate": 0.00019817085844938683, |
|
"loss": 0.0051, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01345477126888843, |
|
"grad_norm": 0.03861517831683159, |
|
"learning_rate": 0.00019812928705050927, |
|
"loss": 0.0061, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.013661767749948251, |
|
"grad_norm": 0.04222877696156502, |
|
"learning_rate": 0.00019808771565163168, |
|
"loss": 0.0074, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.013868764231008072, |
|
"grad_norm": 0.03613612800836563, |
|
"learning_rate": 0.00019804614425275412, |
|
"loss": 0.0063, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.014075760712067896, |
|
"grad_norm": 0.02817763015627861, |
|
"learning_rate": 0.00019800457285387655, |
|
"loss": 0.0074, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.014282757193127717, |
|
"grad_norm": 0.01215298566967249, |
|
"learning_rate": 0.00019796300145499896, |
|
"loss": 0.0057, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.014489753674187538, |
|
"grad_norm": 0.01781376637518406, |
|
"learning_rate": 0.0001979214300561214, |
|
"loss": 0.0067, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.014696750155247362, |
|
"grad_norm": 0.02770097553730011, |
|
"learning_rate": 0.00019787985865724384, |
|
"loss": 0.0057, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.014903746636307183, |
|
"grad_norm": 0.029749557375907898, |
|
"learning_rate": 0.00019783828725836625, |
|
"loss": 0.0063, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.015110743117367004, |
|
"grad_norm": 0.02843872457742691, |
|
"learning_rate": 0.00019779671585948869, |
|
"loss": 0.0076, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.015317739598426828, |
|
"grad_norm": 0.019167358055710793, |
|
"learning_rate": 0.00019775514446061112, |
|
"loss": 0.0059, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.015524736079486649, |
|
"grad_norm": 0.01691405475139618, |
|
"learning_rate": 0.00019771357306173353, |
|
"loss": 0.0072, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.015731732560546472, |
|
"grad_norm": 0.00585334375500679, |
|
"learning_rate": 0.00019767200166285594, |
|
"loss": 0.0047, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.015938729041606294, |
|
"grad_norm": 0.019127612933516502, |
|
"learning_rate": 0.00019763043026397838, |
|
"loss": 0.0047, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.016145725522666115, |
|
"grad_norm": 0.026558954268693924, |
|
"learning_rate": 0.00019758885886510082, |
|
"loss": 0.0064, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.016352722003725936, |
|
"grad_norm": 0.0275382362306118, |
|
"learning_rate": 0.00019754728746622325, |
|
"loss": 0.006, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.016559718484785758, |
|
"grad_norm": 0.016686394810676575, |
|
"learning_rate": 0.00019750571606734566, |
|
"loss": 0.0069, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01676671496584558, |
|
"grad_norm": 0.011664893478155136, |
|
"learning_rate": 0.0001974641446684681, |
|
"loss": 0.0055, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.016973711446905404, |
|
"grad_norm": 0.010350242257118225, |
|
"learning_rate": 0.00019742257326959054, |
|
"loss": 0.0045, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.017180707927965225, |
|
"grad_norm": 0.018541481345891953, |
|
"learning_rate": 0.00019738100187071295, |
|
"loss": 0.0056, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.017387704409025047, |
|
"grad_norm": 0.015899088233709335, |
|
"learning_rate": 0.00019733943047183539, |
|
"loss": 0.0047, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.017594700890084868, |
|
"grad_norm": 0.01706838794052601, |
|
"learning_rate": 0.00019729785907295782, |
|
"loss": 0.0059, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01780169737114469, |
|
"grad_norm": 0.01610150933265686, |
|
"learning_rate": 0.00019725628767408023, |
|
"loss": 0.0065, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.01800869385220451, |
|
"grad_norm": 0.011388594284653664, |
|
"learning_rate": 0.00019721471627520267, |
|
"loss": 0.0054, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.018215690333264336, |
|
"grad_norm": 0.010438695549964905, |
|
"learning_rate": 0.0001971731448763251, |
|
"loss": 0.005, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.018422686814324157, |
|
"grad_norm": 0.01277916319668293, |
|
"learning_rate": 0.00019713157347744752, |
|
"loss": 0.0045, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01862968329538398, |
|
"grad_norm": 0.016964582726359367, |
|
"learning_rate": 0.00019709000207856995, |
|
"loss": 0.0064, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0188366797764438, |
|
"grad_norm": 0.015511998906731606, |
|
"learning_rate": 0.0001970484306796924, |
|
"loss": 0.0046, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.01904367625750362, |
|
"grad_norm": 0.014610686339437962, |
|
"learning_rate": 0.00019700685928081483, |
|
"loss": 0.0068, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.019250672738563443, |
|
"grad_norm": 0.011182552203536034, |
|
"learning_rate": 0.00019696528788193724, |
|
"loss": 0.005, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.019457669219623268, |
|
"grad_norm": 0.00904427282512188, |
|
"learning_rate": 0.00019692371648305967, |
|
"loss": 0.0052, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.01966466570068309, |
|
"grad_norm": 0.013450189493596554, |
|
"learning_rate": 0.0001968821450841821, |
|
"loss": 0.0053, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01987166218174291, |
|
"grad_norm": 0.011684760451316833, |
|
"learning_rate": 0.00019684057368530452, |
|
"loss": 0.0051, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.020078658662802732, |
|
"grad_norm": 0.009463542141020298, |
|
"learning_rate": 0.00019679900228642693, |
|
"loss": 0.0055, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.020285655143862553, |
|
"grad_norm": 0.007411513477563858, |
|
"learning_rate": 0.00019675743088754937, |
|
"loss": 0.005, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.020492651624922375, |
|
"grad_norm": 0.013031812384724617, |
|
"learning_rate": 0.0001967158594886718, |
|
"loss": 0.0063, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0206996481059822, |
|
"grad_norm": 0.006821679417043924, |
|
"learning_rate": 0.00019667428808979422, |
|
"loss": 0.0058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02090664458704202, |
|
"grad_norm": 0.009770995937287807, |
|
"learning_rate": 0.00019663271669091665, |
|
"loss": 0.0045, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.021113641068101843, |
|
"grad_norm": 0.007675915956497192, |
|
"learning_rate": 0.0001965911452920391, |
|
"loss": 0.0049, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.021320637549161664, |
|
"grad_norm": 0.009076464921236038, |
|
"learning_rate": 0.0001965495738931615, |
|
"loss": 0.0048, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.021527634030221485, |
|
"grad_norm": 0.010678350925445557, |
|
"learning_rate": 0.00019650800249428394, |
|
"loss": 0.0056, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.021734630511281307, |
|
"grad_norm": 0.029563505202531815, |
|
"learning_rate": 0.00019646643109540637, |
|
"loss": 0.0049, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.02194162699234113, |
|
"grad_norm": 0.0072585404850542545, |
|
"learning_rate": 0.00019642485969652878, |
|
"loss": 0.005, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.022148623473400953, |
|
"grad_norm": 0.009124008938670158, |
|
"learning_rate": 0.00019638328829765122, |
|
"loss": 0.0045, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.022355619954460774, |
|
"grad_norm": 0.005743277724832296, |
|
"learning_rate": 0.00019634171689877366, |
|
"loss": 0.0051, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.022562616435520596, |
|
"grad_norm": 0.01300257071852684, |
|
"learning_rate": 0.0001963001454998961, |
|
"loss": 0.0049, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.022769612916580417, |
|
"grad_norm": 0.02877631224691868, |
|
"learning_rate": 0.0001962585741010185, |
|
"loss": 0.0053, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02297660939764024, |
|
"grad_norm": 0.010237788781523705, |
|
"learning_rate": 0.00019621700270214094, |
|
"loss": 0.0046, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.023183605878700064, |
|
"grad_norm": 0.010189997963607311, |
|
"learning_rate": 0.00019617543130326338, |
|
"loss": 0.0056, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.023390602359759885, |
|
"grad_norm": 0.010226712562143803, |
|
"learning_rate": 0.0001961338599043858, |
|
"loss": 0.005, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.023597598840819706, |
|
"grad_norm": 0.011194237507879734, |
|
"learning_rate": 0.00019609228850550823, |
|
"loss": 0.0054, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.023804595321879528, |
|
"grad_norm": 0.0065891253761947155, |
|
"learning_rate": 0.00019605071710663066, |
|
"loss": 0.0057, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02401159180293935, |
|
"grad_norm": 0.008131214417517185, |
|
"learning_rate": 0.00019600914570775307, |
|
"loss": 0.0056, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02421858828399917, |
|
"grad_norm": 0.013916265219449997, |
|
"learning_rate": 0.0001959675743088755, |
|
"loss": 0.0051, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.024425584765058996, |
|
"grad_norm": 0.00658258656039834, |
|
"learning_rate": 0.00019592600290999795, |
|
"loss": 0.0048, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.024632581246118817, |
|
"grad_norm": 0.005407229065895081, |
|
"learning_rate": 0.00019588443151112036, |
|
"loss": 0.0053, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.02483957772717864, |
|
"grad_norm": 0.009581067599356174, |
|
"learning_rate": 0.00019584286011224277, |
|
"loss": 0.0048, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02504657420823846, |
|
"grad_norm": 0.011583163402974606, |
|
"learning_rate": 0.0001958012887133652, |
|
"loss": 0.0049, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.02525357068929828, |
|
"grad_norm": 0.017572317272424698, |
|
"learning_rate": 0.00019575971731448764, |
|
"loss": 0.0049, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.025460567170358103, |
|
"grad_norm": 0.018644949421286583, |
|
"learning_rate": 0.00019571814591561005, |
|
"loss": 0.0068, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.025667563651417927, |
|
"grad_norm": 0.014408939518034458, |
|
"learning_rate": 0.0001956765745167325, |
|
"loss": 0.0056, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.02587456013247775, |
|
"grad_norm": 0.0033774918410927057, |
|
"learning_rate": 0.00019563500311785493, |
|
"loss": 0.0053, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.02608155661353757, |
|
"grad_norm": 0.005626993719488382, |
|
"learning_rate": 0.00019559343171897736, |
|
"loss": 0.0046, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02628855309459739, |
|
"grad_norm": 0.018701711669564247, |
|
"learning_rate": 0.00019555186032009977, |
|
"loss": 0.0077, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.026495549575657213, |
|
"grad_norm": 0.016636714339256287, |
|
"learning_rate": 0.0001955102889212222, |
|
"loss": 0.005, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.026702546056717034, |
|
"grad_norm": 0.013526069931685925, |
|
"learning_rate": 0.00019546871752234465, |
|
"loss": 0.0047, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02690954253777686, |
|
"grad_norm": 0.024728018790483475, |
|
"learning_rate": 0.00019542714612346706, |
|
"loss": 0.0045, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02711653901883668, |
|
"grad_norm": 0.02217916212975979, |
|
"learning_rate": 0.0001953855747245895, |
|
"loss": 0.0045, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.027323535499896502, |
|
"grad_norm": 0.010518092662096024, |
|
"learning_rate": 0.00019534400332571193, |
|
"loss": 0.0065, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.027530531980956324, |
|
"grad_norm": 0.008342115208506584, |
|
"learning_rate": 0.00019530243192683434, |
|
"loss": 0.0063, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.027737528462016145, |
|
"grad_norm": 0.008312125690281391, |
|
"learning_rate": 0.00019526086052795678, |
|
"loss": 0.0047, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.027944524943075966, |
|
"grad_norm": 0.006928388494998217, |
|
"learning_rate": 0.00019521928912907921, |
|
"loss": 0.005, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02815152142413579, |
|
"grad_norm": 0.0073064775206148624, |
|
"learning_rate": 0.00019517771773020162, |
|
"loss": 0.0061, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.028358517905195613, |
|
"grad_norm": 0.007849021814763546, |
|
"learning_rate": 0.00019513614633132406, |
|
"loss": 0.0052, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.028565514386255434, |
|
"grad_norm": 0.006330843083560467, |
|
"learning_rate": 0.0001950945749324465, |
|
"loss": 0.006, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.028772510867315255, |
|
"grad_norm": 0.002727820537984371, |
|
"learning_rate": 0.00019505300353356894, |
|
"loss": 0.0047, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.028979507348375077, |
|
"grad_norm": 0.006755925714969635, |
|
"learning_rate": 0.00019501143213469135, |
|
"loss": 0.0043, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0291865038294349, |
|
"grad_norm": 0.007393544539809227, |
|
"learning_rate": 0.00019496986073581376, |
|
"loss": 0.0047, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.029393500310494723, |
|
"grad_norm": 0.007699685171246529, |
|
"learning_rate": 0.0001949282893369362, |
|
"loss": 0.0053, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.029600496791554545, |
|
"grad_norm": 0.003382055787369609, |
|
"learning_rate": 0.0001948867179380586, |
|
"loss": 0.0042, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.029807493272614366, |
|
"grad_norm": 0.01596757024526596, |
|
"learning_rate": 0.00019484514653918104, |
|
"loss": 0.0078, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.030014489753674187, |
|
"grad_norm": 0.00668082432821393, |
|
"learning_rate": 0.00019480357514030348, |
|
"loss": 0.0052, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03022148623473401, |
|
"grad_norm": 0.007241010665893555, |
|
"learning_rate": 0.00019476200374142591, |
|
"loss": 0.0054, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.03042848271579383, |
|
"grad_norm": 0.009339329786598682, |
|
"learning_rate": 0.00019472043234254832, |
|
"loss": 0.0066, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.030635479196853655, |
|
"grad_norm": 0.007252382580190897, |
|
"learning_rate": 0.00019467886094367076, |
|
"loss": 0.0046, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.030842475677913476, |
|
"grad_norm": 0.004665658809244633, |
|
"learning_rate": 0.0001946372895447932, |
|
"loss": 0.0044, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.031049472158973298, |
|
"grad_norm": 0.006148173939436674, |
|
"learning_rate": 0.0001945957181459156, |
|
"loss": 0.0049, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03125646864003312, |
|
"grad_norm": 0.004451967775821686, |
|
"learning_rate": 0.00019455414674703805, |
|
"loss": 0.0061, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.031463465121092944, |
|
"grad_norm": 0.004053746350109577, |
|
"learning_rate": 0.00019451257534816048, |
|
"loss": 0.0047, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.03167046160215276, |
|
"grad_norm": 0.009219355881214142, |
|
"learning_rate": 0.0001944710039492829, |
|
"loss": 0.0048, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.03187745808321259, |
|
"grad_norm": 0.020168175920844078, |
|
"learning_rate": 0.00019442943255040533, |
|
"loss": 0.0063, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.032084454564272405, |
|
"grad_norm": 0.0030542612075805664, |
|
"learning_rate": 0.00019438786115152777, |
|
"loss": 0.0048, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03229145104533223, |
|
"grad_norm": 0.00561768002808094, |
|
"learning_rate": 0.0001943462897526502, |
|
"loss": 0.0047, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.032498447526392055, |
|
"grad_norm": 0.004265373572707176, |
|
"learning_rate": 0.0001943047183537726, |
|
"loss": 0.0062, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.03270544400745187, |
|
"grad_norm": 0.005523406434804201, |
|
"learning_rate": 0.00019426314695489505, |
|
"loss": 0.0044, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.0329124404885117, |
|
"grad_norm": 0.009324849583208561, |
|
"learning_rate": 0.0001942215755560175, |
|
"loss": 0.0055, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.033119436969571515, |
|
"grad_norm": 0.012468270026147366, |
|
"learning_rate": 0.0001941800041571399, |
|
"loss": 0.0045, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03332643345063134, |
|
"grad_norm": 0.0018690524157136679, |
|
"learning_rate": 0.00019413843275826233, |
|
"loss": 0.0047, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.03353342993169116, |
|
"grad_norm": 0.009851769544184208, |
|
"learning_rate": 0.00019409686135938477, |
|
"loss": 0.0052, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.03374042641275098, |
|
"grad_norm": 0.0014802832156419754, |
|
"learning_rate": 0.00019405528996050718, |
|
"loss": 0.0045, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.03394742289381081, |
|
"grad_norm": 0.00177583540789783, |
|
"learning_rate": 0.0001940137185616296, |
|
"loss": 0.0046, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.034154419374870626, |
|
"grad_norm": 0.0017039773520082235, |
|
"learning_rate": 0.00019397214716275203, |
|
"loss": 0.0044, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03436141585593045, |
|
"grad_norm": 0.010193880647420883, |
|
"learning_rate": 0.00019393057576387447, |
|
"loss": 0.0054, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.03456841233699027, |
|
"grad_norm": 0.011056206189095974, |
|
"learning_rate": 0.00019388900436499688, |
|
"loss": 0.0051, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.034775408818050094, |
|
"grad_norm": 0.004655253142118454, |
|
"learning_rate": 0.0001938474329661193, |
|
"loss": 0.0046, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03498240529910992, |
|
"grad_norm": 0.0035964485723525286, |
|
"learning_rate": 0.00019380586156724175, |
|
"loss": 0.0045, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.035189401780169736, |
|
"grad_norm": 0.0026267431676387787, |
|
"learning_rate": 0.00019376429016836416, |
|
"loss": 0.0047, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03539639826122956, |
|
"grad_norm": 0.008452721871435642, |
|
"learning_rate": 0.0001937227187694866, |
|
"loss": 0.0059, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.03560339474228938, |
|
"grad_norm": 0.006845233030617237, |
|
"learning_rate": 0.00019368114737060903, |
|
"loss": 0.0051, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.035810391223349204, |
|
"grad_norm": 0.005468891002237797, |
|
"learning_rate": 0.00019363957597173144, |
|
"loss": 0.0059, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.03601738770440902, |
|
"grad_norm": 0.007444227579981089, |
|
"learning_rate": 0.00019359800457285388, |
|
"loss": 0.0044, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.03622438418546885, |
|
"grad_norm": 0.011544904671609402, |
|
"learning_rate": 0.00019355643317397632, |
|
"loss": 0.0053, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03643138066652867, |
|
"grad_norm": 0.04777868092060089, |
|
"learning_rate": 0.00019351486177509875, |
|
"loss": 0.005, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03663837714758849, |
|
"grad_norm": 0.004177747759968042, |
|
"learning_rate": 0.00019347329037622116, |
|
"loss": 0.0048, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.036845373628648315, |
|
"grad_norm": 0.0020709133241325617, |
|
"learning_rate": 0.0001934317189773436, |
|
"loss": 0.0045, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03705237010970813, |
|
"grad_norm": 0.012106567621231079, |
|
"learning_rate": 0.00019339014757846604, |
|
"loss": 0.0043, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.03725936659076796, |
|
"grad_norm": 0.005318734794855118, |
|
"learning_rate": 0.00019334857617958845, |
|
"loss": 0.005, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03746636307182778, |
|
"grad_norm": 0.04815113916993141, |
|
"learning_rate": 0.00019330700478071089, |
|
"loss": 0.0056, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.0376733595528876, |
|
"grad_norm": 0.029821882024407387, |
|
"learning_rate": 0.00019326543338183332, |
|
"loss": 0.0053, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.037880356033947425, |
|
"grad_norm": 0.010615061037242413, |
|
"learning_rate": 0.00019322386198295573, |
|
"loss": 0.0049, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.03808735251500724, |
|
"grad_norm": 0.00788772851228714, |
|
"learning_rate": 0.00019318229058407817, |
|
"loss": 0.0043, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.03829434899606707, |
|
"grad_norm": 0.0045401486568152905, |
|
"learning_rate": 0.00019314071918520058, |
|
"loss": 0.0056, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.038501345477126886, |
|
"grad_norm": 0.004325198009610176, |
|
"learning_rate": 0.00019309914778632302, |
|
"loss": 0.0059, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.03870834195818671, |
|
"grad_norm": 0.012164851650595665, |
|
"learning_rate": 0.00019305757638744543, |
|
"loss": 0.0046, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.038915338439246536, |
|
"grad_norm": 0.02496037259697914, |
|
"learning_rate": 0.00019301600498856786, |
|
"loss": 0.0068, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.039122334920306354, |
|
"grad_norm": 0.011729522608220577, |
|
"learning_rate": 0.0001929744335896903, |
|
"loss": 0.0047, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03932933140136618, |
|
"grad_norm": 0.003992550540715456, |
|
"learning_rate": 0.0001929328621908127, |
|
"loss": 0.0047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.039536327882425996, |
|
"grad_norm": 0.012220533564686775, |
|
"learning_rate": 0.00019289129079193515, |
|
"loss": 0.0059, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03974332436348582, |
|
"grad_norm": 0.0037318835966289043, |
|
"learning_rate": 0.00019284971939305759, |
|
"loss": 0.0049, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.039950320844545646, |
|
"grad_norm": 0.008259239606559277, |
|
"learning_rate": 0.00019280814799418002, |
|
"loss": 0.0047, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.040157317325605464, |
|
"grad_norm": 0.0029760177712887526, |
|
"learning_rate": 0.00019276657659530243, |
|
"loss": 0.0046, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.04036431380666529, |
|
"grad_norm": 0.007835526019334793, |
|
"learning_rate": 0.00019272500519642487, |
|
"loss": 0.0045, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.04057131028772511, |
|
"grad_norm": 0.019788436591625214, |
|
"learning_rate": 0.0001926834337975473, |
|
"loss": 0.0056, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.04077830676878493, |
|
"grad_norm": 0.0054263221099972725, |
|
"learning_rate": 0.00019264186239866972, |
|
"loss": 0.0047, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.04098530324984475, |
|
"grad_norm": 0.006738686002790928, |
|
"learning_rate": 0.00019260029099979215, |
|
"loss": 0.0053, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.041192299730904575, |
|
"grad_norm": 0.0019211384933441877, |
|
"learning_rate": 0.0001925587196009146, |
|
"loss": 0.0046, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.0413992962119644, |
|
"grad_norm": 0.0064520747400820255, |
|
"learning_rate": 0.000192517148202037, |
|
"loss": 0.0049, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04160629269302422, |
|
"grad_norm": 0.0034363584127277136, |
|
"learning_rate": 0.00019247557680315944, |
|
"loss": 0.0048, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.04181328917408404, |
|
"grad_norm": 0.021868525072932243, |
|
"learning_rate": 0.00019243400540428187, |
|
"loss": 0.0055, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.04202028565514386, |
|
"grad_norm": 0.011735360138118267, |
|
"learning_rate": 0.0001923924340054043, |
|
"loss": 0.0059, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.042227282136203685, |
|
"grad_norm": 0.008518829010426998, |
|
"learning_rate": 0.00019235086260652672, |
|
"loss": 0.0062, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.0424342786172635, |
|
"grad_norm": 0.007598051335662603, |
|
"learning_rate": 0.00019230929120764916, |
|
"loss": 0.005, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.04264127509832333, |
|
"grad_norm": 0.004902805667370558, |
|
"learning_rate": 0.0001922677198087716, |
|
"loss": 0.0047, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.04284827157938315, |
|
"grad_norm": 0.01092604547739029, |
|
"learning_rate": 0.00019222614840989398, |
|
"loss": 0.0062, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.04305526806044297, |
|
"grad_norm": 0.017829621210694313, |
|
"learning_rate": 0.00019218457701101642, |
|
"loss": 0.0069, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.043262264541502796, |
|
"grad_norm": 0.0056928307749331, |
|
"learning_rate": 0.00019214300561213885, |
|
"loss": 0.0052, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.043469261022562614, |
|
"grad_norm": 0.0032917701173573732, |
|
"learning_rate": 0.0001921014342132613, |
|
"loss": 0.0049, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04367625750362244, |
|
"grad_norm": 0.00267777475528419, |
|
"learning_rate": 0.0001920598628143837, |
|
"loss": 0.0047, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.04388325398468226, |
|
"grad_norm": 0.005063153337687254, |
|
"learning_rate": 0.00019201829141550614, |
|
"loss": 0.0065, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.04409025046574208, |
|
"grad_norm": 0.007821328938007355, |
|
"learning_rate": 0.00019197672001662857, |
|
"loss": 0.0073, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.044297246946801906, |
|
"grad_norm": 0.0022040351759642363, |
|
"learning_rate": 0.00019193514861775098, |
|
"loss": 0.0042, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.044504243427861724, |
|
"grad_norm": 0.010549996979534626, |
|
"learning_rate": 0.00019189357721887342, |
|
"loss": 0.0048, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.04471123990892155, |
|
"grad_norm": 0.003572909627109766, |
|
"learning_rate": 0.00019185200581999586, |
|
"loss": 0.0048, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.04491823638998137, |
|
"grad_norm": 0.0047572036273777485, |
|
"learning_rate": 0.00019181043442111827, |
|
"loss": 0.0046, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.04512523287104119, |
|
"grad_norm": 0.003330536652356386, |
|
"learning_rate": 0.0001917688630222407, |
|
"loss": 0.0043, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.04533222935210102, |
|
"grad_norm": 0.003489327384158969, |
|
"learning_rate": 0.00019172729162336314, |
|
"loss": 0.0044, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.045539225833160835, |
|
"grad_norm": 0.006631118711084127, |
|
"learning_rate": 0.00019168572022448555, |
|
"loss": 0.0046, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04574622231422066, |
|
"grad_norm": 0.0016155489720404148, |
|
"learning_rate": 0.000191644148825608, |
|
"loss": 0.0045, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.04595321879528048, |
|
"grad_norm": 0.00484581058844924, |
|
"learning_rate": 0.00019160257742673043, |
|
"loss": 0.0049, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.0461602152763403, |
|
"grad_norm": 0.016010191291570663, |
|
"learning_rate": 0.00019156100602785286, |
|
"loss": 0.0052, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.04636721175740013, |
|
"grad_norm": 0.0038696257397532463, |
|
"learning_rate": 0.00019151943462897527, |
|
"loss": 0.0046, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.046574208238459945, |
|
"grad_norm": 0.003132582874968648, |
|
"learning_rate": 0.0001914778632300977, |
|
"loss": 0.0055, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04678120471951977, |
|
"grad_norm": 0.003856977680698037, |
|
"learning_rate": 0.00019143629183122015, |
|
"loss": 0.0048, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.04698820120057959, |
|
"grad_norm": 0.0084042027592659, |
|
"learning_rate": 0.00019139472043234256, |
|
"loss": 0.005, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.04719519768163941, |
|
"grad_norm": 0.004033537581562996, |
|
"learning_rate": 0.000191353149033465, |
|
"loss": 0.0053, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.04740219416269923, |
|
"grad_norm": 0.002745938254520297, |
|
"learning_rate": 0.0001913115776345874, |
|
"loss": 0.005, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.047609190643759056, |
|
"grad_norm": 0.0033783107064664364, |
|
"learning_rate": 0.00019127000623570984, |
|
"loss": 0.0045, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04781618712481888, |
|
"grad_norm": 0.011202207766473293, |
|
"learning_rate": 0.00019122843483683225, |
|
"loss": 0.0054, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.0480231836058787, |
|
"grad_norm": 0.006426738575100899, |
|
"learning_rate": 0.0001911868634379547, |
|
"loss": 0.0049, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.04823018008693852, |
|
"grad_norm": 0.0026592197827994823, |
|
"learning_rate": 0.00019114529203907713, |
|
"loss": 0.0045, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.04843717656799834, |
|
"grad_norm": 0.0044938791543245316, |
|
"learning_rate": 0.00019110372064019954, |
|
"loss": 0.0048, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.048644173049058166, |
|
"grad_norm": 0.004537639208137989, |
|
"learning_rate": 0.00019106214924132197, |
|
"loss": 0.0043, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.04885116953011799, |
|
"grad_norm": 0.007899290882050991, |
|
"learning_rate": 0.0001910205778424444, |
|
"loss": 0.0047, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.04905816601117781, |
|
"grad_norm": 0.003346335142850876, |
|
"learning_rate": 0.00019097900644356682, |
|
"loss": 0.0046, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.049265162492237634, |
|
"grad_norm": 0.012666060589253902, |
|
"learning_rate": 0.00019093743504468926, |
|
"loss": 0.0051, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.04947215897329745, |
|
"grad_norm": 0.005454343743622303, |
|
"learning_rate": 0.0001908958636458117, |
|
"loss": 0.0052, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.04967915545435728, |
|
"grad_norm": 0.0051568858325481415, |
|
"learning_rate": 0.00019085429224693413, |
|
"loss": 0.0054, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.049886151935417095, |
|
"grad_norm": 0.005354621913284063, |
|
"learning_rate": 0.00019081272084805654, |
|
"loss": 0.0045, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.05009314841647692, |
|
"grad_norm": 0.00728578818961978, |
|
"learning_rate": 0.00019077114944917898, |
|
"loss": 0.0056, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.050300144897536744, |
|
"grad_norm": 0.012164080515503883, |
|
"learning_rate": 0.00019072957805030141, |
|
"loss": 0.0055, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.05050714137859656, |
|
"grad_norm": 0.006451157853007317, |
|
"learning_rate": 0.00019068800665142382, |
|
"loss": 0.0045, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.05071413785965639, |
|
"grad_norm": 0.04119205102324486, |
|
"learning_rate": 0.00019064643525254626, |
|
"loss": 0.0048, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.050921134340716205, |
|
"grad_norm": 0.006683278828859329, |
|
"learning_rate": 0.0001906048638536687, |
|
"loss": 0.0054, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.05112813082177603, |
|
"grad_norm": 0.004711155779659748, |
|
"learning_rate": 0.0001905632924547911, |
|
"loss": 0.0052, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.051335127302835855, |
|
"grad_norm": 0.022338053211569786, |
|
"learning_rate": 0.00019052172105591355, |
|
"loss": 0.0051, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.05154212378389567, |
|
"grad_norm": 0.0033723730593919754, |
|
"learning_rate": 0.00019048014965703598, |
|
"loss": 0.0041, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.0517491202649555, |
|
"grad_norm": 0.013185818679630756, |
|
"learning_rate": 0.00019043857825815842, |
|
"loss": 0.0071, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.051956116746015316, |
|
"grad_norm": 0.01329563558101654, |
|
"learning_rate": 0.0001903970068592808, |
|
"loss": 0.0047, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.05216311322707514, |
|
"grad_norm": 0.0038430816493928432, |
|
"learning_rate": 0.00019035543546040324, |
|
"loss": 0.0049, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.05237010970813496, |
|
"grad_norm": 0.006868777330964804, |
|
"learning_rate": 0.00019031386406152568, |
|
"loss": 0.0045, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.05257710618919478, |
|
"grad_norm": 0.002608188660815358, |
|
"learning_rate": 0.0001902722926626481, |
|
"loss": 0.005, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.05278410267025461, |
|
"grad_norm": 0.005354885943233967, |
|
"learning_rate": 0.00019023072126377052, |
|
"loss": 0.0058, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.052991099151314426, |
|
"grad_norm": 0.006729793269187212, |
|
"learning_rate": 0.00019018914986489296, |
|
"loss": 0.0056, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.05319809563237425, |
|
"grad_norm": 0.0042056431993842125, |
|
"learning_rate": 0.0001901475784660154, |
|
"loss": 0.0042, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.05340509211343407, |
|
"grad_norm": 0.006845582276582718, |
|
"learning_rate": 0.0001901060070671378, |
|
"loss": 0.0048, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.053612088594493894, |
|
"grad_norm": 0.004010177683085203, |
|
"learning_rate": 0.00019006443566826025, |
|
"loss": 0.005, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.05381908507555372, |
|
"grad_norm": 0.0019074059091508389, |
|
"learning_rate": 0.00019002286426938268, |
|
"loss": 0.005, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05402608155661354, |
|
"grad_norm": 0.002619614126160741, |
|
"learning_rate": 0.0001899812928705051, |
|
"loss": 0.0043, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.05423307803767336, |
|
"grad_norm": 0.002715028589591384, |
|
"learning_rate": 0.00018993972147162753, |
|
"loss": 0.0046, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.05444007451873318, |
|
"grad_norm": 0.004556506406515837, |
|
"learning_rate": 0.00018989815007274997, |
|
"loss": 0.0046, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.054647070999793004, |
|
"grad_norm": 0.010189310647547245, |
|
"learning_rate": 0.00018985657867387238, |
|
"loss": 0.0047, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.05485406748085282, |
|
"grad_norm": 0.002120416844263673, |
|
"learning_rate": 0.0001898150072749948, |
|
"loss": 0.0051, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.05506106396191265, |
|
"grad_norm": 0.008936331607401371, |
|
"learning_rate": 0.00018977343587611725, |
|
"loss": 0.005, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.05526806044297247, |
|
"grad_norm": 0.0026866530533879995, |
|
"learning_rate": 0.00018973186447723966, |
|
"loss": 0.0047, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.05547505692403229, |
|
"grad_norm": 0.009859035722911358, |
|
"learning_rate": 0.0001896902930783621, |
|
"loss": 0.0052, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.055682053405092115, |
|
"grad_norm": 0.0024370807223021984, |
|
"learning_rate": 0.00018964872167948453, |
|
"loss": 0.0053, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.05588904988615193, |
|
"grad_norm": 0.006978074554353952, |
|
"learning_rate": 0.00018960715028060697, |
|
"loss": 0.0062, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05609604636721176, |
|
"grad_norm": 0.0037223445251584053, |
|
"learning_rate": 0.00018956557888172938, |
|
"loss": 0.0047, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.05630304284827158, |
|
"grad_norm": 0.018946697935461998, |
|
"learning_rate": 0.00018952400748285182, |
|
"loss": 0.0054, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.0565100393293314, |
|
"grad_norm": 0.009405361488461494, |
|
"learning_rate": 0.00018948243608397423, |
|
"loss": 0.0043, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.056717035810391225, |
|
"grad_norm": 0.0008082574931904674, |
|
"learning_rate": 0.00018944086468509667, |
|
"loss": 0.0045, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.05692403229145104, |
|
"grad_norm": 0.004950170870870352, |
|
"learning_rate": 0.00018939929328621908, |
|
"loss": 0.0044, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05713102877251087, |
|
"grad_norm": 0.010343370027840137, |
|
"learning_rate": 0.0001893577218873415, |
|
"loss": 0.0046, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.057338025253570686, |
|
"grad_norm": 0.005096866749227047, |
|
"learning_rate": 0.00018931615048846395, |
|
"loss": 0.005, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.05754502173463051, |
|
"grad_norm": 0.0033187547232955694, |
|
"learning_rate": 0.00018927457908958636, |
|
"loss": 0.0041, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.057752018215690336, |
|
"grad_norm": 0.0031861995812505484, |
|
"learning_rate": 0.0001892330076907088, |
|
"loss": 0.0059, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.057959014696750154, |
|
"grad_norm": 0.0053593809716403484, |
|
"learning_rate": 0.00018919143629183123, |
|
"loss": 0.0047, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05816601117780998, |
|
"grad_norm": 0.005950809922069311, |
|
"learning_rate": 0.00018914986489295364, |
|
"loss": 0.0046, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.0583730076588698, |
|
"grad_norm": 0.006816082634031773, |
|
"learning_rate": 0.00018910829349407608, |
|
"loss": 0.0048, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.05858000413992962, |
|
"grad_norm": 0.005741049535572529, |
|
"learning_rate": 0.00018906672209519852, |
|
"loss": 0.0048, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.058787000620989446, |
|
"grad_norm": 0.004367714747786522, |
|
"learning_rate": 0.00018902515069632093, |
|
"loss": 0.0044, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.058993997102049264, |
|
"grad_norm": 0.0051370360888540745, |
|
"learning_rate": 0.00018898357929744336, |
|
"loss": 0.0049, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.05920099358310909, |
|
"grad_norm": 0.00557302962988615, |
|
"learning_rate": 0.0001889420078985658, |
|
"loss": 0.005, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.05940799006416891, |
|
"grad_norm": 0.004271439276635647, |
|
"learning_rate": 0.00018890043649968824, |
|
"loss": 0.0042, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.05961498654522873, |
|
"grad_norm": 0.00942255649715662, |
|
"learning_rate": 0.00018885886510081065, |
|
"loss": 0.005, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05982198302628855, |
|
"grad_norm": 0.001850366359576583, |
|
"learning_rate": 0.00018881729370193309, |
|
"loss": 0.0049, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.060028979507348375, |
|
"grad_norm": 0.005905089899897575, |
|
"learning_rate": 0.00018877572230305552, |
|
"loss": 0.0049, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0602359759884082, |
|
"grad_norm": 0.006148039363324642, |
|
"learning_rate": 0.00018873415090417793, |
|
"loss": 0.0058, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.06044297246946802, |
|
"grad_norm": 0.004467264749109745, |
|
"learning_rate": 0.00018869257950530037, |
|
"loss": 0.0046, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.06064996895052784, |
|
"grad_norm": 0.0026016500778496265, |
|
"learning_rate": 0.0001886510081064228, |
|
"loss": 0.0046, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.06085696543158766, |
|
"grad_norm": 0.00536306481808424, |
|
"learning_rate": 0.00018860943670754522, |
|
"loss": 0.0046, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.061063961912647485, |
|
"grad_norm": 0.00043015365372411907, |
|
"learning_rate": 0.00018856786530866763, |
|
"loss": 0.0051, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.06127095839370731, |
|
"grad_norm": 0.002035475103184581, |
|
"learning_rate": 0.00018852629390979006, |
|
"loss": 0.0044, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.06147795487476713, |
|
"grad_norm": 0.004757678601890802, |
|
"learning_rate": 0.0001884847225109125, |
|
"loss": 0.0055, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.06168495135582695, |
|
"grad_norm": 0.004153924528509378, |
|
"learning_rate": 0.0001884431511120349, |
|
"loss": 0.0047, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.06189194783688677, |
|
"grad_norm": 0.004693943541496992, |
|
"learning_rate": 0.00018840157971315735, |
|
"loss": 0.006, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.062098944317946596, |
|
"grad_norm": 0.005683131981641054, |
|
"learning_rate": 0.00018836000831427979, |
|
"loss": 0.0054, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.062305940799006414, |
|
"grad_norm": 0.01867171749472618, |
|
"learning_rate": 0.0001883184369154022, |
|
"loss": 0.0047, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.06251293728006624, |
|
"grad_norm": 0.010631178505718708, |
|
"learning_rate": 0.00018827686551652463, |
|
"loss": 0.0052, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.06271993376112606, |
|
"grad_norm": 0.0025544106028974056, |
|
"learning_rate": 0.00018823529411764707, |
|
"loss": 0.0044, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.06292693024218589, |
|
"grad_norm": 0.005773225799202919, |
|
"learning_rate": 0.0001881937227187695, |
|
"loss": 0.0047, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.0631339267232457, |
|
"grad_norm": 0.008623667992651463, |
|
"learning_rate": 0.00018815215131989192, |
|
"loss": 0.0041, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.06334092320430552, |
|
"grad_norm": 0.0023567613679915667, |
|
"learning_rate": 0.00018811057992101435, |
|
"loss": 0.0044, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.06354791968536534, |
|
"grad_norm": 0.005568039603531361, |
|
"learning_rate": 0.0001880690085221368, |
|
"loss": 0.0048, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.06375491616642517, |
|
"grad_norm": 0.006429716479033232, |
|
"learning_rate": 0.0001880274371232592, |
|
"loss": 0.0049, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.06396191264748499, |
|
"grad_norm": 0.0012258924543857574, |
|
"learning_rate": 0.00018798586572438164, |
|
"loss": 0.0046, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.06416890912854481, |
|
"grad_norm": 0.013900945894420147, |
|
"learning_rate": 0.00018794429432550407, |
|
"loss": 0.0054, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06437590560960464, |
|
"grad_norm": 0.0164532121270895, |
|
"learning_rate": 0.00018790272292662648, |
|
"loss": 0.0052, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.06458290209066446, |
|
"grad_norm": 0.0039049319457262754, |
|
"learning_rate": 0.00018786115152774892, |
|
"loss": 0.0047, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.06478989857172428, |
|
"grad_norm": 0.0025435080751776695, |
|
"learning_rate": 0.00018781958012887136, |
|
"loss": 0.0045, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.06499689505278411, |
|
"grad_norm": 0.0066621373407542706, |
|
"learning_rate": 0.00018777800872999377, |
|
"loss": 0.0057, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.06520389153384393, |
|
"grad_norm": 0.0011938404059037566, |
|
"learning_rate": 0.0001877364373311162, |
|
"loss": 0.0045, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.06541088801490375, |
|
"grad_norm": 0.005898007657378912, |
|
"learning_rate": 0.00018769486593223864, |
|
"loss": 0.0047, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.06561788449596356, |
|
"grad_norm": 0.004224811680614948, |
|
"learning_rate": 0.00018765329453336105, |
|
"loss": 0.0059, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.0658248809770234, |
|
"grad_norm": 0.02106441557407379, |
|
"learning_rate": 0.00018761172313448346, |
|
"loss": 0.0051, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.06603187745808321, |
|
"grad_norm": 0.0010845439974218607, |
|
"learning_rate": 0.0001875701517356059, |
|
"loss": 0.0042, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.06623887393914303, |
|
"grad_norm": 0.007267239037901163, |
|
"learning_rate": 0.00018752858033672834, |
|
"loss": 0.0048, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06644587042020286, |
|
"grad_norm": 0.0066713071428239346, |
|
"learning_rate": 0.00018748700893785077, |
|
"loss": 0.0049, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.06665286690126268, |
|
"grad_norm": 0.007623916491866112, |
|
"learning_rate": 0.00018744543753897318, |
|
"loss": 0.0052, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.0668598633823225, |
|
"grad_norm": 0.003484464716166258, |
|
"learning_rate": 0.00018740386614009562, |
|
"loss": 0.0045, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.06706685986338232, |
|
"grad_norm": 0.013743946328759193, |
|
"learning_rate": 0.00018736229474121806, |
|
"loss": 0.0049, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.06727385634444215, |
|
"grad_norm": 0.0030819710809737444, |
|
"learning_rate": 0.00018732072334234047, |
|
"loss": 0.0052, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06748085282550197, |
|
"grad_norm": 0.014786194078624249, |
|
"learning_rate": 0.0001872791519434629, |
|
"loss": 0.0058, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.06768784930656178, |
|
"grad_norm": 0.007248689886182547, |
|
"learning_rate": 0.00018723758054458534, |
|
"loss": 0.0046, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.06789484578762162, |
|
"grad_norm": 0.010181601159274578, |
|
"learning_rate": 0.00018719600914570775, |
|
"loss": 0.0053, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.06810184226868143, |
|
"grad_norm": 0.004160667769610882, |
|
"learning_rate": 0.0001871544377468302, |
|
"loss": 0.0056, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.06830883874974125, |
|
"grad_norm": 0.0021253142040222883, |
|
"learning_rate": 0.00018711286634795263, |
|
"loss": 0.0045, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06851583523080107, |
|
"grad_norm": 0.0058175004087388515, |
|
"learning_rate": 0.00018707129494907504, |
|
"loss": 0.0058, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.0687228317118609, |
|
"grad_norm": 0.005257429089397192, |
|
"learning_rate": 0.00018702972355019747, |
|
"loss": 0.0049, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.06892982819292072, |
|
"grad_norm": 0.003563474863767624, |
|
"learning_rate": 0.0001869881521513199, |
|
"loss": 0.0043, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.06913682467398054, |
|
"grad_norm": 0.002636804012581706, |
|
"learning_rate": 0.00018694658075244235, |
|
"loss": 0.0046, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.06934382115504037, |
|
"grad_norm": 0.009255892597138882, |
|
"learning_rate": 0.00018690500935356476, |
|
"loss": 0.0053, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.06955081763610019, |
|
"grad_norm": 0.0023986424785107374, |
|
"learning_rate": 0.0001868634379546872, |
|
"loss": 0.0045, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.06975781411716, |
|
"grad_norm": 0.006371774710714817, |
|
"learning_rate": 0.00018682186655580963, |
|
"loss": 0.0046, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.06996481059821984, |
|
"grad_norm": 0.009518152102828026, |
|
"learning_rate": 0.00018678029515693204, |
|
"loss": 0.0049, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.07017180707927965, |
|
"grad_norm": 0.003742037108168006, |
|
"learning_rate": 0.00018673872375805445, |
|
"loss": 0.0064, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.07037880356033947, |
|
"grad_norm": 0.009771923534572124, |
|
"learning_rate": 0.0001866971523591769, |
|
"loss": 0.0045, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07058580004139929, |
|
"grad_norm": 0.01101437397301197, |
|
"learning_rate": 0.00018665558096029933, |
|
"loss": 0.0055, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.07079279652245912, |
|
"grad_norm": 0.008826150558888912, |
|
"learning_rate": 0.00018661400956142174, |
|
"loss": 0.005, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.07099979300351894, |
|
"grad_norm": 0.006243105512112379, |
|
"learning_rate": 0.00018657243816254417, |
|
"loss": 0.0048, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.07120678948457876, |
|
"grad_norm": 0.0014233957044780254, |
|
"learning_rate": 0.0001865308667636666, |
|
"loss": 0.0045, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.07141378596563859, |
|
"grad_norm": 0.002639338606968522, |
|
"learning_rate": 0.00018648929536478902, |
|
"loss": 0.0053, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.07162078244669841, |
|
"grad_norm": 0.003536937525495887, |
|
"learning_rate": 0.00018644772396591146, |
|
"loss": 0.0048, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.07182777892775823, |
|
"grad_norm": 0.0018274744506925344, |
|
"learning_rate": 0.0001864061525670339, |
|
"loss": 0.0043, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.07203477540881804, |
|
"grad_norm": 0.004306804854422808, |
|
"learning_rate": 0.0001863645811681563, |
|
"loss": 0.0058, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.07224177188987788, |
|
"grad_norm": 0.003877132898196578, |
|
"learning_rate": 0.00018632300976927874, |
|
"loss": 0.0058, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.0724487683709377, |
|
"grad_norm": 0.0018924670293927193, |
|
"learning_rate": 0.00018628143837040118, |
|
"loss": 0.0044, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07265576485199751, |
|
"grad_norm": 0.005626944359391928, |
|
"learning_rate": 0.00018623986697152361, |
|
"loss": 0.0046, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.07286276133305734, |
|
"grad_norm": 0.006948824506253004, |
|
"learning_rate": 0.00018619829557264602, |
|
"loss": 0.0057, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.07306975781411716, |
|
"grad_norm": 0.002097270218655467, |
|
"learning_rate": 0.00018615672417376846, |
|
"loss": 0.0047, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.07327675429517698, |
|
"grad_norm": 0.0013399182353168726, |
|
"learning_rate": 0.0001861151527748909, |
|
"loss": 0.0047, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.0734837507762368, |
|
"grad_norm": 0.010953530669212341, |
|
"learning_rate": 0.0001860735813760133, |
|
"loss": 0.0061, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.07369074725729663, |
|
"grad_norm": 0.0024627153761684895, |
|
"learning_rate": 0.00018603200997713575, |
|
"loss": 0.0061, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.07389774373835645, |
|
"grad_norm": 0.002271963283419609, |
|
"learning_rate": 0.00018599043857825818, |
|
"loss": 0.0043, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.07410474021941627, |
|
"grad_norm": 0.0036786317359656096, |
|
"learning_rate": 0.0001859488671793806, |
|
"loss": 0.0044, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.0743117367004761, |
|
"grad_norm": 0.006179209798574448, |
|
"learning_rate": 0.00018590729578050303, |
|
"loss": 0.0049, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.07451873318153591, |
|
"grad_norm": 0.0028029060922563076, |
|
"learning_rate": 0.00018586572438162547, |
|
"loss": 0.0051, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07472572966259573, |
|
"grad_norm": 0.003495444543659687, |
|
"learning_rate": 0.00018582415298274788, |
|
"loss": 0.0055, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.07493272614365556, |
|
"grad_norm": 0.005056194495409727, |
|
"learning_rate": 0.0001857825815838703, |
|
"loss": 0.0052, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.07513972262471538, |
|
"grad_norm": 0.005216763820499182, |
|
"learning_rate": 0.00018574101018499272, |
|
"loss": 0.0064, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.0753467191057752, |
|
"grad_norm": 0.006161578465253115, |
|
"learning_rate": 0.00018569943878611516, |
|
"loss": 0.0051, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.07555371558683502, |
|
"grad_norm": 0.0022837778087705374, |
|
"learning_rate": 0.00018565786738723757, |
|
"loss": 0.0046, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.07576071206789485, |
|
"grad_norm": 0.0017736790468916297, |
|
"learning_rate": 0.00018561629598836, |
|
"loss": 0.0043, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.07596770854895467, |
|
"grad_norm": 0.004538076464086771, |
|
"learning_rate": 0.00018557472458948245, |
|
"loss": 0.0048, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.07617470503001449, |
|
"grad_norm": 0.0018419534899294376, |
|
"learning_rate": 0.00018553315319060488, |
|
"loss": 0.0043, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.07638170151107432, |
|
"grad_norm": 0.004529993515461683, |
|
"learning_rate": 0.0001854915817917273, |
|
"loss": 0.0044, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.07658869799213414, |
|
"grad_norm": 0.0072746858932077885, |
|
"learning_rate": 0.00018545001039284973, |
|
"loss": 0.0045, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07679569447319395, |
|
"grad_norm": 0.013997476547956467, |
|
"learning_rate": 0.00018540843899397217, |
|
"loss": 0.0048, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.07700269095425377, |
|
"grad_norm": 0.0026135060470551252, |
|
"learning_rate": 0.00018536686759509458, |
|
"loss": 0.0043, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.0772096874353136, |
|
"grad_norm": 0.008647504262626171, |
|
"learning_rate": 0.000185325296196217, |
|
"loss": 0.0043, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.07741668391637342, |
|
"grad_norm": 0.0028080667834728956, |
|
"learning_rate": 0.00018528372479733945, |
|
"loss": 0.0045, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.07762368039743324, |
|
"grad_norm": 0.00593935651704669, |
|
"learning_rate": 0.00018524215339846186, |
|
"loss": 0.0043, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.07783067687849307, |
|
"grad_norm": 0.005520394071936607, |
|
"learning_rate": 0.0001852005819995843, |
|
"loss": 0.0044, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.07803767335955289, |
|
"grad_norm": 0.004040780942887068, |
|
"learning_rate": 0.00018515901060070673, |
|
"loss": 0.0044, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.07824466984061271, |
|
"grad_norm": 0.006418270990252495, |
|
"learning_rate": 0.00018511743920182914, |
|
"loss": 0.0044, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.07845166632167253, |
|
"grad_norm": 0.01350860670208931, |
|
"learning_rate": 0.00018507586780295158, |
|
"loss": 0.0044, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.07865866280273236, |
|
"grad_norm": 0.011060641147196293, |
|
"learning_rate": 0.00018503429640407402, |
|
"loss": 0.004, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07886565928379217, |
|
"grad_norm": 0.004036550410091877, |
|
"learning_rate": 0.00018499272500519646, |
|
"loss": 0.0038, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.07907265576485199, |
|
"grad_norm": 0.003959359601140022, |
|
"learning_rate": 0.00018495115360631887, |
|
"loss": 0.0038, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.07927965224591182, |
|
"grad_norm": 0.005686459131538868, |
|
"learning_rate": 0.00018490958220744128, |
|
"loss": 0.0047, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.07948664872697164, |
|
"grad_norm": 0.007630357053130865, |
|
"learning_rate": 0.0001848680108085637, |
|
"loss": 0.0042, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.07969364520803146, |
|
"grad_norm": 0.005852843634784222, |
|
"learning_rate": 0.00018482643940968615, |
|
"loss": 0.0036, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.07990064168909129, |
|
"grad_norm": 0.005719276610761881, |
|
"learning_rate": 0.00018478486801080856, |
|
"loss": 0.0033, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.08010763817015111, |
|
"grad_norm": 0.007002281956374645, |
|
"learning_rate": 0.000184743296611931, |
|
"loss": 0.0033, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.08031463465121093, |
|
"grad_norm": 0.007220590952783823, |
|
"learning_rate": 0.00018470172521305343, |
|
"loss": 0.0046, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.08052163113227075, |
|
"grad_norm": 0.005552125629037619, |
|
"learning_rate": 0.00018466015381417584, |
|
"loss": 0.0028, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.08072862761333058, |
|
"grad_norm": 0.011994168162345886, |
|
"learning_rate": 0.00018461858241529828, |
|
"loss": 0.0028, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.0809356240943904, |
|
"grad_norm": 0.011316240765154362, |
|
"learning_rate": 0.00018457701101642072, |
|
"loss": 0.0026, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.08114262057545021, |
|
"grad_norm": 0.006591492332518101, |
|
"learning_rate": 0.00018453543961754313, |
|
"loss": 0.0022, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.08134961705651005, |
|
"grad_norm": 0.007715560495853424, |
|
"learning_rate": 0.00018449386821866556, |
|
"loss": 0.0032, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.08155661353756986, |
|
"grad_norm": 0.009517872706055641, |
|
"learning_rate": 0.000184452296819788, |
|
"loss": 0.0016, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.08176361001862968, |
|
"grad_norm": 0.79290372133255, |
|
"learning_rate": 0.0001844107254209104, |
|
"loss": 0.0117, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.0819706064996895, |
|
"grad_norm": 0.10478183627128601, |
|
"learning_rate": 0.00018436915402203285, |
|
"loss": 0.0041, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.08217760298074933, |
|
"grad_norm": 0.11330251395702362, |
|
"learning_rate": 0.00018432758262315529, |
|
"loss": 0.0349, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.08238459946180915, |
|
"grad_norm": 0.022089608013629913, |
|
"learning_rate": 0.00018428601122427772, |
|
"loss": 0.0028, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.08259159594286897, |
|
"grad_norm": 0.045795392245054245, |
|
"learning_rate": 0.00018424443982540013, |
|
"loss": 0.0027, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.0827985924239288, |
|
"grad_norm": 0.052710726857185364, |
|
"learning_rate": 0.00018420286842652257, |
|
"loss": 0.0033, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08300558890498862, |
|
"grad_norm": 0.0732388123869896, |
|
"learning_rate": 0.000184161297027645, |
|
"loss": 0.0031, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.08321258538604843, |
|
"grad_norm": 0.03560757264494896, |
|
"learning_rate": 0.00018411972562876742, |
|
"loss": 0.0025, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.08341958186710825, |
|
"grad_norm": 0.039032500237226486, |
|
"learning_rate": 0.00018407815422988985, |
|
"loss": 0.0027, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.08362657834816808, |
|
"grad_norm": 0.018673432990908623, |
|
"learning_rate": 0.0001840365828310123, |
|
"loss": 0.0018, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.0838335748292279, |
|
"grad_norm": 0.06421470642089844, |
|
"learning_rate": 0.0001839950114321347, |
|
"loss": 0.0293, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.08404057131028772, |
|
"grad_norm": 0.05356355383992195, |
|
"learning_rate": 0.0001839534400332571, |
|
"loss": 0.0234, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.08424756779134755, |
|
"grad_norm": 0.047022175043821335, |
|
"learning_rate": 0.00018391186863437955, |
|
"loss": 0.0198, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.08445456427240737, |
|
"grad_norm": 0.017866840586066246, |
|
"learning_rate": 0.00018387029723550199, |
|
"loss": 0.0017, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.08466156075346719, |
|
"grad_norm": 0.010899499990046024, |
|
"learning_rate": 0.0001838287258366244, |
|
"loss": 0.0013, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.084868557234527, |
|
"grad_norm": 0.0167918112128973, |
|
"learning_rate": 0.00018378715443774683, |
|
"loss": 0.0024, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08507555371558684, |
|
"grad_norm": 0.02067534811794758, |
|
"learning_rate": 0.00018374558303886927, |
|
"loss": 0.0015, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.08528255019664666, |
|
"grad_norm": 0.01670040749013424, |
|
"learning_rate": 0.00018370401163999168, |
|
"loss": 0.0015, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.08548954667770647, |
|
"grad_norm": 0.008924894034862518, |
|
"learning_rate": 0.00018366244024111412, |
|
"loss": 0.0029, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.0856965431587663, |
|
"grad_norm": 0.013602840714156628, |
|
"learning_rate": 0.00018362086884223655, |
|
"loss": 0.0009, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.08590353963982612, |
|
"grad_norm": 0.013082594610750675, |
|
"learning_rate": 0.000183579297443359, |
|
"loss": 0.002, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.08611053612088594, |
|
"grad_norm": 0.012215960770845413, |
|
"learning_rate": 0.0001835377260444814, |
|
"loss": 0.0017, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.08631753260194577, |
|
"grad_norm": 0.16738812625408173, |
|
"learning_rate": 0.00018349615464560384, |
|
"loss": 0.0173, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.08652452908300559, |
|
"grad_norm": 0.006629611365497112, |
|
"learning_rate": 0.00018345458324672627, |
|
"loss": 0.0006, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.08673152556406541, |
|
"grad_norm": 0.006643650587648153, |
|
"learning_rate": 0.00018341301184784868, |
|
"loss": 0.0015, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.08693852204512523, |
|
"grad_norm": 0.0039656031876802444, |
|
"learning_rate": 0.00018337144044897112, |
|
"loss": 0.0003, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08714551852618506, |
|
"grad_norm": 0.00564931146800518, |
|
"learning_rate": 0.00018332986905009356, |
|
"loss": 0.0004, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.08735251500724488, |
|
"grad_norm": 0.014363352209329605, |
|
"learning_rate": 0.00018328829765121597, |
|
"loss": 0.0006, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.0875595114883047, |
|
"grad_norm": 0.006862149108201265, |
|
"learning_rate": 0.0001832467262523384, |
|
"loss": 0.0004, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.08776650796936453, |
|
"grad_norm": 0.005224172957241535, |
|
"learning_rate": 0.00018320515485346084, |
|
"loss": 0.0003, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.08797350445042434, |
|
"grad_norm": 0.012813829816877842, |
|
"learning_rate": 0.00018316358345458325, |
|
"loss": 0.0005, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.08818050093148416, |
|
"grad_norm": 0.0045601376332342625, |
|
"learning_rate": 0.0001831220120557057, |
|
"loss": 0.0003, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.08838749741254398, |
|
"grad_norm": 0.002229505218565464, |
|
"learning_rate": 0.0001830804406568281, |
|
"loss": 0.0002, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.08859449389360381, |
|
"grad_norm": 0.005202361848205328, |
|
"learning_rate": 0.00018303886925795054, |
|
"loss": 0.0003, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.08880149037466363, |
|
"grad_norm": 0.010837195441126823, |
|
"learning_rate": 0.00018299729785907295, |
|
"loss": 0.0016, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.08900848685572345, |
|
"grad_norm": 0.006401981692761183, |
|
"learning_rate": 0.00018295572646019538, |
|
"loss": 0.0002, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08921548333678328, |
|
"grad_norm": 0.0025153057649731636, |
|
"learning_rate": 0.00018291415506131782, |
|
"loss": 0.0001, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.0894224798178431, |
|
"grad_norm": 0.009693821892142296, |
|
"learning_rate": 0.00018287258366244023, |
|
"loss": 0.0004, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.08962947629890292, |
|
"grad_norm": 0.0013723783195018768, |
|
"learning_rate": 0.00018283101226356267, |
|
"loss": 0.0001, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.08983647277996273, |
|
"grad_norm": 0.008555575273931026, |
|
"learning_rate": 0.0001827894408646851, |
|
"loss": 0.0018, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.09004346926102257, |
|
"grad_norm": 0.0028277519159018993, |
|
"learning_rate": 0.00018274786946580754, |
|
"loss": 0.0002, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.09025046574208238, |
|
"grad_norm": 0.014325006864964962, |
|
"learning_rate": 0.00018270629806692995, |
|
"loss": 0.0009, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.0904574622231422, |
|
"grad_norm": 0.008406553417444229, |
|
"learning_rate": 0.0001826647266680524, |
|
"loss": 0.0012, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.09066445870420203, |
|
"grad_norm": 0.0018985685892403126, |
|
"learning_rate": 0.00018262315526917483, |
|
"loss": 0.0002, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.09087145518526185, |
|
"grad_norm": 0.008550492115318775, |
|
"learning_rate": 0.00018258158387029724, |
|
"loss": 0.0024, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.09107845166632167, |
|
"grad_norm": 0.0008987212786450982, |
|
"learning_rate": 0.00018254001247141967, |
|
"loss": 0.0001, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0912854481473815, |
|
"grad_norm": 0.002059886697679758, |
|
"learning_rate": 0.0001824984410725421, |
|
"loss": 0.0001, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.09149244462844132, |
|
"grad_norm": 0.003429644973948598, |
|
"learning_rate": 0.00018245686967366452, |
|
"loss": 0.0001, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.09169944110950114, |
|
"grad_norm": 0.0026945085264742374, |
|
"learning_rate": 0.00018241529827478696, |
|
"loss": 0.0011, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.09190643759056095, |
|
"grad_norm": 0.00299448031000793, |
|
"learning_rate": 0.0001823737268759094, |
|
"loss": 0.0001, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.09211343407162079, |
|
"grad_norm": 0.004376763943582773, |
|
"learning_rate": 0.00018233215547703183, |
|
"loss": 0.0011, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.0923204305526806, |
|
"grad_norm": 0.005078963004052639, |
|
"learning_rate": 0.00018229058407815424, |
|
"loss": 0.0002, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.09252742703374042, |
|
"grad_norm": 0.009306194260716438, |
|
"learning_rate": 0.00018224901267927668, |
|
"loss": 0.0003, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.09273442351480025, |
|
"grad_norm": 0.0030481938738375902, |
|
"learning_rate": 0.00018220744128039912, |
|
"loss": 0.0013, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.09294141999586007, |
|
"grad_norm": 0.013268841430544853, |
|
"learning_rate": 0.0001821658698815215, |
|
"loss": 0.0017, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.09314841647691989, |
|
"grad_norm": 0.006063752807676792, |
|
"learning_rate": 0.00018212429848264394, |
|
"loss": 0.0002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09335541295797971, |
|
"grad_norm": 0.00182344822678715, |
|
"learning_rate": 0.00018208272708376637, |
|
"loss": 0.0001, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.09356240943903954, |
|
"grad_norm": 0.009372780099511147, |
|
"learning_rate": 0.0001820411556848888, |
|
"loss": 0.0005, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.09376940592009936, |
|
"grad_norm": 0.003136920742690563, |
|
"learning_rate": 0.00018199958428601122, |
|
"loss": 0.0001, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.09397640240115918, |
|
"grad_norm": 0.030062230303883553, |
|
"learning_rate": 0.00018195801288713366, |
|
"loss": 0.0006, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.09418339888221901, |
|
"grad_norm": 0.004309754353016615, |
|
"learning_rate": 0.0001819164414882561, |
|
"loss": 0.0007, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.09439039536327883, |
|
"grad_norm": 0.010606180876493454, |
|
"learning_rate": 0.0001818748700893785, |
|
"loss": 0.0002, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.09459739184433864, |
|
"grad_norm": 0.0054748812690377235, |
|
"learning_rate": 0.00018183329869050094, |
|
"loss": 0.0005, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.09480438832539846, |
|
"grad_norm": 0.001673020888119936, |
|
"learning_rate": 0.00018179172729162338, |
|
"loss": 0.0001, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.0950113848064583, |
|
"grad_norm": 0.004401384387165308, |
|
"learning_rate": 0.0001817501558927458, |
|
"loss": 0.0001, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.09521838128751811, |
|
"grad_norm": 0.005755012389272451, |
|
"learning_rate": 0.00018170858449386822, |
|
"loss": 0.0012, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09542537776857793, |
|
"grad_norm": 0.004951901733875275, |
|
"learning_rate": 0.00018166701309499066, |
|
"loss": 0.0012, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.09563237424963776, |
|
"grad_norm": 0.0014370133867487311, |
|
"learning_rate": 0.0001816254416961131, |
|
"loss": 0.0001, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.09583937073069758, |
|
"grad_norm": 0.012078122235834599, |
|
"learning_rate": 0.0001815838702972355, |
|
"loss": 0.0013, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.0960463672117574, |
|
"grad_norm": 0.002765175886452198, |
|
"learning_rate": 0.00018154229889835795, |
|
"loss": 0.0009, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.09625336369281723, |
|
"grad_norm": 0.0017701378092169762, |
|
"learning_rate": 0.00018150072749948038, |
|
"loss": 0.0001, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.09646036017387705, |
|
"grad_norm": 0.003232579445466399, |
|
"learning_rate": 0.0001814591561006028, |
|
"loss": 0.0001, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.09666735665493686, |
|
"grad_norm": 0.004619232844561338, |
|
"learning_rate": 0.00018141758470172523, |
|
"loss": 0.0012, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.09687435313599668, |
|
"grad_norm": 0.008126890286803246, |
|
"learning_rate": 0.00018137601330284767, |
|
"loss": 0.0011, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.09708134961705651, |
|
"grad_norm": 0.004719397984445095, |
|
"learning_rate": 0.00018133444190397008, |
|
"loss": 0.0009, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.09728834609811633, |
|
"grad_norm": 0.005153202451765537, |
|
"learning_rate": 0.00018129287050509251, |
|
"loss": 0.0012, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09749534257917615, |
|
"grad_norm": 0.01385215763002634, |
|
"learning_rate": 0.00018125129910621492, |
|
"loss": 0.0022, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.09770233906023598, |
|
"grad_norm": 0.004983994178473949, |
|
"learning_rate": 0.00018120972770733736, |
|
"loss": 0.0004, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.0979093355412958, |
|
"grad_norm": 0.007088206708431244, |
|
"learning_rate": 0.00018116815630845977, |
|
"loss": 0.0005, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.09811633202235562, |
|
"grad_norm": 0.004754175432026386, |
|
"learning_rate": 0.0001811265849095822, |
|
"loss": 0.0006, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.09832332850341544, |
|
"grad_norm": 0.004105637315660715, |
|
"learning_rate": 0.00018108501351070465, |
|
"loss": 0.0001, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.09853032498447527, |
|
"grad_norm": 0.002687152475118637, |
|
"learning_rate": 0.00018104344211182706, |
|
"loss": 0.0001, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.09873732146553509, |
|
"grad_norm": 0.0023124783765524626, |
|
"learning_rate": 0.0001810018707129495, |
|
"loss": 0.0001, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.0989443179465949, |
|
"grad_norm": 0.007885076105594635, |
|
"learning_rate": 0.00018096029931407193, |
|
"loss": 0.0006, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.09915131442765474, |
|
"grad_norm": 0.0014087413437664509, |
|
"learning_rate": 0.00018091872791519434, |
|
"loss": 0.0007, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.09935831090871455, |
|
"grad_norm": 0.0055119190365076065, |
|
"learning_rate": 0.00018087715651631678, |
|
"loss": 0.0002, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09956530738977437, |
|
"grad_norm": 0.0003096537839155644, |
|
"learning_rate": 0.0001808355851174392, |
|
"loss": 0.0, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.09977230387083419, |
|
"grad_norm": 0.001899409806355834, |
|
"learning_rate": 0.00018079401371856165, |
|
"loss": 0.0002, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.09997930035189402, |
|
"grad_norm": 0.005635194014757872, |
|
"learning_rate": 0.00018075244231968406, |
|
"loss": 0.0001, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.10018629683295384, |
|
"grad_norm": 0.005856087896972895, |
|
"learning_rate": 0.0001807108709208065, |
|
"loss": 0.001, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.10039329331401366, |
|
"grad_norm": 0.005273948423564434, |
|
"learning_rate": 0.00018066929952192893, |
|
"loss": 0.0013, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.10060028979507349, |
|
"grad_norm": 0.001652201754041016, |
|
"learning_rate": 0.00018062772812305134, |
|
"loss": 0.0001, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.1008072862761333, |
|
"grad_norm": 0.006849886849522591, |
|
"learning_rate": 0.00018058615672417378, |
|
"loss": 0.0001, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.10101428275719312, |
|
"grad_norm": 0.009754250757396221, |
|
"learning_rate": 0.00018054458532529622, |
|
"loss": 0.0023, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.10122127923825296, |
|
"grad_norm": 0.0038455536123365164, |
|
"learning_rate": 0.00018050301392641863, |
|
"loss": 0.0016, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.10142827571931277, |
|
"grad_norm": 0.0006483698962256312, |
|
"learning_rate": 0.00018046144252754107, |
|
"loss": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.10163527220037259, |
|
"grad_norm": 0.008700639940798283, |
|
"learning_rate": 0.0001804198711286635, |
|
"loss": 0.0007, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.10184226868143241, |
|
"grad_norm": 0.004151639994233847, |
|
"learning_rate": 0.00018037829972978594, |
|
"loss": 0.0006, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.10204926516249224, |
|
"grad_norm": 0.003242357401177287, |
|
"learning_rate": 0.00018033672833090832, |
|
"loss": 0.0001, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.10225626164355206, |
|
"grad_norm": 0.0038309101946651936, |
|
"learning_rate": 0.00018029515693203076, |
|
"loss": 0.0002, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.10246325812461188, |
|
"grad_norm": 0.0032492976170033216, |
|
"learning_rate": 0.0001802535855331532, |
|
"loss": 0.0001, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.10267025460567171, |
|
"grad_norm": 0.005621058400720358, |
|
"learning_rate": 0.0001802120141342756, |
|
"loss": 0.0002, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.10287725108673153, |
|
"grad_norm": 0.007613383699208498, |
|
"learning_rate": 0.00018017044273539804, |
|
"loss": 0.0013, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.10308424756779135, |
|
"grad_norm": 0.004469983279705048, |
|
"learning_rate": 0.00018012887133652048, |
|
"loss": 0.0001, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.10329124404885116, |
|
"grad_norm": 0.010518516413867474, |
|
"learning_rate": 0.00018008729993764292, |
|
"loss": 0.0002, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.103498240529911, |
|
"grad_norm": 0.0019439860479906201, |
|
"learning_rate": 0.00018004572853876533, |
|
"loss": 0.0001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.10370523701097081, |
|
"grad_norm": 0.007315011695027351, |
|
"learning_rate": 0.00018000415713988776, |
|
"loss": 0.0002, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.10391223349203063, |
|
"grad_norm": 0.0027510204818099737, |
|
"learning_rate": 0.0001799625857410102, |
|
"loss": 0.0002, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.10411922997309046, |
|
"grad_norm": 0.007963057607412338, |
|
"learning_rate": 0.0001799210143421326, |
|
"loss": 0.0003, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.10432622645415028, |
|
"grad_norm": 0.004816776607185602, |
|
"learning_rate": 0.00017987944294325505, |
|
"loss": 0.001, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.1045332229352101, |
|
"grad_norm": 0.0010107432026416063, |
|
"learning_rate": 0.00017983787154437749, |
|
"loss": 0.0, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.10474021941626992, |
|
"grad_norm": 0.002849761163815856, |
|
"learning_rate": 0.0001797963001454999, |
|
"loss": 0.0001, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.10494721589732975, |
|
"grad_norm": 0.008465790189802647, |
|
"learning_rate": 0.00017975472874662233, |
|
"loss": 0.0017, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.10515421237838957, |
|
"grad_norm": 0.001012888620607555, |
|
"learning_rate": 0.00017971315734774477, |
|
"loss": 0.0, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.10536120885944938, |
|
"grad_norm": 0.005154603160917759, |
|
"learning_rate": 0.0001796715859488672, |
|
"loss": 0.0001, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.10556820534050922, |
|
"grad_norm": 0.012283824384212494, |
|
"learning_rate": 0.00017963001454998962, |
|
"loss": 0.0004, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.10577520182156903, |
|
"grad_norm": 0.001696570310741663, |
|
"learning_rate": 0.00017958844315111205, |
|
"loss": 0.0001, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.10598219830262885, |
|
"grad_norm": 0.0002515624219086021, |
|
"learning_rate": 0.0001795468717522345, |
|
"loss": 0.0, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.10618919478368868, |
|
"grad_norm": 0.0006335057551041245, |
|
"learning_rate": 0.0001795053003533569, |
|
"loss": 0.0, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.1063961912647485, |
|
"grad_norm": 0.0007280270801857114, |
|
"learning_rate": 0.00017946372895447934, |
|
"loss": 0.0, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.10660318774580832, |
|
"grad_norm": 0.0040188622660934925, |
|
"learning_rate": 0.00017942215755560175, |
|
"loss": 0.0002, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.10681018422686814, |
|
"grad_norm": 0.0054796175099909306, |
|
"learning_rate": 0.00017938058615672419, |
|
"loss": 0.0009, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.10701718070792797, |
|
"grad_norm": 0.0034792469814419746, |
|
"learning_rate": 0.0001793390147578466, |
|
"loss": 0.0001, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.10722417718898779, |
|
"grad_norm": 0.00432013813406229, |
|
"learning_rate": 0.00017929744335896903, |
|
"loss": 0.0002, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.1074311736700476, |
|
"grad_norm": 0.004862105939537287, |
|
"learning_rate": 0.00017925587196009147, |
|
"loss": 0.001, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.10763817015110744, |
|
"grad_norm": 0.002249139128252864, |
|
"learning_rate": 0.00017921430056121388, |
|
"loss": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10784516663216726, |
|
"grad_norm": 0.006818681955337524, |
|
"learning_rate": 0.00017917272916233632, |
|
"loss": 0.0004, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.10805216311322707, |
|
"grad_norm": 0.008624670095741749, |
|
"learning_rate": 0.00017913115776345875, |
|
"loss": 0.0004, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.10825915959428689, |
|
"grad_norm": 0.0007210278417915106, |
|
"learning_rate": 0.00017908958636458116, |
|
"loss": 0.0, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.10846615607534672, |
|
"grad_norm": 0.010820691473782063, |
|
"learning_rate": 0.0001790480149657036, |
|
"loss": 0.0003, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.10867315255640654, |
|
"grad_norm": 0.0023018312640488148, |
|
"learning_rate": 0.00017900644356682604, |
|
"loss": 0.0001, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.10888014903746636, |
|
"grad_norm": 0.00019024198991246521, |
|
"learning_rate": 0.00017896487216794845, |
|
"loss": 0.0, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.10908714551852619, |
|
"grad_norm": 0.005011410918086767, |
|
"learning_rate": 0.00017892330076907088, |
|
"loss": 0.0003, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.10929414199958601, |
|
"grad_norm": 0.007016469724476337, |
|
"learning_rate": 0.00017888172937019332, |
|
"loss": 0.0009, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.10950113848064583, |
|
"grad_norm": 0.008118787780404091, |
|
"learning_rate": 0.00017884015797131576, |
|
"loss": 0.0003, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.10970813496170564, |
|
"grad_norm": 0.005854643415659666, |
|
"learning_rate": 0.00017879858657243817, |
|
"loss": 0.0011, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10991513144276548, |
|
"grad_norm": 0.0038967933505773544, |
|
"learning_rate": 0.0001787570151735606, |
|
"loss": 0.001, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.1101221279238253, |
|
"grad_norm": 0.003802061313763261, |
|
"learning_rate": 0.00017871544377468304, |
|
"loss": 0.0018, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.11032912440488511, |
|
"grad_norm": 0.004740913398563862, |
|
"learning_rate": 0.00017867387237580545, |
|
"loss": 0.0001, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.11053612088594494, |
|
"grad_norm": 0.008046228438615799, |
|
"learning_rate": 0.0001786323009769279, |
|
"loss": 0.0005, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.11074311736700476, |
|
"grad_norm": 0.0014560514828190207, |
|
"learning_rate": 0.00017859072957805033, |
|
"loss": 0.0, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.11095011384806458, |
|
"grad_norm": 0.009222283028066158, |
|
"learning_rate": 0.00017854915817917274, |
|
"loss": 0.0002, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.11115711032912441, |
|
"grad_norm": 0.005452610552310944, |
|
"learning_rate": 0.00017850758678029515, |
|
"loss": 0.0013, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.11136410681018423, |
|
"grad_norm": 0.0007964425021782517, |
|
"learning_rate": 0.00017846601538141758, |
|
"loss": 0.0, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.11157110329124405, |
|
"grad_norm": 0.007167865987867117, |
|
"learning_rate": 0.00017842444398254002, |
|
"loss": 0.0011, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.11177809977230387, |
|
"grad_norm": 0.01039041206240654, |
|
"learning_rate": 0.00017838287258366243, |
|
"loss": 0.0007, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1119850962533637, |
|
"grad_norm": 0.0001594142959220335, |
|
"learning_rate": 0.00017834130118478487, |
|
"loss": 0.0, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.11219209273442352, |
|
"grad_norm": 0.0023100003600120544, |
|
"learning_rate": 0.0001782997297859073, |
|
"loss": 0.0007, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.11239908921548333, |
|
"grad_norm": 0.003656044602394104, |
|
"learning_rate": 0.00017825815838702972, |
|
"loss": 0.0014, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.11260608569654317, |
|
"grad_norm": 0.0014201959129422903, |
|
"learning_rate": 0.00017821658698815215, |
|
"loss": 0.0001, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.11281308217760298, |
|
"grad_norm": 0.015617001801729202, |
|
"learning_rate": 0.0001781750155892746, |
|
"loss": 0.0002, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.1130200786586628, |
|
"grad_norm": 0.008762934245169163, |
|
"learning_rate": 0.00017813344419039703, |
|
"loss": 0.0003, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.11322707513972262, |
|
"grad_norm": 0.002274678787216544, |
|
"learning_rate": 0.00017809187279151944, |
|
"loss": 0.0001, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.11343407162078245, |
|
"grad_norm": 0.003864066442474723, |
|
"learning_rate": 0.00017805030139264187, |
|
"loss": 0.0005, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.11364106810184227, |
|
"grad_norm": 0.006032771430909634, |
|
"learning_rate": 0.0001780087299937643, |
|
"loss": 0.0003, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.11384806458290209, |
|
"grad_norm": 0.0011935516959056258, |
|
"learning_rate": 0.00017796715859488672, |
|
"loss": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11405506106396192, |
|
"grad_norm": 0.013128140941262245, |
|
"learning_rate": 0.00017792558719600916, |
|
"loss": 0.0006, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.11426205754502174, |
|
"grad_norm": 0.000645163469016552, |
|
"learning_rate": 0.0001778840157971316, |
|
"loss": 0.0, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.11446905402608155, |
|
"grad_norm": 0.011457535438239574, |
|
"learning_rate": 0.000177842444398254, |
|
"loss": 0.0003, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.11467605050714137, |
|
"grad_norm": 0.003057427005842328, |
|
"learning_rate": 0.00017780087299937644, |
|
"loss": 0.0012, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.1148830469882012, |
|
"grad_norm": 0.0017485780408605933, |
|
"learning_rate": 0.00017775930160049888, |
|
"loss": 0.0006, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.11509004346926102, |
|
"grad_norm": 0.0005046813748776913, |
|
"learning_rate": 0.0001777177302016213, |
|
"loss": 0.0, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.11529703995032084, |
|
"grad_norm": 0.007802332751452923, |
|
"learning_rate": 0.00017767615880274373, |
|
"loss": 0.0009, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.11550403643138067, |
|
"grad_norm": 0.0074394443072378635, |
|
"learning_rate": 0.00017763458740386616, |
|
"loss": 0.0021, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.11571103291244049, |
|
"grad_norm": 0.007590603083372116, |
|
"learning_rate": 0.00017759301600498857, |
|
"loss": 0.0004, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.11591802939350031, |
|
"grad_norm": 0.005805825348943472, |
|
"learning_rate": 0.00017755144460611098, |
|
"loss": 0.0019, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11612502587456014, |
|
"grad_norm": 0.008781611919403076, |
|
"learning_rate": 0.00017750987320723342, |
|
"loss": 0.0017, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.11633202235561996, |
|
"grad_norm": 0.0010139975929632783, |
|
"learning_rate": 0.00017746830180835586, |
|
"loss": 0.0, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.11653901883667978, |
|
"grad_norm": 0.003940982278436422, |
|
"learning_rate": 0.0001774267304094783, |
|
"loss": 0.0001, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.1167460153177396, |
|
"grad_norm": 0.013145407661795616, |
|
"learning_rate": 0.0001773851590106007, |
|
"loss": 0.0012, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.11695301179879942, |
|
"grad_norm": 0.0011159079149365425, |
|
"learning_rate": 0.00017734358761172314, |
|
"loss": 0.0001, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.11716000827985924, |
|
"grad_norm": 0.01270979829132557, |
|
"learning_rate": 0.00017730201621284558, |
|
"loss": 0.0017, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.11736700476091906, |
|
"grad_norm": 0.004431411158293486, |
|
"learning_rate": 0.000177260444813968, |
|
"loss": 0.0001, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.11757400124197889, |
|
"grad_norm": 0.0041249035857617855, |
|
"learning_rate": 0.00017721887341509042, |
|
"loss": 0.0003, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.11778099772303871, |
|
"grad_norm": 0.0006257002823986113, |
|
"learning_rate": 0.00017717730201621286, |
|
"loss": 0.0, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.11798799420409853, |
|
"grad_norm": 0.004501596093177795, |
|
"learning_rate": 0.00017713573061733527, |
|
"loss": 0.0001, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11819499068515835, |
|
"grad_norm": 0.00742512010037899, |
|
"learning_rate": 0.0001770941592184577, |
|
"loss": 0.0002, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.11840198716621818, |
|
"grad_norm": 0.010421551764011383, |
|
"learning_rate": 0.00017705258781958015, |
|
"loss": 0.0008, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.118608983647278, |
|
"grad_norm": 0.0010451058624312282, |
|
"learning_rate": 0.00017701101642070256, |
|
"loss": 0.0001, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.11881598012833781, |
|
"grad_norm": 0.0014272347325459123, |
|
"learning_rate": 0.000176969445021825, |
|
"loss": 0.0001, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.11902297660939765, |
|
"grad_norm": 0.007021667901426554, |
|
"learning_rate": 0.00017692787362294743, |
|
"loss": 0.0002, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.11922997309045746, |
|
"grad_norm": 0.0050498368218541145, |
|
"learning_rate": 0.00017688630222406987, |
|
"loss": 0.001, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.11943696957151728, |
|
"grad_norm": 0.0004365240456536412, |
|
"learning_rate": 0.00017684473082519228, |
|
"loss": 0.0, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.1196439660525771, |
|
"grad_norm": 0.007502545602619648, |
|
"learning_rate": 0.00017680315942631471, |
|
"loss": 0.0002, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.11985096253363693, |
|
"grad_norm": 0.00824455451220274, |
|
"learning_rate": 0.00017676158802743715, |
|
"loss": 0.0004, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.12005795901469675, |
|
"grad_norm": 0.003414528677240014, |
|
"learning_rate": 0.00017672001662855956, |
|
"loss": 0.0011, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.12026495549575657, |
|
"grad_norm": 0.004608092829585075, |
|
"learning_rate": 0.00017667844522968197, |
|
"loss": 0.0014, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.1204719519768164, |
|
"grad_norm": 0.006573988124728203, |
|
"learning_rate": 0.0001766368738308044, |
|
"loss": 0.0002, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.12067894845787622, |
|
"grad_norm": 0.006878604646772146, |
|
"learning_rate": 0.00017659530243192685, |
|
"loss": 0.0005, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.12088594493893604, |
|
"grad_norm": 0.0013765916228294373, |
|
"learning_rate": 0.00017655373103304926, |
|
"loss": 0.0001, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.12109294141999585, |
|
"grad_norm": 0.009517376311123371, |
|
"learning_rate": 0.0001765121596341717, |
|
"loss": 0.0005, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.12129993790105568, |
|
"grad_norm": 0.02729586698114872, |
|
"learning_rate": 0.00017647058823529413, |
|
"loss": 0.0013, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.1215069343821155, |
|
"grad_norm": 0.005033944733440876, |
|
"learning_rate": 0.00017642901683641654, |
|
"loss": 0.0011, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.12171393086317532, |
|
"grad_norm": 0.001488934038206935, |
|
"learning_rate": 0.00017638744543753898, |
|
"loss": 0.0001, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.12192092734423515, |
|
"grad_norm": 0.004233523737639189, |
|
"learning_rate": 0.0001763458740386614, |
|
"loss": 0.0013, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.12212792382529497, |
|
"grad_norm": 0.001819688593968749, |
|
"learning_rate": 0.00017630430263978382, |
|
"loss": 0.0002, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12233492030635479, |
|
"grad_norm": 0.0051133958622813225, |
|
"learning_rate": 0.00017626273124090626, |
|
"loss": 0.0003, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.12254191678741462, |
|
"grad_norm": 0.007632863707840443, |
|
"learning_rate": 0.0001762211598420287, |
|
"loss": 0.0005, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.12274891326847444, |
|
"grad_norm": 0.0009289845474995673, |
|
"learning_rate": 0.00017617958844315113, |
|
"loss": 0.0, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.12295590974953426, |
|
"grad_norm": 0.00543027650564909, |
|
"learning_rate": 0.00017613801704427354, |
|
"loss": 0.0001, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.12316290623059407, |
|
"grad_norm": 0.002607417991384864, |
|
"learning_rate": 0.00017609644564539598, |
|
"loss": 0.0002, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.1233699027116539, |
|
"grad_norm": 0.025557972490787506, |
|
"learning_rate": 0.00017605487424651842, |
|
"loss": 0.0002, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.12357689919271372, |
|
"grad_norm": 0.0016189507441595197, |
|
"learning_rate": 0.00017601330284764083, |
|
"loss": 0.0001, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.12378389567377354, |
|
"grad_norm": 0.004612909164279699, |
|
"learning_rate": 0.00017597173144876327, |
|
"loss": 0.0002, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.12399089215483337, |
|
"grad_norm": 0.00019464526849333197, |
|
"learning_rate": 0.0001759301600498857, |
|
"loss": 0.0, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.12419788863589319, |
|
"grad_norm": 0.0013309603091329336, |
|
"learning_rate": 0.0001758885886510081, |
|
"loss": 0.0007, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12440488511695301, |
|
"grad_norm": 0.002917417325079441, |
|
"learning_rate": 0.00017584701725213055, |
|
"loss": 0.0001, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.12461188159801283, |
|
"grad_norm": 0.004730269778519869, |
|
"learning_rate": 0.000175805445853253, |
|
"loss": 0.0001, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.12481887807907266, |
|
"grad_norm": 0.0036635478027164936, |
|
"learning_rate": 0.0001757638744543754, |
|
"loss": 0.0001, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.12502587456013248, |
|
"grad_norm": 0.002084661042317748, |
|
"learning_rate": 0.0001757223030554978, |
|
"loss": 0.0001, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.1252328710411923, |
|
"grad_norm": 0.006881284527480602, |
|
"learning_rate": 0.00017568073165662024, |
|
"loss": 0.0002, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.1254398675222521, |
|
"grad_norm": 0.0007496286416426301, |
|
"learning_rate": 0.00017563916025774268, |
|
"loss": 0.0, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.12564686400331193, |
|
"grad_norm": 0.0013991744490340352, |
|
"learning_rate": 0.0001755975888588651, |
|
"loss": 0.0, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.12585386048437178, |
|
"grad_norm": 0.00578208127990365, |
|
"learning_rate": 0.00017555601745998753, |
|
"loss": 0.0016, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.1260608569654316, |
|
"grad_norm": 0.0005476415390148759, |
|
"learning_rate": 0.00017551444606110996, |
|
"loss": 0.0, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.1262678534464914, |
|
"grad_norm": 0.003824407234787941, |
|
"learning_rate": 0.0001754728746622324, |
|
"loss": 0.0001, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12647484992755123, |
|
"grad_norm": 0.0068860347382724285, |
|
"learning_rate": 0.0001754313032633548, |
|
"loss": 0.0003, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.12668184640861105, |
|
"grad_norm": 0.001763600972481072, |
|
"learning_rate": 0.00017538973186447725, |
|
"loss": 0.0002, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.12688884288967087, |
|
"grad_norm": 0.0029042328242212534, |
|
"learning_rate": 0.00017534816046559969, |
|
"loss": 0.0019, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.12709583937073068, |
|
"grad_norm": 0.026835285127162933, |
|
"learning_rate": 0.0001753065890667221, |
|
"loss": 0.0006, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.12730283585179053, |
|
"grad_norm": 0.0025784820318222046, |
|
"learning_rate": 0.00017526501766784453, |
|
"loss": 0.0002, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.12750983233285035, |
|
"grad_norm": 0.000811999780125916, |
|
"learning_rate": 0.00017522344626896697, |
|
"loss": 0.0, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.12771682881391017, |
|
"grad_norm": 0.0023158304393291473, |
|
"learning_rate": 0.00017518187487008938, |
|
"loss": 0.0001, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.12792382529496998, |
|
"grad_norm": 0.00527742225676775, |
|
"learning_rate": 0.00017514030347121182, |
|
"loss": 0.0011, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.1281308217760298, |
|
"grad_norm": 0.004715193063020706, |
|
"learning_rate": 0.00017509873207233425, |
|
"loss": 0.0026, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.12833781825708962, |
|
"grad_norm": 0.001638007932342589, |
|
"learning_rate": 0.00017505716067345666, |
|
"loss": 0.0001, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12854481473814944, |
|
"grad_norm": 0.0012813376961275935, |
|
"learning_rate": 0.0001750155892745791, |
|
"loss": 0.0001, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.12875181121920928, |
|
"grad_norm": 0.006484480109065771, |
|
"learning_rate": 0.00017497401787570154, |
|
"loss": 0.0004, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.1289588077002691, |
|
"grad_norm": 0.00035095165367238224, |
|
"learning_rate": 0.00017493244647682398, |
|
"loss": 0.0, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.12916580418132892, |
|
"grad_norm": 0.004927014000713825, |
|
"learning_rate": 0.00017489087507794639, |
|
"loss": 0.0012, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.12937280066238874, |
|
"grad_norm": 0.00287305167876184, |
|
"learning_rate": 0.0001748493036790688, |
|
"loss": 0.0001, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.12957979714344856, |
|
"grad_norm": 0.003079169662669301, |
|
"learning_rate": 0.00017480773228019123, |
|
"loss": 0.0001, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.12978679362450837, |
|
"grad_norm": 0.0018820518162101507, |
|
"learning_rate": 0.00017476616088131367, |
|
"loss": 0.0001, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.12999379010556822, |
|
"grad_norm": 0.004426770843565464, |
|
"learning_rate": 0.00017472458948243608, |
|
"loss": 0.0001, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.13020078658662804, |
|
"grad_norm": 0.008074757643043995, |
|
"learning_rate": 0.00017468301808355852, |
|
"loss": 0.0002, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.13040778306768785, |
|
"grad_norm": 0.004479815252125263, |
|
"learning_rate": 0.00017464144668468095, |
|
"loss": 0.0002, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.13061477954874767, |
|
"grad_norm": 0.0016544251702725887, |
|
"learning_rate": 0.00017459987528580336, |
|
"loss": 0.0, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.1308217760298075, |
|
"grad_norm": 0.0007902836659923196, |
|
"learning_rate": 0.0001745583038869258, |
|
"loss": 0.0, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.1310287725108673, |
|
"grad_norm": 0.011000900529325008, |
|
"learning_rate": 0.00017451673248804824, |
|
"loss": 0.0005, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.13123576899192713, |
|
"grad_norm": 0.00046783004654571414, |
|
"learning_rate": 0.00017447516108917065, |
|
"loss": 0.0, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.13144276547298697, |
|
"grad_norm": 0.003358067711815238, |
|
"learning_rate": 0.00017443358969029308, |
|
"loss": 0.0018, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.1316497619540468, |
|
"grad_norm": 0.0025496368762105703, |
|
"learning_rate": 0.00017439201829141552, |
|
"loss": 0.0015, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.1318567584351066, |
|
"grad_norm": 0.0016015061410143971, |
|
"learning_rate": 0.00017435044689253793, |
|
"loss": 0.0002, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.13206375491616643, |
|
"grad_norm": 0.0038993649650365114, |
|
"learning_rate": 0.00017430887549366037, |
|
"loss": 0.0001, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.13227075139722624, |
|
"grad_norm": 0.0033800469245761633, |
|
"learning_rate": 0.0001742673040947828, |
|
"loss": 0.0001, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.13247774787828606, |
|
"grad_norm": 0.0008187236380763352, |
|
"learning_rate": 0.00017422573269590524, |
|
"loss": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.13268474435934588, |
|
"grad_norm": 0.005097914487123489, |
|
"learning_rate": 0.00017418416129702765, |
|
"loss": 0.0004, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.13289174084040573, |
|
"grad_norm": 0.0009978336747735739, |
|
"learning_rate": 0.0001741425898981501, |
|
"loss": 0.0, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.13309873732146554, |
|
"grad_norm": 0.004832749720662832, |
|
"learning_rate": 0.00017410101849927253, |
|
"loss": 0.0012, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.13330573380252536, |
|
"grad_norm": 0.0038694944232702255, |
|
"learning_rate": 0.00017405944710039494, |
|
"loss": 0.0001, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.13351273028358518, |
|
"grad_norm": 0.001419690903276205, |
|
"learning_rate": 0.00017401787570151737, |
|
"loss": 0.0, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.133719726764645, |
|
"grad_norm": 0.006202602293342352, |
|
"learning_rate": 0.0001739763043026398, |
|
"loss": 0.0002, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.13392672324570482, |
|
"grad_norm": 0.0008485604776069522, |
|
"learning_rate": 0.00017393473290376222, |
|
"loss": 0.0, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.13413371972676463, |
|
"grad_norm": 0.0050230189226567745, |
|
"learning_rate": 0.00017389316150488463, |
|
"loss": 0.0022, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.13434071620782448, |
|
"grad_norm": 0.002081549260765314, |
|
"learning_rate": 0.00017385159010600707, |
|
"loss": 0.0001, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.1345477126888843, |
|
"grad_norm": 0.001964141381904483, |
|
"learning_rate": 0.0001738100187071295, |
|
"loss": 0.0002, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13475470916994411, |
|
"grad_norm": 0.0006888345233164728, |
|
"learning_rate": 0.00017376844730825192, |
|
"loss": 0.0, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.13496170565100393, |
|
"grad_norm": 0.002313450677320361, |
|
"learning_rate": 0.00017372687590937435, |
|
"loss": 0.0, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.13516870213206375, |
|
"grad_norm": 0.007078672293573618, |
|
"learning_rate": 0.0001736853045104968, |
|
"loss": 0.0006, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.13537569861312357, |
|
"grad_norm": 0.005166813265532255, |
|
"learning_rate": 0.0001736437331116192, |
|
"loss": 0.0011, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.1355826950941834, |
|
"grad_norm": 0.007185124326497316, |
|
"learning_rate": 0.00017360216171274164, |
|
"loss": 0.0001, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.13578969157524323, |
|
"grad_norm": 0.005528238136321306, |
|
"learning_rate": 0.00017356059031386407, |
|
"loss": 0.0001, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.13599668805630305, |
|
"grad_norm": 0.0077844299376010895, |
|
"learning_rate": 0.0001735190189149865, |
|
"loss": 0.0011, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.13620368453736287, |
|
"grad_norm": 0.00246329209767282, |
|
"learning_rate": 0.00017347744751610892, |
|
"loss": 0.0008, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.13641068101842269, |
|
"grad_norm": 0.005287639796733856, |
|
"learning_rate": 0.00017343587611723136, |
|
"loss": 0.0002, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.1366176774994825, |
|
"grad_norm": 0.006681959610432386, |
|
"learning_rate": 0.0001733943047183538, |
|
"loss": 0.0013, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13682467398054232, |
|
"grad_norm": 0.0063599334098398685, |
|
"learning_rate": 0.0001733527333194762, |
|
"loss": 0.0003, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.13703167046160214, |
|
"grad_norm": 0.007015643175691366, |
|
"learning_rate": 0.00017331116192059864, |
|
"loss": 0.0003, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.13723866694266199, |
|
"grad_norm": 0.0003168722032569349, |
|
"learning_rate": 0.00017326959052172108, |
|
"loss": 0.0, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.1374456634237218, |
|
"grad_norm": 0.006562775932252407, |
|
"learning_rate": 0.0001732280191228435, |
|
"loss": 0.0008, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.13765265990478162, |
|
"grad_norm": 0.003267984837293625, |
|
"learning_rate": 0.00017318644772396593, |
|
"loss": 0.0001, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.13785965638584144, |
|
"grad_norm": 0.007215241901576519, |
|
"learning_rate": 0.00017314487632508836, |
|
"loss": 0.0021, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.13806665286690126, |
|
"grad_norm": 0.001962031237781048, |
|
"learning_rate": 0.00017310330492621077, |
|
"loss": 0.0, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.13827364934796108, |
|
"grad_norm": 0.007086516357958317, |
|
"learning_rate": 0.0001730617335273332, |
|
"loss": 0.0021, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.1384806458290209, |
|
"grad_norm": 0.0063016172498464584, |
|
"learning_rate": 0.00017302016212845562, |
|
"loss": 0.0001, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.13868764231008074, |
|
"grad_norm": 0.007975582964718342, |
|
"learning_rate": 0.00017297859072957806, |
|
"loss": 0.0002, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13889463879114056, |
|
"grad_norm": 0.0030251971911638975, |
|
"learning_rate": 0.00017293701933070047, |
|
"loss": 0.0002, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.13910163527220037, |
|
"grad_norm": 0.00741973053663969, |
|
"learning_rate": 0.0001728954479318229, |
|
"loss": 0.0003, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.1393086317532602, |
|
"grad_norm": 0.002640543272718787, |
|
"learning_rate": 0.00017285387653294534, |
|
"loss": 0.0015, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.13951562823432, |
|
"grad_norm": 0.0004313603858463466, |
|
"learning_rate": 0.00017281230513406778, |
|
"loss": 0.0, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.13972262471537983, |
|
"grad_norm": 0.0020118863321840763, |
|
"learning_rate": 0.0001727707337351902, |
|
"loss": 0.0005, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.13992962119643967, |
|
"grad_norm": 0.003337120870128274, |
|
"learning_rate": 0.00017272916233631262, |
|
"loss": 0.0007, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.1401366176774995, |
|
"grad_norm": 0.014386707916855812, |
|
"learning_rate": 0.00017268759093743506, |
|
"loss": 0.0004, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.1403436141585593, |
|
"grad_norm": 0.006729326210916042, |
|
"learning_rate": 0.00017264601953855747, |
|
"loss": 0.001, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.14055061063961913, |
|
"grad_norm": 0.001671936479397118, |
|
"learning_rate": 0.0001726044481396799, |
|
"loss": 0.0002, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.14075760712067895, |
|
"grad_norm": 0.007516622077673674, |
|
"learning_rate": 0.00017256287674080235, |
|
"loss": 0.001, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.14096460360173876, |
|
"grad_norm": 0.0027280249632894993, |
|
"learning_rate": 0.00017252130534192476, |
|
"loss": 0.0006, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.14117160008279858, |
|
"grad_norm": 0.010556796565651894, |
|
"learning_rate": 0.0001724797339430472, |
|
"loss": 0.0003, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.14137859656385843, |
|
"grad_norm": 0.0027946115005761385, |
|
"learning_rate": 0.00017243816254416963, |
|
"loss": 0.0001, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.14158559304491825, |
|
"grad_norm": 0.00467882351949811, |
|
"learning_rate": 0.00017239659114529204, |
|
"loss": 0.0001, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.14179258952597806, |
|
"grad_norm": 0.004167881328612566, |
|
"learning_rate": 0.00017235501974641448, |
|
"loss": 0.0012, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.14199958600703788, |
|
"grad_norm": 0.0034762704744935036, |
|
"learning_rate": 0.00017231344834753691, |
|
"loss": 0.0007, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.1422065824880977, |
|
"grad_norm": 0.0005650786333717406, |
|
"learning_rate": 0.00017227187694865935, |
|
"loss": 0.0, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.14241357896915752, |
|
"grad_norm": 0.0043237158097326756, |
|
"learning_rate": 0.00017223030554978176, |
|
"loss": 0.0001, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.14262057545021734, |
|
"grad_norm": 0.0071853832341730595, |
|
"learning_rate": 0.0001721887341509042, |
|
"loss": 0.0004, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.14282757193127718, |
|
"grad_norm": 0.01868472993373871, |
|
"learning_rate": 0.00017214716275202664, |
|
"loss": 0.0009, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.143034568412337, |
|
"grad_norm": 0.001339295064099133, |
|
"learning_rate": 0.00017210559135314902, |
|
"loss": 0.0009, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.14324156489339682, |
|
"grad_norm": 0.00664726085960865, |
|
"learning_rate": 0.00017206401995427146, |
|
"loss": 0.0002, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.14344856137445663, |
|
"grad_norm": 0.006592089310288429, |
|
"learning_rate": 0.0001720224485553939, |
|
"loss": 0.0001, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.14365555785551645, |
|
"grad_norm": 0.0005503061693161726, |
|
"learning_rate": 0.00017198087715651633, |
|
"loss": 0.0, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.14386255433657627, |
|
"grad_norm": 0.003913522697985172, |
|
"learning_rate": 0.00017193930575763874, |
|
"loss": 0.0002, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.1440695508176361, |
|
"grad_norm": 0.004871245473623276, |
|
"learning_rate": 0.00017189773435876118, |
|
"loss": 0.0002, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.14427654729869593, |
|
"grad_norm": 0.007188999559730291, |
|
"learning_rate": 0.0001718561629598836, |
|
"loss": 0.0002, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.14448354377975575, |
|
"grad_norm": 0.003864140482619405, |
|
"learning_rate": 0.00017181459156100602, |
|
"loss": 0.0013, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.14469054026081557, |
|
"grad_norm": 0.005774588789790869, |
|
"learning_rate": 0.00017177302016212846, |
|
"loss": 0.0004, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.1448975367418754, |
|
"grad_norm": 0.002636535558849573, |
|
"learning_rate": 0.0001717314487632509, |
|
"loss": 0.0007, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1451045332229352, |
|
"grad_norm": 0.03907289355993271, |
|
"learning_rate": 0.0001716898773643733, |
|
"loss": 0.0019, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.14531152970399502, |
|
"grad_norm": 0.005653630942106247, |
|
"learning_rate": 0.00017164830596549574, |
|
"loss": 0.0003, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.14551852618505484, |
|
"grad_norm": 0.003644258715212345, |
|
"learning_rate": 0.00017160673456661818, |
|
"loss": 0.0001, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.1457255226661147, |
|
"grad_norm": 0.0028953952714800835, |
|
"learning_rate": 0.00017156516316774062, |
|
"loss": 0.0001, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.1459325191471745, |
|
"grad_norm": 0.005685892421752214, |
|
"learning_rate": 0.00017152359176886303, |
|
"loss": 0.0002, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.14613951562823432, |
|
"grad_norm": 0.00946901086717844, |
|
"learning_rate": 0.00017148202036998547, |
|
"loss": 0.0008, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.14634651210929414, |
|
"grad_norm": 0.004027761984616518, |
|
"learning_rate": 0.0001714404489711079, |
|
"loss": 0.0001, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.14655350859035396, |
|
"grad_norm": 0.0014218458672985435, |
|
"learning_rate": 0.0001713988775722303, |
|
"loss": 0.0, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.14676050507141378, |
|
"grad_norm": 0.0058472915552556515, |
|
"learning_rate": 0.00017135730617335275, |
|
"loss": 0.0002, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.1469675015524736, |
|
"grad_norm": 0.004684192128479481, |
|
"learning_rate": 0.0001713157347744752, |
|
"loss": 0.0002, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.14717449803353344, |
|
"grad_norm": 0.002729298546910286, |
|
"learning_rate": 0.0001712741633755976, |
|
"loss": 0.0001, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.14738149451459326, |
|
"grad_norm": 0.003782545682042837, |
|
"learning_rate": 0.00017123259197672, |
|
"loss": 0.0022, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.14758849099565308, |
|
"grad_norm": 0.004307260736823082, |
|
"learning_rate": 0.00017119102057784244, |
|
"loss": 0.0009, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.1477954874767129, |
|
"grad_norm": 0.01339892577379942, |
|
"learning_rate": 0.00017114944917896488, |
|
"loss": 0.001, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.1480024839577727, |
|
"grad_norm": 0.0017793363658711314, |
|
"learning_rate": 0.0001711078777800873, |
|
"loss": 0.0, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.14820948043883253, |
|
"grad_norm": 0.0005680687027052045, |
|
"learning_rate": 0.00017106630638120973, |
|
"loss": 0.0, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.14841647691989235, |
|
"grad_norm": 0.0010823605116456747, |
|
"learning_rate": 0.00017102473498233216, |
|
"loss": 0.0, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.1486234734009522, |
|
"grad_norm": 0.006135303992778063, |
|
"learning_rate": 0.00017098316358345457, |
|
"loss": 0.0002, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.148830469882012, |
|
"grad_norm": 0.003215776290744543, |
|
"learning_rate": 0.000170941592184577, |
|
"loss": 0.0008, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.14903746636307183, |
|
"grad_norm": 0.00970076397061348, |
|
"learning_rate": 0.00017090002078569945, |
|
"loss": 0.0008, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14924446284413165, |
|
"grad_norm": 0.0037311650812625885, |
|
"learning_rate": 0.00017085844938682189, |
|
"loss": 0.0007, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.14945145932519147, |
|
"grad_norm": 0.0035531132016330957, |
|
"learning_rate": 0.0001708168779879443, |
|
"loss": 0.0007, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.14965845580625128, |
|
"grad_norm": 0.0013675455702468753, |
|
"learning_rate": 0.00017077530658906673, |
|
"loss": 0.0002, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.14986545228731113, |
|
"grad_norm": 0.003667420009151101, |
|
"learning_rate": 0.00017073373519018917, |
|
"loss": 0.0001, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.15007244876837095, |
|
"grad_norm": 0.0006531656836159527, |
|
"learning_rate": 0.00017069216379131158, |
|
"loss": 0.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.15027944524943077, |
|
"grad_norm": 0.0029405278619378805, |
|
"learning_rate": 0.00017065059239243402, |
|
"loss": 0.0001, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.15048644173049058, |
|
"grad_norm": 0.0020144616719335318, |
|
"learning_rate": 0.00017060902099355645, |
|
"loss": 0.0001, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.1506934382115504, |
|
"grad_norm": 0.003123146714642644, |
|
"learning_rate": 0.00017056744959467886, |
|
"loss": 0.0007, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.15090043469261022, |
|
"grad_norm": 0.005841000005602837, |
|
"learning_rate": 0.0001705258781958013, |
|
"loss": 0.0001, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.15110743117367004, |
|
"grad_norm": 0.001898916088975966, |
|
"learning_rate": 0.00017048430679692374, |
|
"loss": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.15131442765472988, |
|
"grad_norm": 0.0005505726439878345, |
|
"learning_rate": 0.00017044273539804615, |
|
"loss": 0.0, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.1515214241357897, |
|
"grad_norm": 0.022630905732512474, |
|
"learning_rate": 0.00017040116399916859, |
|
"loss": 0.0003, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.15172842061684952, |
|
"grad_norm": 0.0018513459945097566, |
|
"learning_rate": 0.00017035959260029102, |
|
"loss": 0.0, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.15193541709790934, |
|
"grad_norm": 0.006640856619924307, |
|
"learning_rate": 0.00017031802120141343, |
|
"loss": 0.0015, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.15214241357896915, |
|
"grad_norm": 0.010431594215333462, |
|
"learning_rate": 0.00017027644980253584, |
|
"loss": 0.0007, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.15234941006002897, |
|
"grad_norm": 0.0009595350711606443, |
|
"learning_rate": 0.00017023487840365828, |
|
"loss": 0.0001, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.1525564065410888, |
|
"grad_norm": 0.0019930503331124783, |
|
"learning_rate": 0.00017019330700478072, |
|
"loss": 0.0001, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.15276340302214864, |
|
"grad_norm": 0.0020235483534634113, |
|
"learning_rate": 0.00017015173560590313, |
|
"loss": 0.0012, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.15297039950320845, |
|
"grad_norm": 0.000323964050039649, |
|
"learning_rate": 0.00017011016420702556, |
|
"loss": 0.0, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.15317739598426827, |
|
"grad_norm": 0.004805979318916798, |
|
"learning_rate": 0.000170068592808148, |
|
"loss": 0.0016, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1533843924653281, |
|
"grad_norm": 0.0007103011594153941, |
|
"learning_rate": 0.00017002702140927044, |
|
"loss": 0.0, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.1535913889463879, |
|
"grad_norm": 0.00901501253247261, |
|
"learning_rate": 0.00016998545001039285, |
|
"loss": 0.0001, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.15379838542744773, |
|
"grad_norm": 0.01626206934452057, |
|
"learning_rate": 0.00016994387861151528, |
|
"loss": 0.0001, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.15400538190850754, |
|
"grad_norm": 0.006600509863346815, |
|
"learning_rate": 0.00016990230721263772, |
|
"loss": 0.0022, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.1542123783895674, |
|
"grad_norm": 0.0031586415134370327, |
|
"learning_rate": 0.00016986073581376013, |
|
"loss": 0.0003, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.1544193748706272, |
|
"grad_norm": 0.00408458337187767, |
|
"learning_rate": 0.00016981916441488257, |
|
"loss": 0.0014, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.15462637135168703, |
|
"grad_norm": 0.006417447701096535, |
|
"learning_rate": 0.000169777593016005, |
|
"loss": 0.0013, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.15483336783274684, |
|
"grad_norm": 0.002676580101251602, |
|
"learning_rate": 0.00016973602161712742, |
|
"loss": 0.0009, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.15504036431380666, |
|
"grad_norm": 0.003124868730083108, |
|
"learning_rate": 0.00016969445021824985, |
|
"loss": 0.0001, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.15524736079486648, |
|
"grad_norm": 0.005617608781903982, |
|
"learning_rate": 0.0001696528788193723, |
|
"loss": 0.0009, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1554543572759263, |
|
"grad_norm": 0.0029069185256958008, |
|
"learning_rate": 0.00016961130742049473, |
|
"loss": 0.0008, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.15566135375698614, |
|
"grad_norm": 0.008944474160671234, |
|
"learning_rate": 0.00016956973602161714, |
|
"loss": 0.0017, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.15586835023804596, |
|
"grad_norm": 0.004935794975608587, |
|
"learning_rate": 0.00016952816462273957, |
|
"loss": 0.0012, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.15607534671910578, |
|
"grad_norm": 0.0005579759599640965, |
|
"learning_rate": 0.000169486593223862, |
|
"loss": 0.0, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.1562823432001656, |
|
"grad_norm": 0.00902874581515789, |
|
"learning_rate": 0.00016944502182498442, |
|
"loss": 0.0004, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.15648933968122541, |
|
"grad_norm": 0.00498725613579154, |
|
"learning_rate": 0.00016940345042610683, |
|
"loss": 0.0001, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.15669633616228523, |
|
"grad_norm": 0.0004982489626854658, |
|
"learning_rate": 0.00016936187902722927, |
|
"loss": 0.0, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.15690333264334505, |
|
"grad_norm": 0.0011680921306833625, |
|
"learning_rate": 0.0001693203076283517, |
|
"loss": 0.0, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.1571103291244049, |
|
"grad_norm": 0.0013553223107010126, |
|
"learning_rate": 0.00016927873622947412, |
|
"loss": 0.0, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.15731732560546471, |
|
"grad_norm": 0.00549361202865839, |
|
"learning_rate": 0.00016923716483059655, |
|
"loss": 0.0016, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.15752432208652453, |
|
"grad_norm": 0.004852932877838612, |
|
"learning_rate": 0.000169195593431719, |
|
"loss": 0.0002, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.15773131856758435, |
|
"grad_norm": 0.0046032629907131195, |
|
"learning_rate": 0.0001691540220328414, |
|
"loss": 0.002, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.15793831504864417, |
|
"grad_norm": 0.009385612793266773, |
|
"learning_rate": 0.00016911245063396384, |
|
"loss": 0.0003, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.15814531152970399, |
|
"grad_norm": 0.0024257150944322348, |
|
"learning_rate": 0.00016907087923508627, |
|
"loss": 0.0009, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.1583523080107638, |
|
"grad_norm": 0.002726235194131732, |
|
"learning_rate": 0.00016902930783620868, |
|
"loss": 0.0012, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.15855930449182365, |
|
"grad_norm": 0.006497920490801334, |
|
"learning_rate": 0.00016898773643733112, |
|
"loss": 0.0005, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.15876630097288347, |
|
"grad_norm": 0.012873928062617779, |
|
"learning_rate": 0.00016894616503845356, |
|
"loss": 0.0021, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.15897329745394329, |
|
"grad_norm": 0.009931253269314766, |
|
"learning_rate": 0.000168904593639576, |
|
"loss": 0.0004, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.1591802939350031, |
|
"grad_norm": 0.0012783849379047751, |
|
"learning_rate": 0.0001688630222406984, |
|
"loss": 0.0, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.15938729041606292, |
|
"grad_norm": 0.0025215751957148314, |
|
"learning_rate": 0.00016882145084182084, |
|
"loss": 0.0001, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15959428689712274, |
|
"grad_norm": 0.00574857834726572, |
|
"learning_rate": 0.00016877987944294328, |
|
"loss": 0.0002, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.15980128337818258, |
|
"grad_norm": 0.0008691879920661449, |
|
"learning_rate": 0.0001687383080440657, |
|
"loss": 0.0, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.1600082798592424, |
|
"grad_norm": 0.008225478231906891, |
|
"learning_rate": 0.00016869673664518813, |
|
"loss": 0.0002, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.16021527634030222, |
|
"grad_norm": 0.003890304360538721, |
|
"learning_rate": 0.00016865516524631056, |
|
"loss": 0.0014, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.16042227282136204, |
|
"grad_norm": 0.0011641031596809626, |
|
"learning_rate": 0.00016861359384743297, |
|
"loss": 0.0001, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.16062926930242186, |
|
"grad_norm": 0.008769871667027473, |
|
"learning_rate": 0.0001685720224485554, |
|
"loss": 0.0011, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.16083626578348167, |
|
"grad_norm": 0.005050954408943653, |
|
"learning_rate": 0.00016853045104967785, |
|
"loss": 0.0002, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.1610432622645415, |
|
"grad_norm": 0.002180990530177951, |
|
"learning_rate": 0.00016848887965080026, |
|
"loss": 0.0008, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.16125025874560134, |
|
"grad_norm": 0.0015876460820436478, |
|
"learning_rate": 0.00016844730825192267, |
|
"loss": 0.0, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.16145725522666116, |
|
"grad_norm": 0.004357179626822472, |
|
"learning_rate": 0.0001684057368530451, |
|
"loss": 0.0003, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.16166425170772097, |
|
"grad_norm": 0.0034056720323860645, |
|
"learning_rate": 0.00016836416545416754, |
|
"loss": 0.0012, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.1618712481887808, |
|
"grad_norm": 0.005545208230614662, |
|
"learning_rate": 0.00016832259405528995, |
|
"loss": 0.0001, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.1620782446698406, |
|
"grad_norm": 0.0002129770437022671, |
|
"learning_rate": 0.0001682810226564124, |
|
"loss": 0.0, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.16228524115090043, |
|
"grad_norm": 0.0036753590684384108, |
|
"learning_rate": 0.00016823945125753482, |
|
"loss": 0.0001, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.16249223763196025, |
|
"grad_norm": 0.0018491502851247787, |
|
"learning_rate": 0.00016819787985865723, |
|
"loss": 0.0008, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.1626992341130201, |
|
"grad_norm": 0.0006519712042063475, |
|
"learning_rate": 0.00016815630845977967, |
|
"loss": 0.0, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.1629062305940799, |
|
"grad_norm": 0.011139947921037674, |
|
"learning_rate": 0.0001681147370609021, |
|
"loss": 0.0011, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.16311322707513973, |
|
"grad_norm": 0.0020866713020950556, |
|
"learning_rate": 0.00016807316566202455, |
|
"loss": 0.0002, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.16332022355619955, |
|
"grad_norm": 0.0034007905051112175, |
|
"learning_rate": 0.00016803159426314696, |
|
"loss": 0.0008, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.16352722003725936, |
|
"grad_norm": 0.0017938032979145646, |
|
"learning_rate": 0.0001679900228642694, |
|
"loss": 0.0009, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.16373421651831918, |
|
"grad_norm": 0.005385685246437788, |
|
"learning_rate": 0.00016794845146539183, |
|
"loss": 0.001, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.163941212999379, |
|
"grad_norm": 0.010079730302095413, |
|
"learning_rate": 0.00016790688006651424, |
|
"loss": 0.0004, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.16414820948043884, |
|
"grad_norm": 0.005826961249113083, |
|
"learning_rate": 0.00016786530866763668, |
|
"loss": 0.0001, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.16435520596149866, |
|
"grad_norm": 0.002885566558688879, |
|
"learning_rate": 0.00016782373726875911, |
|
"loss": 0.0001, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.16456220244255848, |
|
"grad_norm": 0.004031067714095116, |
|
"learning_rate": 0.00016778216586988152, |
|
"loss": 0.0002, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.1647691989236183, |
|
"grad_norm": 0.0019721402786672115, |
|
"learning_rate": 0.00016774059447100396, |
|
"loss": 0.0001, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.16497619540467812, |
|
"grad_norm": 0.002213244093582034, |
|
"learning_rate": 0.0001676990230721264, |
|
"loss": 0.0006, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.16518319188573793, |
|
"grad_norm": 0.001942839939147234, |
|
"learning_rate": 0.00016765745167324884, |
|
"loss": 0.0, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.16539018836679775, |
|
"grad_norm": 0.003173516597598791, |
|
"learning_rate": 0.00016761588027437125, |
|
"loss": 0.0001, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.1655971848478576, |
|
"grad_norm": 0.004877821542322636, |
|
"learning_rate": 0.00016757430887549366, |
|
"loss": 0.0001, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16580418132891742, |
|
"grad_norm": 0.006676991004496813, |
|
"learning_rate": 0.0001675327374766161, |
|
"loss": 0.0002, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.16601117780997723, |
|
"grad_norm": 0.0022598986979573965, |
|
"learning_rate": 0.0001674911660777385, |
|
"loss": 0.0003, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.16621817429103705, |
|
"grad_norm": 0.00012318776862230152, |
|
"learning_rate": 0.00016744959467886094, |
|
"loss": 0.0, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.16642517077209687, |
|
"grad_norm": 0.006045771297067404, |
|
"learning_rate": 0.00016740802327998338, |
|
"loss": 0.0003, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.1666321672531567, |
|
"grad_norm": 0.004370058421045542, |
|
"learning_rate": 0.0001673664518811058, |
|
"loss": 0.0001, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.1668391637342165, |
|
"grad_norm": 0.005490643437951803, |
|
"learning_rate": 0.00016732488048222822, |
|
"loss": 0.0022, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.16704616021527635, |
|
"grad_norm": 0.007493430282920599, |
|
"learning_rate": 0.00016728330908335066, |
|
"loss": 0.001, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.16725315669633617, |
|
"grad_norm": 0.0006159085314720869, |
|
"learning_rate": 0.0001672417376844731, |
|
"loss": 0.0, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.167460153177396, |
|
"grad_norm": 0.002211883431300521, |
|
"learning_rate": 0.0001672001662855955, |
|
"loss": 0.0, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.1676671496584558, |
|
"grad_norm": 0.0028680090326815844, |
|
"learning_rate": 0.00016715859488671794, |
|
"loss": 0.0011, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.16787414613951562, |
|
"grad_norm": 0.004992680158466101, |
|
"learning_rate": 0.00016711702348784038, |
|
"loss": 0.0002, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.16808114262057544, |
|
"grad_norm": 0.0024819490499794483, |
|
"learning_rate": 0.0001670754520889628, |
|
"loss": 0.0001, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.16828813910163526, |
|
"grad_norm": 0.001662694732658565, |
|
"learning_rate": 0.00016703388069008523, |
|
"loss": 0.0001, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.1684951355826951, |
|
"grad_norm": 0.0027136337012052536, |
|
"learning_rate": 0.00016699230929120767, |
|
"loss": 0.0015, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.16870213206375492, |
|
"grad_norm": 0.0055983890779316425, |
|
"learning_rate": 0.00016695073789233008, |
|
"loss": 0.0006, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.16890912854481474, |
|
"grad_norm": 0.0005543065490201116, |
|
"learning_rate": 0.0001669091664934525, |
|
"loss": 0.0, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.16911612502587456, |
|
"grad_norm": 0.006743449252098799, |
|
"learning_rate": 0.00016686759509457495, |
|
"loss": 0.0006, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.16932312150693438, |
|
"grad_norm": 0.005361751653254032, |
|
"learning_rate": 0.0001668260236956974, |
|
"loss": 0.0002, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.1695301179879942, |
|
"grad_norm": 0.015542850829660892, |
|
"learning_rate": 0.0001667844522968198, |
|
"loss": 0.0001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.169737114469054, |
|
"grad_norm": 0.006788911763578653, |
|
"learning_rate": 0.00016674288089794223, |
|
"loss": 0.0004, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16994411095011386, |
|
"grad_norm": 0.006434622220695019, |
|
"learning_rate": 0.00016670130949906467, |
|
"loss": 0.0004, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.17015110743117368, |
|
"grad_norm": 0.0024506154004484415, |
|
"learning_rate": 0.00016665973810018708, |
|
"loss": 0.0007, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.1703581039122335, |
|
"grad_norm": 0.000382046215236187, |
|
"learning_rate": 0.0001666181667013095, |
|
"loss": 0.0, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.1705651003932933, |
|
"grad_norm": 0.00432636309415102, |
|
"learning_rate": 0.00016657659530243193, |
|
"loss": 0.0009, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.17077209687435313, |
|
"grad_norm": 0.005686972755938768, |
|
"learning_rate": 0.00016653502390355436, |
|
"loss": 0.0002, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.17097909335541295, |
|
"grad_norm": 0.005743528716266155, |
|
"learning_rate": 0.00016649345250467677, |
|
"loss": 0.0003, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.1711860898364728, |
|
"grad_norm": 0.002116352552548051, |
|
"learning_rate": 0.0001664518811057992, |
|
"loss": 0.0003, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.1713930863175326, |
|
"grad_norm": 0.002352718496695161, |
|
"learning_rate": 0.00016641030970692165, |
|
"loss": 0.0, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.17160008279859243, |
|
"grad_norm": 0.0044693113304674625, |
|
"learning_rate": 0.00016636873830804406, |
|
"loss": 0.0004, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.17180707927965225, |
|
"grad_norm": 0.0005167116178199649, |
|
"learning_rate": 0.0001663271669091665, |
|
"loss": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.17201407576071207, |
|
"grad_norm": 0.005162122659385204, |
|
"learning_rate": 0.00016628559551028893, |
|
"loss": 0.0002, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.17222107224177188, |
|
"grad_norm": 0.00015954635455273092, |
|
"learning_rate": 0.00016624402411141134, |
|
"loss": 0.0, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.1724280687228317, |
|
"grad_norm": 0.0030487151816487312, |
|
"learning_rate": 0.00016620245271253378, |
|
"loss": 0.0001, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.17263506520389155, |
|
"grad_norm": 0.002151534892618656, |
|
"learning_rate": 0.00016616088131365622, |
|
"loss": 0.0002, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.17284206168495136, |
|
"grad_norm": 0.0044494629837572575, |
|
"learning_rate": 0.00016611930991477865, |
|
"loss": 0.0005, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.17304905816601118, |
|
"grad_norm": 0.00033838755916804075, |
|
"learning_rate": 0.00016607773851590106, |
|
"loss": 0.0, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.173256054647071, |
|
"grad_norm": 0.0005302856443449855, |
|
"learning_rate": 0.0001660361671170235, |
|
"loss": 0.0, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.17346305112813082, |
|
"grad_norm": 0.0013208640739321709, |
|
"learning_rate": 0.00016599459571814594, |
|
"loss": 0.0, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.17367004760919064, |
|
"grad_norm": 0.001052051316946745, |
|
"learning_rate": 0.00016595302431926835, |
|
"loss": 0.0, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.17387704409025045, |
|
"grad_norm": 0.018445929512381554, |
|
"learning_rate": 0.00016591145292039079, |
|
"loss": 0.0004, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1740840405713103, |
|
"grad_norm": 0.0025256802327930927, |
|
"learning_rate": 0.00016586988152151322, |
|
"loss": 0.0001, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.17429103705237012, |
|
"grad_norm": 0.0014724883949384093, |
|
"learning_rate": 0.00016582831012263563, |
|
"loss": 0.0, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.17449803353342994, |
|
"grad_norm": 0.003576815826818347, |
|
"learning_rate": 0.00016578673872375807, |
|
"loss": 0.0001, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.17470503001448975, |
|
"grad_norm": 0.0006163385114632547, |
|
"learning_rate": 0.00016574516732488048, |
|
"loss": 0.0, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.17491202649554957, |
|
"grad_norm": 0.0011656074784696102, |
|
"learning_rate": 0.00016570359592600292, |
|
"loss": 0.0001, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.1751190229766094, |
|
"grad_norm": 0.0018338944064453244, |
|
"learning_rate": 0.00016566202452712533, |
|
"loss": 0.0001, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.1753260194576692, |
|
"grad_norm": 0.005035779904574156, |
|
"learning_rate": 0.00016562045312824776, |
|
"loss": 0.0014, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.17553301593872905, |
|
"grad_norm": 0.006770180072635412, |
|
"learning_rate": 0.0001655788817293702, |
|
"loss": 0.0012, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.17574001241978887, |
|
"grad_norm": 0.0003650276339612901, |
|
"learning_rate": 0.0001655373103304926, |
|
"loss": 0.0, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.1759470089008487, |
|
"grad_norm": 0.00023851868172641844, |
|
"learning_rate": 0.00016549573893161505, |
|
"loss": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1761540053819085, |
|
"grad_norm": 0.014695384539663792, |
|
"learning_rate": 0.00016545416753273748, |
|
"loss": 0.0011, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.17636100186296833, |
|
"grad_norm": 0.00036404369166120887, |
|
"learning_rate": 0.00016541259613385992, |
|
"loss": 0.0, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.17656799834402814, |
|
"grad_norm": 0.002682497026398778, |
|
"learning_rate": 0.00016537102473498233, |
|
"loss": 0.0012, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.17677499482508796, |
|
"grad_norm": 0.007028127089142799, |
|
"learning_rate": 0.00016532945333610477, |
|
"loss": 0.0002, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.1769819913061478, |
|
"grad_norm": 0.0012324461713433266, |
|
"learning_rate": 0.0001652878819372272, |
|
"loss": 0.0, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.17718898778720762, |
|
"grad_norm": 0.00292210397310555, |
|
"learning_rate": 0.00016524631053834962, |
|
"loss": 0.0, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.17739598426826744, |
|
"grad_norm": 0.0014698312152177095, |
|
"learning_rate": 0.00016520473913947205, |
|
"loss": 0.0002, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.17760298074932726, |
|
"grad_norm": 0.0022247894667088985, |
|
"learning_rate": 0.0001651631677405945, |
|
"loss": 0.0, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.17780997723038708, |
|
"grad_norm": 0.0006738615338690579, |
|
"learning_rate": 0.0001651215963417169, |
|
"loss": 0.0, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.1780169737114469, |
|
"grad_norm": 0.004056425765156746, |
|
"learning_rate": 0.00016508002494283934, |
|
"loss": 0.001, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.17822397019250671, |
|
"grad_norm": 0.006607827264815569, |
|
"learning_rate": 0.00016503845354396177, |
|
"loss": 0.0003, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.17843096667356656, |
|
"grad_norm": 0.007498994003981352, |
|
"learning_rate": 0.00016499688214508418, |
|
"loss": 0.0009, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.17863796315462638, |
|
"grad_norm": 0.002715140348300338, |
|
"learning_rate": 0.00016495531074620662, |
|
"loss": 0.0001, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.1788449596356862, |
|
"grad_norm": 0.006559406872838736, |
|
"learning_rate": 0.00016491373934732906, |
|
"loss": 0.0012, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.179051956116746, |
|
"grad_norm": 0.005900564603507519, |
|
"learning_rate": 0.0001648721679484515, |
|
"loss": 0.0001, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.17925895259780583, |
|
"grad_norm": 0.0014680642634630203, |
|
"learning_rate": 0.00016483059654957388, |
|
"loss": 0.0004, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.17946594907886565, |
|
"grad_norm": 0.0017862527165561914, |
|
"learning_rate": 0.00016478902515069632, |
|
"loss": 0.0001, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.17967294555992547, |
|
"grad_norm": 0.0010660128900781274, |
|
"learning_rate": 0.00016474745375181875, |
|
"loss": 0.0001, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.1798799420409853, |
|
"grad_norm": 0.010508016683161259, |
|
"learning_rate": 0.0001647058823529412, |
|
"loss": 0.0004, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.18008693852204513, |
|
"grad_norm": 0.0026938568335026503, |
|
"learning_rate": 0.0001646643109540636, |
|
"loss": 0.0001, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.18029393500310495, |
|
"grad_norm": 0.0015470877988263965, |
|
"learning_rate": 0.00016462273955518604, |
|
"loss": 0.0, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.18050093148416477, |
|
"grad_norm": 0.003435211256146431, |
|
"learning_rate": 0.00016458116815630847, |
|
"loss": 0.0001, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.18070792796522459, |
|
"grad_norm": 0.010342281311750412, |
|
"learning_rate": 0.00016453959675743088, |
|
"loss": 0.0007, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.1809149244462844, |
|
"grad_norm": 0.0007751841330900788, |
|
"learning_rate": 0.00016449802535855332, |
|
"loss": 0.0, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.18112192092734425, |
|
"grad_norm": 0.0003991715202573687, |
|
"learning_rate": 0.00016445645395967576, |
|
"loss": 0.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.18132891740840407, |
|
"grad_norm": 0.004742010496556759, |
|
"learning_rate": 0.00016441488256079817, |
|
"loss": 0.0001, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.18153591388946388, |
|
"grad_norm": 0.0010139705846086144, |
|
"learning_rate": 0.0001643733111619206, |
|
"loss": 0.0, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.1817429103705237, |
|
"grad_norm": 0.00697368336841464, |
|
"learning_rate": 0.00016433173976304304, |
|
"loss": 0.0003, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.18194990685158352, |
|
"grad_norm": 0.0056029148399829865, |
|
"learning_rate": 0.00016429016836416545, |
|
"loss": 0.0003, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.18215690333264334, |
|
"grad_norm": 0.0031287583988159895, |
|
"learning_rate": 0.0001642485969652879, |
|
"loss": 0.0001, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.18236389981370316, |
|
"grad_norm": 0.0005836491473019123, |
|
"learning_rate": 0.00016420702556641033, |
|
"loss": 0.0, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.182570896294763, |
|
"grad_norm": 0.006221551448106766, |
|
"learning_rate": 0.00016416545416753276, |
|
"loss": 0.0014, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.18277789277582282, |
|
"grad_norm": 0.00045936627429910004, |
|
"learning_rate": 0.00016412388276865517, |
|
"loss": 0.0, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.18298488925688264, |
|
"grad_norm": 0.0006924067274667323, |
|
"learning_rate": 0.0001640823113697776, |
|
"loss": 0.0, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.18319188573794246, |
|
"grad_norm": 0.010869395919144154, |
|
"learning_rate": 0.00016404073997090005, |
|
"loss": 0.0013, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.18339888221900227, |
|
"grad_norm": 0.00480787456035614, |
|
"learning_rate": 0.00016399916857202246, |
|
"loss": 0.0009, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.1836058787000621, |
|
"grad_norm": 0.0004685772000811994, |
|
"learning_rate": 0.0001639575971731449, |
|
"loss": 0.0, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.1838128751811219, |
|
"grad_norm": 0.00303410436026752, |
|
"learning_rate": 0.0001639160257742673, |
|
"loss": 0.0007, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.18401987166218176, |
|
"grad_norm": 0.001141547691076994, |
|
"learning_rate": 0.00016387445437538974, |
|
"loss": 0.0001, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.18422686814324157, |
|
"grad_norm": 0.007433968596160412, |
|
"learning_rate": 0.00016383288297651215, |
|
"loss": 0.0001, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1844338646243014, |
|
"grad_norm": 0.003386344527825713, |
|
"learning_rate": 0.0001637913115776346, |
|
"loss": 0.0003, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.1846408611053612, |
|
"grad_norm": 0.0011372484732419252, |
|
"learning_rate": 0.00016374974017875702, |
|
"loss": 0.0, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.18484785758642103, |
|
"grad_norm": 0.0039020997937768698, |
|
"learning_rate": 0.00016370816877987943, |
|
"loss": 0.0001, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.18505485406748085, |
|
"grad_norm": 0.003088288474828005, |
|
"learning_rate": 0.00016366659738100187, |
|
"loss": 0.0009, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.18526185054854066, |
|
"grad_norm": 0.001126794726587832, |
|
"learning_rate": 0.0001636250259821243, |
|
"loss": 0.0001, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1854688470296005, |
|
"grad_norm": 0.007449139375239611, |
|
"learning_rate": 0.00016358345458324672, |
|
"loss": 0.0004, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.18567584351066033, |
|
"grad_norm": 0.005704225040972233, |
|
"learning_rate": 0.00016354188318436916, |
|
"loss": 0.0016, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.18588283999172014, |
|
"grad_norm": 0.004983640741556883, |
|
"learning_rate": 0.0001635003117854916, |
|
"loss": 0.0016, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.18608983647277996, |
|
"grad_norm": 0.00034120268537662923, |
|
"learning_rate": 0.00016345874038661403, |
|
"loss": 0.0, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.18629683295383978, |
|
"grad_norm": 0.007043101824820042, |
|
"learning_rate": 0.00016341716898773644, |
|
"loss": 0.001, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1865038294348996, |
|
"grad_norm": 0.004050271585583687, |
|
"learning_rate": 0.00016337559758885888, |
|
"loss": 0.0001, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.18671082591595942, |
|
"grad_norm": 0.001882696757093072, |
|
"learning_rate": 0.00016333402618998131, |
|
"loss": 0.0005, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.18691782239701926, |
|
"grad_norm": 0.002479350659996271, |
|
"learning_rate": 0.00016329245479110372, |
|
"loss": 0.0003, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.18712481887807908, |
|
"grad_norm": 0.00246567465364933, |
|
"learning_rate": 0.00016325088339222616, |
|
"loss": 0.0001, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.1873318153591389, |
|
"grad_norm": 0.0021426973398774862, |
|
"learning_rate": 0.0001632093119933486, |
|
"loss": 0.0006, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.18753881184019872, |
|
"grad_norm": 0.004363594576716423, |
|
"learning_rate": 0.000163167740594471, |
|
"loss": 0.0002, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.18774580832125853, |
|
"grad_norm": 0.004984852857887745, |
|
"learning_rate": 0.00016312616919559345, |
|
"loss": 0.0004, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.18795280480231835, |
|
"grad_norm": 0.004489907994866371, |
|
"learning_rate": 0.00016308459779671588, |
|
"loss": 0.0001, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.18815980128337817, |
|
"grad_norm": 0.0013233786448836327, |
|
"learning_rate": 0.0001630430263978383, |
|
"loss": 0.0, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.18836679776443802, |
|
"grad_norm": 0.00894436426460743, |
|
"learning_rate": 0.0001630014549989607, |
|
"loss": 0.0016, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.18857379424549783, |
|
"grad_norm": 0.001729694427922368, |
|
"learning_rate": 0.00016295988360008314, |
|
"loss": 0.001, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.18878079072655765, |
|
"grad_norm": 0.005005873739719391, |
|
"learning_rate": 0.00016291831220120558, |
|
"loss": 0.0005, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.18898778720761747, |
|
"grad_norm": 0.0007573101902380586, |
|
"learning_rate": 0.00016287674080232799, |
|
"loss": 0.0, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.1891947836886773, |
|
"grad_norm": 0.005315006244927645, |
|
"learning_rate": 0.00016283516940345042, |
|
"loss": 0.0001, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.1894017801697371, |
|
"grad_norm": 0.001140634878538549, |
|
"learning_rate": 0.00016279359800457286, |
|
"loss": 0.0001, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.18960877665079692, |
|
"grad_norm": 0.003881396260112524, |
|
"learning_rate": 0.0001627520266056953, |
|
"loss": 0.0001, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.18981577313185677, |
|
"grad_norm": 0.001353550935164094, |
|
"learning_rate": 0.0001627104552068177, |
|
"loss": 0.0, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.1900227696129166, |
|
"grad_norm": 0.001415180740877986, |
|
"learning_rate": 0.00016266888380794014, |
|
"loss": 0.0, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.1902297660939764, |
|
"grad_norm": 0.006398684810847044, |
|
"learning_rate": 0.00016262731240906258, |
|
"loss": 0.0006, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.19043676257503622, |
|
"grad_norm": 0.005204816348850727, |
|
"learning_rate": 0.000162585741010185, |
|
"loss": 0.0016, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.19064375905609604, |
|
"grad_norm": 0.0015194268198683858, |
|
"learning_rate": 0.00016254416961130743, |
|
"loss": 0.0007, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.19085075553715586, |
|
"grad_norm": 0.002916971454396844, |
|
"learning_rate": 0.00016250259821242987, |
|
"loss": 0.0015, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.1910577520182157, |
|
"grad_norm": 0.00017840563668869436, |
|
"learning_rate": 0.00016246102681355228, |
|
"loss": 0.0, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.19126474849927552, |
|
"grad_norm": 0.0017515165964141488, |
|
"learning_rate": 0.0001624194554146747, |
|
"loss": 0.0005, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.19147174498033534, |
|
"grad_norm": 0.0011207156348973513, |
|
"learning_rate": 0.00016237788401579715, |
|
"loss": 0.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.19167874146139516, |
|
"grad_norm": 0.00647772429510951, |
|
"learning_rate": 0.00016233631261691956, |
|
"loss": 0.0004, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.19188573794245498, |
|
"grad_norm": 0.0009239514474757016, |
|
"learning_rate": 0.000162294741218042, |
|
"loss": 0.0, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.1920927344235148, |
|
"grad_norm": 0.0009718858054839075, |
|
"learning_rate": 0.00016225316981916443, |
|
"loss": 0.0, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.1922997309045746, |
|
"grad_norm": 0.0009835285600274801, |
|
"learning_rate": 0.00016221159842028687, |
|
"loss": 0.0001, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.19250672738563446, |
|
"grad_norm": 0.003986849449574947, |
|
"learning_rate": 0.00016217002702140928, |
|
"loss": 0.0005, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.19271372386669428, |
|
"grad_norm": 0.0055690668523311615, |
|
"learning_rate": 0.00016212845562253172, |
|
"loss": 0.0002, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.1929207203477541, |
|
"grad_norm": 0.006283191032707691, |
|
"learning_rate": 0.00016208688422365413, |
|
"loss": 0.0003, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.1931277168288139, |
|
"grad_norm": 0.00035167241003364325, |
|
"learning_rate": 0.00016204531282477656, |
|
"loss": 0.0, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.19333471330987373, |
|
"grad_norm": 0.001550202607177198, |
|
"learning_rate": 0.00016200374142589897, |
|
"loss": 0.0001, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.19354170979093355, |
|
"grad_norm": 0.0009650330757722259, |
|
"learning_rate": 0.0001619621700270214, |
|
"loss": 0.0, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.19374870627199337, |
|
"grad_norm": 0.006459403783082962, |
|
"learning_rate": 0.00016192059862814385, |
|
"loss": 0.0009, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.1939557027530532, |
|
"grad_norm": 0.0006884423783048987, |
|
"learning_rate": 0.00016187902722926626, |
|
"loss": 0.0, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.19416269923411303, |
|
"grad_norm": 0.0034009867813438177, |
|
"learning_rate": 0.0001618374558303887, |
|
"loss": 0.0001, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.19436969571517285, |
|
"grad_norm": 0.001749175600707531, |
|
"learning_rate": 0.00016179588443151113, |
|
"loss": 0.0012, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.19457669219623266, |
|
"grad_norm": 0.005409194156527519, |
|
"learning_rate": 0.00016175431303263354, |
|
"loss": 0.0002, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.19478368867729248, |
|
"grad_norm": 0.0033904362935572863, |
|
"learning_rate": 0.00016171274163375598, |
|
"loss": 0.0001, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.1949906851583523, |
|
"grad_norm": 0.005800081882625818, |
|
"learning_rate": 0.00016167117023487842, |
|
"loss": 0.0014, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.19519768163941212, |
|
"grad_norm": 0.001085714902728796, |
|
"learning_rate": 0.00016162959883600083, |
|
"loss": 0.0001, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.19540467812047196, |
|
"grad_norm": 0.0017082407139241695, |
|
"learning_rate": 0.00016158802743712326, |
|
"loss": 0.0001, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.19561167460153178, |
|
"grad_norm": 0.0016056247986853123, |
|
"learning_rate": 0.0001615464560382457, |
|
"loss": 0.0009, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.1958186710825916, |
|
"grad_norm": 0.0005931173800490797, |
|
"learning_rate": 0.00016150488463936814, |
|
"loss": 0.0, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.19602566756365142, |
|
"grad_norm": 0.0002708766842260957, |
|
"learning_rate": 0.00016146331324049055, |
|
"loss": 0.0, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.19623266404471124, |
|
"grad_norm": 0.003350366372615099, |
|
"learning_rate": 0.00016142174184161299, |
|
"loss": 0.0004, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.19643966052577105, |
|
"grad_norm": 0.00030215582228265703, |
|
"learning_rate": 0.00016138017044273542, |
|
"loss": 0.0, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.19664665700683087, |
|
"grad_norm": 0.0013855715515092015, |
|
"learning_rate": 0.00016133859904385783, |
|
"loss": 0.0009, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.19685365348789072, |
|
"grad_norm": 0.0005864354898221791, |
|
"learning_rate": 0.00016129702764498027, |
|
"loss": 0.0, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.19706064996895054, |
|
"grad_norm": 0.0006372429197654128, |
|
"learning_rate": 0.0001612554562461027, |
|
"loss": 0.0, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.19726764645001035, |
|
"grad_norm": 0.0005041586118750274, |
|
"learning_rate": 0.00016121388484722512, |
|
"loss": 0.0, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.19747464293107017, |
|
"grad_norm": 0.0023472902830690145, |
|
"learning_rate": 0.00016117231344834753, |
|
"loss": 0.001, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.19768163941213, |
|
"grad_norm": 0.00015194782463368028, |
|
"learning_rate": 0.00016113074204946996, |
|
"loss": 0.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.1978886358931898, |
|
"grad_norm": 0.001190232578665018, |
|
"learning_rate": 0.0001610891706505924, |
|
"loss": 0.0002, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.19809563237424963, |
|
"grad_norm": 0.0018357646185904741, |
|
"learning_rate": 0.0001610475992517148, |
|
"loss": 0.0002, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.19830262885530947, |
|
"grad_norm": 0.007886867970228195, |
|
"learning_rate": 0.00016100602785283725, |
|
"loss": 0.0015, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.1985096253363693, |
|
"grad_norm": 0.00014407855633180588, |
|
"learning_rate": 0.00016096445645395968, |
|
"loss": 0.0, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.1987166218174291, |
|
"grad_norm": 0.0008407345740124583, |
|
"learning_rate": 0.0001609228850550821, |
|
"loss": 0.0001, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.19892361829848892, |
|
"grad_norm": 0.0005690194084309042, |
|
"learning_rate": 0.00016088131365620453, |
|
"loss": 0.0, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.19913061477954874, |
|
"grad_norm": 0.0001164446584880352, |
|
"learning_rate": 0.00016083974225732697, |
|
"loss": 0.0, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.19933761126060856, |
|
"grad_norm": 0.004800689872354269, |
|
"learning_rate": 0.0001607981708584494, |
|
"loss": 0.0009, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.19954460774166838, |
|
"grad_norm": 0.0038551113102585077, |
|
"learning_rate": 0.00016075659945957182, |
|
"loss": 0.0002, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.19975160422272822, |
|
"grad_norm": 0.00023845378018449992, |
|
"learning_rate": 0.00016071502806069425, |
|
"loss": 0.0, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.19995860070378804, |
|
"grad_norm": 0.0006543719209730625, |
|
"learning_rate": 0.0001606734566618167, |
|
"loss": 0.0, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.20016559718484786, |
|
"grad_norm": 0.0024344087578356266, |
|
"learning_rate": 0.0001606318852629391, |
|
"loss": 0.0011, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.20037259366590768, |
|
"grad_norm": 0.0006420607678592205, |
|
"learning_rate": 0.00016059031386406154, |
|
"loss": 0.0, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.2005795901469675, |
|
"grad_norm": 0.0016330952057614923, |
|
"learning_rate": 0.00016054874246518397, |
|
"loss": 0.0, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.2007865866280273, |
|
"grad_norm": 0.0013299377169460058, |
|
"learning_rate": 0.00016050717106630638, |
|
"loss": 0.0001, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.20099358310908713, |
|
"grad_norm": 0.0023206102196127176, |
|
"learning_rate": 0.00016046559966742882, |
|
"loss": 0.0006, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.20120057959014698, |
|
"grad_norm": 0.0030964380130171776, |
|
"learning_rate": 0.00016042402826855126, |
|
"loss": 0.0004, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.2014075760712068, |
|
"grad_norm": 0.0015272133750841022, |
|
"learning_rate": 0.00016038245686967367, |
|
"loss": 0.0, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.2016145725522666, |
|
"grad_norm": 0.0036174836568534374, |
|
"learning_rate": 0.0001603408854707961, |
|
"loss": 0.0002, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.20182156903332643, |
|
"grad_norm": 0.0014752513961866498, |
|
"learning_rate": 0.00016029931407191854, |
|
"loss": 0.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.20202856551438625, |
|
"grad_norm": 0.0008594008395448327, |
|
"learning_rate": 0.00016025774267304095, |
|
"loss": 0.0001, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.20223556199544607, |
|
"grad_norm": 0.007280942518264055, |
|
"learning_rate": 0.00016021617127416336, |
|
"loss": 0.001, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.2024425584765059, |
|
"grad_norm": 0.0013399991439655423, |
|
"learning_rate": 0.0001601745998752858, |
|
"loss": 0.0, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.20264955495756573, |
|
"grad_norm": 0.0015200217021629214, |
|
"learning_rate": 0.00016013302847640824, |
|
"loss": 0.0003, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.20285655143862555, |
|
"grad_norm": 0.008712020702660084, |
|
"learning_rate": 0.00016009145707753067, |
|
"loss": 0.0002, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.20306354791968537, |
|
"grad_norm": 0.004218498710542917, |
|
"learning_rate": 0.00016004988567865308, |
|
"loss": 0.0006, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.20327054440074518, |
|
"grad_norm": 0.0019195530330762267, |
|
"learning_rate": 0.00016000831427977552, |
|
"loss": 0.0008, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.203477540881805, |
|
"grad_norm": 0.0017306975787505507, |
|
"learning_rate": 0.00015996674288089796, |
|
"loss": 0.001, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.20368453736286482, |
|
"grad_norm": 0.006045056506991386, |
|
"learning_rate": 0.00015992517148202037, |
|
"loss": 0.0002, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.20389153384392467, |
|
"grad_norm": 0.002741064177826047, |
|
"learning_rate": 0.0001598836000831428, |
|
"loss": 0.0001, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.20409853032498448, |
|
"grad_norm": 0.0026846020482480526, |
|
"learning_rate": 0.00015984202868426524, |
|
"loss": 0.0003, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.2043055268060443, |
|
"grad_norm": 0.009860471822321415, |
|
"learning_rate": 0.00015980045728538765, |
|
"loss": 0.0002, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.20451252328710412, |
|
"grad_norm": 0.0001563982223160565, |
|
"learning_rate": 0.0001597588858865101, |
|
"loss": 0.0, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.20471951976816394, |
|
"grad_norm": 0.0035680129658430815, |
|
"learning_rate": 0.00015971731448763253, |
|
"loss": 0.0009, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.20492651624922376, |
|
"grad_norm": 0.00037079930189065635, |
|
"learning_rate": 0.00015967574308875494, |
|
"loss": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.20513351273028357, |
|
"grad_norm": 0.006476435344666243, |
|
"learning_rate": 0.00015963417168987737, |
|
"loss": 0.0004, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.20534050921134342, |
|
"grad_norm": 0.0008909539901651442, |
|
"learning_rate": 0.0001595926002909998, |
|
"loss": 0.0001, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.20554750569240324, |
|
"grad_norm": 0.005418546497821808, |
|
"learning_rate": 0.00015955102889212225, |
|
"loss": 0.0009, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.20575450217346306, |
|
"grad_norm": 0.004925790708512068, |
|
"learning_rate": 0.00015950945749324466, |
|
"loss": 0.0009, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.20596149865452287, |
|
"grad_norm": 0.003130319295451045, |
|
"learning_rate": 0.0001594678860943671, |
|
"loss": 0.001, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.2061684951355827, |
|
"grad_norm": 0.0062978435307741165, |
|
"learning_rate": 0.00015942631469548953, |
|
"loss": 0.0002, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.2063754916166425, |
|
"grad_norm": 0.006842117290943861, |
|
"learning_rate": 0.00015938474329661194, |
|
"loss": 0.0003, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.20658248809770233, |
|
"grad_norm": 0.0006231727893464267, |
|
"learning_rate": 0.00015934317189773435, |
|
"loss": 0.0, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.20678948457876217, |
|
"grad_norm": 0.0008572920341975987, |
|
"learning_rate": 0.0001593016004988568, |
|
"loss": 0.0, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.206996481059822, |
|
"grad_norm": 0.0018585945945233107, |
|
"learning_rate": 0.00015926002909997922, |
|
"loss": 0.0001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2072034775408818, |
|
"grad_norm": 0.002304868074133992, |
|
"learning_rate": 0.00015921845770110163, |
|
"loss": 0.0009, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.20741047402194163, |
|
"grad_norm": 0.004541350062936544, |
|
"learning_rate": 0.00015917688630222407, |
|
"loss": 0.0, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.20761747050300144, |
|
"grad_norm": 0.003366716904565692, |
|
"learning_rate": 0.0001591353149033465, |
|
"loss": 0.0011, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.20782446698406126, |
|
"grad_norm": 0.002917301142588258, |
|
"learning_rate": 0.00015909374350446892, |
|
"loss": 0.0017, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.20803146346512108, |
|
"grad_norm": 0.0004168320447206497, |
|
"learning_rate": 0.00015905217210559136, |
|
"loss": 0.0, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.20823845994618093, |
|
"grad_norm": 0.001863375655375421, |
|
"learning_rate": 0.0001590106007067138, |
|
"loss": 0.001, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.20844545642724074, |
|
"grad_norm": 0.001271730288863182, |
|
"learning_rate": 0.0001589690293078362, |
|
"loss": 0.0001, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.20865245290830056, |
|
"grad_norm": 0.002366506028920412, |
|
"learning_rate": 0.00015892745790895864, |
|
"loss": 0.0009, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.20885944938936038, |
|
"grad_norm": 0.0031757184769958258, |
|
"learning_rate": 0.00015888588651008108, |
|
"loss": 0.0001, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.2090664458704202, |
|
"grad_norm": 0.0019097458571195602, |
|
"learning_rate": 0.00015884431511120351, |
|
"loss": 0.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.20927344235148002, |
|
"grad_norm": 0.0008379930513910949, |
|
"learning_rate": 0.00015880274371232592, |
|
"loss": 0.0, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.20948043883253983, |
|
"grad_norm": 0.005206478293985128, |
|
"learning_rate": 0.00015876117231344836, |
|
"loss": 0.002, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.20968743531359968, |
|
"grad_norm": 0.0008720169425942004, |
|
"learning_rate": 0.0001587196009145708, |
|
"loss": 0.0, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.2098944317946595, |
|
"grad_norm": 0.0041591702029109, |
|
"learning_rate": 0.0001586780295156932, |
|
"loss": 0.001, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.21010142827571932, |
|
"grad_norm": 0.0004002148343715817, |
|
"learning_rate": 0.00015863645811681565, |
|
"loss": 0.0, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.21030842475677913, |
|
"grad_norm": 0.00017360522178933024, |
|
"learning_rate": 0.00015859488671793808, |
|
"loss": 0.0, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.21051542123783895, |
|
"grad_norm": 0.004276643507182598, |
|
"learning_rate": 0.0001585533153190605, |
|
"loss": 0.0, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.21072241771889877, |
|
"grad_norm": 0.0027010890189558268, |
|
"learning_rate": 0.00015851174392018293, |
|
"loss": 0.0014, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.2109294141999586, |
|
"grad_norm": 0.0048659988678991795, |
|
"learning_rate": 0.00015847017252130537, |
|
"loss": 0.0005, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.21113641068101843, |
|
"grad_norm": 0.0003106594958808273, |
|
"learning_rate": 0.00015842860112242778, |
|
"loss": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.21134340716207825, |
|
"grad_norm": 0.0032943710684776306, |
|
"learning_rate": 0.00015838702972355019, |
|
"loss": 0.0012, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.21155040364313807, |
|
"grad_norm": 0.0022477346938103437, |
|
"learning_rate": 0.00015834545832467262, |
|
"loss": 0.0002, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.2117574001241979, |
|
"grad_norm": 0.0007089116843417287, |
|
"learning_rate": 0.00015830388692579506, |
|
"loss": 0.0, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.2119643966052577, |
|
"grad_norm": 0.003983316943049431, |
|
"learning_rate": 0.00015826231552691747, |
|
"loss": 0.0002, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.21217139308631752, |
|
"grad_norm": 0.0038651269860565662, |
|
"learning_rate": 0.0001582207441280399, |
|
"loss": 0.0017, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.21237838956737737, |
|
"grad_norm": 0.006369102746248245, |
|
"learning_rate": 0.00015817917272916234, |
|
"loss": 0.0002, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.21258538604843719, |
|
"grad_norm": 0.002382291480898857, |
|
"learning_rate": 0.00015813760133028478, |
|
"loss": 0.0004, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.212792382529497, |
|
"grad_norm": 0.0016512110596522689, |
|
"learning_rate": 0.0001580960299314072, |
|
"loss": 0.0002, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.21299937901055682, |
|
"grad_norm": 0.008482804521918297, |
|
"learning_rate": 0.00015805445853252963, |
|
"loss": 0.0002, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.21320637549161664, |
|
"grad_norm": 0.005470529198646545, |
|
"learning_rate": 0.00015801288713365207, |
|
"loss": 0.0007, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.21341337197267646, |
|
"grad_norm": 0.004424599930644035, |
|
"learning_rate": 0.00015797131573477448, |
|
"loss": 0.0007, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.21362036845373628, |
|
"grad_norm": 0.0011165774194523692, |
|
"learning_rate": 0.0001579297443358969, |
|
"loss": 0.0, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.21382736493479612, |
|
"grad_norm": 0.0008872200851328671, |
|
"learning_rate": 0.00015788817293701935, |
|
"loss": 0.0, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.21403436141585594, |
|
"grad_norm": 0.010052971541881561, |
|
"learning_rate": 0.00015784660153814176, |
|
"loss": 0.0011, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.21424135789691576, |
|
"grad_norm": 0.002929918933659792, |
|
"learning_rate": 0.0001578050301392642, |
|
"loss": 0.0015, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.21444835437797558, |
|
"grad_norm": 0.008970585651695728, |
|
"learning_rate": 0.00015776345874038663, |
|
"loss": 0.0003, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.2146553508590354, |
|
"grad_norm": 0.06671982258558273, |
|
"learning_rate": 0.00015772188734150904, |
|
"loss": 0.0008, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.2148623473400952, |
|
"grad_norm": 0.0003781789855565876, |
|
"learning_rate": 0.00015768031594263148, |
|
"loss": 0.0, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.21506934382115503, |
|
"grad_norm": 0.0004683547012973577, |
|
"learning_rate": 0.00015763874454375392, |
|
"loss": 0.0, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.21527634030221487, |
|
"grad_norm": 0.005777016282081604, |
|
"learning_rate": 0.00015759717314487635, |
|
"loss": 0.0015, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2154833367832747, |
|
"grad_norm": 0.0068507413379848, |
|
"learning_rate": 0.00015755560174599876, |
|
"loss": 0.0002, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.2156903332643345, |
|
"grad_norm": 0.0021482266020029783, |
|
"learning_rate": 0.00015751403034712117, |
|
"loss": 0.0004, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.21589732974539433, |
|
"grad_norm": 0.003305132733657956, |
|
"learning_rate": 0.0001574724589482436, |
|
"loss": 0.0009, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.21610432622645415, |
|
"grad_norm": 0.005324844736605883, |
|
"learning_rate": 0.00015743088754936602, |
|
"loss": 0.0012, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.21631132270751396, |
|
"grad_norm": 0.004400115925818682, |
|
"learning_rate": 0.00015738931615048846, |
|
"loss": 0.0002, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.21651831918857378, |
|
"grad_norm": 0.0030595625285059214, |
|
"learning_rate": 0.0001573477447516109, |
|
"loss": 0.0005, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.21672531566963363, |
|
"grad_norm": 0.004036907572299242, |
|
"learning_rate": 0.00015730617335273333, |
|
"loss": 0.0, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.21693231215069345, |
|
"grad_norm": 0.005875944159924984, |
|
"learning_rate": 0.00015726460195385574, |
|
"loss": 0.0004, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.21713930863175326, |
|
"grad_norm": 0.003494358854368329, |
|
"learning_rate": 0.00015722303055497818, |
|
"loss": 0.0015, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.21734630511281308, |
|
"grad_norm": 0.00041328632505610585, |
|
"learning_rate": 0.00015718145915610062, |
|
"loss": 0.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2175533015938729, |
|
"grad_norm": 0.0010599165689200163, |
|
"learning_rate": 0.00015713988775722303, |
|
"loss": 0.0, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.21776029807493272, |
|
"grad_norm": 0.00022103896480984986, |
|
"learning_rate": 0.00015709831635834546, |
|
"loss": 0.0, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.21796729455599254, |
|
"grad_norm": 0.00018703911337070167, |
|
"learning_rate": 0.0001570567449594679, |
|
"loss": 0.0, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.21817429103705238, |
|
"grad_norm": 0.0001905701938085258, |
|
"learning_rate": 0.0001570151735605903, |
|
"loss": 0.0, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.2183812875181122, |
|
"grad_norm": 0.01590561680495739, |
|
"learning_rate": 0.00015697360216171275, |
|
"loss": 0.0004, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.21858828399917202, |
|
"grad_norm": 0.007658824324607849, |
|
"learning_rate": 0.00015693203076283519, |
|
"loss": 0.0001, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.21879528048023184, |
|
"grad_norm": 0.0036896623205393553, |
|
"learning_rate": 0.00015689045936395762, |
|
"loss": 0.0011, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.21900227696129165, |
|
"grad_norm": 0.006060061044991016, |
|
"learning_rate": 0.00015684888796508003, |
|
"loss": 0.0001, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.21920927344235147, |
|
"grad_norm": 0.010098116472363472, |
|
"learning_rate": 0.00015680731656620247, |
|
"loss": 0.0012, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.2194162699234113, |
|
"grad_norm": 0.0016395826824009418, |
|
"learning_rate": 0.0001567657451673249, |
|
"loss": 0.0007, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.21962326640447113, |
|
"grad_norm": 0.004565931856632233, |
|
"learning_rate": 0.00015672417376844732, |
|
"loss": 0.0011, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.21983026288553095, |
|
"grad_norm": 0.004525905009359121, |
|
"learning_rate": 0.00015668260236956975, |
|
"loss": 0.0002, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.22003725936659077, |
|
"grad_norm": 0.0008571099024266005, |
|
"learning_rate": 0.0001566410309706922, |
|
"loss": 0.0, |
|
"step": 1063 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 4831, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.0739590310395904e+17, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|