|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998731447418495, |
|
"eval_steps": 500, |
|
"global_step": 3941, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002537105163009007, |
|
"grad_norm": 4.350327938684374, |
|
"learning_rate": 2.5316455696202533e-07, |
|
"loss": 1.8196, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005074210326018014, |
|
"grad_norm": 3.678530456221003, |
|
"learning_rate": 5.063291139240507e-07, |
|
"loss": 1.8108, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00761131548902702, |
|
"grad_norm": 2.945036910011768, |
|
"learning_rate": 7.59493670886076e-07, |
|
"loss": 1.8147, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010148420652036028, |
|
"grad_norm": 2.3190574498378447, |
|
"learning_rate": 1.0126582278481013e-06, |
|
"loss": 1.7548, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012685525815045033, |
|
"grad_norm": 2.074674879169714, |
|
"learning_rate": 1.2658227848101267e-06, |
|
"loss": 1.7204, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01522263097805404, |
|
"grad_norm": 1.748653753253889, |
|
"learning_rate": 1.518987341772152e-06, |
|
"loss": 1.6762, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01775973614106305, |
|
"grad_norm": 1.6751399170091914, |
|
"learning_rate": 1.7721518987341774e-06, |
|
"loss": 1.6487, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.020296841304072055, |
|
"grad_norm": 1.6363195765660283, |
|
"learning_rate": 2.0253164556962026e-06, |
|
"loss": 1.6116, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022833946467081062, |
|
"grad_norm": 1.5794976909544993, |
|
"learning_rate": 2.278481012658228e-06, |
|
"loss": 1.5916, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.025371051630090066, |
|
"grad_norm": 1.6233632992191482, |
|
"learning_rate": 2.5316455696202535e-06, |
|
"loss": 1.598, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027908156793099072, |
|
"grad_norm": 1.5800661094425872, |
|
"learning_rate": 2.7848101265822785e-06, |
|
"loss": 1.5626, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03044526195610808, |
|
"grad_norm": 1.6072050143283245, |
|
"learning_rate": 3.037974683544304e-06, |
|
"loss": 1.5457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03298236711911709, |
|
"grad_norm": 1.6572306247078625, |
|
"learning_rate": 3.2911392405063294e-06, |
|
"loss": 1.5391, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0355194722821261, |
|
"grad_norm": 1.586848380490154, |
|
"learning_rate": 3.544303797468355e-06, |
|
"loss": 1.5125, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.038056577445135104, |
|
"grad_norm": 1.638449311664989, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 1.5243, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04059368260814411, |
|
"grad_norm": 1.6031920359772533, |
|
"learning_rate": 4.050632911392405e-06, |
|
"loss": 1.4856, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04313078777115312, |
|
"grad_norm": 1.5967439995800559, |
|
"learning_rate": 4.303797468354431e-06, |
|
"loss": 1.5129, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.045667892934162124, |
|
"grad_norm": 1.6075212497819606, |
|
"learning_rate": 4.556962025316456e-06, |
|
"loss": 1.4961, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04820499809717113, |
|
"grad_norm": 1.6791238021539772, |
|
"learning_rate": 4.8101265822784815e-06, |
|
"loss": 1.4893, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05074210326018013, |
|
"grad_norm": 1.6210921331693446, |
|
"learning_rate": 5.063291139240507e-06, |
|
"loss": 1.5013, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05327920842318914, |
|
"grad_norm": 1.7652729374280518, |
|
"learning_rate": 5.3164556962025316e-06, |
|
"loss": 1.473, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.055816313586198145, |
|
"grad_norm": 1.6723244529240142, |
|
"learning_rate": 5.569620253164557e-06, |
|
"loss": 1.4791, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05835341874920715, |
|
"grad_norm": 1.825326138794735, |
|
"learning_rate": 5.8227848101265824e-06, |
|
"loss": 1.4761, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06089052391221616, |
|
"grad_norm": 1.9131148271572453, |
|
"learning_rate": 6.075949367088608e-06, |
|
"loss": 1.4626, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06342762907522517, |
|
"grad_norm": 1.6613770739809675, |
|
"learning_rate": 6.329113924050634e-06, |
|
"loss": 1.4601, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06596473423823418, |
|
"grad_norm": 1.6666458237214428, |
|
"learning_rate": 6.582278481012659e-06, |
|
"loss": 1.4686, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06850183940124319, |
|
"grad_norm": 1.5745675069520453, |
|
"learning_rate": 6.835443037974684e-06, |
|
"loss": 1.461, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0710389445642522, |
|
"grad_norm": 1.6507776778175596, |
|
"learning_rate": 7.08860759493671e-06, |
|
"loss": 1.47, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0735760497272612, |
|
"grad_norm": 1.6009958375778823, |
|
"learning_rate": 7.341772151898735e-06, |
|
"loss": 1.4526, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07611315489027021, |
|
"grad_norm": 1.6786912574149853, |
|
"learning_rate": 7.5949367088607605e-06, |
|
"loss": 1.4501, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07865026005327921, |
|
"grad_norm": 1.6698693144659327, |
|
"learning_rate": 7.848101265822786e-06, |
|
"loss": 1.4483, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08118736521628822, |
|
"grad_norm": 1.7393580296857223, |
|
"learning_rate": 8.10126582278481e-06, |
|
"loss": 1.4252, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08372447037929723, |
|
"grad_norm": 1.6124831573952214, |
|
"learning_rate": 8.354430379746837e-06, |
|
"loss": 1.4274, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08626157554230623, |
|
"grad_norm": 1.6899774259466704, |
|
"learning_rate": 8.607594936708861e-06, |
|
"loss": 1.437, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08879868070531524, |
|
"grad_norm": 1.6821954539953226, |
|
"learning_rate": 8.860759493670886e-06, |
|
"loss": 1.4388, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09133578586832425, |
|
"grad_norm": 1.8121412852354848, |
|
"learning_rate": 9.113924050632912e-06, |
|
"loss": 1.4151, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09387289103133326, |
|
"grad_norm": 1.5407350947949157, |
|
"learning_rate": 9.367088607594937e-06, |
|
"loss": 1.4274, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09640999619434226, |
|
"grad_norm": 1.7381357929095853, |
|
"learning_rate": 9.620253164556963e-06, |
|
"loss": 1.4309, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09894710135735126, |
|
"grad_norm": 1.6085237968347799, |
|
"learning_rate": 9.87341772151899e-06, |
|
"loss": 1.4173, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10148420652036026, |
|
"grad_norm": 1.728407830056737, |
|
"learning_rate": 9.999950942931784e-06, |
|
"loss": 1.4312, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10402131168336927, |
|
"grad_norm": 1.6972786696047149, |
|
"learning_rate": 9.999558492161865e-06, |
|
"loss": 1.422, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10655841684637828, |
|
"grad_norm": 1.6297612720977512, |
|
"learning_rate": 9.998773621425852e-06, |
|
"loss": 1.3892, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10909552200938728, |
|
"grad_norm": 1.7368434280409393, |
|
"learning_rate": 9.997596392328971e-06, |
|
"loss": 1.4368, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11163262717239629, |
|
"grad_norm": 1.8196387241516612, |
|
"learning_rate": 9.996026897273024e-06, |
|
"loss": 1.4129, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1141697323354053, |
|
"grad_norm": 1.6083388947957715, |
|
"learning_rate": 9.994065259449128e-06, |
|
"loss": 1.4181, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1167068374984143, |
|
"grad_norm": 1.7315485017229137, |
|
"learning_rate": 9.991711632828049e-06, |
|
"loss": 1.4107, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11924394266142331, |
|
"grad_norm": 1.6192259621686464, |
|
"learning_rate": 9.988966202148115e-06, |
|
"loss": 1.3933, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12178104782443232, |
|
"grad_norm": 1.685661277294985, |
|
"learning_rate": 9.985829182900717e-06, |
|
"loss": 1.4305, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12431815298744132, |
|
"grad_norm": 1.7673237121898477, |
|
"learning_rate": 9.982300821313394e-06, |
|
"loss": 1.407, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12685525815045035, |
|
"grad_norm": 1.6729684650970384, |
|
"learning_rate": 9.978381394330509e-06, |
|
"loss": 1.3941, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12939236331345935, |
|
"grad_norm": 1.6737204711157692, |
|
"learning_rate": 9.974071209591507e-06, |
|
"loss": 1.4083, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13192946847646836, |
|
"grad_norm": 1.5846450496238496, |
|
"learning_rate": 9.96937060540677e-06, |
|
"loss": 1.3913, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13446657363947737, |
|
"grad_norm": 1.6440175318683266, |
|
"learning_rate": 9.964279950731066e-06, |
|
"loss": 1.4141, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13700367880248637, |
|
"grad_norm": 1.5435352480418292, |
|
"learning_rate": 9.958799645134585e-06, |
|
"loss": 1.3923, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13954078396549538, |
|
"grad_norm": 1.6806917695478834, |
|
"learning_rate": 9.952930118771576e-06, |
|
"loss": 1.3882, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1420778891285044, |
|
"grad_norm": 1.6991483906725386, |
|
"learning_rate": 9.946671832346588e-06, |
|
"loss": 1.3806, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1446149942915134, |
|
"grad_norm": 1.6444779930069549, |
|
"learning_rate": 9.940025277078304e-06, |
|
"loss": 1.3877, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1471520994545224, |
|
"grad_norm": 1.584958906864304, |
|
"learning_rate": 9.932990974660992e-06, |
|
"loss": 1.3758, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1496892046175314, |
|
"grad_norm": 1.6339337045637337, |
|
"learning_rate": 9.925569477223549e-06, |
|
"loss": 1.3942, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15222630978054041, |
|
"grad_norm": 1.6782688039697937, |
|
"learning_rate": 9.917761367286164e-06, |
|
"loss": 1.3997, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15476341494354942, |
|
"grad_norm": 1.722985917307532, |
|
"learning_rate": 9.909567257714605e-06, |
|
"loss": 1.3902, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15730052010655843, |
|
"grad_norm": 1.515629790408513, |
|
"learning_rate": 9.9009877916721e-06, |
|
"loss": 1.3906, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15983762526956743, |
|
"grad_norm": 1.6832684084973726, |
|
"learning_rate": 9.892023642568871e-06, |
|
"loss": 1.3644, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16237473043257644, |
|
"grad_norm": 1.7217857604177804, |
|
"learning_rate": 9.882675514009262e-06, |
|
"loss": 1.3673, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16491183559558545, |
|
"grad_norm": 1.772784930329774, |
|
"learning_rate": 9.872944139736523e-06, |
|
"loss": 1.3751, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16744894075859446, |
|
"grad_norm": 1.528731449667675, |
|
"learning_rate": 9.862830283575215e-06, |
|
"loss": 1.3678, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16998604592160346, |
|
"grad_norm": 1.6156407009731812, |
|
"learning_rate": 9.852334739371252e-06, |
|
"loss": 1.3825, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17252315108461247, |
|
"grad_norm": 1.6415774929326135, |
|
"learning_rate": 9.841458330929598e-06, |
|
"loss": 1.3884, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17506025624762148, |
|
"grad_norm": 1.6070221223746397, |
|
"learning_rate": 9.830201911949604e-06, |
|
"loss": 1.3934, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17759736141063048, |
|
"grad_norm": 1.562454926578275, |
|
"learning_rate": 9.818566365957996e-06, |
|
"loss": 1.3645, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1801344665736395, |
|
"grad_norm": 1.5996614008577792, |
|
"learning_rate": 9.80655260623953e-06, |
|
"loss": 1.3708, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1826715717366485, |
|
"grad_norm": 1.5048794279696338, |
|
"learning_rate": 9.794161575765311e-06, |
|
"loss": 1.3749, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1852086768996575, |
|
"grad_norm": 1.5935516523984996, |
|
"learning_rate": 9.78139424711877e-06, |
|
"loss": 1.3886, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1877457820626665, |
|
"grad_norm": 1.5282445861981415, |
|
"learning_rate": 9.76825162241933e-06, |
|
"loss": 1.373, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19028288722567552, |
|
"grad_norm": 1.6358363561782086, |
|
"learning_rate": 9.754734733243749e-06, |
|
"loss": 1.3742, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19281999238868452, |
|
"grad_norm": 1.587601691095452, |
|
"learning_rate": 9.740844640545151e-06, |
|
"loss": 1.3603, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.19535709755169353, |
|
"grad_norm": 1.5280980736395107, |
|
"learning_rate": 9.726582434569744e-06, |
|
"loss": 1.3636, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1978942027147025, |
|
"grad_norm": 1.5821827900533842, |
|
"learning_rate": 9.711949234771258e-06, |
|
"loss": 1.3536, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20043130787771152, |
|
"grad_norm": 1.6354511380141648, |
|
"learning_rate": 9.696946189723067e-06, |
|
"loss": 1.3777, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.20296841304072052, |
|
"grad_norm": 1.4961728532518945, |
|
"learning_rate": 9.681574477028039e-06, |
|
"loss": 1.3555, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.20550551820372953, |
|
"grad_norm": 1.6165039622149184, |
|
"learning_rate": 9.66583530322611e-06, |
|
"loss": 1.3736, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.20804262336673854, |
|
"grad_norm": 1.6085071421673924, |
|
"learning_rate": 9.649729903699575e-06, |
|
"loss": 1.3685, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.21057972852974755, |
|
"grad_norm": 1.674024959469941, |
|
"learning_rate": 9.633259542576127e-06, |
|
"loss": 1.3516, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21311683369275655, |
|
"grad_norm": 1.5457350813422102, |
|
"learning_rate": 9.61642551262963e-06, |
|
"loss": 1.3433, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21565393885576556, |
|
"grad_norm": 1.5638603110160445, |
|
"learning_rate": 9.599229135178651e-06, |
|
"loss": 1.3596, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21819104401877457, |
|
"grad_norm": 1.5845605847496684, |
|
"learning_rate": 9.581671759982747e-06, |
|
"loss": 1.3821, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22072814918178357, |
|
"grad_norm": 1.56321358587459, |
|
"learning_rate": 9.563754765136522e-06, |
|
"loss": 1.3568, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.22326525434479258, |
|
"grad_norm": 1.621089242536098, |
|
"learning_rate": 9.545479556961457e-06, |
|
"loss": 1.3614, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2258023595078016, |
|
"grad_norm": 1.5746598904044478, |
|
"learning_rate": 9.526847569895529e-06, |
|
"loss": 1.3536, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2283394646708106, |
|
"grad_norm": 1.632144120218129, |
|
"learning_rate": 9.507860266380625e-06, |
|
"loss": 1.3521, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2308765698338196, |
|
"grad_norm": 1.6660492103415234, |
|
"learning_rate": 9.488519136747741e-06, |
|
"loss": 1.3455, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2334136749968286, |
|
"grad_norm": 1.6307872469664786, |
|
"learning_rate": 9.468825699100013e-06, |
|
"loss": 1.3388, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23595078015983761, |
|
"grad_norm": 1.51760811186189, |
|
"learning_rate": 9.448781499193563e-06, |
|
"loss": 1.36, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23848788532284662, |
|
"grad_norm": 1.6298958079571104, |
|
"learning_rate": 9.428388110316165e-06, |
|
"loss": 1.346, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24102499048585563, |
|
"grad_norm": 1.6241168589647443, |
|
"learning_rate": 9.407647133163754e-06, |
|
"loss": 1.3565, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24356209564886463, |
|
"grad_norm": 1.6330870068463266, |
|
"learning_rate": 9.386560195714796e-06, |
|
"loss": 1.3539, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24609920081187364, |
|
"grad_norm": 1.5846187793083721, |
|
"learning_rate": 9.365128953102495e-06, |
|
"loss": 1.3443, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24863630597488265, |
|
"grad_norm": 1.5816319458789425, |
|
"learning_rate": 9.343355087484893e-06, |
|
"loss": 1.3449, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.25117341113789166, |
|
"grad_norm": 1.608667389007063, |
|
"learning_rate": 9.321240307912818e-06, |
|
"loss": 1.3503, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2537105163009007, |
|
"grad_norm": 1.535736158897923, |
|
"learning_rate": 9.298786350195758e-06, |
|
"loss": 1.3504, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25624762146390967, |
|
"grad_norm": 1.6105502703548435, |
|
"learning_rate": 9.275994976765602e-06, |
|
"loss": 1.3512, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2587847266269187, |
|
"grad_norm": 1.457234439212148, |
|
"learning_rate": 9.252867976538312e-06, |
|
"loss": 1.3447, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2613218317899277, |
|
"grad_norm": 1.632312084639862, |
|
"learning_rate": 9.22940716477351e-06, |
|
"loss": 1.3451, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2638589369529367, |
|
"grad_norm": 1.5657163405769847, |
|
"learning_rate": 9.205614382931986e-06, |
|
"loss": 1.3678, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2663960421159457, |
|
"grad_norm": 1.523325498659843, |
|
"learning_rate": 9.181491498531179e-06, |
|
"loss": 1.355, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.26893314727895473, |
|
"grad_norm": 1.5647021825494114, |
|
"learning_rate": 9.157040404998572e-06, |
|
"loss": 1.3455, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2714702524419637, |
|
"grad_norm": 1.581907280598391, |
|
"learning_rate": 9.132263021523096e-06, |
|
"loss": 1.353, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.27400735760497275, |
|
"grad_norm": 1.4861566014453274, |
|
"learning_rate": 9.107161292904476e-06, |
|
"loss": 1.3428, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2765444627679817, |
|
"grad_norm": 1.6256210181495103, |
|
"learning_rate": 9.081737189400583e-06, |
|
"loss": 1.3421, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.27908156793099076, |
|
"grad_norm": 1.4876360574590954, |
|
"learning_rate": 9.0559927065728e-06, |
|
"loss": 1.3377, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28161867309399974, |
|
"grad_norm": 1.5204275847901962, |
|
"learning_rate": 9.029929865129375e-06, |
|
"loss": 1.349, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2841557782570088, |
|
"grad_norm": 1.5624633516357405, |
|
"learning_rate": 9.003550710766813e-06, |
|
"loss": 1.3552, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28669288342001775, |
|
"grad_norm": 1.5591567540085947, |
|
"learning_rate": 8.97685731400932e-06, |
|
"loss": 1.3209, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2892299885830268, |
|
"grad_norm": 1.5373317845285133, |
|
"learning_rate": 8.949851770046272e-06, |
|
"loss": 1.3267, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.29176709374603577, |
|
"grad_norm": 1.5556061129094692, |
|
"learning_rate": 8.922536198567772e-06, |
|
"loss": 1.3379, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2943041989090448, |
|
"grad_norm": 1.971486780664198, |
|
"learning_rate": 8.894912743598269e-06, |
|
"loss": 1.3272, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2968413040720538, |
|
"grad_norm": 1.5365700226491938, |
|
"learning_rate": 8.866983573328267e-06, |
|
"loss": 1.333, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2993784092350628, |
|
"grad_norm": 1.6217713070921793, |
|
"learning_rate": 8.83875087994415e-06, |
|
"loss": 1.3497, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3019155143980718, |
|
"grad_norm": 1.4917017043884344, |
|
"learning_rate": 8.810216879456114e-06, |
|
"loss": 1.3355, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.30445261956108083, |
|
"grad_norm": 1.5427563058731948, |
|
"learning_rate": 8.781383811524222e-06, |
|
"loss": 1.3339, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3069897247240898, |
|
"grad_norm": 1.5666645778409243, |
|
"learning_rate": 8.752253939282622e-06, |
|
"loss": 1.332, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.30952682988709884, |
|
"grad_norm": 1.5940427272465527, |
|
"learning_rate": 8.722829549161904e-06, |
|
"loss": 1.3411, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3120639350501078, |
|
"grad_norm": 1.569355522659196, |
|
"learning_rate": 8.69311295070964e-06, |
|
"loss": 1.321, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.31460104021311686, |
|
"grad_norm": 1.5823744419831982, |
|
"learning_rate": 8.663106476409107e-06, |
|
"loss": 1.3511, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.31713814537612584, |
|
"grad_norm": 1.5626340370876246, |
|
"learning_rate": 8.632812481496195e-06, |
|
"loss": 1.3491, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31967525053913487, |
|
"grad_norm": 1.6216546055767536, |
|
"learning_rate": 8.602233343774562e-06, |
|
"loss": 1.3294, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.32221235570214385, |
|
"grad_norm": 1.4885399811487754, |
|
"learning_rate": 8.571371463428986e-06, |
|
"loss": 1.3419, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3247494608651529, |
|
"grad_norm": 1.597124872589071, |
|
"learning_rate": 8.540229262836974e-06, |
|
"loss": 1.3245, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.32728656602816186, |
|
"grad_norm": 1.5069638761813242, |
|
"learning_rate": 8.508809186378631e-06, |
|
"loss": 1.3357, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3298236711911709, |
|
"grad_norm": 1.5496475251999724, |
|
"learning_rate": 8.477113700244788e-06, |
|
"loss": 1.3297, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3323607763541799, |
|
"grad_norm": 1.5177410295586948, |
|
"learning_rate": 8.445145292243446e-06, |
|
"loss": 1.3361, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.3348978815171889, |
|
"grad_norm": 1.4375424317665, |
|
"learning_rate": 8.412906471604489e-06, |
|
"loss": 1.3365, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3374349866801979, |
|
"grad_norm": 1.4733958562961815, |
|
"learning_rate": 8.380399768782742e-06, |
|
"loss": 1.3364, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3399720918432069, |
|
"grad_norm": 1.5665888162471464, |
|
"learning_rate": 8.347627735259344e-06, |
|
"loss": 1.3572, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.3425091970062159, |
|
"grad_norm": 1.5175787042273947, |
|
"learning_rate": 8.314592943341494e-06, |
|
"loss": 1.311, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.34504630216922494, |
|
"grad_norm": 1.5210307965368668, |
|
"learning_rate": 8.281297985960538e-06, |
|
"loss": 1.3261, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3475834073322339, |
|
"grad_norm": 1.5365431443148119, |
|
"learning_rate": 8.247745476468449e-06, |
|
"loss": 1.3433, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.35012051249524295, |
|
"grad_norm": 1.5548012069585933, |
|
"learning_rate": 8.213938048432697e-06, |
|
"loss": 1.3134, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.35265761765825193, |
|
"grad_norm": 1.4642811591908687, |
|
"learning_rate": 8.179878355429556e-06, |
|
"loss": 1.3159, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.35519472282126097, |
|
"grad_norm": 1.6713134353309254, |
|
"learning_rate": 8.145569070835799e-06, |
|
"loss": 1.3285, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35773182798426995, |
|
"grad_norm": 1.5444628338197106, |
|
"learning_rate": 8.111012887618882e-06, |
|
"loss": 1.344, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.360268933147279, |
|
"grad_norm": 1.5042040298049457, |
|
"learning_rate": 8.076212518125556e-06, |
|
"loss": 1.3217, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.36280603831028796, |
|
"grad_norm": 1.5827643194628298, |
|
"learning_rate": 8.041170693868985e-06, |
|
"loss": 1.3284, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.365343143473297, |
|
"grad_norm": 1.4314485322723574, |
|
"learning_rate": 8.005890165314334e-06, |
|
"loss": 1.3188, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.367880248636306, |
|
"grad_norm": 1.5452457890288078, |
|
"learning_rate": 7.970373701662892e-06, |
|
"loss": 1.3123, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.370417353799315, |
|
"grad_norm": 1.5944938106930338, |
|
"learning_rate": 7.934624090634713e-06, |
|
"loss": 1.3131, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.372954458962324, |
|
"grad_norm": 1.5553727991379855, |
|
"learning_rate": 7.8986441382498e-06, |
|
"loss": 1.3318, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.375491564125333, |
|
"grad_norm": 1.5196578480754726, |
|
"learning_rate": 7.862436668607865e-06, |
|
"loss": 1.3164, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.378028669288342, |
|
"grad_norm": 1.5354385242535227, |
|
"learning_rate": 7.826004523666661e-06, |
|
"loss": 1.3292, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.38056577445135104, |
|
"grad_norm": 1.5449910825994637, |
|
"learning_rate": 7.78935056301891e-06, |
|
"loss": 1.3272, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38310287961436, |
|
"grad_norm": 1.4946907973724173, |
|
"learning_rate": 7.752477663667854e-06, |
|
"loss": 1.3391, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.38563998477736905, |
|
"grad_norm": 1.5791940161814702, |
|
"learning_rate": 7.715388719801437e-06, |
|
"loss": 1.3392, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.38817708994037803, |
|
"grad_norm": 1.4567702862839176, |
|
"learning_rate": 7.67808664256514e-06, |
|
"loss": 1.2971, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.39071419510338706, |
|
"grad_norm": 1.4605769814867744, |
|
"learning_rate": 7.640574359833472e-06, |
|
"loss": 1.3148, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.39325130026639604, |
|
"grad_norm": 1.5566796816874888, |
|
"learning_rate": 7.6028548159801685e-06, |
|
"loss": 1.3315, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.395788405429405, |
|
"grad_norm": 1.5768032029757384, |
|
"learning_rate": 7.564930971647087e-06, |
|
"loss": 1.3238, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39832551059241406, |
|
"grad_norm": 1.5702550171255043, |
|
"learning_rate": 7.52680580351181e-06, |
|
"loss": 1.3175, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.40086261575542304, |
|
"grad_norm": 1.5687466837527182, |
|
"learning_rate": 7.488482304054019e-06, |
|
"loss": 1.3104, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.40339972091843207, |
|
"grad_norm": 1.5897550883645912, |
|
"learning_rate": 7.449963481320599e-06, |
|
"loss": 1.316, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.40593682608144105, |
|
"grad_norm": 1.5236147067965886, |
|
"learning_rate": 7.411252358689541e-06, |
|
"loss": 1.3273, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4084739312444501, |
|
"grad_norm": 1.5469446528938424, |
|
"learning_rate": 7.372351974632634e-06, |
|
"loss": 1.3119, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.41101103640745906, |
|
"grad_norm": 1.4722026799112722, |
|
"learning_rate": 7.333265382476971e-06, |
|
"loss": 1.3151, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4135481415704681, |
|
"grad_norm": 1.5178886141824586, |
|
"learning_rate": 7.293995650165287e-06, |
|
"loss": 1.3245, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.4160852467334771, |
|
"grad_norm": 1.5308435376939995, |
|
"learning_rate": 7.2545458600151615e-06, |
|
"loss": 1.3317, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4186223518964861, |
|
"grad_norm": 1.5091424984828243, |
|
"learning_rate": 7.214919108477077e-06, |
|
"loss": 1.3044, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4211594570594951, |
|
"grad_norm": 1.457202507709852, |
|
"learning_rate": 7.175118505891385e-06, |
|
"loss": 1.3339, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4236965622225041, |
|
"grad_norm": 1.530896247556501, |
|
"learning_rate": 7.135147176244158e-06, |
|
"loss": 1.3044, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4262336673855131, |
|
"grad_norm": 1.5274463812149695, |
|
"learning_rate": 7.0950082569219955e-06, |
|
"loss": 1.3048, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.42877077254852214, |
|
"grad_norm": 1.507428973101804, |
|
"learning_rate": 7.054704898465772e-06, |
|
"loss": 1.3069, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.4313078777115311, |
|
"grad_norm": 1.5716469315983397, |
|
"learning_rate": 7.0142402643233346e-06, |
|
"loss": 1.3136, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43384498287454015, |
|
"grad_norm": 1.4220881687524514, |
|
"learning_rate": 6.973617530601209e-06, |
|
"loss": 1.3165, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.43638208803754913, |
|
"grad_norm": 1.5926945403384438, |
|
"learning_rate": 6.932839885815304e-06, |
|
"loss": 1.3301, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.43891919320055817, |
|
"grad_norm": 1.4527595611730801, |
|
"learning_rate": 6.891910530640642e-06, |
|
"loss": 1.3145, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.44145629836356715, |
|
"grad_norm": 1.5069254389998272, |
|
"learning_rate": 6.850832677660134e-06, |
|
"loss": 1.3139, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4439934035265762, |
|
"grad_norm": 1.4587280578384394, |
|
"learning_rate": 6.809609551112419e-06, |
|
"loss": 1.3085, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.44653050868958516, |
|
"grad_norm": 1.5122830472595903, |
|
"learning_rate": 6.768244386638793e-06, |
|
"loss": 1.3158, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4490676138525942, |
|
"grad_norm": 1.4912245201929943, |
|
"learning_rate": 6.726740431029243e-06, |
|
"loss": 1.3167, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4516047190156032, |
|
"grad_norm": 1.5574941259720791, |
|
"learning_rate": 6.685100941967596e-06, |
|
"loss": 1.3118, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4541418241786122, |
|
"grad_norm": 1.4994130740882026, |
|
"learning_rate": 6.643329187775827e-06, |
|
"loss": 1.307, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4566789293416212, |
|
"grad_norm": 1.5791237950971593, |
|
"learning_rate": 6.601428447157525e-06, |
|
"loss": 1.3086, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4592160345046302, |
|
"grad_norm": 1.5319564794342408, |
|
"learning_rate": 6.559402008940539e-06, |
|
"loss": 1.3025, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4617531396676392, |
|
"grad_norm": 1.5560620624811086, |
|
"learning_rate": 6.517253171818844e-06, |
|
"loss": 1.3146, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.46429024483064824, |
|
"grad_norm": 1.5762189341956727, |
|
"learning_rate": 6.474985244093613e-06, |
|
"loss": 1.307, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4668273499936572, |
|
"grad_norm": 1.568824162809672, |
|
"learning_rate": 6.432601543413552e-06, |
|
"loss": 1.2996, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.46936445515666625, |
|
"grad_norm": 1.461712822890638, |
|
"learning_rate": 6.390105396514497e-06, |
|
"loss": 1.3013, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.47190156031967523, |
|
"grad_norm": 1.4727912142252813, |
|
"learning_rate": 6.347500138958285e-06, |
|
"loss": 1.3086, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.47443866548268426, |
|
"grad_norm": 1.4842630358439066, |
|
"learning_rate": 6.304789114870953e-06, |
|
"loss": 1.3121, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.47697577064569324, |
|
"grad_norm": 1.5147058669468259, |
|
"learning_rate": 6.261975676680252e-06, |
|
"loss": 1.3109, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4795128758087023, |
|
"grad_norm": 1.5879467208142688, |
|
"learning_rate": 6.219063184852509e-06, |
|
"loss": 1.3057, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.48204998097171126, |
|
"grad_norm": 1.4622817504218393, |
|
"learning_rate": 6.176055007628859e-06, |
|
"loss": 1.2978, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4845870861347203, |
|
"grad_norm": 1.4651555100721898, |
|
"learning_rate": 6.132954520760882e-06, |
|
"loss": 1.2936, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.48712419129772927, |
|
"grad_norm": 1.4242680820832143, |
|
"learning_rate": 6.089765107245616e-06, |
|
"loss": 1.311, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4896612964607383, |
|
"grad_norm": 1.4510357489546541, |
|
"learning_rate": 6.046490157060041e-06, |
|
"loss": 1.2917, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4921984016237473, |
|
"grad_norm": 1.5389362630585735, |
|
"learning_rate": 6.003133066894987e-06, |
|
"loss": 1.3173, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4947355067867563, |
|
"grad_norm": 1.5597918071325416, |
|
"learning_rate": 5.959697239888525e-06, |
|
"loss": 1.2978, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4972726119497653, |
|
"grad_norm": 1.481163850939429, |
|
"learning_rate": 5.916186085358858e-06, |
|
"loss": 1.3125, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.49980971711277433, |
|
"grad_norm": 1.5172196100773179, |
|
"learning_rate": 5.872603018536713e-06, |
|
"loss": 1.3035, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5023468222757833, |
|
"grad_norm": 1.520182324070576, |
|
"learning_rate": 5.828951460297277e-06, |
|
"loss": 1.2943, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5048839274387923, |
|
"grad_norm": 1.374020881318329, |
|
"learning_rate": 5.785234836891697e-06, |
|
"loss": 1.3019, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5074210326018014, |
|
"grad_norm": 1.609172422257604, |
|
"learning_rate": 5.741456579678141e-06, |
|
"loss": 1.2929, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5099581377648104, |
|
"grad_norm": 1.451921659432821, |
|
"learning_rate": 5.697620124852472e-06, |
|
"loss": 1.2868, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5124952429278193, |
|
"grad_norm": 1.531522896512812, |
|
"learning_rate": 5.65372891317854e-06, |
|
"loss": 1.2875, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5150323480908283, |
|
"grad_norm": 1.443649652350418, |
|
"learning_rate": 5.6097863897181075e-06, |
|
"loss": 1.2963, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5175694532538374, |
|
"grad_norm": 1.5591743411035264, |
|
"learning_rate": 5.565796003560447e-06, |
|
"loss": 1.3121, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5201065584168464, |
|
"grad_norm": 1.428229068798765, |
|
"learning_rate": 5.521761207551622e-06, |
|
"loss": 1.2979, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5226436635798554, |
|
"grad_norm": 1.5164415865949983, |
|
"learning_rate": 5.47768545802346e-06, |
|
"loss": 1.3107, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5251807687428643, |
|
"grad_norm": 1.5292361648846982, |
|
"learning_rate": 5.433572214522275e-06, |
|
"loss": 1.2952, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5277178739058734, |
|
"grad_norm": 1.4451039662214231, |
|
"learning_rate": 5.389424939537311e-06, |
|
"loss": 1.2922, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5302549790688824, |
|
"grad_norm": 1.558654012548035, |
|
"learning_rate": 5.345247098228977e-06, |
|
"loss": 1.2942, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5327920842318914, |
|
"grad_norm": 1.5393309134302235, |
|
"learning_rate": 5.301042158156866e-06, |
|
"loss": 1.2898, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5353291893949004, |
|
"grad_norm": 1.5206662969722375, |
|
"learning_rate": 5.256813589007571e-06, |
|
"loss": 1.2967, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5378662945579095, |
|
"grad_norm": 1.5295277898061372, |
|
"learning_rate": 5.212564862322355e-06, |
|
"loss": 1.2987, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5404033997209184, |
|
"grad_norm": 1.5121887795702076, |
|
"learning_rate": 5.168299451224665e-06, |
|
"loss": 1.2859, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5429405048839274, |
|
"grad_norm": 1.5405224763949017, |
|
"learning_rate": 5.124020830147525e-06, |
|
"loss": 1.2942, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5454776100469364, |
|
"grad_norm": 1.5241647102261355, |
|
"learning_rate": 5.079732474560821e-06, |
|
"loss": 1.2967, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5480147152099455, |
|
"grad_norm": 1.5740459163455902, |
|
"learning_rate": 5.035437860698508e-06, |
|
"loss": 1.2792, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5505518203729545, |
|
"grad_norm": 1.455514390960437, |
|
"learning_rate": 4.991140465285762e-06, |
|
"loss": 1.2722, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5530889255359634, |
|
"grad_norm": 1.4543563727275153, |
|
"learning_rate": 4.94684376526608e-06, |
|
"loss": 1.294, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5556260306989724, |
|
"grad_norm": 1.415880887469612, |
|
"learning_rate": 4.902551237528387e-06, |
|
"loss": 1.2898, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5581631358619815, |
|
"grad_norm": 1.5027054686198038, |
|
"learning_rate": 4.858266358634109e-06, |
|
"loss": 1.2943, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5607002410249905, |
|
"grad_norm": 1.495415983271707, |
|
"learning_rate": 4.813992604544319e-06, |
|
"loss": 1.309, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5632373461879995, |
|
"grad_norm": 1.5256997169566149, |
|
"learning_rate": 4.769733450346885e-06, |
|
"loss": 1.2941, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5657744513510085, |
|
"grad_norm": 1.4207029137255274, |
|
"learning_rate": 4.725492369983721e-06, |
|
"loss": 1.2808, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5683115565140175, |
|
"grad_norm": 1.5127789303300487, |
|
"learning_rate": 4.6812728359781064e-06, |
|
"loss": 1.2886, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5708486616770265, |
|
"grad_norm": 1.4480660719145084, |
|
"learning_rate": 4.637078319162127e-06, |
|
"loss": 1.2848, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5733857668400355, |
|
"grad_norm": 1.4818074524822986, |
|
"learning_rate": 4.592912288404251e-06, |
|
"loss": 1.2747, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5759228720030445, |
|
"grad_norm": 1.496021447098999, |
|
"learning_rate": 4.5487782103370445e-06, |
|
"loss": 1.2889, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5784599771660536, |
|
"grad_norm": 1.4726400774082267, |
|
"learning_rate": 4.504679549085077e-06, |
|
"loss": 1.2956, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5809970823290626, |
|
"grad_norm": 1.492109044123467, |
|
"learning_rate": 4.460619765993025e-06, |
|
"loss": 1.2974, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5835341874920715, |
|
"grad_norm": 1.4567515467141523, |
|
"learning_rate": 4.416602319353974e-06, |
|
"loss": 1.29, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5860712926550805, |
|
"grad_norm": 1.460535915347314, |
|
"learning_rate": 4.3726306641379915e-06, |
|
"loss": 1.2745, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5886083978180896, |
|
"grad_norm": 1.4651576736560898, |
|
"learning_rate": 4.328708251720924e-06, |
|
"loss": 1.2739, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5911455029810986, |
|
"grad_norm": 1.6196158147206026, |
|
"learning_rate": 4.2848385296135165e-06, |
|
"loss": 1.3101, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5936826081441076, |
|
"grad_norm": 1.527439804056797, |
|
"learning_rate": 4.241024941190792e-06, |
|
"loss": 1.2771, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5962197133071165, |
|
"grad_norm": 1.4872645401772542, |
|
"learning_rate": 4.197270925421796e-06, |
|
"loss": 1.2877, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5987568184701256, |
|
"grad_norm": 1.4908027336325684, |
|
"learning_rate": 4.153579916599659e-06, |
|
"loss": 1.2969, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6012939236331346, |
|
"grad_norm": 1.370441167203172, |
|
"learning_rate": 4.109955344072036e-06, |
|
"loss": 1.2745, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6038310287961436, |
|
"grad_norm": 1.457801692594122, |
|
"learning_rate": 4.066400631971938e-06, |
|
"loss": 1.2714, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6063681339591526, |
|
"grad_norm": 1.5047248748403204, |
|
"learning_rate": 4.022919198948966e-06, |
|
"loss": 1.2759, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6089052391221617, |
|
"grad_norm": 1.5232259549425642, |
|
"learning_rate": 3.979514457900982e-06, |
|
"loss": 1.2845, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6114423442851706, |
|
"grad_norm": 1.4170452963382303, |
|
"learning_rate": 3.936189815706219e-06, |
|
"loss": 1.2833, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6139794494481796, |
|
"grad_norm": 1.5010818180720833, |
|
"learning_rate": 3.8929486729558775e-06, |
|
"loss": 1.2941, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6165165546111886, |
|
"grad_norm": 1.4420347497785075, |
|
"learning_rate": 3.849794423687212e-06, |
|
"loss": 1.2775, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6190536597741977, |
|
"grad_norm": 1.520468191298721, |
|
"learning_rate": 3.8067304551171247e-06, |
|
"loss": 1.2627, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6215907649372067, |
|
"grad_norm": 1.4753704862458017, |
|
"learning_rate": 3.7637601473763035e-06, |
|
"loss": 1.284, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6241278701002156, |
|
"grad_norm": 1.469877746697786, |
|
"learning_rate": 3.7208868732439145e-06, |
|
"loss": 1.2927, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6266649752632246, |
|
"grad_norm": 1.4601548141707599, |
|
"learning_rate": 3.6781139978828606e-06, |
|
"loss": 1.2947, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6292020804262337, |
|
"grad_norm": 1.5092438879342172, |
|
"learning_rate": 3.6354448785756558e-06, |
|
"loss": 1.2843, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6317391855892427, |
|
"grad_norm": 1.4368007055488876, |
|
"learning_rate": 3.592882864460905e-06, |
|
"loss": 1.265, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6342762907522517, |
|
"grad_norm": 1.4672055312297339, |
|
"learning_rate": 3.5504312962704245e-06, |
|
"loss": 1.2709, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6368133959152606, |
|
"grad_norm": 1.4995451462382032, |
|
"learning_rate": 3.5080935060670345e-06, |
|
"loss": 1.2679, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6393505010782697, |
|
"grad_norm": 1.458116276283539, |
|
"learning_rate": 3.465872816983008e-06, |
|
"loss": 1.2821, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6418876062412787, |
|
"grad_norm": 1.4447640379158275, |
|
"learning_rate": 3.4237725429592507e-06, |
|
"loss": 1.2865, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6444247114042877, |
|
"grad_norm": 1.3965736731366891, |
|
"learning_rate": 3.3817959884851735e-06, |
|
"loss": 1.2698, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6469618165672967, |
|
"grad_norm": 1.4648194884238146, |
|
"learning_rate": 3.3399464483393272e-06, |
|
"loss": 1.291, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6494989217303058, |
|
"grad_norm": 1.4271493727093771, |
|
"learning_rate": 3.298227207330792e-06, |
|
"loss": 1.2765, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6520360268933147, |
|
"grad_norm": 1.5962462881292958, |
|
"learning_rate": 3.256641540041346e-06, |
|
"loss": 1.2905, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6545731320563237, |
|
"grad_norm": 1.4501719681830862, |
|
"learning_rate": 3.2151927105684423e-06, |
|
"loss": 1.298, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6571102372193327, |
|
"grad_norm": 1.5186349976521718, |
|
"learning_rate": 3.1738839722690085e-06, |
|
"loss": 1.2742, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6596473423823418, |
|
"grad_norm": 1.3901740398219145, |
|
"learning_rate": 3.1327185675040907e-06, |
|
"loss": 1.2769, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6621844475453508, |
|
"grad_norm": 1.4618375024699428, |
|
"learning_rate": 3.0916997273843454e-06, |
|
"loss": 1.2938, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6647215527083598, |
|
"grad_norm": 1.4675982361039484, |
|
"learning_rate": 3.0508306715164416e-06, |
|
"loss": 1.2913, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6672586578713687, |
|
"grad_norm": 1.5086185778550512, |
|
"learning_rate": 3.0101146077503386e-06, |
|
"loss": 1.2777, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6697957630343778, |
|
"grad_norm": 1.4573487737483761, |
|
"learning_rate": 2.9695547319275093e-06, |
|
"loss": 1.2633, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6723328681973868, |
|
"grad_norm": 1.43323809832072, |
|
"learning_rate": 2.9291542276300866e-06, |
|
"loss": 1.289, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6748699733603958, |
|
"grad_norm": 1.420082813628849, |
|
"learning_rate": 2.8889162659309832e-06, |
|
"loss": 1.2729, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6774070785234048, |
|
"grad_norm": 1.424990219399345, |
|
"learning_rate": 2.848844005145004e-06, |
|
"loss": 1.3024, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6799441836864138, |
|
"grad_norm": 1.4395745448115305, |
|
"learning_rate": 2.808940590580922e-06, |
|
"loss": 1.2845, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6824812888494228, |
|
"grad_norm": 1.4802086998925903, |
|
"learning_rate": 2.769209154294623e-06, |
|
"loss": 1.2844, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6850183940124318, |
|
"grad_norm": 1.491623196795251, |
|
"learning_rate": 2.7296528148432565e-06, |
|
"loss": 1.2683, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6875554991754408, |
|
"grad_norm": 1.416764375906272, |
|
"learning_rate": 2.690274677040462e-06, |
|
"loss": 1.2776, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6900926043384499, |
|
"grad_norm": 1.519033593874162, |
|
"learning_rate": 2.6510778317126597e-06, |
|
"loss": 1.2807, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6926297095014589, |
|
"grad_norm": 1.3894691132515595, |
|
"learning_rate": 2.6120653554564624e-06, |
|
"loss": 1.2777, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6951668146644678, |
|
"grad_norm": 1.4049713206074572, |
|
"learning_rate": 2.573240310397187e-06, |
|
"loss": 1.2736, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6977039198274768, |
|
"grad_norm": 1.4357642101900112, |
|
"learning_rate": 2.5346057439484923e-06, |
|
"loss": 1.2803, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7002410249904859, |
|
"grad_norm": 1.490167340198777, |
|
"learning_rate": 2.4961646885732034e-06, |
|
"loss": 1.2744, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7027781301534949, |
|
"grad_norm": 1.4179312953545702, |
|
"learning_rate": 2.4579201615452812e-06, |
|
"loss": 1.2842, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7053152353165039, |
|
"grad_norm": 1.6140649717523825, |
|
"learning_rate": 2.4198751647129896e-06, |
|
"loss": 1.2963, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7078523404795128, |
|
"grad_norm": 1.530468810042779, |
|
"learning_rate": 2.3820326842632894e-06, |
|
"loss": 1.2637, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7103894456425219, |
|
"grad_norm": 1.412588711043796, |
|
"learning_rate": 2.344395690487441e-06, |
|
"loss": 1.2856, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7129265508055309, |
|
"grad_norm": 1.5447254908338892, |
|
"learning_rate": 2.3069671375478645e-06, |
|
"loss": 1.2848, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7154636559685399, |
|
"grad_norm": 1.43691808431636, |
|
"learning_rate": 2.2697499632462695e-06, |
|
"loss": 1.2536, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7180007611315489, |
|
"grad_norm": 1.5560428170621574, |
|
"learning_rate": 2.2327470887930595e-06, |
|
"loss": 1.3015, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.720537866294558, |
|
"grad_norm": 1.450374747082515, |
|
"learning_rate": 2.195961418578041e-06, |
|
"loss": 1.2744, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7230749714575669, |
|
"grad_norm": 1.484538648746269, |
|
"learning_rate": 2.159395839942464e-06, |
|
"loss": 1.2664, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7256120766205759, |
|
"grad_norm": 1.3953379506558543, |
|
"learning_rate": 2.1230532229523865e-06, |
|
"loss": 1.2489, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7281491817835849, |
|
"grad_norm": 1.4415654155573785, |
|
"learning_rate": 2.086936420173399e-06, |
|
"loss": 1.2719, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.730686286946594, |
|
"grad_norm": 1.4271516629005172, |
|
"learning_rate": 2.051048266446727e-06, |
|
"loss": 1.2652, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.733223392109603, |
|
"grad_norm": 1.4951992832082914, |
|
"learning_rate": 2.0153915786667203e-06, |
|
"loss": 1.26, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.735760497272612, |
|
"grad_norm": 1.4351479751585414, |
|
"learning_rate": 1.9799691555597555e-06, |
|
"loss": 1.2881, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7382976024356209, |
|
"grad_norm": 1.474899241565124, |
|
"learning_rate": 1.9447837774645513e-06, |
|
"loss": 1.2702, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.74083470759863, |
|
"grad_norm": 1.4426835070499822, |
|
"learning_rate": 1.9098382061139503e-06, |
|
"loss": 1.2699, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.743371812761639, |
|
"grad_norm": 1.4876818985570295, |
|
"learning_rate": 1.8751351844181414e-06, |
|
"loss": 1.2612, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.745908917924648, |
|
"grad_norm": 1.4360645410392319, |
|
"learning_rate": 1.8406774362493662e-06, |
|
"loss": 1.2754, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.748446023087657, |
|
"grad_norm": 1.4473888665732064, |
|
"learning_rate": 1.8064676662281206e-06, |
|
"loss": 1.2902, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.750983128250666, |
|
"grad_norm": 1.4434612838312966, |
|
"learning_rate": 1.7725085595108682e-06, |
|
"loss": 1.273, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.753520233413675, |
|
"grad_norm": 1.558136105535075, |
|
"learning_rate": 1.7388027815792725e-06, |
|
"loss": 1.2787, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.756057338576684, |
|
"grad_norm": 1.4724878594646564, |
|
"learning_rate": 1.705352978030993e-06, |
|
"loss": 1.2627, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.758594443739693, |
|
"grad_norm": 1.4768497018650097, |
|
"learning_rate": 1.672161774372022e-06, |
|
"loss": 1.2911, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7611315489027021, |
|
"grad_norm": 1.4598692131173956, |
|
"learning_rate": 1.639231775810602e-06, |
|
"loss": 1.2907, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.763668654065711, |
|
"grad_norm": 1.3971487709781405, |
|
"learning_rate": 1.6065655670527546e-06, |
|
"loss": 1.2632, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.76620575922872, |
|
"grad_norm": 1.4196228285690422, |
|
"learning_rate": 1.574165712099392e-06, |
|
"loss": 1.2542, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.768742864391729, |
|
"grad_norm": 1.4395590200787511, |
|
"learning_rate": 1.542034754045067e-06, |
|
"loss": 1.2693, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7712799695547381, |
|
"grad_norm": 1.4538143237649903, |
|
"learning_rate": 1.5101752148783705e-06, |
|
"loss": 1.2728, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7738170747177471, |
|
"grad_norm": 1.4483981816763403, |
|
"learning_rate": 1.4785895952839735e-06, |
|
"loss": 1.2671, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7763541798807561, |
|
"grad_norm": 1.5335192207213328, |
|
"learning_rate": 1.447280374446346e-06, |
|
"loss": 1.2778, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.778891285043765, |
|
"grad_norm": 1.4504666284348766, |
|
"learning_rate": 1.4162500098551608e-06, |
|
"loss": 1.276, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7814283902067741, |
|
"grad_norm": 1.454412830474016, |
|
"learning_rate": 1.385500937112415e-06, |
|
"loss": 1.2804, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7839654953697831, |
|
"grad_norm": 1.462536001446098, |
|
"learning_rate": 1.3550355697412386e-06, |
|
"loss": 1.2586, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7865026005327921, |
|
"grad_norm": 1.4861860594882876, |
|
"learning_rate": 1.3248562989964719e-06, |
|
"loss": 1.2843, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7890397056958011, |
|
"grad_norm": 1.391241218546658, |
|
"learning_rate": 1.2949654936769622e-06, |
|
"loss": 1.2723, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.79157681085881, |
|
"grad_norm": 1.412674356321388, |
|
"learning_rate": 1.2653654999396436e-06, |
|
"loss": 1.2621, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7941139160218191, |
|
"grad_norm": 1.406341007739084, |
|
"learning_rate": 1.2360586411153747e-06, |
|
"loss": 1.2897, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7966510211848281, |
|
"grad_norm": 1.4125498017483746, |
|
"learning_rate": 1.2070472175265857e-06, |
|
"loss": 1.2657, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7991881263478371, |
|
"grad_norm": 1.575395352386111, |
|
"learning_rate": 1.1783335063067286e-06, |
|
"loss": 1.2974, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8017252315108461, |
|
"grad_norm": 1.4632409776646316, |
|
"learning_rate": 1.1499197612215269e-06, |
|
"loss": 1.2914, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8042623366738552, |
|
"grad_norm": 1.420307782085356, |
|
"learning_rate": 1.1218082124920903e-06, |
|
"loss": 1.2583, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8067994418368641, |
|
"grad_norm": 1.4073518626370982, |
|
"learning_rate": 1.0940010666198575e-06, |
|
"loss": 1.2588, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8093365469998731, |
|
"grad_norm": 1.427712685491864, |
|
"learning_rate": 1.0665005062134015e-06, |
|
"loss": 1.2641, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8118736521628821, |
|
"grad_norm": 1.4042233353051128, |
|
"learning_rate": 1.0393086898171234e-06, |
|
"loss": 1.2623, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8144107573258912, |
|
"grad_norm": 1.4353551061219325, |
|
"learning_rate": 1.0124277517418196e-06, |
|
"loss": 1.2701, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8169478624889002, |
|
"grad_norm": 1.4714738106408498, |
|
"learning_rate": 9.858598018971599e-07, |
|
"loss": 1.2665, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8194849676519091, |
|
"grad_norm": 1.3867710691517015, |
|
"learning_rate": 9.596069256260792e-07, |
|
"loss": 1.2811, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8220220728149181, |
|
"grad_norm": 1.5556697110120234, |
|
"learning_rate": 9.336711835410972e-07, |
|
"loss": 1.2577, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8245591779779272, |
|
"grad_norm": 1.4677232808017586, |
|
"learning_rate": 9.080546113625738e-07, |
|
"loss": 1.2675, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8270962831409362, |
|
"grad_norm": 1.4009076826572764, |
|
"learning_rate": 8.827592197589341e-07, |
|
"loss": 1.2573, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8296333883039452, |
|
"grad_norm": 1.3584163410682717, |
|
"learning_rate": 8.577869941888389e-07, |
|
"loss": 1.2654, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8321704934669542, |
|
"grad_norm": 1.4763004523041792, |
|
"learning_rate": 8.331398947453512e-07, |
|
"loss": 1.271, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.8347075986299632, |
|
"grad_norm": 1.432107775038367, |
|
"learning_rate": 8.08819856002081e-07, |
|
"loss": 1.2771, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.8372447037929722, |
|
"grad_norm": 1.4636493219573536, |
|
"learning_rate": 7.848287868613441e-07, |
|
"loss": 1.2511, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8397818089559812, |
|
"grad_norm": 1.501456048501624, |
|
"learning_rate": 7.611685704043281e-07, |
|
"loss": 1.2724, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8423189141189902, |
|
"grad_norm": 1.4533194309629769, |
|
"learning_rate": 7.378410637432848e-07, |
|
"loss": 1.2761, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8448560192819993, |
|
"grad_norm": 1.4930040288043631, |
|
"learning_rate": 7.148480978757694e-07, |
|
"loss": 1.2808, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8473931244450083, |
|
"grad_norm": 1.4327631888495864, |
|
"learning_rate": 6.921914775409211e-07, |
|
"loss": 1.2764, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8499302296080172, |
|
"grad_norm": 1.4066505744498654, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 1.2724, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8524673347710262, |
|
"grad_norm": 1.4214214237910756, |
|
"learning_rate": 6.478943602858373e-07, |
|
"loss": 1.2703, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8550044399340353, |
|
"grad_norm": 1.4609797404161982, |
|
"learning_rate": 6.262573402872707e-07, |
|
"loss": 1.2702, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8575415450970443, |
|
"grad_norm": 1.4426076897314533, |
|
"learning_rate": 6.04963619391799e-07, |
|
"loss": 1.2652, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8600786502600533, |
|
"grad_norm": 1.4557782632700174, |
|
"learning_rate": 5.840148689632536e-07, |
|
"loss": 1.2628, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8626157554230622, |
|
"grad_norm": 1.4465949605683495, |
|
"learning_rate": 5.634127332884143e-07, |
|
"loss": 1.2649, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8651528605860713, |
|
"grad_norm": 1.4543385042228827, |
|
"learning_rate": 5.431588294479479e-07, |
|
"loss": 1.2863, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8676899657490803, |
|
"grad_norm": 1.4211990179028964, |
|
"learning_rate": 5.232547471894839e-07, |
|
"loss": 1.2603, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8702270709120893, |
|
"grad_norm": 1.4970252757505178, |
|
"learning_rate": 5.037020488028322e-07, |
|
"loss": 1.2659, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8727641760750983, |
|
"grad_norm": 1.476846612856639, |
|
"learning_rate": 4.845022689973567e-07, |
|
"loss": 1.2622, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8753012812381074, |
|
"grad_norm": 1.3975933791175674, |
|
"learning_rate": 4.656569147815171e-07, |
|
"loss": 1.2675, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8778383864011163, |
|
"grad_norm": 1.4066115466115592, |
|
"learning_rate": 4.471674653445801e-07, |
|
"loss": 1.2657, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8803754915641253, |
|
"grad_norm": 1.4093052515025426, |
|
"learning_rate": 4.290353719405199e-07, |
|
"loss": 1.2622, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8829125967271343, |
|
"grad_norm": 1.517434872609148, |
|
"learning_rate": 4.1126205777410054e-07, |
|
"loss": 1.2658, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8854497018901434, |
|
"grad_norm": 1.3671982309013966, |
|
"learning_rate": 3.938489178891769e-07, |
|
"loss": 1.26, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8879868070531524, |
|
"grad_norm": 1.3848746120915914, |
|
"learning_rate": 3.767973190591906e-07, |
|
"loss": 1.252, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8905239122161613, |
|
"grad_norm": 1.404335518132526, |
|
"learning_rate": 3.6010859967988975e-07, |
|
"loss": 1.2684, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8930610173791703, |
|
"grad_norm": 1.4696908362694405, |
|
"learning_rate": 3.437840696642797e-07, |
|
"loss": 1.28, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8955981225421794, |
|
"grad_norm": 1.4394982066957633, |
|
"learning_rate": 3.2782501033980897e-07, |
|
"loss": 1.2596, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8981352277051884, |
|
"grad_norm": 1.441827806292722, |
|
"learning_rate": 3.1223267434778934e-07, |
|
"loss": 1.2548, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9006723328681974, |
|
"grad_norm": 1.4029572337771223, |
|
"learning_rate": 2.9700828554508175e-07, |
|
"loss": 1.2714, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9032094380312063, |
|
"grad_norm": 1.456563644801128, |
|
"learning_rate": 2.82153038908034e-07, |
|
"loss": 1.271, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9057465431942154, |
|
"grad_norm": 1.4854658320433165, |
|
"learning_rate": 2.6766810043867996e-07, |
|
"loss": 1.2636, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9082836483572244, |
|
"grad_norm": 1.3976434876269141, |
|
"learning_rate": 2.53554607073227e-07, |
|
"loss": 1.2555, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9108207535202334, |
|
"grad_norm": 1.4520566750739115, |
|
"learning_rate": 2.3981366659281135e-07, |
|
"loss": 1.2741, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.9133578586832424, |
|
"grad_norm": 1.431635462724296, |
|
"learning_rate": 2.2644635753654832e-07, |
|
"loss": 1.2641, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9158949638462515, |
|
"grad_norm": 1.4459695881350807, |
|
"learning_rate": 2.1345372911687868e-07, |
|
"loss": 1.2719, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9184320690092604, |
|
"grad_norm": 1.4523937909973577, |
|
"learning_rate": 2.008368011372136e-07, |
|
"loss": 1.2574, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9209691741722694, |
|
"grad_norm": 1.448000545062192, |
|
"learning_rate": 1.8859656391188918e-07, |
|
"loss": 1.2678, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9235062793352784, |
|
"grad_norm": 1.4574912967654818, |
|
"learning_rate": 1.7673397818843696e-07, |
|
"loss": 1.2631, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9260433844982875, |
|
"grad_norm": 1.469570266612804, |
|
"learning_rate": 1.65249975072172e-07, |
|
"loss": 1.2676, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9285804896612965, |
|
"grad_norm": 1.4705072480655184, |
|
"learning_rate": 1.5414545595311193e-07, |
|
"loss": 1.2363, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9311175948243055, |
|
"grad_norm": 1.469943074464241, |
|
"learning_rate": 1.4342129243522241e-07, |
|
"loss": 1.2716, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9336546999873144, |
|
"grad_norm": 1.4039691988667693, |
|
"learning_rate": 1.3307832626800966e-07, |
|
"loss": 1.2674, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9361918051503235, |
|
"grad_norm": 1.4357184274036978, |
|
"learning_rate": 1.2311736928044437e-07, |
|
"loss": 1.2662, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9387289103133325, |
|
"grad_norm": 1.4198942808558601, |
|
"learning_rate": 1.1353920331724666e-07, |
|
"loss": 1.2743, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9412660154763415, |
|
"grad_norm": 1.472183542066415, |
|
"learning_rate": 1.0434458017751392e-07, |
|
"loss": 1.2505, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9438031206393505, |
|
"grad_norm": 1.4530560276575668, |
|
"learning_rate": 9.553422155571257e-08, |
|
"loss": 1.2637, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9463402258023595, |
|
"grad_norm": 1.4342800198901315, |
|
"learning_rate": 8.710881898503276e-08, |
|
"loss": 1.2706, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9488773309653685, |
|
"grad_norm": 1.4769111711160674, |
|
"learning_rate": 7.906903378310738e-08, |
|
"loss": 1.2717, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9514144361283775, |
|
"grad_norm": 1.4434198544006103, |
|
"learning_rate": 7.141549700010741e-08, |
|
"loss": 1.2764, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9539515412913865, |
|
"grad_norm": 1.3967239883734883, |
|
"learning_rate": 6.414880936920665e-08, |
|
"loss": 1.2454, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9564886464543956, |
|
"grad_norm": 1.4745766429828837, |
|
"learning_rate": 5.726954125943318e-08, |
|
"loss": 1.2747, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9590257516174046, |
|
"grad_norm": 1.3354472782196753, |
|
"learning_rate": 5.0778232630897536e-08, |
|
"loss": 1.2717, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9615628567804135, |
|
"grad_norm": 1.5048063707346293, |
|
"learning_rate": 4.4675392992412634e-08, |
|
"loss": 1.2728, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9640999619434225, |
|
"grad_norm": 1.4399837174477832, |
|
"learning_rate": 3.896150136150134e-08, |
|
"loss": 1.2826, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9666370671064316, |
|
"grad_norm": 1.4450380885385077, |
|
"learning_rate": 3.3637006226797665e-08, |
|
"loss": 1.2534, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9691741722694406, |
|
"grad_norm": 1.35673950609508, |
|
"learning_rate": 2.8702325512844908e-08, |
|
"loss": 1.2609, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9717112774324496, |
|
"grad_norm": 1.43190405721669, |
|
"learning_rate": 2.4157846547292473e-08, |
|
"loss": 1.2787, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9742483825954585, |
|
"grad_norm": 1.4245067018508633, |
|
"learning_rate": 2.000392603049517e-08, |
|
"loss": 1.2665, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9767854877584676, |
|
"grad_norm": 1.4112885660620007, |
|
"learning_rate": 1.6240890007510612e-08, |
|
"loss": 1.2785, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9793225929214766, |
|
"grad_norm": 1.499341122900986, |
|
"learning_rate": 1.286903384251581e-08, |
|
"loss": 1.2539, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9818596980844856, |
|
"grad_norm": 1.4494246005853764, |
|
"learning_rate": 9.888622195615705e-09, |
|
"loss": 1.2725, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9843968032474946, |
|
"grad_norm": 1.421431046100693, |
|
"learning_rate": 7.299889002075344e-09, |
|
"loss": 1.2726, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9869339084105037, |
|
"grad_norm": 1.4209346862475516, |
|
"learning_rate": 5.103037453954573e-09, |
|
"loss": 1.2548, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9894710135735126, |
|
"grad_norm": 1.3751824191752333, |
|
"learning_rate": 3.2982399841618996e-09, |
|
"loss": 1.2859, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9920081187365216, |
|
"grad_norm": 1.4627888810880714, |
|
"learning_rate": 1.8856382529192085e-09, |
|
"loss": 1.2842, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9945452238995306, |
|
"grad_norm": 1.3905016842842302, |
|
"learning_rate": 8.653431366406617e-10, |
|
"loss": 1.2447, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9970823290625397, |
|
"grad_norm": 1.5470180096733397, |
|
"learning_rate": 2.374347192335424e-10, |
|
"loss": 1.2707, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9996194342255487, |
|
"grad_norm": 1.4506027900307656, |
|
"learning_rate": 1.9622858088430564e-12, |
|
"loss": 1.2737, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9998731447418495, |
|
"step": 3941, |
|
"total_flos": 3.7575827488610714e+18, |
|
"train_loss": 1.3289946492206504, |
|
"train_runtime": 13442.1001, |
|
"train_samples_per_second": 37.53, |
|
"train_steps_per_second": 0.293 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3941, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.7575827488610714e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|