|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2507682851874616, |
|
"eval_steps": 204, |
|
"global_step": 204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001229256299938537, |
|
"grad_norm": 0.19411148130893707, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.1612, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001229256299938537, |
|
"eval_loss": 2.1468453407287598, |
|
"eval_runtime": 66.4837, |
|
"eval_samples_per_second": 10.303, |
|
"eval_steps_per_second": 5.159, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002458512599877074, |
|
"grad_norm": 0.2264145463705063, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.4401, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0036877688998156115, |
|
"grad_norm": 0.2364473193883896, |
|
"learning_rate": 6e-06, |
|
"loss": 1.4676, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004917025199754148, |
|
"grad_norm": 0.24018821120262146, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3851, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006146281499692686, |
|
"grad_norm": 0.23238497972488403, |
|
"learning_rate": 1e-05, |
|
"loss": 1.213, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007375537799631223, |
|
"grad_norm": 0.24634625017642975, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.2627, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.008604794099569761, |
|
"grad_norm": 0.26495596766471863, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 1.3908, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009834050399508297, |
|
"grad_norm": 0.2719455361366272, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.3814, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.011063306699446834, |
|
"grad_norm": 0.26454323530197144, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.2438, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.012292562999385371, |
|
"grad_norm": 0.3004608750343323, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3694, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013521819299323909, |
|
"grad_norm": 0.3035408854484558, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.4792, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.014751075599262446, |
|
"grad_norm": 0.4270775020122528, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.1673, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.015980331899200985, |
|
"grad_norm": 0.4388391971588135, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 1.5171, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.017209588199139522, |
|
"grad_norm": 0.7133700847625732, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 1.0732, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01843884449907806, |
|
"grad_norm": 1.026840329170227, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1705, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.019668100799016593, |
|
"grad_norm": 0.7934454679489136, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.3509, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02089735709895513, |
|
"grad_norm": 0.8138520121574402, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 1.181, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.022126613398893668, |
|
"grad_norm": 1.7830528020858765, |
|
"learning_rate": 3.6e-05, |
|
"loss": 2.1836, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.023355869698832205, |
|
"grad_norm": 10.527496337890625, |
|
"learning_rate": 3.8e-05, |
|
"loss": 3.3749, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.024585125998770743, |
|
"grad_norm": 6.364173889160156, |
|
"learning_rate": 4e-05, |
|
"loss": 2.9191, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02581438229870928, |
|
"grad_norm": 7.087876796722412, |
|
"learning_rate": 4.2e-05, |
|
"loss": 3.0788, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.027043638598647817, |
|
"grad_norm": 5.370169639587402, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 2.7809, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.028272894898586354, |
|
"grad_norm": 4.118806838989258, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 2.7475, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02950215119852489, |
|
"grad_norm": 4.46057653427124, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.6906, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03073140749846343, |
|
"grad_norm": 3.8601913452148438, |
|
"learning_rate": 5e-05, |
|
"loss": 2.652, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03196066379840197, |
|
"grad_norm": 0.19972144067287445, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 1.1845, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03318992009834051, |
|
"grad_norm": 0.21230019629001617, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 1.2875, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.034419176398279044, |
|
"grad_norm": 0.22694356739521027, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 1.1829, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03564843269821758, |
|
"grad_norm": 0.2587474584579468, |
|
"learning_rate": 5.8e-05, |
|
"loss": 1.3909, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03687768899815612, |
|
"grad_norm": 0.24409259855747223, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1671, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03810694529809465, |
|
"grad_norm": 0.26323097944259644, |
|
"learning_rate": 6.2e-05, |
|
"loss": 1.2055, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03933620159803319, |
|
"grad_norm": 0.2842409908771515, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 1.3051, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.040565457897971724, |
|
"grad_norm": 0.32476744055747986, |
|
"learning_rate": 6.6e-05, |
|
"loss": 1.0828, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04179471419791026, |
|
"grad_norm": 0.32893380522727966, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 1.3896, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0430239704978488, |
|
"grad_norm": 0.3359004855155945, |
|
"learning_rate": 7e-05, |
|
"loss": 1.3713, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.044253226797787336, |
|
"grad_norm": 0.5471646189689636, |
|
"learning_rate": 7.2e-05, |
|
"loss": 1.2572, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04548248309772587, |
|
"grad_norm": 0.5404387712478638, |
|
"learning_rate": 7.4e-05, |
|
"loss": 1.2321, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04671173939766441, |
|
"grad_norm": 1.0199828147888184, |
|
"learning_rate": 7.6e-05, |
|
"loss": 0.8434, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04794099569760295, |
|
"grad_norm": 1.5890088081359863, |
|
"learning_rate": 7.800000000000001e-05, |
|
"loss": 0.7781, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.049170251997541485, |
|
"grad_norm": 0.7897126078605652, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1982, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05039950829748002, |
|
"grad_norm": 0.7874982953071594, |
|
"learning_rate": 8.2e-05, |
|
"loss": 1.3432, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05162876459741856, |
|
"grad_norm": 1.3902230262756348, |
|
"learning_rate": 8.4e-05, |
|
"loss": 1.7817, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0528580208973571, |
|
"grad_norm": 4.006369590759277, |
|
"learning_rate": 8.6e-05, |
|
"loss": 1.8832, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.054087277197295634, |
|
"grad_norm": 7.2500996589660645, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 1.3256, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05531653349723417, |
|
"grad_norm": 5.088122844696045, |
|
"learning_rate": 9e-05, |
|
"loss": 1.4272, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05654578979717271, |
|
"grad_norm": 2.9680252075195312, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 1.8063, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.057775046097111246, |
|
"grad_norm": 3.4886820316314697, |
|
"learning_rate": 9.4e-05, |
|
"loss": 1.7842, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05900430239704978, |
|
"grad_norm": 2.635120153427124, |
|
"learning_rate": 9.6e-05, |
|
"loss": 1.7775, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06023355869698832, |
|
"grad_norm": 2.7715940475463867, |
|
"learning_rate": 9.8e-05, |
|
"loss": 1.6795, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06146281499692686, |
|
"grad_norm": 4.598182678222656, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0141, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0626920712968654, |
|
"grad_norm": 0.4595154821872711, |
|
"learning_rate": 9.999957617159031e-05, |
|
"loss": 1.1302, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06392132759680394, |
|
"grad_norm": 0.3996050953865051, |
|
"learning_rate": 9.999830469354645e-05, |
|
"loss": 1.3499, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06515058389674247, |
|
"grad_norm": 0.4248620867729187, |
|
"learning_rate": 9.999618558742398e-05, |
|
"loss": 1.4393, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06637984019668101, |
|
"grad_norm": 0.37063130736351013, |
|
"learning_rate": 9.999321888914836e-05, |
|
"loss": 1.4761, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06760909649661954, |
|
"grad_norm": 0.3327302038669586, |
|
"learning_rate": 9.998940464901447e-05, |
|
"loss": 1.1365, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06883835279655809, |
|
"grad_norm": 0.3424387276172638, |
|
"learning_rate": 9.998474293168562e-05, |
|
"loss": 1.2037, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07006760909649662, |
|
"grad_norm": 0.34453633427619934, |
|
"learning_rate": 9.997923381619256e-05, |
|
"loss": 0.9586, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.07129686539643516, |
|
"grad_norm": 0.3327544033527374, |
|
"learning_rate": 9.997287739593206e-05, |
|
"loss": 1.3026, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0725261216963737, |
|
"grad_norm": 0.33542299270629883, |
|
"learning_rate": 9.996567377866537e-05, |
|
"loss": 1.1601, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07375537799631224, |
|
"grad_norm": 0.5743572115898132, |
|
"learning_rate": 9.99576230865164e-05, |
|
"loss": 1.3892, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07498463429625077, |
|
"grad_norm": 0.4624180495738983, |
|
"learning_rate": 9.994872545596966e-05, |
|
"loss": 1.2519, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0762138905961893, |
|
"grad_norm": 0.6259918808937073, |
|
"learning_rate": 9.993898103786786e-05, |
|
"loss": 1.315, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07744314689612784, |
|
"grad_norm": 0.6186118125915527, |
|
"learning_rate": 9.992838999740947e-05, |
|
"loss": 0.877, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07867240319606637, |
|
"grad_norm": 0.6788893342018127, |
|
"learning_rate": 9.991695251414583e-05, |
|
"loss": 0.886, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07990165949600492, |
|
"grad_norm": 0.7688488960266113, |
|
"learning_rate": 9.990466878197817e-05, |
|
"loss": 0.7427, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.08113091579594345, |
|
"grad_norm": 0.6739158630371094, |
|
"learning_rate": 9.989153900915427e-05, |
|
"loss": 1.091, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08236017209588199, |
|
"grad_norm": 1.0515763759613037, |
|
"learning_rate": 9.987756341826493e-05, |
|
"loss": 1.4195, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08358942839582052, |
|
"grad_norm": 2.324380397796631, |
|
"learning_rate": 9.98627422462403e-05, |
|
"loss": 1.8108, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08481868469575907, |
|
"grad_norm": 4.131134510040283, |
|
"learning_rate": 9.98470757443457e-05, |
|
"loss": 1.2769, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0860479409956976, |
|
"grad_norm": 6.158152103424072, |
|
"learning_rate": 9.983056417817747e-05, |
|
"loss": 1.609, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08727719729563614, |
|
"grad_norm": 2.710057020187378, |
|
"learning_rate": 9.981320782765846e-05, |
|
"loss": 1.6382, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.08850645359557467, |
|
"grad_norm": 2.729590654373169, |
|
"learning_rate": 9.979500698703323e-05, |
|
"loss": 1.8179, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08973570989551322, |
|
"grad_norm": 2.1861114501953125, |
|
"learning_rate": 9.977596196486314e-05, |
|
"loss": 1.7416, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.09096496619545175, |
|
"grad_norm": 2.614532947540283, |
|
"learning_rate": 9.975607308402101e-05, |
|
"loss": 1.8413, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.09219422249539029, |
|
"grad_norm": 3.3295183181762695, |
|
"learning_rate": 9.973534068168579e-05, |
|
"loss": 2.1946, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09342347879532882, |
|
"grad_norm": 0.3009834885597229, |
|
"learning_rate": 9.97137651093367e-05, |
|
"loss": 1.1058, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09465273509526737, |
|
"grad_norm": 0.2889084815979004, |
|
"learning_rate": 9.969134673274738e-05, |
|
"loss": 1.0812, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0958819913952059, |
|
"grad_norm": 0.26639047265052795, |
|
"learning_rate": 9.966808593197959e-05, |
|
"loss": 1.2787, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09711124769514444, |
|
"grad_norm": 0.2839871048927307, |
|
"learning_rate": 9.964398310137688e-05, |
|
"loss": 1.2314, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09834050399508297, |
|
"grad_norm": 0.29856863617897034, |
|
"learning_rate": 9.961903864955783e-05, |
|
"loss": 1.1781, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09956976029502151, |
|
"grad_norm": 0.3113296329975128, |
|
"learning_rate": 9.959325299940914e-05, |
|
"loss": 1.1297, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.10079901659496004, |
|
"grad_norm": 0.3259466290473938, |
|
"learning_rate": 9.956662658807842e-05, |
|
"loss": 1.3892, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.10202827289489859, |
|
"grad_norm": 0.3366626501083374, |
|
"learning_rate": 9.95391598669669e-05, |
|
"loss": 1.1833, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.10325752919483712, |
|
"grad_norm": 0.3032483458518982, |
|
"learning_rate": 9.95108533017216e-05, |
|
"loss": 1.1729, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10448678549477566, |
|
"grad_norm": 0.4028280973434448, |
|
"learning_rate": 9.948170737222762e-05, |
|
"loss": 1.1019, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1057160417947142, |
|
"grad_norm": 0.3796052932739258, |
|
"learning_rate": 9.945172257259986e-05, |
|
"loss": 1.3822, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10694529809465274, |
|
"grad_norm": 0.3956368565559387, |
|
"learning_rate": 9.942089941117472e-05, |
|
"loss": 1.2101, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.10817455439459127, |
|
"grad_norm": 0.5040555596351624, |
|
"learning_rate": 9.938923841050147e-05, |
|
"loss": 1.059, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10940381069452981, |
|
"grad_norm": 0.7209507822990417, |
|
"learning_rate": 9.935674010733336e-05, |
|
"loss": 0.9387, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.11063306699446834, |
|
"grad_norm": 0.6711410284042358, |
|
"learning_rate": 9.932340505261855e-05, |
|
"loss": 0.9325, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11186232329440689, |
|
"grad_norm": 0.670559823513031, |
|
"learning_rate": 9.928923381149078e-05, |
|
"loss": 1.1188, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.11309157959434542, |
|
"grad_norm": 1.4009896516799927, |
|
"learning_rate": 9.925422696325975e-05, |
|
"loss": 1.4021, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11432083589428396, |
|
"grad_norm": 2.7449545860290527, |
|
"learning_rate": 9.921838510140135e-05, |
|
"loss": 1.7181, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.11555009219422249, |
|
"grad_norm": 3.5462844371795654, |
|
"learning_rate": 9.918170883354755e-05, |
|
"loss": 1.4934, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11677934849416104, |
|
"grad_norm": 3.204674005508423, |
|
"learning_rate": 9.914419878147611e-05, |
|
"loss": 1.2952, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11800860479409957, |
|
"grad_norm": 2.583436965942383, |
|
"learning_rate": 9.910585558110006e-05, |
|
"loss": 1.418, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11923786109403811, |
|
"grad_norm": 3.0214803218841553, |
|
"learning_rate": 9.906667988245692e-05, |
|
"loss": 1.8579, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.12046711739397664, |
|
"grad_norm": 2.359790325164795, |
|
"learning_rate": 9.902667234969764e-05, |
|
"loss": 1.2705, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.12169637369391519, |
|
"grad_norm": 2.093607187271118, |
|
"learning_rate": 9.898583366107538e-05, |
|
"loss": 1.4241, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.12292562999385372, |
|
"grad_norm": 2.613720655441284, |
|
"learning_rate": 9.8944164508934e-05, |
|
"loss": 1.7558, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12415488629379226, |
|
"grad_norm": 0.29464319348335266, |
|
"learning_rate": 9.890166559969631e-05, |
|
"loss": 1.1966, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1253841425937308, |
|
"grad_norm": 0.27224430441856384, |
|
"learning_rate": 9.885833765385212e-05, |
|
"loss": 1.3172, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12661339889366932, |
|
"grad_norm": 0.2738960385322571, |
|
"learning_rate": 9.881418140594603e-05, |
|
"loss": 1.2875, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.12784265519360788, |
|
"grad_norm": 0.274746298789978, |
|
"learning_rate": 9.876919760456492e-05, |
|
"loss": 1.3156, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1290719114935464, |
|
"grad_norm": 0.3050672113895416, |
|
"learning_rate": 9.872338701232526e-05, |
|
"loss": 1.2426, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.13030116779348494, |
|
"grad_norm": 0.2726648449897766, |
|
"learning_rate": 9.867675040586034e-05, |
|
"loss": 1.1997, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.13153042409342347, |
|
"grad_norm": 0.2615199685096741, |
|
"learning_rate": 9.862928857580687e-05, |
|
"loss": 1.1518, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.13275968039336203, |
|
"grad_norm": 0.27568066120147705, |
|
"learning_rate": 9.858100232679175e-05, |
|
"loss": 0.9874, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.13398893669330056, |
|
"grad_norm": 0.29168951511383057, |
|
"learning_rate": 9.853189247741833e-05, |
|
"loss": 1.2147, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.1352181929932391, |
|
"grad_norm": 0.30630671977996826, |
|
"learning_rate": 9.848195986025257e-05, |
|
"loss": 1.2474, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13644744929317762, |
|
"grad_norm": 0.3246194124221802, |
|
"learning_rate": 9.843120532180896e-05, |
|
"loss": 1.1839, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.13767670559311618, |
|
"grad_norm": 0.34899017214775085, |
|
"learning_rate": 9.837962972253612e-05, |
|
"loss": 1.2389, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1389059618930547, |
|
"grad_norm": 0.3848627805709839, |
|
"learning_rate": 9.83272339368022e-05, |
|
"loss": 1.1833, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.14013521819299324, |
|
"grad_norm": 0.4109489917755127, |
|
"learning_rate": 9.827401885288013e-05, |
|
"loss": 1.1026, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.14136447449293177, |
|
"grad_norm": 0.6600728034973145, |
|
"learning_rate": 9.821998537293245e-05, |
|
"loss": 1.4073, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.14259373079287033, |
|
"grad_norm": 0.5556017756462097, |
|
"learning_rate": 9.816513441299613e-05, |
|
"loss": 0.6878, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.14382298709280886, |
|
"grad_norm": 0.5937761068344116, |
|
"learning_rate": 9.810946690296698e-05, |
|
"loss": 0.7988, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1450522433927474, |
|
"grad_norm": 0.6892157196998596, |
|
"learning_rate": 9.80529837865839e-05, |
|
"loss": 1.2152, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.14628149969268592, |
|
"grad_norm": 1.1046031713485718, |
|
"learning_rate": 9.799568602141283e-05, |
|
"loss": 1.4396, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.14751075599262448, |
|
"grad_norm": 3.366898536682129, |
|
"learning_rate": 9.793757457883062e-05, |
|
"loss": 1.6062, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.148740012292563, |
|
"grad_norm": 4.46527624130249, |
|
"learning_rate": 9.787865044400848e-05, |
|
"loss": 1.041, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.14996926859250154, |
|
"grad_norm": 3.8992013931274414, |
|
"learning_rate": 9.781891461589531e-05, |
|
"loss": 1.6166, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.15119852489244007, |
|
"grad_norm": 2.6794042587280273, |
|
"learning_rate": 9.775836810720074e-05, |
|
"loss": 1.5444, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1524277811923786, |
|
"grad_norm": 2.1487152576446533, |
|
"learning_rate": 9.769701194437799e-05, |
|
"loss": 1.4051, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.15365703749231716, |
|
"grad_norm": 2.6264848709106445, |
|
"learning_rate": 9.763484716760649e-05, |
|
"loss": 1.7286, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.15488629379225569, |
|
"grad_norm": 0.2960408329963684, |
|
"learning_rate": 9.757187483077413e-05, |
|
"loss": 1.1932, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.15611555009219422, |
|
"grad_norm": 0.2633897364139557, |
|
"learning_rate": 9.750809600145954e-05, |
|
"loss": 1.2997, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.15734480639213275, |
|
"grad_norm": 0.2459549605846405, |
|
"learning_rate": 9.744351176091393e-05, |
|
"loss": 1.0985, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1585740626920713, |
|
"grad_norm": 0.30462849140167236, |
|
"learning_rate": 9.737812320404271e-05, |
|
"loss": 1.4303, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.15980331899200984, |
|
"grad_norm": 0.27317526936531067, |
|
"learning_rate": 9.731193143938704e-05, |
|
"loss": 1.224, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16103257529194837, |
|
"grad_norm": 0.26538556814193726, |
|
"learning_rate": 9.724493758910491e-05, |
|
"loss": 1.2667, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1622618315918869, |
|
"grad_norm": 0.28112831711769104, |
|
"learning_rate": 9.71771427889522e-05, |
|
"loss": 1.1212, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.16349108789182545, |
|
"grad_norm": 0.2989320755004883, |
|
"learning_rate": 9.71085481882634e-05, |
|
"loss": 1.0484, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.16472034419176398, |
|
"grad_norm": 0.2814895212650299, |
|
"learning_rate": 9.703915494993215e-05, |
|
"loss": 0.7544, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.16594960049170251, |
|
"grad_norm": 0.3104398846626282, |
|
"learning_rate": 9.696896425039146e-05, |
|
"loss": 1.0323, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.16717885679164105, |
|
"grad_norm": 0.4948181211948395, |
|
"learning_rate": 9.689797727959387e-05, |
|
"loss": 1.2073, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1684081130915796, |
|
"grad_norm": 0.4018343985080719, |
|
"learning_rate": 9.682619524099112e-05, |
|
"loss": 1.2409, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.16963736939151813, |
|
"grad_norm": 0.5637558102607727, |
|
"learning_rate": 9.675361935151395e-05, |
|
"loss": 1.3184, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.17086662569145666, |
|
"grad_norm": 0.7405252456665039, |
|
"learning_rate": 9.66802508415513e-05, |
|
"loss": 1.0983, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1720958819913952, |
|
"grad_norm": 0.6686736345291138, |
|
"learning_rate": 9.660609095492952e-05, |
|
"loss": 1.0025, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17332513829133375, |
|
"grad_norm": 0.7121345400810242, |
|
"learning_rate": 9.653114094889127e-05, |
|
"loss": 0.9337, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.17455439459127228, |
|
"grad_norm": 1.06205153465271, |
|
"learning_rate": 9.645540209407425e-05, |
|
"loss": 1.2931, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1757836508912108, |
|
"grad_norm": 2.3874034881591797, |
|
"learning_rate": 9.637887567448959e-05, |
|
"loss": 1.5124, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.17701290719114934, |
|
"grad_norm": 2.6609811782836914, |
|
"learning_rate": 9.630156298750011e-05, |
|
"loss": 1.4161, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1782421634910879, |
|
"grad_norm": 2.413705587387085, |
|
"learning_rate": 9.622346534379833e-05, |
|
"loss": 1.2768, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.17947141979102643, |
|
"grad_norm": 2.920910120010376, |
|
"learning_rate": 9.614458406738427e-05, |
|
"loss": 1.0866, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.18070067609096496, |
|
"grad_norm": 2.389439582824707, |
|
"learning_rate": 9.606492049554297e-05, |
|
"loss": 1.4862, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1819299323909035, |
|
"grad_norm": 2.03515887260437, |
|
"learning_rate": 9.598447597882181e-05, |
|
"loss": 1.3503, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.18315918869084205, |
|
"grad_norm": 2.016889810562134, |
|
"learning_rate": 9.590325188100768e-05, |
|
"loss": 1.2565, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.18438844499078058, |
|
"grad_norm": 2.1591711044311523, |
|
"learning_rate": 9.582124957910375e-05, |
|
"loss": 1.1261, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1856177012907191, |
|
"grad_norm": 0.2707172632217407, |
|
"learning_rate": 9.573847046330628e-05, |
|
"loss": 1.1045, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.18684695759065764, |
|
"grad_norm": 0.25980842113494873, |
|
"learning_rate": 9.565491593698086e-05, |
|
"loss": 1.274, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1880762138905962, |
|
"grad_norm": 0.25503602623939514, |
|
"learning_rate": 9.55705874166388e-05, |
|
"loss": 1.0971, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.18930547019053473, |
|
"grad_norm": 0.27756351232528687, |
|
"learning_rate": 9.548548633191299e-05, |
|
"loss": 1.215, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.19053472649047326, |
|
"grad_norm": 0.2732703387737274, |
|
"learning_rate": 9.539961412553375e-05, |
|
"loss": 1.1326, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1917639827904118, |
|
"grad_norm": 0.28855475783348083, |
|
"learning_rate": 9.531297225330429e-05, |
|
"loss": 1.2862, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.19299323909035035, |
|
"grad_norm": 0.3158769905567169, |
|
"learning_rate": 9.522556218407608e-05, |
|
"loss": 1.2254, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.19422249539028888, |
|
"grad_norm": 0.30355289578437805, |
|
"learning_rate": 9.513738539972394e-05, |
|
"loss": 1.062, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1954517516902274, |
|
"grad_norm": 0.3448358178138733, |
|
"learning_rate": 9.504844339512095e-05, |
|
"loss": 0.9856, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.19668100799016594, |
|
"grad_norm": 0.3306958079338074, |
|
"learning_rate": 9.495873767811305e-05, |
|
"loss": 1.2696, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1979102642901045, |
|
"grad_norm": 0.4231187105178833, |
|
"learning_rate": 9.486826976949345e-05, |
|
"loss": 1.1711, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.19913952059004303, |
|
"grad_norm": 0.5289990901947021, |
|
"learning_rate": 9.477704120297697e-05, |
|
"loss": 1.4088, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.20036877688998156, |
|
"grad_norm": 0.5111967921257019, |
|
"learning_rate": 9.468505352517394e-05, |
|
"loss": 1.1683, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.2015980331899201, |
|
"grad_norm": 0.7477207779884338, |
|
"learning_rate": 9.459230829556401e-05, |
|
"loss": 0.995, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.20282728948985865, |
|
"grad_norm": 0.7836649417877197, |
|
"learning_rate": 9.449880708646971e-05, |
|
"loss": 0.8027, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.20405654578979718, |
|
"grad_norm": 0.6803653240203857, |
|
"learning_rate": 9.440455148302977e-05, |
|
"loss": 0.9725, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2052858020897357, |
|
"grad_norm": 0.8779723048210144, |
|
"learning_rate": 9.430954308317233e-05, |
|
"loss": 1.1995, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.20651505838967424, |
|
"grad_norm": 1.3584879636764526, |
|
"learning_rate": 9.421378349758769e-05, |
|
"loss": 1.4558, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2077443146896128, |
|
"grad_norm": 2.1976521015167236, |
|
"learning_rate": 9.411727434970121e-05, |
|
"loss": 1.0717, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.20897357098955133, |
|
"grad_norm": 3.9302353858947754, |
|
"learning_rate": 9.402001727564565e-05, |
|
"loss": 1.5138, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21020282728948986, |
|
"grad_norm": 3.9594686031341553, |
|
"learning_rate": 9.392201392423342e-05, |
|
"loss": 1.4295, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2114320835894284, |
|
"grad_norm": 3.2994837760925293, |
|
"learning_rate": 9.382326595692868e-05, |
|
"loss": 1.8676, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.21266133988936695, |
|
"grad_norm": 2.219341993331909, |
|
"learning_rate": 9.372377504781924e-05, |
|
"loss": 1.3185, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.21389059618930548, |
|
"grad_norm": 2.3389649391174316, |
|
"learning_rate": 9.362354288358803e-05, |
|
"loss": 0.9969, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.215119852489244, |
|
"grad_norm": 3.8493995666503906, |
|
"learning_rate": 9.35225711634846e-05, |
|
"loss": 1.2903, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.21634910878918254, |
|
"grad_norm": 0.24931700527668, |
|
"learning_rate": 9.34208615992963e-05, |
|
"loss": 1.051, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.2175783650891211, |
|
"grad_norm": 0.2944095730781555, |
|
"learning_rate": 9.331841591531922e-05, |
|
"loss": 1.3364, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.21880762138905963, |
|
"grad_norm": 0.26118403673171997, |
|
"learning_rate": 9.321523584832905e-05, |
|
"loss": 1.1487, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.22003687768899816, |
|
"grad_norm": 0.29458168148994446, |
|
"learning_rate": 9.311132314755149e-05, |
|
"loss": 1.365, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2212661339889367, |
|
"grad_norm": 0.2739919424057007, |
|
"learning_rate": 9.300667957463278e-05, |
|
"loss": 1.2595, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22249539028887522, |
|
"grad_norm": 0.25647538900375366, |
|
"learning_rate": 9.290130690360965e-05, |
|
"loss": 0.9865, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.22372464658881377, |
|
"grad_norm": 0.27343517541885376, |
|
"learning_rate": 9.279520692087938e-05, |
|
"loss": 1.1263, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2249539028887523, |
|
"grad_norm": 0.3220975697040558, |
|
"learning_rate": 9.268838142516943e-05, |
|
"loss": 1.3404, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.22618315918869084, |
|
"grad_norm": 0.3012546896934509, |
|
"learning_rate": 9.258083222750703e-05, |
|
"loss": 0.934, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.22741241548862937, |
|
"grad_norm": 0.3433031439781189, |
|
"learning_rate": 9.247256115118835e-05, |
|
"loss": 1.1895, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.22864167178856792, |
|
"grad_norm": 0.3515290915966034, |
|
"learning_rate": 9.236357003174775e-05, |
|
"loss": 1.3236, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.22987092808850645, |
|
"grad_norm": 0.4033795893192291, |
|
"learning_rate": 9.225386071692654e-05, |
|
"loss": 1.2089, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.23110018438844498, |
|
"grad_norm": 0.42729562520980835, |
|
"learning_rate": 9.214343506664168e-05, |
|
"loss": 1.1346, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.23232944068838352, |
|
"grad_norm": 0.6692906618118286, |
|
"learning_rate": 9.203229495295429e-05, |
|
"loss": 1.0211, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.23355869698832207, |
|
"grad_norm": 0.6882857084274292, |
|
"learning_rate": 9.192044226003789e-05, |
|
"loss": 0.8235, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2347879532882606, |
|
"grad_norm": 0.6821665167808533, |
|
"learning_rate": 9.18078788841464e-05, |
|
"loss": 0.8171, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.23601720958819913, |
|
"grad_norm": 0.7368921041488647, |
|
"learning_rate": 9.169460673358212e-05, |
|
"loss": 0.9993, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.23724646588813766, |
|
"grad_norm": 0.9759008884429932, |
|
"learning_rate": 9.158062772866325e-05, |
|
"loss": 1.2029, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.23847572218807622, |
|
"grad_norm": 2.167100667953491, |
|
"learning_rate": 9.146594380169143e-05, |
|
"loss": 1.1393, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.23970497848801475, |
|
"grad_norm": 2.76292085647583, |
|
"learning_rate": 9.135055689691888e-05, |
|
"loss": 0.946, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.24093423478795328, |
|
"grad_norm": 3.504427671432495, |
|
"learning_rate": 9.123446897051555e-05, |
|
"loss": 1.7001, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2421634910878918, |
|
"grad_norm": 2.606448173522949, |
|
"learning_rate": 9.111768199053588e-05, |
|
"loss": 1.6293, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.24339274738783037, |
|
"grad_norm": 2.1803855895996094, |
|
"learning_rate": 9.100019793688549e-05, |
|
"loss": 1.2392, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2446220036877689, |
|
"grad_norm": 2.3470633029937744, |
|
"learning_rate": 9.088201880128755e-05, |
|
"loss": 1.0844, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.24585125998770743, |
|
"grad_norm": 2.47255802154541, |
|
"learning_rate": 9.076314658724906e-05, |
|
"loss": 1.19, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24708051628764596, |
|
"grad_norm": 0.2115241140127182, |
|
"learning_rate": 9.064358331002691e-05, |
|
"loss": 0.9038, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.24830977258758452, |
|
"grad_norm": 0.2693980038166046, |
|
"learning_rate": 9.05233309965936e-05, |
|
"loss": 1.0014, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.24953902888752305, |
|
"grad_norm": 0.28890225291252136, |
|
"learning_rate": 9.040239168560303e-05, |
|
"loss": 1.1698, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.2507682851874616, |
|
"grad_norm": 0.27143335342407227, |
|
"learning_rate": 9.028076742735583e-05, |
|
"loss": 1.1856, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.2507682851874616, |
|
"eval_loss": 1.0315037965774536, |
|
"eval_runtime": 65.4064, |
|
"eval_samples_per_second": 10.473, |
|
"eval_steps_per_second": 5.244, |
|
"step": 204 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 813, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 204, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.204971437116621e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|