| { |
| "best_metric": 1.86671543, |
| "best_model_checkpoint": "/home/anubhab-pg/sm745052/swift/exp_output_paligemma/v1-20250508-175335/checkpoint-3500", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 4944, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006067961165048543, |
| "grad_norm": 27.07073974609375, |
| "learning_rate": 9.999998990554643e-05, |
| "loss": 4.470662593841553, |
| "memory(GiB)": 29.74, |
| "step": 1, |
| "token_acc": 0.2638888888888889, |
| "train_speed(iter/s)": 0.176069 |
| }, |
| { |
| "epoch": 0.003033980582524272, |
| "grad_norm": 11.808771133422852, |
| "learning_rate": 9.999974763886429e-05, |
| "loss": 3.900416851043701, |
| "memory(GiB)": 29.74, |
| "step": 5, |
| "token_acc": 0.2831050228310502, |
| "train_speed(iter/s)": 0.398445 |
| }, |
| { |
| "epoch": 0.006067961165048544, |
| "grad_norm": 12.929791450500488, |
| "learning_rate": 9.999899055800455e-05, |
| "loss": 2.7944046020507813, |
| "memory(GiB)": 29.74, |
| "step": 10, |
| "token_acc": 0.4377358490566038, |
| "train_speed(iter/s)": 0.483776 |
| }, |
| { |
| "epoch": 0.009101941747572815, |
| "grad_norm": 8.702505111694336, |
| "learning_rate": 9.99977287650631e-05, |
| "loss": 2.662510871887207, |
| "memory(GiB)": 38.25, |
| "step": 15, |
| "token_acc": 0.44649446494464945, |
| "train_speed(iter/s)": 0.514334 |
| }, |
| { |
| "epoch": 0.012135922330097087, |
| "grad_norm": 8.632209777832031, |
| "learning_rate": 9.999596227277707e-05, |
| "loss": 2.5844635009765624, |
| "memory(GiB)": 38.86, |
| "step": 20, |
| "token_acc": 0.4867549668874172, |
| "train_speed(iter/s)": 0.516824 |
| }, |
| { |
| "epoch": 0.01516990291262136, |
| "grad_norm": 7.144984245300293, |
| "learning_rate": 9.999369109897819e-05, |
| "loss": 2.760052490234375, |
| "memory(GiB)": 38.86, |
| "step": 25, |
| "token_acc": 0.42524916943521596, |
| "train_speed(iter/s)": 0.531359 |
| }, |
| { |
| "epoch": 0.01820388349514563, |
| "grad_norm": 13.147393226623535, |
| "learning_rate": 9.999091526659272e-05, |
| "loss": 2.7114631652832033, |
| "memory(GiB)": 38.86, |
| "step": 30, |
| "token_acc": 0.42613636363636365, |
| "train_speed(iter/s)": 0.54308 |
| }, |
| { |
| "epoch": 0.021237864077669904, |
| "grad_norm": 10.967350959777832, |
| "learning_rate": 9.998763480364113e-05, |
| "loss": 2.8917694091796875, |
| "memory(GiB)": 38.86, |
| "step": 35, |
| "token_acc": 0.3957703927492447, |
| "train_speed(iter/s)": 0.55201 |
| }, |
| { |
| "epoch": 0.024271844660194174, |
| "grad_norm": 6.367667198181152, |
| "learning_rate": 9.99838497432379e-05, |
| "loss": 2.7323539733886717, |
| "memory(GiB)": 38.86, |
| "step": 40, |
| "token_acc": 0.4843205574912892, |
| "train_speed(iter/s)": 0.554469 |
| }, |
| { |
| "epoch": 0.027305825242718445, |
| "grad_norm": 7.910516262054443, |
| "learning_rate": 9.997956012359109e-05, |
| "loss": 2.541508674621582, |
| "memory(GiB)": 38.86, |
| "step": 45, |
| "token_acc": 0.4327485380116959, |
| "train_speed(iter/s)": 0.558152 |
| }, |
| { |
| "epoch": 0.03033980582524272, |
| "grad_norm": 6.039285182952881, |
| "learning_rate": 9.997476598800203e-05, |
| "loss": 2.543034553527832, |
| "memory(GiB)": 38.86, |
| "step": 50, |
| "token_acc": 0.45058139534883723, |
| "train_speed(iter/s)": 0.56043 |
| }, |
| { |
| "epoch": 0.03337378640776699, |
| "grad_norm": 8.210753440856934, |
| "learning_rate": 9.99694673848649e-05, |
| "loss": 2.3589075088500975, |
| "memory(GiB)": 38.86, |
| "step": 55, |
| "token_acc": 0.4642857142857143, |
| "train_speed(iter/s)": 0.562231 |
| }, |
| { |
| "epoch": 0.03640776699029126, |
| "grad_norm": 9.309414863586426, |
| "learning_rate": 9.996366436766611e-05, |
| "loss": 2.3582067489624023, |
| "memory(GiB)": 38.86, |
| "step": 60, |
| "token_acc": 0.4788273615635179, |
| "train_speed(iter/s)": 0.567887 |
| }, |
| { |
| "epoch": 0.03944174757281554, |
| "grad_norm": 6.9387993812561035, |
| "learning_rate": 9.995735699498394e-05, |
| "loss": 2.5982736587524413, |
| "memory(GiB)": 38.86, |
| "step": 65, |
| "token_acc": 0.436046511627907, |
| "train_speed(iter/s)": 0.573648 |
| }, |
| { |
| "epoch": 0.04247572815533981, |
| "grad_norm": 7.010188579559326, |
| "learning_rate": 9.995054533048777e-05, |
| "loss": 2.508279228210449, |
| "memory(GiB)": 38.86, |
| "step": 70, |
| "token_acc": 0.4281609195402299, |
| "train_speed(iter/s)": 0.579165 |
| }, |
| { |
| "epoch": 0.04550970873786408, |
| "grad_norm": 8.828622817993164, |
| "learning_rate": 9.994322944293763e-05, |
| "loss": 2.588084411621094, |
| "memory(GiB)": 38.86, |
| "step": 75, |
| "token_acc": 0.4558303886925795, |
| "train_speed(iter/s)": 0.583997 |
| }, |
| { |
| "epoch": 0.04854368932038835, |
| "grad_norm": 8.112404823303223, |
| "learning_rate": 9.993540940618334e-05, |
| "loss": 2.316554832458496, |
| "memory(GiB)": 38.86, |
| "step": 80, |
| "token_acc": 0.4552238805970149, |
| "train_speed(iter/s)": 0.582967 |
| }, |
| { |
| "epoch": 0.05157766990291262, |
| "grad_norm": 8.621855735778809, |
| "learning_rate": 9.992708529916379e-05, |
| "loss": 2.366764259338379, |
| "memory(GiB)": 38.86, |
| "step": 85, |
| "token_acc": 0.44025157232704404, |
| "train_speed(iter/s)": 0.585803 |
| }, |
| { |
| "epoch": 0.05461165048543689, |
| "grad_norm": 8.71721076965332, |
| "learning_rate": 9.991825720590626e-05, |
| "loss": 2.346388244628906, |
| "memory(GiB)": 38.86, |
| "step": 90, |
| "token_acc": 0.4457831325301205, |
| "train_speed(iter/s)": 0.588259 |
| }, |
| { |
| "epoch": 0.05764563106796117, |
| "grad_norm": 6.412728786468506, |
| "learning_rate": 9.990892521552546e-05, |
| "loss": 2.4675243377685545, |
| "memory(GiB)": 38.86, |
| "step": 95, |
| "token_acc": 0.4896755162241888, |
| "train_speed(iter/s)": 0.587726 |
| }, |
| { |
| "epoch": 0.06067961165048544, |
| "grad_norm": 10.379164695739746, |
| "learning_rate": 9.989908942222264e-05, |
| "loss": 2.24587345123291, |
| "memory(GiB)": 38.86, |
| "step": 100, |
| "token_acc": 0.5168067226890757, |
| "train_speed(iter/s)": 0.588352 |
| }, |
| { |
| "epoch": 0.06371359223300971, |
| "grad_norm": 6.4799370765686035, |
| "learning_rate": 9.988874992528468e-05, |
| "loss": 2.652623748779297, |
| "memory(GiB)": 38.86, |
| "step": 105, |
| "token_acc": 0.4127906976744186, |
| "train_speed(iter/s)": 0.589898 |
| }, |
| { |
| "epoch": 0.06674757281553398, |
| "grad_norm": 6.027382850646973, |
| "learning_rate": 9.987790682908306e-05, |
| "loss": 2.2998146057128905, |
| "memory(GiB)": 38.86, |
| "step": 110, |
| "token_acc": 0.4807121661721068, |
| "train_speed(iter/s)": 0.589495 |
| }, |
| { |
| "epoch": 0.06978155339805825, |
| "grad_norm": 6.517679214477539, |
| "learning_rate": 9.986656024307286e-05, |
| "loss": 2.5867145538330076, |
| "memory(GiB)": 38.86, |
| "step": 115, |
| "token_acc": 0.455026455026455, |
| "train_speed(iter/s)": 0.589597 |
| }, |
| { |
| "epoch": 0.07281553398058252, |
| "grad_norm": 7.561508655548096, |
| "learning_rate": 9.985471028179154e-05, |
| "loss": 2.4384201049804686, |
| "memory(GiB)": 38.86, |
| "step": 120, |
| "token_acc": 0.47368421052631576, |
| "train_speed(iter/s)": 0.589834 |
| }, |
| { |
| "epoch": 0.07584951456310679, |
| "grad_norm": 7.263455867767334, |
| "learning_rate": 9.984235706485789e-05, |
| "loss": 2.373090362548828, |
| "memory(GiB)": 38.86, |
| "step": 125, |
| "token_acc": 0.4657039711191336, |
| "train_speed(iter/s)": 0.590074 |
| }, |
| { |
| "epoch": 0.07888349514563107, |
| "grad_norm": 6.084628582000732, |
| "learning_rate": 9.98295007169708e-05, |
| "loss": 2.5036380767822264, |
| "memory(GiB)": 38.86, |
| "step": 130, |
| "token_acc": 0.47941176470588237, |
| "train_speed(iter/s)": 0.592166 |
| }, |
| { |
| "epoch": 0.08191747572815535, |
| "grad_norm": 8.00130844116211, |
| "learning_rate": 9.981614136790796e-05, |
| "loss": 2.153367614746094, |
| "memory(GiB)": 39.22, |
| "step": 135, |
| "token_acc": 0.5410764872521246, |
| "train_speed(iter/s)": 0.591399 |
| }, |
| { |
| "epoch": 0.08495145631067962, |
| "grad_norm": 6.030653953552246, |
| "learning_rate": 9.980227915252459e-05, |
| "loss": 2.2291128158569338, |
| "memory(GiB)": 39.22, |
| "step": 140, |
| "token_acc": 0.4910394265232975, |
| "train_speed(iter/s)": 0.591432 |
| }, |
| { |
| "epoch": 0.08798543689320389, |
| "grad_norm": 6.891486167907715, |
| "learning_rate": 9.978791421075206e-05, |
| "loss": 2.5422630310058594, |
| "memory(GiB)": 39.22, |
| "step": 145, |
| "token_acc": 0.4812286689419795, |
| "train_speed(iter/s)": 0.588377 |
| }, |
| { |
| "epoch": 0.09101941747572816, |
| "grad_norm": 7.838645935058594, |
| "learning_rate": 9.97730466875965e-05, |
| "loss": 2.476850128173828, |
| "memory(GiB)": 39.22, |
| "step": 150, |
| "token_acc": 0.4542372881355932, |
| "train_speed(iter/s)": 0.590615 |
| }, |
| { |
| "epoch": 0.09405339805825243, |
| "grad_norm": 7.769046306610107, |
| "learning_rate": 9.975767673313734e-05, |
| "loss": 2.592838096618652, |
| "memory(GiB)": 39.22, |
| "step": 155, |
| "token_acc": 0.4678362573099415, |
| "train_speed(iter/s)": 0.591754 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 5.977383136749268, |
| "learning_rate": 9.974180450252569e-05, |
| "loss": 2.345209503173828, |
| "memory(GiB)": 39.22, |
| "step": 160, |
| "token_acc": 0.4849624060150376, |
| "train_speed(iter/s)": 0.592414 |
| }, |
| { |
| "epoch": 0.10012135922330097, |
| "grad_norm": 6.340784549713135, |
| "learning_rate": 9.972543015598295e-05, |
| "loss": 2.4988531112670898, |
| "memory(GiB)": 39.22, |
| "step": 165, |
| "token_acc": 0.4491525423728814, |
| "train_speed(iter/s)": 0.592598 |
| }, |
| { |
| "epoch": 0.10315533980582524, |
| "grad_norm": 6.322139263153076, |
| "learning_rate": 9.970855385879908e-05, |
| "loss": 2.7641939163208007, |
| "memory(GiB)": 39.22, |
| "step": 170, |
| "token_acc": 0.42450142450142453, |
| "train_speed(iter/s)": 0.593328 |
| }, |
| { |
| "epoch": 0.10618932038834951, |
| "grad_norm": 8.460200309753418, |
| "learning_rate": 9.969117578133089e-05, |
| "loss": 2.4497074127197265, |
| "memory(GiB)": 39.22, |
| "step": 175, |
| "token_acc": 0.4819672131147541, |
| "train_speed(iter/s)": 0.592932 |
| }, |
| { |
| "epoch": 0.10922330097087378, |
| "grad_norm": 6.508354663848877, |
| "learning_rate": 9.96732960990005e-05, |
| "loss": 2.3542524337768556, |
| "memory(GiB)": 39.22, |
| "step": 180, |
| "token_acc": 0.4444444444444444, |
| "train_speed(iter/s)": 0.594793 |
| }, |
| { |
| "epoch": 0.11225728155339806, |
| "grad_norm": 6.436831474304199, |
| "learning_rate": 9.965491499229332e-05, |
| "loss": 2.355543518066406, |
| "memory(GiB)": 39.22, |
| "step": 185, |
| "token_acc": 0.48639455782312924, |
| "train_speed(iter/s)": 0.595112 |
| }, |
| { |
| "epoch": 0.11529126213592233, |
| "grad_norm": 5.326399803161621, |
| "learning_rate": 9.963603264675648e-05, |
| "loss": 2.626679611206055, |
| "memory(GiB)": 39.22, |
| "step": 190, |
| "token_acc": 0.45058139534883723, |
| "train_speed(iter/s)": 0.59601 |
| }, |
| { |
| "epoch": 0.1183252427184466, |
| "grad_norm": 6.522929668426514, |
| "learning_rate": 9.961664925299677e-05, |
| "loss": 2.417061424255371, |
| "memory(GiB)": 39.22, |
| "step": 195, |
| "token_acc": 0.49050632911392406, |
| "train_speed(iter/s)": 0.595902 |
| }, |
| { |
| "epoch": 0.12135922330097088, |
| "grad_norm": 5.905120849609375, |
| "learning_rate": 9.95967650066788e-05, |
| "loss": 2.5360954284667967, |
| "memory(GiB)": 39.22, |
| "step": 200, |
| "token_acc": 0.4444444444444444, |
| "train_speed(iter/s)": 0.59608 |
| }, |
| { |
| "epoch": 0.12439320388349515, |
| "grad_norm": 7.083728790283203, |
| "learning_rate": 9.957638010852301e-05, |
| "loss": 2.5276988983154296, |
| "memory(GiB)": 39.22, |
| "step": 205, |
| "token_acc": 0.43333333333333335, |
| "train_speed(iter/s)": 0.597234 |
| }, |
| { |
| "epoch": 0.12742718446601942, |
| "grad_norm": 6.791469573974609, |
| "learning_rate": 9.955549476430364e-05, |
| "loss": 2.6791542053222654, |
| "memory(GiB)": 39.22, |
| "step": 210, |
| "token_acc": 0.44481605351170567, |
| "train_speed(iter/s)": 0.597785 |
| }, |
| { |
| "epoch": 0.1304611650485437, |
| "grad_norm": 8.691610336303711, |
| "learning_rate": 9.953410918484667e-05, |
| "loss": 2.5277048110961915, |
| "memory(GiB)": 39.22, |
| "step": 215, |
| "token_acc": 0.4937888198757764, |
| "train_speed(iter/s)": 0.598803 |
| }, |
| { |
| "epoch": 0.13349514563106796, |
| "grad_norm": 5.966423988342285, |
| "learning_rate": 9.951222358602763e-05, |
| "loss": 2.5550731658935546, |
| "memory(GiB)": 39.22, |
| "step": 220, |
| "token_acc": 0.4676470588235294, |
| "train_speed(iter/s)": 0.599676 |
| }, |
| { |
| "epoch": 0.13652912621359223, |
| "grad_norm": 8.491061210632324, |
| "learning_rate": 9.948983818876954e-05, |
| "loss": 2.433759880065918, |
| "memory(GiB)": 39.22, |
| "step": 225, |
| "token_acc": 0.4908424908424908, |
| "train_speed(iter/s)": 0.598817 |
| }, |
| { |
| "epoch": 0.1395631067961165, |
| "grad_norm": 4.885462760925293, |
| "learning_rate": 9.946695321904056e-05, |
| "loss": 2.5523433685302734, |
| "memory(GiB)": 39.22, |
| "step": 230, |
| "token_acc": 0.45478723404255317, |
| "train_speed(iter/s)": 0.597615 |
| }, |
| { |
| "epoch": 0.14259708737864077, |
| "grad_norm": 6.235279083251953, |
| "learning_rate": 9.944356890785177e-05, |
| "loss": 2.3788055419921874, |
| "memory(GiB)": 39.22, |
| "step": 235, |
| "token_acc": 0.4809384164222874, |
| "train_speed(iter/s)": 0.598117 |
| }, |
| { |
| "epoch": 0.14563106796116504, |
| "grad_norm": 5.3688130378723145, |
| "learning_rate": 9.941968549125481e-05, |
| "loss": 2.4541061401367186, |
| "memory(GiB)": 39.22, |
| "step": 240, |
| "token_acc": 0.46153846153846156, |
| "train_speed(iter/s)": 0.596655 |
| }, |
| { |
| "epoch": 0.1486650485436893, |
| "grad_norm": 5.759191036224365, |
| "learning_rate": 9.939530321033955e-05, |
| "loss": 2.168326568603516, |
| "memory(GiB)": 39.22, |
| "step": 245, |
| "token_acc": 0.501628664495114, |
| "train_speed(iter/s)": 0.5967 |
| }, |
| { |
| "epoch": 0.15169902912621358, |
| "grad_norm": 8.470988273620605, |
| "learning_rate": 9.937042231123155e-05, |
| "loss": 2.5771547317504884, |
| "memory(GiB)": 39.22, |
| "step": 250, |
| "token_acc": 0.501628664495114, |
| "train_speed(iter/s)": 0.596782 |
| }, |
| { |
| "epoch": 0.15473300970873785, |
| "grad_norm": 6.000228404998779, |
| "learning_rate": 9.934504304508974e-05, |
| "loss": 2.5160358428955076, |
| "memory(GiB)": 39.22, |
| "step": 255, |
| "token_acc": 0.4469914040114613, |
| "train_speed(iter/s)": 0.596955 |
| }, |
| { |
| "epoch": 0.15776699029126215, |
| "grad_norm": 7.762350082397461, |
| "learning_rate": 9.931916566810371e-05, |
| "loss": 2.245794677734375, |
| "memory(GiB)": 39.22, |
| "step": 260, |
| "token_acc": 0.521594684385382, |
| "train_speed(iter/s)": 0.596759 |
| }, |
| { |
| "epoch": 0.16080097087378642, |
| "grad_norm": 7.007081031799316, |
| "learning_rate": 9.929279044149123e-05, |
| "loss": 2.3080322265625, |
| "memory(GiB)": 39.22, |
| "step": 265, |
| "token_acc": 0.4964788732394366, |
| "train_speed(iter/s)": 0.595878 |
| }, |
| { |
| "epoch": 0.1638349514563107, |
| "grad_norm": 5.466193675994873, |
| "learning_rate": 9.926591763149559e-05, |
| "loss": 2.1369998931884764, |
| "memory(GiB)": 39.22, |
| "step": 270, |
| "token_acc": 0.5296052631578947, |
| "train_speed(iter/s)": 0.596845 |
| }, |
| { |
| "epoch": 0.16686893203883496, |
| "grad_norm": 7.380741596221924, |
| "learning_rate": 9.923854750938291e-05, |
| "loss": 2.2451313018798826, |
| "memory(GiB)": 39.22, |
| "step": 275, |
| "token_acc": 0.5, |
| "train_speed(iter/s)": 0.595956 |
| }, |
| { |
| "epoch": 0.16990291262135923, |
| "grad_norm": 6.371977806091309, |
| "learning_rate": 9.921068035143936e-05, |
| "loss": 2.408839797973633, |
| "memory(GiB)": 39.22, |
| "step": 280, |
| "token_acc": 0.46296296296296297, |
| "train_speed(iter/s)": 0.596145 |
| }, |
| { |
| "epoch": 0.1729368932038835, |
| "grad_norm": 7.335880279541016, |
| "learning_rate": 9.918231643896852e-05, |
| "loss": 2.199435234069824, |
| "memory(GiB)": 39.22, |
| "step": 285, |
| "token_acc": 0.5050167224080268, |
| "train_speed(iter/s)": 0.595974 |
| }, |
| { |
| "epoch": 0.17597087378640777, |
| "grad_norm": 7.418302536010742, |
| "learning_rate": 9.915345605828828e-05, |
| "loss": 2.3224533081054686, |
| "memory(GiB)": 39.22, |
| "step": 290, |
| "token_acc": 0.5035971223021583, |
| "train_speed(iter/s)": 0.596204 |
| }, |
| { |
| "epoch": 0.17900485436893204, |
| "grad_norm": 6.758571624755859, |
| "learning_rate": 9.912409950072821e-05, |
| "loss": 2.4346172332763674, |
| "memory(GiB)": 39.22, |
| "step": 295, |
| "token_acc": 0.48429319371727747, |
| "train_speed(iter/s)": 0.597578 |
| }, |
| { |
| "epoch": 0.1820388349514563, |
| "grad_norm": 10.919794082641602, |
| "learning_rate": 9.909424706262647e-05, |
| "loss": 2.4341407775878907, |
| "memory(GiB)": 39.22, |
| "step": 300, |
| "token_acc": 0.48857142857142855, |
| "train_speed(iter/s)": 0.598081 |
| }, |
| { |
| "epoch": 0.18507281553398058, |
| "grad_norm": 5.728007793426514, |
| "learning_rate": 9.906389904532688e-05, |
| "loss": 2.120174026489258, |
| "memory(GiB)": 39.22, |
| "step": 305, |
| "token_acc": 0.5371024734982333, |
| "train_speed(iter/s)": 0.597458 |
| }, |
| { |
| "epoch": 0.18810679611650485, |
| "grad_norm": 6.871456623077393, |
| "learning_rate": 9.903305575517584e-05, |
| "loss": 2.342795181274414, |
| "memory(GiB)": 39.22, |
| "step": 310, |
| "token_acc": 0.498567335243553, |
| "train_speed(iter/s)": 0.596534 |
| }, |
| { |
| "epoch": 0.19114077669902912, |
| "grad_norm": 8.57703971862793, |
| "learning_rate": 9.900171750351925e-05, |
| "loss": 2.6183086395263673, |
| "memory(GiB)": 39.22, |
| "step": 315, |
| "token_acc": 0.4625, |
| "train_speed(iter/s)": 0.597484 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 7.768932342529297, |
| "learning_rate": 9.89698846066994e-05, |
| "loss": 2.291164207458496, |
| "memory(GiB)": 39.22, |
| "step": 320, |
| "token_acc": 0.5154320987654321, |
| "train_speed(iter/s)": 0.596998 |
| }, |
| { |
| "epoch": 0.19720873786407767, |
| "grad_norm": 6.957777976989746, |
| "learning_rate": 9.893755738605171e-05, |
| "loss": 2.211928367614746, |
| "memory(GiB)": 39.22, |
| "step": 325, |
| "token_acc": 0.5045871559633027, |
| "train_speed(iter/s)": 0.597803 |
| }, |
| { |
| "epoch": 0.20024271844660194, |
| "grad_norm": 6.228968143463135, |
| "learning_rate": 9.890473616790154e-05, |
| "loss": 2.4344671249389647, |
| "memory(GiB)": 39.22, |
| "step": 330, |
| "token_acc": 0.47413793103448276, |
| "train_speed(iter/s)": 0.597936 |
| }, |
| { |
| "epoch": 0.2032766990291262, |
| "grad_norm": 5.3689422607421875, |
| "learning_rate": 9.887142128356092e-05, |
| "loss": 2.6146148681640624, |
| "memory(GiB)": 39.22, |
| "step": 335, |
| "token_acc": 0.45222929936305734, |
| "train_speed(iter/s)": 0.59885 |
| }, |
| { |
| "epoch": 0.20631067961165048, |
| "grad_norm": 5.215574264526367, |
| "learning_rate": 9.88376130693251e-05, |
| "loss": 2.0827293395996094, |
| "memory(GiB)": 39.22, |
| "step": 340, |
| "token_acc": 0.5172413793103449, |
| "train_speed(iter/s)": 0.599013 |
| }, |
| { |
| "epoch": 0.20934466019417475, |
| "grad_norm": 5.897531509399414, |
| "learning_rate": 9.880331186646925e-05, |
| "loss": 2.232925796508789, |
| "memory(GiB)": 39.22, |
| "step": 345, |
| "token_acc": 0.4750830564784053, |
| "train_speed(iter/s)": 0.599063 |
| }, |
| { |
| "epoch": 0.21237864077669902, |
| "grad_norm": 5.433231830596924, |
| "learning_rate": 9.876851802124503e-05, |
| "loss": 2.4904659271240233, |
| "memory(GiB)": 39.22, |
| "step": 350, |
| "token_acc": 0.47770700636942676, |
| "train_speed(iter/s)": 0.598348 |
| }, |
| { |
| "epoch": 0.2154126213592233, |
| "grad_norm": 7.521780967712402, |
| "learning_rate": 9.873323188487697e-05, |
| "loss": 2.5035079956054687, |
| "memory(GiB)": 39.22, |
| "step": 355, |
| "token_acc": 0.4612903225806452, |
| "train_speed(iter/s)": 0.597584 |
| }, |
| { |
| "epoch": 0.21844660194174756, |
| "grad_norm": 7.7608256340026855, |
| "learning_rate": 9.869745381355906e-05, |
| "loss": 2.2622493743896483, |
| "memory(GiB)": 39.22, |
| "step": 360, |
| "token_acc": 0.5156695156695157, |
| "train_speed(iter/s)": 0.59833 |
| }, |
| { |
| "epoch": 0.22148058252427186, |
| "grad_norm": 9.321803092956543, |
| "learning_rate": 9.86611841684511e-05, |
| "loss": 2.4146696090698243, |
| "memory(GiB)": 39.22, |
| "step": 365, |
| "token_acc": 0.47335423197492166, |
| "train_speed(iter/s)": 0.599026 |
| }, |
| { |
| "epoch": 0.22451456310679613, |
| "grad_norm": 9.292396545410156, |
| "learning_rate": 9.862442331567503e-05, |
| "loss": 2.3599546432495115, |
| "memory(GiB)": 39.22, |
| "step": 370, |
| "token_acc": 0.4956268221574344, |
| "train_speed(iter/s)": 0.5998 |
| }, |
| { |
| "epoch": 0.2275485436893204, |
| "grad_norm": 8.419163703918457, |
| "learning_rate": 9.858717162631128e-05, |
| "loss": 2.6148075103759765, |
| "memory(GiB)": 39.22, |
| "step": 375, |
| "token_acc": 0.46048109965635736, |
| "train_speed(iter/s)": 0.599846 |
| }, |
| { |
| "epoch": 0.23058252427184467, |
| "grad_norm": 6.27116584777832, |
| "learning_rate": 9.854942947639501e-05, |
| "loss": 2.4621152877807617, |
| "memory(GiB)": 39.22, |
| "step": 380, |
| "token_acc": 0.505524861878453, |
| "train_speed(iter/s)": 0.600276 |
| }, |
| { |
| "epoch": 0.23361650485436894, |
| "grad_norm": 7.211396217346191, |
| "learning_rate": 9.851119724691225e-05, |
| "loss": 2.5144262313842773, |
| "memory(GiB)": 39.22, |
| "step": 385, |
| "token_acc": 0.4525993883792049, |
| "train_speed(iter/s)": 0.600896 |
| }, |
| { |
| "epoch": 0.2366504854368932, |
| "grad_norm": 6.34926700592041, |
| "learning_rate": 9.84724753237962e-05, |
| "loss": 2.4521541595458984, |
| "memory(GiB)": 39.22, |
| "step": 390, |
| "token_acc": 0.5, |
| "train_speed(iter/s)": 0.600572 |
| }, |
| { |
| "epoch": 0.23968446601941748, |
| "grad_norm": 6.972572326660156, |
| "learning_rate": 9.843326409792317e-05, |
| "loss": 2.6046756744384765, |
| "memory(GiB)": 39.22, |
| "step": 395, |
| "token_acc": 0.44884488448844884, |
| "train_speed(iter/s)": 0.600491 |
| }, |
| { |
| "epoch": 0.24271844660194175, |
| "grad_norm": 11.898480415344238, |
| "learning_rate": 9.839356396510875e-05, |
| "loss": 2.3576316833496094, |
| "memory(GiB)": 39.22, |
| "step": 400, |
| "token_acc": 0.4472843450479233, |
| "train_speed(iter/s)": 0.601068 |
| }, |
| { |
| "epoch": 0.24575242718446602, |
| "grad_norm": 5.818270683288574, |
| "learning_rate": 9.835337532610376e-05, |
| "loss": 2.0870508193969726, |
| "memory(GiB)": 39.22, |
| "step": 405, |
| "token_acc": 0.526813880126183, |
| "train_speed(iter/s)": 0.601138 |
| }, |
| { |
| "epoch": 0.2487864077669903, |
| "grad_norm": 8.206275939941406, |
| "learning_rate": 9.831269858659023e-05, |
| "loss": 2.1485408782958983, |
| "memory(GiB)": 39.61, |
| "step": 410, |
| "token_acc": 0.5371900826446281, |
| "train_speed(iter/s)": 0.598807 |
| }, |
| { |
| "epoch": 0.2518203883495146, |
| "grad_norm": 7.233333587646484, |
| "learning_rate": 9.827153415717729e-05, |
| "loss": 2.37838191986084, |
| "memory(GiB)": 39.61, |
| "step": 415, |
| "token_acc": 0.5067114093959731, |
| "train_speed(iter/s)": 0.598786 |
| }, |
| { |
| "epoch": 0.25485436893203883, |
| "grad_norm": 6.615445613861084, |
| "learning_rate": 9.822988245339701e-05, |
| "loss": 2.3126983642578125, |
| "memory(GiB)": 39.61, |
| "step": 420, |
| "token_acc": 0.514018691588785, |
| "train_speed(iter/s)": 0.599013 |
| }, |
| { |
| "epoch": 0.25788834951456313, |
| "grad_norm": 7.5856523513793945, |
| "learning_rate": 9.818774389570027e-05, |
| "loss": 2.4124004364013674, |
| "memory(GiB)": 39.61, |
| "step": 425, |
| "token_acc": 0.511864406779661, |
| "train_speed(iter/s)": 0.598978 |
| }, |
| { |
| "epoch": 0.2609223300970874, |
| "grad_norm": 4.8371381759643555, |
| "learning_rate": 9.814511890945241e-05, |
| "loss": 2.2959733963012696, |
| "memory(GiB)": 39.61, |
| "step": 430, |
| "token_acc": 0.5327380952380952, |
| "train_speed(iter/s)": 0.59816 |
| }, |
| { |
| "epoch": 0.26395631067961167, |
| "grad_norm": 6.623883247375488, |
| "learning_rate": 9.810200792492904e-05, |
| "loss": 2.1788196563720703, |
| "memory(GiB)": 39.61, |
| "step": 435, |
| "token_acc": 0.5016611295681063, |
| "train_speed(iter/s)": 0.597541 |
| }, |
| { |
| "epoch": 0.2669902912621359, |
| "grad_norm": 6.926652431488037, |
| "learning_rate": 9.805841137731164e-05, |
| "loss": 2.1499845504760744, |
| "memory(GiB)": 39.61, |
| "step": 440, |
| "token_acc": 0.5192307692307693, |
| "train_speed(iter/s)": 0.597116 |
| }, |
| { |
| "epoch": 0.2700242718446602, |
| "grad_norm": 9.03418254852295, |
| "learning_rate": 9.801432970668318e-05, |
| "loss": 2.1190351486206054, |
| "memory(GiB)": 39.61, |
| "step": 445, |
| "token_acc": 0.5272727272727272, |
| "train_speed(iter/s)": 0.597517 |
| }, |
| { |
| "epoch": 0.27305825242718446, |
| "grad_norm": 8.781913757324219, |
| "learning_rate": 9.79697633580237e-05, |
| "loss": 2.4038110733032227, |
| "memory(GiB)": 39.61, |
| "step": 450, |
| "token_acc": 0.48179271708683474, |
| "train_speed(iter/s)": 0.59777 |
| }, |
| { |
| "epoch": 0.27609223300970875, |
| "grad_norm": 5.531435489654541, |
| "learning_rate": 9.792471278120573e-05, |
| "loss": 2.3716163635253906, |
| "memory(GiB)": 39.61, |
| "step": 455, |
| "token_acc": 0.4847560975609756, |
| "train_speed(iter/s)": 0.597608 |
| }, |
| { |
| "epoch": 0.279126213592233, |
| "grad_norm": 5.956150054931641, |
| "learning_rate": 9.787917843098989e-05, |
| "loss": 2.181165313720703, |
| "memory(GiB)": 39.61, |
| "step": 460, |
| "token_acc": 0.5051903114186851, |
| "train_speed(iter/s)": 0.597412 |
| }, |
| { |
| "epoch": 0.2821601941747573, |
| "grad_norm": 7.345389366149902, |
| "learning_rate": 9.783316076702019e-05, |
| "loss": 2.4305038452148438, |
| "memory(GiB)": 39.61, |
| "step": 465, |
| "token_acc": 0.47802197802197804, |
| "train_speed(iter/s)": 0.597506 |
| }, |
| { |
| "epoch": 0.28519417475728154, |
| "grad_norm": 5.4440388679504395, |
| "learning_rate": 9.778666025381943e-05, |
| "loss": 2.178025245666504, |
| "memory(GiB)": 39.61, |
| "step": 470, |
| "token_acc": 0.5167785234899329, |
| "train_speed(iter/s)": 0.597433 |
| }, |
| { |
| "epoch": 0.28822815533980584, |
| "grad_norm": 6.164299011230469, |
| "learning_rate": 9.77396773607845e-05, |
| "loss": 2.1623489379882814, |
| "memory(GiB)": 39.61, |
| "step": 475, |
| "token_acc": 0.4915254237288136, |
| "train_speed(iter/s)": 0.597134 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 6.166046619415283, |
| "learning_rate": 9.769221256218164e-05, |
| "loss": 2.3753950119018556, |
| "memory(GiB)": 39.61, |
| "step": 480, |
| "token_acc": 0.4879518072289157, |
| "train_speed(iter/s)": 0.597494 |
| }, |
| { |
| "epoch": 0.2942961165048544, |
| "grad_norm": 6.958017826080322, |
| "learning_rate": 9.764426633714167e-05, |
| "loss": 2.21927547454834, |
| "memory(GiB)": 39.61, |
| "step": 485, |
| "token_acc": 0.5050847457627119, |
| "train_speed(iter/s)": 0.597922 |
| }, |
| { |
| "epoch": 0.2973300970873786, |
| "grad_norm": 6.639190673828125, |
| "learning_rate": 9.759583916965517e-05, |
| "loss": 2.4649885177612303, |
| "memory(GiB)": 39.61, |
| "step": 490, |
| "token_acc": 0.4845360824742268, |
| "train_speed(iter/s)": 0.597703 |
| }, |
| { |
| "epoch": 0.3003640776699029, |
| "grad_norm": 5.950069904327393, |
| "learning_rate": 9.754693154856751e-05, |
| "loss": 2.612634468078613, |
| "memory(GiB)": 39.61, |
| "step": 495, |
| "token_acc": 0.45478723404255317, |
| "train_speed(iter/s)": 0.597528 |
| }, |
| { |
| "epoch": 0.30339805825242716, |
| "grad_norm": 6.54391622543335, |
| "learning_rate": 9.7497543967574e-05, |
| "loss": 2.3269075393676757, |
| "memory(GiB)": 39.61, |
| "step": 500, |
| "token_acc": 0.47770700636942676, |
| "train_speed(iter/s)": 0.597896 |
| }, |
| { |
| "epoch": 0.30339805825242716, |
| "eval_loss": 1.981583833694458, |
| "eval_runtime": 12.577, |
| "eval_samples_per_second": 7.951, |
| "eval_steps_per_second": 7.951, |
| "eval_token_acc": 0.48756906077348067, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.30643203883495146, |
| "grad_norm": 10.183039665222168, |
| "learning_rate": 9.74476769252149e-05, |
| "loss": 2.1522619247436525, |
| "memory(GiB)": 39.61, |
| "step": 505, |
| "token_acc": 0.49651741293532337, |
| "train_speed(iter/s)": 0.587357 |
| }, |
| { |
| "epoch": 0.3094660194174757, |
| "grad_norm": 7.112940788269043, |
| "learning_rate": 9.739733092487035e-05, |
| "loss": 2.388911247253418, |
| "memory(GiB)": 39.61, |
| "step": 510, |
| "token_acc": 0.501577287066246, |
| "train_speed(iter/s)": 0.587206 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 6.210618495941162, |
| "learning_rate": 9.73465064747553e-05, |
| "loss": 2.59771614074707, |
| "memory(GiB)": 39.61, |
| "step": 515, |
| "token_acc": 0.4624624624624625, |
| "train_speed(iter/s)": 0.587584 |
| }, |
| { |
| "epoch": 0.3155339805825243, |
| "grad_norm": 6.931279182434082, |
| "learning_rate": 9.729520408791434e-05, |
| "loss": 2.512074279785156, |
| "memory(GiB)": 39.61, |
| "step": 520, |
| "token_acc": 0.4910394265232975, |
| "train_speed(iter/s)": 0.587423 |
| }, |
| { |
| "epoch": 0.31856796116504854, |
| "grad_norm": 6.450678825378418, |
| "learning_rate": 9.72434242822167e-05, |
| "loss": 2.1714031219482424, |
| "memory(GiB)": 39.61, |
| "step": 525, |
| "token_acc": 0.5186721991701245, |
| "train_speed(iter/s)": 0.586863 |
| }, |
| { |
| "epoch": 0.32160194174757284, |
| "grad_norm": 6.14349365234375, |
| "learning_rate": 9.719116758035074e-05, |
| "loss": 2.5791160583496096, |
| "memory(GiB)": 39.61, |
| "step": 530, |
| "token_acc": 0.46153846153846156, |
| "train_speed(iter/s)": 0.585986 |
| }, |
| { |
| "epoch": 0.3246359223300971, |
| "grad_norm": 6.583944797515869, |
| "learning_rate": 9.71384345098189e-05, |
| "loss": 2.3987077713012694, |
| "memory(GiB)": 39.61, |
| "step": 535, |
| "token_acc": 0.4925373134328358, |
| "train_speed(iter/s)": 0.58625 |
| }, |
| { |
| "epoch": 0.3276699029126214, |
| "grad_norm": 5.9314422607421875, |
| "learning_rate": 9.70852256029323e-05, |
| "loss": 2.235941505432129, |
| "memory(GiB)": 39.61, |
| "step": 540, |
| "token_acc": 0.5250737463126843, |
| "train_speed(iter/s)": 0.586408 |
| }, |
| { |
| "epoch": 0.3307038834951456, |
| "grad_norm": 7.305792331695557, |
| "learning_rate": 9.703154139680533e-05, |
| "loss": 2.417573928833008, |
| "memory(GiB)": 39.61, |
| "step": 545, |
| "token_acc": 0.5104477611940299, |
| "train_speed(iter/s)": 0.586501 |
| }, |
| { |
| "epoch": 0.3337378640776699, |
| "grad_norm": 5.719043731689453, |
| "learning_rate": 9.697738243335028e-05, |
| "loss": 2.2177127838134765, |
| "memory(GiB)": 39.61, |
| "step": 550, |
| "token_acc": 0.5016181229773463, |
| "train_speed(iter/s)": 0.586526 |
| }, |
| { |
| "epoch": 0.33677184466019416, |
| "grad_norm": 6.281179428100586, |
| "learning_rate": 9.692274925927185e-05, |
| "loss": 2.1101545333862304, |
| "memory(GiB)": 39.61, |
| "step": 555, |
| "token_acc": 0.5151515151515151, |
| "train_speed(iter/s)": 0.586182 |
| }, |
| { |
| "epoch": 0.33980582524271846, |
| "grad_norm": 5.763940811157227, |
| "learning_rate": 9.686764242606163e-05, |
| "loss": 2.2045364379882812, |
| "memory(GiB)": 39.61, |
| "step": 560, |
| "token_acc": 0.5030864197530864, |
| "train_speed(iter/s)": 0.585339 |
| }, |
| { |
| "epoch": 0.3428398058252427, |
| "grad_norm": 8.95957088470459, |
| "learning_rate": 9.681206248999257e-05, |
| "loss": 2.5135177612304687, |
| "memory(GiB)": 39.61, |
| "step": 565, |
| "token_acc": 0.48986486486486486, |
| "train_speed(iter/s)": 0.584751 |
| }, |
| { |
| "epoch": 0.345873786407767, |
| "grad_norm": 5.587778568267822, |
| "learning_rate": 9.675601001211326e-05, |
| "loss": 2.392421340942383, |
| "memory(GiB)": 39.61, |
| "step": 570, |
| "token_acc": 0.4461538461538462, |
| "train_speed(iter/s)": 0.585118 |
| }, |
| { |
| "epoch": 0.34890776699029125, |
| "grad_norm": 7.836484432220459, |
| "learning_rate": 9.669948555824242e-05, |
| "loss": 2.324014663696289, |
| "memory(GiB)": 39.61, |
| "step": 575, |
| "token_acc": 0.4639175257731959, |
| "train_speed(iter/s)": 0.584897 |
| }, |
| { |
| "epoch": 0.35194174757281554, |
| "grad_norm": 5.96414041519165, |
| "learning_rate": 9.664248969896303e-05, |
| "loss": 2.302785301208496, |
| "memory(GiB)": 39.61, |
| "step": 580, |
| "token_acc": 0.4904109589041096, |
| "train_speed(iter/s)": 0.585033 |
| }, |
| { |
| "epoch": 0.3549757281553398, |
| "grad_norm": 7.691707611083984, |
| "learning_rate": 9.65850230096167e-05, |
| "loss": 2.4697898864746093, |
| "memory(GiB)": 39.61, |
| "step": 585, |
| "token_acc": 0.4444444444444444, |
| "train_speed(iter/s)": 0.584897 |
| }, |
| { |
| "epoch": 0.3580097087378641, |
| "grad_norm": 8.556262016296387, |
| "learning_rate": 9.652708607029779e-05, |
| "loss": 2.2903860092163084, |
| "memory(GiB)": 40.86, |
| "step": 590, |
| "token_acc": 0.47115384615384615, |
| "train_speed(iter/s)": 0.584145 |
| }, |
| { |
| "epoch": 0.36104368932038833, |
| "grad_norm": 6.732985496520996, |
| "learning_rate": 9.646867946584757e-05, |
| "loss": 2.1200277328491213, |
| "memory(GiB)": 40.86, |
| "step": 595, |
| "token_acc": 0.532608695652174, |
| "train_speed(iter/s)": 0.584433 |
| }, |
| { |
| "epoch": 0.3640776699029126, |
| "grad_norm": 6.632906913757324, |
| "learning_rate": 9.64098037858483e-05, |
| "loss": 2.4770671844482424, |
| "memory(GiB)": 40.86, |
| "step": 600, |
| "token_acc": 0.4965753424657534, |
| "train_speed(iter/s)": 0.584177 |
| }, |
| { |
| "epoch": 0.36711165048543687, |
| "grad_norm": 8.074189186096191, |
| "learning_rate": 9.635045962461735e-05, |
| "loss": 2.0175329208374024, |
| "memory(GiB)": 40.86, |
| "step": 605, |
| "token_acc": 0.5444444444444444, |
| "train_speed(iter/s)": 0.584218 |
| }, |
| { |
| "epoch": 0.37014563106796117, |
| "grad_norm": 10.57684326171875, |
| "learning_rate": 9.62906475812011e-05, |
| "loss": 2.471089172363281, |
| "memory(GiB)": 40.86, |
| "step": 610, |
| "token_acc": 0.47604790419161674, |
| "train_speed(iter/s)": 0.584641 |
| }, |
| { |
| "epoch": 0.3731796116504854, |
| "grad_norm": 9.030044555664062, |
| "learning_rate": 9.623036825936898e-05, |
| "loss": 2.4689071655273436, |
| "memory(GiB)": 40.86, |
| "step": 615, |
| "token_acc": 0.4551282051282051, |
| "train_speed(iter/s)": 0.58472 |
| }, |
| { |
| "epoch": 0.3762135922330097, |
| "grad_norm": 12.650615692138672, |
| "learning_rate": 9.616962226760728e-05, |
| "loss": 2.4379999160766603, |
| "memory(GiB)": 40.86, |
| "step": 620, |
| "token_acc": 0.4965753424657534, |
| "train_speed(iter/s)": 0.583902 |
| }, |
| { |
| "epoch": 0.379247572815534, |
| "grad_norm": 6.823087692260742, |
| "learning_rate": 9.610841021911312e-05, |
| "loss": 2.2892841339111327, |
| "memory(GiB)": 40.86, |
| "step": 625, |
| "token_acc": 0.4925373134328358, |
| "train_speed(iter/s)": 0.584344 |
| }, |
| { |
| "epoch": 0.38228155339805825, |
| "grad_norm": 6.585781097412109, |
| "learning_rate": 9.604673273178819e-05, |
| "loss": 2.1564374923706056, |
| "memory(GiB)": 40.86, |
| "step": 630, |
| "token_acc": 0.5151515151515151, |
| "train_speed(iter/s)": 0.584389 |
| }, |
| { |
| "epoch": 0.38531553398058255, |
| "grad_norm": 7.104307174682617, |
| "learning_rate": 9.59845904282325e-05, |
| "loss": 2.1816734313964843, |
| "memory(GiB)": 40.86, |
| "step": 635, |
| "token_acc": 0.5308219178082192, |
| "train_speed(iter/s)": 0.585012 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 7.516766548156738, |
| "learning_rate": 9.592198393573816e-05, |
| "loss": 2.276702308654785, |
| "memory(GiB)": 40.86, |
| "step": 640, |
| "token_acc": 0.5102739726027398, |
| "train_speed(iter/s)": 0.585101 |
| }, |
| { |
| "epoch": 0.3913834951456311, |
| "grad_norm": 8.942841529846191, |
| "learning_rate": 9.585891388628298e-05, |
| "loss": 2.3461095809936525, |
| "memory(GiB)": 40.86, |
| "step": 645, |
| "token_acc": 0.527972027972028, |
| "train_speed(iter/s)": 0.585177 |
| }, |
| { |
| "epoch": 0.39441747572815533, |
| "grad_norm": 7.309288024902344, |
| "learning_rate": 9.579538091652414e-05, |
| "loss": 2.3102886199951174, |
| "memory(GiB)": 40.86, |
| "step": 650, |
| "token_acc": 0.5029585798816568, |
| "train_speed(iter/s)": 0.584698 |
| }, |
| { |
| "epoch": 0.39745145631067963, |
| "grad_norm": 8.047052383422852, |
| "learning_rate": 9.573138566779171e-05, |
| "loss": 2.2706655502319335, |
| "memory(GiB)": 40.86, |
| "step": 655, |
| "token_acc": 0.48942598187311176, |
| "train_speed(iter/s)": 0.584864 |
| }, |
| { |
| "epoch": 0.40048543689320387, |
| "grad_norm": 5.1699442863464355, |
| "learning_rate": 9.566692878608229e-05, |
| "loss": 2.3724884033203124, |
| "memory(GiB)": 40.86, |
| "step": 660, |
| "token_acc": 0.5276872964169381, |
| "train_speed(iter/s)": 0.584586 |
| }, |
| { |
| "epoch": 0.40351941747572817, |
| "grad_norm": 7.834784030914307, |
| "learning_rate": 9.560201092205231e-05, |
| "loss": 2.149821090698242, |
| "memory(GiB)": 40.86, |
| "step": 665, |
| "token_acc": 0.5422535211267606, |
| "train_speed(iter/s)": 0.584347 |
| }, |
| { |
| "epoch": 0.4065533980582524, |
| "grad_norm": 5.076271057128906, |
| "learning_rate": 9.553663273101162e-05, |
| "loss": 2.2725826263427735, |
| "memory(GiB)": 40.86, |
| "step": 670, |
| "token_acc": 0.48223350253807107, |
| "train_speed(iter/s)": 0.584476 |
| }, |
| { |
| "epoch": 0.4095873786407767, |
| "grad_norm": 5.5801005363464355, |
| "learning_rate": 9.54707948729168e-05, |
| "loss": 2.556637001037598, |
| "memory(GiB)": 40.86, |
| "step": 675, |
| "token_acc": 0.49714285714285716, |
| "train_speed(iter/s)": 0.583956 |
| }, |
| { |
| "epoch": 0.41262135922330095, |
| "grad_norm": 7.065471649169922, |
| "learning_rate": 9.540449801236451e-05, |
| "loss": 2.326729393005371, |
| "memory(GiB)": 40.86, |
| "step": 680, |
| "token_acc": 0.5097402597402597, |
| "train_speed(iter/s)": 0.583876 |
| }, |
| { |
| "epoch": 0.41565533980582525, |
| "grad_norm": 5.337322235107422, |
| "learning_rate": 9.533774281858481e-05, |
| "loss": 2.34055118560791, |
| "memory(GiB)": 40.86, |
| "step": 685, |
| "token_acc": 0.45645645645645644, |
| "train_speed(iter/s)": 0.584332 |
| }, |
| { |
| "epoch": 0.4186893203883495, |
| "grad_norm": 6.009404182434082, |
| "learning_rate": 9.527052996543436e-05, |
| "loss": 2.368490791320801, |
| "memory(GiB)": 40.86, |
| "step": 690, |
| "token_acc": 0.49240121580547114, |
| "train_speed(iter/s)": 0.584492 |
| }, |
| { |
| "epoch": 0.4217233009708738, |
| "grad_norm": 7.1615495681762695, |
| "learning_rate": 9.520286013138959e-05, |
| "loss": 2.2751487731933593, |
| "memory(GiB)": 40.86, |
| "step": 695, |
| "token_acc": 0.51875, |
| "train_speed(iter/s)": 0.584253 |
| }, |
| { |
| "epoch": 0.42475728155339804, |
| "grad_norm": 6.305184841156006, |
| "learning_rate": 9.513473399954001e-05, |
| "loss": 2.2249755859375, |
| "memory(GiB)": 40.86, |
| "step": 700, |
| "token_acc": 0.5359477124183006, |
| "train_speed(iter/s)": 0.583644 |
| }, |
| { |
| "epoch": 0.42779126213592233, |
| "grad_norm": 6.879371166229248, |
| "learning_rate": 9.506615225758111e-05, |
| "loss": 2.1284107208251952, |
| "memory(GiB)": 40.86, |
| "step": 705, |
| "token_acc": 0.46647230320699706, |
| "train_speed(iter/s)": 0.583054 |
| }, |
| { |
| "epoch": 0.4308252427184466, |
| "grad_norm": 5.7029523849487305, |
| "learning_rate": 9.499711559780756e-05, |
| "loss": 2.3587778091430662, |
| "memory(GiB)": 40.86, |
| "step": 710, |
| "token_acc": 0.4859154929577465, |
| "train_speed(iter/s)": 0.583094 |
| }, |
| { |
| "epoch": 0.4338592233009709, |
| "grad_norm": 7.390230178833008, |
| "learning_rate": 9.492762471710612e-05, |
| "loss": 2.6136167526245115, |
| "memory(GiB)": 40.86, |
| "step": 715, |
| "token_acc": 0.46646341463414637, |
| "train_speed(iter/s)": 0.582932 |
| }, |
| { |
| "epoch": 0.4368932038834951, |
| "grad_norm": 5.883137226104736, |
| "learning_rate": 9.485768031694872e-05, |
| "loss": 2.2231393814086915, |
| "memory(GiB)": 40.86, |
| "step": 720, |
| "token_acc": 0.49818181818181817, |
| "train_speed(iter/s)": 0.582775 |
| }, |
| { |
| "epoch": 0.4399271844660194, |
| "grad_norm": 6.680229663848877, |
| "learning_rate": 9.478728310338527e-05, |
| "loss": 2.1992170333862306, |
| "memory(GiB)": 40.86, |
| "step": 725, |
| "token_acc": 0.5133531157270029, |
| "train_speed(iter/s)": 0.582635 |
| }, |
| { |
| "epoch": 0.4429611650485437, |
| "grad_norm": 8.902689933776855, |
| "learning_rate": 9.471643378703662e-05, |
| "loss": 2.0395624160766603, |
| "memory(GiB)": 40.86, |
| "step": 730, |
| "token_acc": 0.5493421052631579, |
| "train_speed(iter/s)": 0.582703 |
| }, |
| { |
| "epoch": 0.44599514563106796, |
| "grad_norm": 5.443286895751953, |
| "learning_rate": 9.464513308308734e-05, |
| "loss": 2.506935882568359, |
| "memory(GiB)": 40.86, |
| "step": 735, |
| "token_acc": 0.47368421052631576, |
| "train_speed(iter/s)": 0.583215 |
| }, |
| { |
| "epoch": 0.44902912621359226, |
| "grad_norm": 6.487564563751221, |
| "learning_rate": 9.457338171127847e-05, |
| "loss": 2.2692995071411133, |
| "memory(GiB)": 40.86, |
| "step": 740, |
| "token_acc": 0.5179153094462541, |
| "train_speed(iter/s)": 0.583207 |
| }, |
| { |
| "epoch": 0.4520631067961165, |
| "grad_norm": 7.125478267669678, |
| "learning_rate": 9.450118039590032e-05, |
| "loss": 2.1293052673339843, |
| "memory(GiB)": 40.86, |
| "step": 745, |
| "token_acc": 0.5464285714285714, |
| "train_speed(iter/s)": 0.583504 |
| }, |
| { |
| "epoch": 0.4550970873786408, |
| "grad_norm": 7.087446212768555, |
| "learning_rate": 9.442852986578514e-05, |
| "loss": 2.4458339691162108, |
| "memory(GiB)": 40.86, |
| "step": 750, |
| "token_acc": 0.49736842105263157, |
| "train_speed(iter/s)": 0.583179 |
| }, |
| { |
| "epoch": 0.45813106796116504, |
| "grad_norm": 7.162069320678711, |
| "learning_rate": 9.435543085429972e-05, |
| "loss": 2.3158668518066405, |
| "memory(GiB)": 40.86, |
| "step": 755, |
| "token_acc": 0.4744744744744745, |
| "train_speed(iter/s)": 0.5828 |
| }, |
| { |
| "epoch": 0.46116504854368934, |
| "grad_norm": 5.414243698120117, |
| "learning_rate": 9.428188409933806e-05, |
| "loss": 2.16876335144043, |
| "memory(GiB)": 40.86, |
| "step": 760, |
| "token_acc": 0.5087719298245614, |
| "train_speed(iter/s)": 0.58285 |
| }, |
| { |
| "epoch": 0.4641990291262136, |
| "grad_norm": 6.282864570617676, |
| "learning_rate": 9.420789034331387e-05, |
| "loss": 2.289217948913574, |
| "memory(GiB)": 40.86, |
| "step": 765, |
| "token_acc": 0.512396694214876, |
| "train_speed(iter/s)": 0.582631 |
| }, |
| { |
| "epoch": 0.4672330097087379, |
| "grad_norm": 8.376455307006836, |
| "learning_rate": 9.413345033315307e-05, |
| "loss": 2.428557777404785, |
| "memory(GiB)": 40.86, |
| "step": 770, |
| "token_acc": 0.49038461538461536, |
| "train_speed(iter/s)": 0.582831 |
| }, |
| { |
| "epoch": 0.4702669902912621, |
| "grad_norm": 6.952515602111816, |
| "learning_rate": 9.405856482028627e-05, |
| "loss": 2.5767995834350588, |
| "memory(GiB)": 40.86, |
| "step": 775, |
| "token_acc": 0.43425076452599387, |
| "train_speed(iter/s)": 0.583315 |
| }, |
| { |
| "epoch": 0.4733009708737864, |
| "grad_norm": 9.879197120666504, |
| "learning_rate": 9.398323456064123e-05, |
| "loss": 2.218907356262207, |
| "memory(GiB)": 40.86, |
| "step": 780, |
| "token_acc": 0.4844961240310077, |
| "train_speed(iter/s)": 0.583721 |
| }, |
| { |
| "epoch": 0.47633495145631066, |
| "grad_norm": 7.553537845611572, |
| "learning_rate": 9.39074603146351e-05, |
| "loss": 2.3447980880737305, |
| "memory(GiB)": 40.86, |
| "step": 785, |
| "token_acc": 0.4934640522875817, |
| "train_speed(iter/s)": 0.583893 |
| }, |
| { |
| "epoch": 0.47936893203883496, |
| "grad_norm": 6.071379661560059, |
| "learning_rate": 9.383124284716691e-05, |
| "loss": 2.241764450073242, |
| "memory(GiB)": 40.86, |
| "step": 790, |
| "token_acc": 0.46984126984126984, |
| "train_speed(iter/s)": 0.584261 |
| }, |
| { |
| "epoch": 0.4824029126213592, |
| "grad_norm": 5.881275653839111, |
| "learning_rate": 9.37545829276097e-05, |
| "loss": 2.361056900024414, |
| "memory(GiB)": 40.86, |
| "step": 795, |
| "token_acc": 0.5249169435215947, |
| "train_speed(iter/s)": 0.584506 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 8.599099159240723, |
| "learning_rate": 9.367748132980287e-05, |
| "loss": 2.1997608184814452, |
| "memory(GiB)": 40.86, |
| "step": 800, |
| "token_acc": 0.48518518518518516, |
| "train_speed(iter/s)": 0.584425 |
| }, |
| { |
| "epoch": 0.48847087378640774, |
| "grad_norm": 5.97467565536499, |
| "learning_rate": 9.359993883204425e-05, |
| "loss": 2.2297504425048826, |
| "memory(GiB)": 40.86, |
| "step": 805, |
| "token_acc": 0.5, |
| "train_speed(iter/s)": 0.583965 |
| }, |
| { |
| "epoch": 0.49150485436893204, |
| "grad_norm": 6.91083288192749, |
| "learning_rate": 9.352195621708239e-05, |
| "loss": 1.9850988388061523, |
| "memory(GiB)": 40.86, |
| "step": 810, |
| "token_acc": 0.5147540983606558, |
| "train_speed(iter/s)": 0.584245 |
| }, |
| { |
| "epoch": 0.4945388349514563, |
| "grad_norm": 8.59461784362793, |
| "learning_rate": 9.344353427210852e-05, |
| "loss": 2.421934127807617, |
| "memory(GiB)": 40.86, |
| "step": 815, |
| "token_acc": 0.5176056338028169, |
| "train_speed(iter/s)": 0.5847 |
| }, |
| { |
| "epoch": 0.4975728155339806, |
| "grad_norm": 6.944448947906494, |
| "learning_rate": 9.336467378874871e-05, |
| "loss": 2.4557096481323244, |
| "memory(GiB)": 40.86, |
| "step": 820, |
| "token_acc": 0.47419354838709676, |
| "train_speed(iter/s)": 0.584583 |
| }, |
| { |
| "epoch": 0.5006067961165048, |
| "grad_norm": 5.55971622467041, |
| "learning_rate": 9.328537556305578e-05, |
| "loss": 2.2306629180908204, |
| "memory(GiB)": 40.86, |
| "step": 825, |
| "token_acc": 0.4852459016393443, |
| "train_speed(iter/s)": 0.585006 |
| }, |
| { |
| "epoch": 0.5036407766990292, |
| "grad_norm": 7.160358905792236, |
| "learning_rate": 9.320564039550134e-05, |
| "loss": 2.429665374755859, |
| "memory(GiB)": 40.86, |
| "step": 830, |
| "token_acc": 0.47262247838616717, |
| "train_speed(iter/s)": 0.585348 |
| }, |
| { |
| "epoch": 0.5066747572815534, |
| "grad_norm": 6.570638656616211, |
| "learning_rate": 9.31254690909677e-05, |
| "loss": 2.442539596557617, |
| "memory(GiB)": 40.86, |
| "step": 835, |
| "token_acc": 0.476027397260274, |
| "train_speed(iter/s)": 0.585696 |
| }, |
| { |
| "epoch": 0.5097087378640777, |
| "grad_norm": 6.370124340057373, |
| "learning_rate": 9.304486245873972e-05, |
| "loss": 2.287601089477539, |
| "memory(GiB)": 40.86, |
| "step": 840, |
| "token_acc": 0.4982456140350877, |
| "train_speed(iter/s)": 0.585975 |
| }, |
| { |
| "epoch": 0.5127427184466019, |
| "grad_norm": 6.999332904815674, |
| "learning_rate": 9.296382131249666e-05, |
| "loss": 2.317913818359375, |
| "memory(GiB)": 40.86, |
| "step": 845, |
| "token_acc": 0.5041782729805014, |
| "train_speed(iter/s)": 0.585835 |
| }, |
| { |
| "epoch": 0.5157766990291263, |
| "grad_norm": 5.257606506347656, |
| "learning_rate": 9.288234647030391e-05, |
| "loss": 2.18968505859375, |
| "memory(GiB)": 40.86, |
| "step": 850, |
| "token_acc": 0.5102040816326531, |
| "train_speed(iter/s)": 0.585922 |
| }, |
| { |
| "epoch": 0.5188106796116505, |
| "grad_norm": 5.611077785491943, |
| "learning_rate": 9.280043875460485e-05, |
| "loss": 2.0620901107788088, |
| "memory(GiB)": 40.86, |
| "step": 855, |
| "token_acc": 0.5365079365079365, |
| "train_speed(iter/s)": 0.585966 |
| }, |
| { |
| "epoch": 0.5218446601941747, |
| "grad_norm": 5.108231067657471, |
| "learning_rate": 9.271809899221246e-05, |
| "loss": 2.4372896194458007, |
| "memory(GiB)": 40.86, |
| "step": 860, |
| "token_acc": 0.4479768786127168, |
| "train_speed(iter/s)": 0.585988 |
| }, |
| { |
| "epoch": 0.524878640776699, |
| "grad_norm": 6.733373641967773, |
| "learning_rate": 9.263532801430094e-05, |
| "loss": 2.1579952239990234, |
| "memory(GiB)": 40.86, |
| "step": 865, |
| "token_acc": 0.4807121661721068, |
| "train_speed(iter/s)": 0.585966 |
| }, |
| { |
| "epoch": 0.5279126213592233, |
| "grad_norm": 5.073429107666016, |
| "learning_rate": 9.255212665639744e-05, |
| "loss": 2.1149240493774415, |
| "memory(GiB)": 40.86, |
| "step": 870, |
| "token_acc": 0.5171232876712328, |
| "train_speed(iter/s)": 0.585975 |
| }, |
| { |
| "epoch": 0.5309466019417476, |
| "grad_norm": 6.175984859466553, |
| "learning_rate": 9.246849575837349e-05, |
| "loss": 1.9833623886108398, |
| "memory(GiB)": 40.86, |
| "step": 875, |
| "token_acc": 0.526813880126183, |
| "train_speed(iter/s)": 0.585474 |
| }, |
| { |
| "epoch": 0.5339805825242718, |
| "grad_norm": 8.124624252319336, |
| "learning_rate": 9.238443616443666e-05, |
| "loss": 2.4017959594726563, |
| "memory(GiB)": 40.86, |
| "step": 880, |
| "token_acc": 0.49032258064516127, |
| "train_speed(iter/s)": 0.585597 |
| }, |
| { |
| "epoch": 0.5370145631067961, |
| "grad_norm": 7.752699375152588, |
| "learning_rate": 9.229994872312193e-05, |
| "loss": 2.387744331359863, |
| "memory(GiB)": 40.86, |
| "step": 885, |
| "token_acc": 0.5106382978723404, |
| "train_speed(iter/s)": 0.585668 |
| }, |
| { |
| "epoch": 0.5400485436893204, |
| "grad_norm": 8.323918342590332, |
| "learning_rate": 9.221503428728316e-05, |
| "loss": 2.1385421752929688, |
| "memory(GiB)": 40.86, |
| "step": 890, |
| "token_acc": 0.5551020408163265, |
| "train_speed(iter/s)": 0.586131 |
| }, |
| { |
| "epoch": 0.5430825242718447, |
| "grad_norm": 6.044275760650635, |
| "learning_rate": 9.212969371408449e-05, |
| "loss": 1.9817846298217774, |
| "memory(GiB)": 40.86, |
| "step": 895, |
| "token_acc": 0.5494880546075085, |
| "train_speed(iter/s)": 0.586393 |
| }, |
| { |
| "epoch": 0.5461165048543689, |
| "grad_norm": 6.398566246032715, |
| "learning_rate": 9.204392786499168e-05, |
| "loss": 2.3052085876464843, |
| "memory(GiB)": 40.86, |
| "step": 900, |
| "token_acc": 0.4840764331210191, |
| "train_speed(iter/s)": 0.586567 |
| }, |
| { |
| "epoch": 0.5491504854368932, |
| "grad_norm": 9.17261028289795, |
| "learning_rate": 9.19577376057634e-05, |
| "loss": 2.37634391784668, |
| "memory(GiB)": 40.86, |
| "step": 905, |
| "token_acc": 0.5249169435215947, |
| "train_speed(iter/s)": 0.586787 |
| }, |
| { |
| "epoch": 0.5521844660194175, |
| "grad_norm": 5.751415729522705, |
| "learning_rate": 9.187112380644254e-05, |
| "loss": 2.2847476959228517, |
| "memory(GiB)": 40.86, |
| "step": 910, |
| "token_acc": 0.51338199513382, |
| "train_speed(iter/s)": 0.586408 |
| }, |
| { |
| "epoch": 0.5552184466019418, |
| "grad_norm": 12.116822242736816, |
| "learning_rate": 9.178408734134736e-05, |
| "loss": 2.5225976943969726, |
| "memory(GiB)": 40.86, |
| "step": 915, |
| "token_acc": 0.48253968253968255, |
| "train_speed(iter/s)": 0.586571 |
| }, |
| { |
| "epoch": 0.558252427184466, |
| "grad_norm": 8.28947925567627, |
| "learning_rate": 9.16966290890627e-05, |
| "loss": 2.215795135498047, |
| "memory(GiB)": 40.86, |
| "step": 920, |
| "token_acc": 0.5543071161048689, |
| "train_speed(iter/s)": 0.586881 |
| }, |
| { |
| "epoch": 0.5612864077669902, |
| "grad_norm": 9.582908630371094, |
| "learning_rate": 9.160874993243113e-05, |
| "loss": 2.299172019958496, |
| "memory(GiB)": 40.86, |
| "step": 925, |
| "token_acc": 0.4763636363636364, |
| "train_speed(iter/s)": 0.587069 |
| }, |
| { |
| "epoch": 0.5643203883495146, |
| "grad_norm": 8.669927597045898, |
| "learning_rate": 9.152045075854398e-05, |
| "loss": 2.457051086425781, |
| "memory(GiB)": 40.86, |
| "step": 930, |
| "token_acc": 0.49828178694158076, |
| "train_speed(iter/s)": 0.587349 |
| }, |
| { |
| "epoch": 0.5673543689320388, |
| "grad_norm": 6.801449298858643, |
| "learning_rate": 9.143173245873247e-05, |
| "loss": 2.1124551773071287, |
| "memory(GiB)": 40.86, |
| "step": 935, |
| "token_acc": 0.5018315018315018, |
| "train_speed(iter/s)": 0.58766 |
| }, |
| { |
| "epoch": 0.5703883495145631, |
| "grad_norm": 7.988888263702393, |
| "learning_rate": 9.134259592855861e-05, |
| "loss": 2.3452516555786134, |
| "memory(GiB)": 40.86, |
| "step": 940, |
| "token_acc": 0.49666666666666665, |
| "train_speed(iter/s)": 0.58765 |
| }, |
| { |
| "epoch": 0.5734223300970874, |
| "grad_norm": 7.102814674377441, |
| "learning_rate": 9.125304206780627e-05, |
| "loss": 2.3180185317993165, |
| "memory(GiB)": 40.86, |
| "step": 945, |
| "token_acc": 0.5050847457627119, |
| "train_speed(iter/s)": 0.587449 |
| }, |
| { |
| "epoch": 0.5764563106796117, |
| "grad_norm": 7.604477405548096, |
| "learning_rate": 9.116307178047198e-05, |
| "loss": 2.3972042083740233, |
| "memory(GiB)": 40.86, |
| "step": 950, |
| "token_acc": 0.46283783783783783, |
| "train_speed(iter/s)": 0.587729 |
| }, |
| { |
| "epoch": 0.5794902912621359, |
| "grad_norm": 6.319246292114258, |
| "learning_rate": 9.10726859747559e-05, |
| "loss": 2.103443908691406, |
| "memory(GiB)": 40.86, |
| "step": 955, |
| "token_acc": 0.5191256830601093, |
| "train_speed(iter/s)": 0.588189 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 8.772871017456055, |
| "learning_rate": 9.098188556305263e-05, |
| "loss": 2.073552703857422, |
| "memory(GiB)": 40.86, |
| "step": 960, |
| "token_acc": 0.552901023890785, |
| "train_speed(iter/s)": 0.588218 |
| }, |
| { |
| "epoch": 0.5855582524271845, |
| "grad_norm": 8.01586627960205, |
| "learning_rate": 9.089067146194196e-05, |
| "loss": 1.8984146118164062, |
| "memory(GiB)": 40.86, |
| "step": 965, |
| "token_acc": 0.616504854368932, |
| "train_speed(iter/s)": 0.58821 |
| }, |
| { |
| "epoch": 0.5885922330097088, |
| "grad_norm": 6.168645858764648, |
| "learning_rate": 9.079904459217966e-05, |
| "loss": 2.379282760620117, |
| "memory(GiB)": 40.86, |
| "step": 970, |
| "token_acc": 0.4649122807017544, |
| "train_speed(iter/s)": 0.588446 |
| }, |
| { |
| "epoch": 0.591626213592233, |
| "grad_norm": 6.704972743988037, |
| "learning_rate": 9.070700587868817e-05, |
| "loss": 2.1655595779418944, |
| "memory(GiB)": 40.86, |
| "step": 975, |
| "token_acc": 0.5521885521885522, |
| "train_speed(iter/s)": 0.588386 |
| }, |
| { |
| "epoch": 0.5946601941747572, |
| "grad_norm": 7.025293827056885, |
| "learning_rate": 9.061455625054725e-05, |
| "loss": 2.193133735656738, |
| "memory(GiB)": 40.86, |
| "step": 980, |
| "token_acc": 0.5197368421052632, |
| "train_speed(iter/s)": 0.588278 |
| }, |
| { |
| "epoch": 0.5976941747572816, |
| "grad_norm": 6.618514537811279, |
| "learning_rate": 9.052169664098461e-05, |
| "loss": 2.0073310852050783, |
| "memory(GiB)": 40.86, |
| "step": 985, |
| "token_acc": 0.55893536121673, |
| "train_speed(iter/s)": 0.588288 |
| }, |
| { |
| "epoch": 0.6007281553398058, |
| "grad_norm": 5.154722690582275, |
| "learning_rate": 9.042842798736654e-05, |
| "loss": 2.2399974822998048, |
| "memory(GiB)": 40.86, |
| "step": 990, |
| "token_acc": 0.5195530726256983, |
| "train_speed(iter/s)": 0.588303 |
| }, |
| { |
| "epoch": 0.6037621359223301, |
| "grad_norm": 6.787222862243652, |
| "learning_rate": 9.03347512311884e-05, |
| "loss": 2.3585285186767577, |
| "memory(GiB)": 40.86, |
| "step": 995, |
| "token_acc": 0.46075085324232085, |
| "train_speed(iter/s)": 0.588665 |
| }, |
| { |
| "epoch": 0.6067961165048543, |
| "grad_norm": 4.932912826538086, |
| "learning_rate": 9.024066731806501e-05, |
| "loss": 2.276376724243164, |
| "memory(GiB)": 40.86, |
| "step": 1000, |
| "token_acc": 0.4921135646687697, |
| "train_speed(iter/s)": 0.58881 |
| }, |
| { |
| "epoch": 0.6067961165048543, |
| "eval_loss": 2.31942081451416, |
| "eval_runtime": 12.0489, |
| "eval_samples_per_second": 8.3, |
| "eval_steps_per_second": 8.3, |
| "eval_token_acc": 0.48575305291723203, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6098300970873787, |
| "grad_norm": 9.08281421661377, |
| "learning_rate": 9.01461771977214e-05, |
| "loss": 2.333499717712402, |
| "memory(GiB)": 40.86, |
| "step": 1005, |
| "token_acc": 0.4905838041431262, |
| "train_speed(iter/s)": 0.584203 |
| }, |
| { |
| "epoch": 0.6128640776699029, |
| "grad_norm": 7.2298359870910645, |
| "learning_rate": 9.005128182398283e-05, |
| "loss": 2.4393625259399414, |
| "memory(GiB)": 40.86, |
| "step": 1010, |
| "token_acc": 0.48732394366197185, |
| "train_speed(iter/s)": 0.584412 |
| }, |
| { |
| "epoch": 0.6158980582524272, |
| "grad_norm": 5.784246444702148, |
| "learning_rate": 8.995598215476555e-05, |
| "loss": 2.171500587463379, |
| "memory(GiB)": 40.86, |
| "step": 1015, |
| "token_acc": 0.5384615384615384, |
| "train_speed(iter/s)": 0.58417 |
| }, |
| { |
| "epoch": 0.6189320388349514, |
| "grad_norm": 8.403388977050781, |
| "learning_rate": 8.986027915206686e-05, |
| "loss": 2.1093074798583986, |
| "memory(GiB)": 40.86, |
| "step": 1020, |
| "token_acc": 0.5201342281879194, |
| "train_speed(iter/s)": 0.584014 |
| }, |
| { |
| "epoch": 0.6219660194174758, |
| "grad_norm": 7.646571636199951, |
| "learning_rate": 8.976417378195544e-05, |
| "loss": 2.1439834594726563, |
| "memory(GiB)": 40.86, |
| "step": 1025, |
| "token_acc": 0.5295857988165681, |
| "train_speed(iter/s)": 0.583981 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 6.978275299072266, |
| "learning_rate": 8.966766701456177e-05, |
| "loss": 2.288041687011719, |
| "memory(GiB)": 40.86, |
| "step": 1030, |
| "token_acc": 0.513126491646778, |
| "train_speed(iter/s)": 0.584314 |
| }, |
| { |
| "epoch": 0.6280339805825242, |
| "grad_norm": 6.3236541748046875, |
| "learning_rate": 8.957075982406811e-05, |
| "loss": 2.250352668762207, |
| "memory(GiB)": 40.86, |
| "step": 1035, |
| "token_acc": 0.531986531986532, |
| "train_speed(iter/s)": 0.584575 |
| }, |
| { |
| "epoch": 0.6310679611650486, |
| "grad_norm": 6.21897554397583, |
| "learning_rate": 8.947345318869882e-05, |
| "loss": 2.425637054443359, |
| "memory(GiB)": 40.86, |
| "step": 1040, |
| "token_acc": 0.46859903381642515, |
| "train_speed(iter/s)": 0.584674 |
| }, |
| { |
| "epoch": 0.6341019417475728, |
| "grad_norm": 7.0973358154296875, |
| "learning_rate": 8.937574809071041e-05, |
| "loss": 1.9796913146972657, |
| "memory(GiB)": 40.86, |
| "step": 1045, |
| "token_acc": 0.5555555555555556, |
| "train_speed(iter/s)": 0.584622 |
| }, |
| { |
| "epoch": 0.6371359223300971, |
| "grad_norm": 6.9171833992004395, |
| "learning_rate": 8.927764551638169e-05, |
| "loss": 2.153505325317383, |
| "memory(GiB)": 40.86, |
| "step": 1050, |
| "token_acc": 0.5481727574750831, |
| "train_speed(iter/s)": 0.584749 |
| }, |
| { |
| "epoch": 0.6401699029126213, |
| "grad_norm": 6.10349178314209, |
| "learning_rate": 8.917914645600369e-05, |
| "loss": 2.2978469848632814, |
| "memory(GiB)": 40.86, |
| "step": 1055, |
| "token_acc": 0.5279503105590062, |
| "train_speed(iter/s)": 0.584579 |
| }, |
| { |
| "epoch": 0.6432038834951457, |
| "grad_norm": 8.071660995483398, |
| "learning_rate": 8.908025190386985e-05, |
| "loss": 1.8877496719360352, |
| "memory(GiB)": 40.86, |
| "step": 1060, |
| "token_acc": 0.582089552238806, |
| "train_speed(iter/s)": 0.584349 |
| }, |
| { |
| "epoch": 0.6462378640776699, |
| "grad_norm": 5.858845233917236, |
| "learning_rate": 8.898096285826582e-05, |
| "loss": 2.2511001586914063, |
| "memory(GiB)": 40.86, |
| "step": 1065, |
| "token_acc": 0.4642857142857143, |
| "train_speed(iter/s)": 0.584502 |
| }, |
| { |
| "epoch": 0.6492718446601942, |
| "grad_norm": 7.347677230834961, |
| "learning_rate": 8.888128032145941e-05, |
| "loss": 2.173788833618164, |
| "memory(GiB)": 40.86, |
| "step": 1070, |
| "token_acc": 0.5113636363636364, |
| "train_speed(iter/s)": 0.584862 |
| }, |
| { |
| "epoch": 0.6523058252427184, |
| "grad_norm": 6.6078596115112305, |
| "learning_rate": 8.878120529969061e-05, |
| "loss": 2.1907543182373046, |
| "memory(GiB)": 40.86, |
| "step": 1075, |
| "token_acc": 0.5047318611987381, |
| "train_speed(iter/s)": 0.585062 |
| }, |
| { |
| "epoch": 0.6553398058252428, |
| "grad_norm": 6.744375228881836, |
| "learning_rate": 8.868073880316124e-05, |
| "loss": 2.5921836853027345, |
| "memory(GiB)": 40.86, |
| "step": 1080, |
| "token_acc": 0.4777777777777778, |
| "train_speed(iter/s)": 0.585281 |
| }, |
| { |
| "epoch": 0.658373786407767, |
| "grad_norm": 6.0812153816223145, |
| "learning_rate": 8.857988184602484e-05, |
| "loss": 2.076370620727539, |
| "memory(GiB)": 40.86, |
| "step": 1085, |
| "token_acc": 0.5142118863049095, |
| "train_speed(iter/s)": 0.585629 |
| }, |
| { |
| "epoch": 0.6614077669902912, |
| "grad_norm": 7.485403060913086, |
| "learning_rate": 8.84786354463765e-05, |
| "loss": 2.553455352783203, |
| "memory(GiB)": 40.86, |
| "step": 1090, |
| "token_acc": 0.47151898734177217, |
| "train_speed(iter/s)": 0.58598 |
| }, |
| { |
| "epoch": 0.6644417475728155, |
| "grad_norm": 6.709589958190918, |
| "learning_rate": 8.837700062624245e-05, |
| "loss": 2.1033605575561523, |
| "memory(GiB)": 40.86, |
| "step": 1095, |
| "token_acc": 0.525, |
| "train_speed(iter/s)": 0.586173 |
| }, |
| { |
| "epoch": 0.6674757281553398, |
| "grad_norm": 7.800707817077637, |
| "learning_rate": 8.827497841156986e-05, |
| "loss": 2.4268184661865235, |
| "memory(GiB)": 40.86, |
| "step": 1100, |
| "token_acc": 0.5115511551155115, |
| "train_speed(iter/s)": 0.586144 |
| }, |
| { |
| "epoch": 0.6705097087378641, |
| "grad_norm": 6.759317874908447, |
| "learning_rate": 8.817256983221637e-05, |
| "loss": 2.4730669021606446, |
| "memory(GiB)": 40.86, |
| "step": 1105, |
| "token_acc": 0.45609065155807366, |
| "train_speed(iter/s)": 0.586261 |
| }, |
| { |
| "epoch": 0.6735436893203883, |
| "grad_norm": 7.099054336547852, |
| "learning_rate": 8.806977592193985e-05, |
| "loss": 2.596049118041992, |
| "memory(GiB)": 40.86, |
| "step": 1110, |
| "token_acc": 0.44542772861356933, |
| "train_speed(iter/s)": 0.586514 |
| }, |
| { |
| "epoch": 0.6765776699029126, |
| "grad_norm": 6.84334135055542, |
| "learning_rate": 8.796659771838777e-05, |
| "loss": 2.2642656326293946, |
| "memory(GiB)": 40.86, |
| "step": 1115, |
| "token_acc": 0.5156794425087108, |
| "train_speed(iter/s)": 0.586643 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 6.421635627746582, |
| "learning_rate": 8.786303626308689e-05, |
| "loss": 2.1252628326416017, |
| "memory(GiB)": 40.86, |
| "step": 1120, |
| "token_acc": 0.5263157894736842, |
| "train_speed(iter/s)": 0.586755 |
| }, |
| { |
| "epoch": 0.6826456310679612, |
| "grad_norm": 4.622204303741455, |
| "learning_rate": 8.775909260143266e-05, |
| "loss": 2.2372303009033203, |
| "memory(GiB)": 40.86, |
| "step": 1125, |
| "token_acc": 0.5159574468085106, |
| "train_speed(iter/s)": 0.586869 |
| }, |
| { |
| "epoch": 0.6856796116504854, |
| "grad_norm": 7.592894554138184, |
| "learning_rate": 8.765476778267874e-05, |
| "loss": 2.1323163986206053, |
| "memory(GiB)": 40.86, |
| "step": 1130, |
| "token_acc": 0.4909090909090909, |
| "train_speed(iter/s)": 0.586899 |
| }, |
| { |
| "epoch": 0.6887135922330098, |
| "grad_norm": 7.193599700927734, |
| "learning_rate": 8.755006285992629e-05, |
| "loss": 2.1902294158935547, |
| "memory(GiB)": 40.86, |
| "step": 1135, |
| "token_acc": 0.5234899328859061, |
| "train_speed(iter/s)": 0.586763 |
| }, |
| { |
| "epoch": 0.691747572815534, |
| "grad_norm": 4.904916286468506, |
| "learning_rate": 8.744497889011343e-05, |
| "loss": 2.2312740325927733, |
| "memory(GiB)": 40.86, |
| "step": 1140, |
| "token_acc": 0.48404255319148937, |
| "train_speed(iter/s)": 0.586675 |
| }, |
| { |
| "epoch": 0.6947815533980582, |
| "grad_norm": 8.201228141784668, |
| "learning_rate": 8.733951693400458e-05, |
| "loss": 2.166943168640137, |
| "memory(GiB)": 40.86, |
| "step": 1145, |
| "token_acc": 0.5105633802816901, |
| "train_speed(iter/s)": 0.58693 |
| }, |
| { |
| "epoch": 0.6978155339805825, |
| "grad_norm": 5.049937725067139, |
| "learning_rate": 8.723367805617965e-05, |
| "loss": 2.254404067993164, |
| "memory(GiB)": 40.86, |
| "step": 1150, |
| "token_acc": 0.478125, |
| "train_speed(iter/s)": 0.587058 |
| }, |
| { |
| "epoch": 0.7008495145631068, |
| "grad_norm": 6.745171546936035, |
| "learning_rate": 8.712746332502351e-05, |
| "loss": 2.1543249130249023, |
| "memory(GiB)": 40.86, |
| "step": 1155, |
| "token_acc": 0.5327380952380952, |
| "train_speed(iter/s)": 0.587131 |
| }, |
| { |
| "epoch": 0.7038834951456311, |
| "grad_norm": 10.320196151733398, |
| "learning_rate": 8.702087381271488e-05, |
| "loss": 2.4464441299438477, |
| "memory(GiB)": 40.86, |
| "step": 1160, |
| "token_acc": 0.4897959183673469, |
| "train_speed(iter/s)": 0.587013 |
| }, |
| { |
| "epoch": 0.7069174757281553, |
| "grad_norm": 6.8334503173828125, |
| "learning_rate": 8.691391059521583e-05, |
| "loss": 2.1910587310791017, |
| "memory(GiB)": 40.86, |
| "step": 1165, |
| "token_acc": 0.527972027972028, |
| "train_speed(iter/s)": 0.586856 |
| }, |
| { |
| "epoch": 0.7099514563106796, |
| "grad_norm": 6.28577184677124, |
| "learning_rate": 8.680657475226069e-05, |
| "loss": 1.9499628067016601, |
| "memory(GiB)": 40.86, |
| "step": 1170, |
| "token_acc": 0.6007751937984496, |
| "train_speed(iter/s)": 0.586444 |
| }, |
| { |
| "epoch": 0.7129854368932039, |
| "grad_norm": 6.818657398223877, |
| "learning_rate": 8.669886736734527e-05, |
| "loss": 2.151942825317383, |
| "memory(GiB)": 40.86, |
| "step": 1175, |
| "token_acc": 0.5254777070063694, |
| "train_speed(iter/s)": 0.58655 |
| }, |
| { |
| "epoch": 0.7160194174757282, |
| "grad_norm": 5.253009796142578, |
| "learning_rate": 8.659078952771592e-05, |
| "loss": 2.54516487121582, |
| "memory(GiB)": 40.86, |
| "step": 1180, |
| "token_acc": 0.4984894259818731, |
| "train_speed(iter/s)": 0.586775 |
| }, |
| { |
| "epoch": 0.7190533980582524, |
| "grad_norm": 8.068851470947266, |
| "learning_rate": 8.648234232435845e-05, |
| "loss": 2.3182897567749023, |
| "memory(GiB)": 40.86, |
| "step": 1185, |
| "token_acc": 0.4734982332155477, |
| "train_speed(iter/s)": 0.586773 |
| }, |
| { |
| "epoch": 0.7220873786407767, |
| "grad_norm": 6.965189456939697, |
| "learning_rate": 8.63735268519873e-05, |
| "loss": 2.1850954055786134, |
| "memory(GiB)": 40.86, |
| "step": 1190, |
| "token_acc": 0.5196374622356495, |
| "train_speed(iter/s)": 0.586751 |
| }, |
| { |
| "epoch": 0.725121359223301, |
| "grad_norm": 6.5986409187316895, |
| "learning_rate": 8.626434420903424e-05, |
| "loss": 2.5639453887939454, |
| "memory(GiB)": 40.86, |
| "step": 1195, |
| "token_acc": 0.4631268436578171, |
| "train_speed(iter/s)": 0.586943 |
| }, |
| { |
| "epoch": 0.7281553398058253, |
| "grad_norm": 4.670579433441162, |
| "learning_rate": 8.615479549763756e-05, |
| "loss": 2.406460189819336, |
| "memory(GiB)": 40.86, |
| "step": 1200, |
| "token_acc": 0.48223350253807107, |
| "train_speed(iter/s)": 0.587084 |
| }, |
| { |
| "epoch": 0.7311893203883495, |
| "grad_norm": 6.295917510986328, |
| "learning_rate": 8.604488182363074e-05, |
| "loss": 2.536121940612793, |
| "memory(GiB)": 40.86, |
| "step": 1205, |
| "token_acc": 0.4873417721518987, |
| "train_speed(iter/s)": 0.587357 |
| }, |
| { |
| "epoch": 0.7342233009708737, |
| "grad_norm": 6.125625133514404, |
| "learning_rate": 8.593460429653133e-05, |
| "loss": 2.4063135147094727, |
| "memory(GiB)": 40.86, |
| "step": 1210, |
| "token_acc": 0.48128342245989303, |
| "train_speed(iter/s)": 0.587204 |
| }, |
| { |
| "epoch": 0.7372572815533981, |
| "grad_norm": 6.775357723236084, |
| "learning_rate": 8.582396402952984e-05, |
| "loss": 2.082032585144043, |
| "memory(GiB)": 40.86, |
| "step": 1215, |
| "token_acc": 0.5273311897106109, |
| "train_speed(iter/s)": 0.587489 |
| }, |
| { |
| "epoch": 0.7402912621359223, |
| "grad_norm": 8.1486177444458, |
| "learning_rate": 8.571296213947838e-05, |
| "loss": 1.675777053833008, |
| "memory(GiB)": 40.86, |
| "step": 1220, |
| "token_acc": 0.6021897810218978, |
| "train_speed(iter/s)": 0.587737 |
| }, |
| { |
| "epoch": 0.7433252427184466, |
| "grad_norm": 5.302309036254883, |
| "learning_rate": 8.560159974687952e-05, |
| "loss": 2.1232393264770506, |
| "memory(GiB)": 40.86, |
| "step": 1225, |
| "token_acc": 0.5156695156695157, |
| "train_speed(iter/s)": 0.587809 |
| }, |
| { |
| "epoch": 0.7463592233009708, |
| "grad_norm": 9.10730266571045, |
| "learning_rate": 8.54898779758748e-05, |
| "loss": 2.1305063247680662, |
| "memory(GiB)": 40.86, |
| "step": 1230, |
| "token_acc": 0.53515625, |
| "train_speed(iter/s)": 0.587588 |
| }, |
| { |
| "epoch": 0.7493932038834952, |
| "grad_norm": 6.489813327789307, |
| "learning_rate": 8.537779795423359e-05, |
| "loss": 2.2934566497802735, |
| "memory(GiB)": 40.86, |
| "step": 1235, |
| "token_acc": 0.5161290322580645, |
| "train_speed(iter/s)": 0.587435 |
| }, |
| { |
| "epoch": 0.7524271844660194, |
| "grad_norm": 6.982603549957275, |
| "learning_rate": 8.526536081334152e-05, |
| "loss": 2.2987644195556642, |
| "memory(GiB)": 40.86, |
| "step": 1240, |
| "token_acc": 0.47924528301886793, |
| "train_speed(iter/s)": 0.587522 |
| }, |
| { |
| "epoch": 0.7554611650485437, |
| "grad_norm": 7.774171352386475, |
| "learning_rate": 8.515256768818918e-05, |
| "loss": 2.5817737579345703, |
| "memory(GiB)": 40.86, |
| "step": 1245, |
| "token_acc": 0.5040431266846361, |
| "train_speed(iter/s)": 0.587668 |
| }, |
| { |
| "epoch": 0.758495145631068, |
| "grad_norm": 5.695102691650391, |
| "learning_rate": 8.503941971736062e-05, |
| "loss": 2.298574447631836, |
| "memory(GiB)": 40.86, |
| "step": 1250, |
| "token_acc": 0.5070821529745042, |
| "train_speed(iter/s)": 0.587481 |
| }, |
| { |
| "epoch": 0.7615291262135923, |
| "grad_norm": 5.622751235961914, |
| "learning_rate": 8.492591804302186e-05, |
| "loss": 2.149024772644043, |
| "memory(GiB)": 40.86, |
| "step": 1255, |
| "token_acc": 0.5030674846625767, |
| "train_speed(iter/s)": 0.58761 |
| }, |
| { |
| "epoch": 0.7645631067961165, |
| "grad_norm": 9.05585765838623, |
| "learning_rate": 8.481206381090934e-05, |
| "loss": 2.464432716369629, |
| "memory(GiB)": 40.86, |
| "step": 1260, |
| "token_acc": 0.504950495049505, |
| "train_speed(iter/s)": 0.587385 |
| }, |
| { |
| "epoch": 0.7675970873786407, |
| "grad_norm": 6.5983428955078125, |
| "learning_rate": 8.469785817031841e-05, |
| "loss": 2.203810119628906, |
| "memory(GiB)": 40.86, |
| "step": 1265, |
| "token_acc": 0.5412186379928315, |
| "train_speed(iter/s)": 0.587789 |
| }, |
| { |
| "epoch": 0.7706310679611651, |
| "grad_norm": 4.769191265106201, |
| "learning_rate": 8.458330227409168e-05, |
| "loss": 2.432425308227539, |
| "memory(GiB)": 40.86, |
| "step": 1270, |
| "token_acc": 0.4608433734939759, |
| "train_speed(iter/s)": 0.588056 |
| }, |
| { |
| "epoch": 0.7736650485436893, |
| "grad_norm": 8.539231300354004, |
| "learning_rate": 8.446839727860738e-05, |
| "loss": 2.354892539978027, |
| "memory(GiB)": 40.86, |
| "step": 1275, |
| "token_acc": 0.5155555555555555, |
| "train_speed(iter/s)": 0.588053 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 5.209239959716797, |
| "learning_rate": 8.435314434376773e-05, |
| "loss": 2.296826934814453, |
| "memory(GiB)": 40.86, |
| "step": 1280, |
| "token_acc": 0.5085714285714286, |
| "train_speed(iter/s)": 0.587863 |
| }, |
| { |
| "epoch": 0.7797330097087378, |
| "grad_norm": 7.653853893280029, |
| "learning_rate": 8.423754463298717e-05, |
| "loss": 2.117538261413574, |
| "memory(GiB)": 40.86, |
| "step": 1285, |
| "token_acc": 0.5723076923076923, |
| "train_speed(iter/s)": 0.587922 |
| }, |
| { |
| "epoch": 0.7827669902912622, |
| "grad_norm": 7.506109237670898, |
| "learning_rate": 8.412159931318068e-05, |
| "loss": 2.4975624084472656, |
| "memory(GiB)": 40.86, |
| "step": 1290, |
| "token_acc": 0.46710526315789475, |
| "train_speed(iter/s)": 0.587905 |
| }, |
| { |
| "epoch": 0.7858009708737864, |
| "grad_norm": 5.187159538269043, |
| "learning_rate": 8.400530955475198e-05, |
| "loss": 2.2532814025878904, |
| "memory(GiB)": 40.86, |
| "step": 1295, |
| "token_acc": 0.46987951807228917, |
| "train_speed(iter/s)": 0.587699 |
| }, |
| { |
| "epoch": 0.7888349514563107, |
| "grad_norm": 8.4281587600708, |
| "learning_rate": 8.38886765315817e-05, |
| "loss": 2.3919906616210938, |
| "memory(GiB)": 40.86, |
| "step": 1300, |
| "token_acc": 0.5017182130584192, |
| "train_speed(iter/s)": 0.587556 |
| }, |
| { |
| "epoch": 0.7918689320388349, |
| "grad_norm": 6.116622447967529, |
| "learning_rate": 8.377170142101548e-05, |
| "loss": 2.3181718826293944, |
| "memory(GiB)": 40.86, |
| "step": 1305, |
| "token_acc": 0.5061349693251533, |
| "train_speed(iter/s)": 0.58774 |
| }, |
| { |
| "epoch": 0.7949029126213593, |
| "grad_norm": 8.740164756774902, |
| "learning_rate": 8.365438540385223e-05, |
| "loss": 2.1749797821044923, |
| "memory(GiB)": 40.86, |
| "step": 1310, |
| "token_acc": 0.5187713310580204, |
| "train_speed(iter/s)": 0.587607 |
| }, |
| { |
| "epoch": 0.7979368932038835, |
| "grad_norm": 6.935183048248291, |
| "learning_rate": 8.353672966433206e-05, |
| "loss": 2.314193534851074, |
| "memory(GiB)": 40.86, |
| "step": 1315, |
| "token_acc": 0.47039473684210525, |
| "train_speed(iter/s)": 0.587722 |
| }, |
| { |
| "epoch": 0.8009708737864077, |
| "grad_norm": 7.3493475914001465, |
| "learning_rate": 8.341873539012444e-05, |
| "loss": 2.2399951934814455, |
| "memory(GiB)": 40.86, |
| "step": 1320, |
| "token_acc": 0.5111821086261981, |
| "train_speed(iter/s)": 0.587965 |
| }, |
| { |
| "epoch": 0.804004854368932, |
| "grad_norm": 6.552261829376221, |
| "learning_rate": 8.33004037723161e-05, |
| "loss": 2.223754119873047, |
| "memory(GiB)": 40.86, |
| "step": 1325, |
| "token_acc": 0.5283018867924528, |
| "train_speed(iter/s)": 0.588183 |
| }, |
| { |
| "epoch": 0.8070388349514563, |
| "grad_norm": 6.420342445373535, |
| "learning_rate": 8.318173600539911e-05, |
| "loss": 1.9445220947265625, |
| "memory(GiB)": 40.86, |
| "step": 1330, |
| "token_acc": 0.5394736842105263, |
| "train_speed(iter/s)": 0.588461 |
| }, |
| { |
| "epoch": 0.8100728155339806, |
| "grad_norm": 5.923401355743408, |
| "learning_rate": 8.306273328725878e-05, |
| "loss": 2.1622385025024413, |
| "memory(GiB)": 40.86, |
| "step": 1335, |
| "token_acc": 0.5357142857142857, |
| "train_speed(iter/s)": 0.588601 |
| }, |
| { |
| "epoch": 0.8131067961165048, |
| "grad_norm": 7.1788506507873535, |
| "learning_rate": 8.294339681916154e-05, |
| "loss": 2.1121898651123048, |
| "memory(GiB)": 40.86, |
| "step": 1340, |
| "token_acc": 0.496875, |
| "train_speed(iter/s)": 0.588559 |
| }, |
| { |
| "epoch": 0.8161407766990292, |
| "grad_norm": 6.46894645690918, |
| "learning_rate": 8.282372780574285e-05, |
| "loss": 2.207390022277832, |
| "memory(GiB)": 40.86, |
| "step": 1345, |
| "token_acc": 0.5216049382716049, |
| "train_speed(iter/s)": 0.588706 |
| }, |
| { |
| "epoch": 0.8191747572815534, |
| "grad_norm": 7.959349632263184, |
| "learning_rate": 8.270372745499506e-05, |
| "loss": 2.2782615661621093, |
| "memory(GiB)": 40.86, |
| "step": 1350, |
| "token_acc": 0.5174603174603175, |
| "train_speed(iter/s)": 0.588601 |
| }, |
| { |
| "epoch": 0.8222087378640777, |
| "grad_norm": 7.4319939613342285, |
| "learning_rate": 8.258339697825515e-05, |
| "loss": 1.8879600524902345, |
| "memory(GiB)": 40.86, |
| "step": 1355, |
| "token_acc": 0.5580357142857143, |
| "train_speed(iter/s)": 0.588875 |
| }, |
| { |
| "epoch": 0.8252427184466019, |
| "grad_norm": 7.50739860534668, |
| "learning_rate": 8.246273759019252e-05, |
| "loss": 2.3653688430786133, |
| "memory(GiB)": 40.86, |
| "step": 1360, |
| "token_acc": 0.5179856115107914, |
| "train_speed(iter/s)": 0.588976 |
| }, |
| { |
| "epoch": 0.8282766990291263, |
| "grad_norm": 8.38315486907959, |
| "learning_rate": 8.234175050879684e-05, |
| "loss": 2.0219940185546874, |
| "memory(GiB)": 40.86, |
| "step": 1365, |
| "token_acc": 0.5266903914590747, |
| "train_speed(iter/s)": 0.589206 |
| }, |
| { |
| "epoch": 0.8313106796116505, |
| "grad_norm": 5.579223155975342, |
| "learning_rate": 8.222043695536555e-05, |
| "loss": 2.0323202133178713, |
| "memory(GiB)": 41.25, |
| "step": 1370, |
| "token_acc": 0.5419847328244275, |
| "train_speed(iter/s)": 0.588666 |
| }, |
| { |
| "epoch": 0.8343446601941747, |
| "grad_norm": 7.079959392547607, |
| "learning_rate": 8.20987981544917e-05, |
| "loss": 2.245712661743164, |
| "memory(GiB)": 41.25, |
| "step": 1375, |
| "token_acc": 0.5054545454545455, |
| "train_speed(iter/s)": 0.58865 |
| }, |
| { |
| "epoch": 0.837378640776699, |
| "grad_norm": 5.938848972320557, |
| "learning_rate": 8.197683533405157e-05, |
| "loss": 1.959267807006836, |
| "memory(GiB)": 41.25, |
| "step": 1380, |
| "token_acc": 0.5316901408450704, |
| "train_speed(iter/s)": 0.58891 |
| }, |
| { |
| "epoch": 0.8404126213592233, |
| "grad_norm": 8.333083152770996, |
| "learning_rate": 8.185454972519213e-05, |
| "loss": 2.2188604354858397, |
| "memory(GiB)": 41.25, |
| "step": 1385, |
| "token_acc": 0.5415282392026578, |
| "train_speed(iter/s)": 0.589226 |
| }, |
| { |
| "epoch": 0.8434466019417476, |
| "grad_norm": 5.235838413238525, |
| "learning_rate": 8.173194256231884e-05, |
| "loss": 2.312948226928711, |
| "memory(GiB)": 41.25, |
| "step": 1390, |
| "token_acc": 0.48546511627906974, |
| "train_speed(iter/s)": 0.589378 |
| }, |
| { |
| "epoch": 0.8464805825242718, |
| "grad_norm": 9.581235885620117, |
| "learning_rate": 8.1609015083083e-05, |
| "loss": 2.3604787826538085, |
| "memory(GiB)": 41.25, |
| "step": 1395, |
| "token_acc": 0.4927007299270073, |
| "train_speed(iter/s)": 0.589384 |
| }, |
| { |
| "epoch": 0.8495145631067961, |
| "grad_norm": 6.8221611976623535, |
| "learning_rate": 8.148576852836933e-05, |
| "loss": 2.0327474594116213, |
| "memory(GiB)": 41.25, |
| "step": 1400, |
| "token_acc": 0.569620253164557, |
| "train_speed(iter/s)": 0.589095 |
| }, |
| { |
| "epoch": 0.8525485436893204, |
| "grad_norm": 7.140889644622803, |
| "learning_rate": 8.136220414228347e-05, |
| "loss": 2.5129384994506836, |
| "memory(GiB)": 41.25, |
| "step": 1405, |
| "token_acc": 0.4952076677316294, |
| "train_speed(iter/s)": 0.589242 |
| }, |
| { |
| "epoch": 0.8555825242718447, |
| "grad_norm": 5.594088077545166, |
| "learning_rate": 8.123832317213933e-05, |
| "loss": 2.288181686401367, |
| "memory(GiB)": 41.25, |
| "step": 1410, |
| "token_acc": 0.5228758169934641, |
| "train_speed(iter/s)": 0.589415 |
| }, |
| { |
| "epoch": 0.8586165048543689, |
| "grad_norm": 5.7525811195373535, |
| "learning_rate": 8.111412686844664e-05, |
| "loss": 2.288965606689453, |
| "memory(GiB)": 41.25, |
| "step": 1415, |
| "token_acc": 0.5157068062827225, |
| "train_speed(iter/s)": 0.589323 |
| }, |
| { |
| "epoch": 0.8616504854368932, |
| "grad_norm": 9.362752914428711, |
| "learning_rate": 8.098961648489821e-05, |
| "loss": 1.9993032455444335, |
| "memory(GiB)": 41.25, |
| "step": 1420, |
| "token_acc": 0.5448275862068965, |
| "train_speed(iter/s)": 0.589162 |
| }, |
| { |
| "epoch": 0.8646844660194175, |
| "grad_norm": 6.3312764167785645, |
| "learning_rate": 8.08647932783573e-05, |
| "loss": 2.4338268280029296, |
| "memory(GiB)": 41.25, |
| "step": 1425, |
| "token_acc": 0.4863013698630137, |
| "train_speed(iter/s)": 0.589226 |
| }, |
| { |
| "epoch": 0.8677184466019418, |
| "grad_norm": 5.9260172843933105, |
| "learning_rate": 8.073965850884496e-05, |
| "loss": 2.2075326919555662, |
| "memory(GiB)": 41.25, |
| "step": 1430, |
| "token_acc": 0.5230769230769231, |
| "train_speed(iter/s)": 0.589205 |
| }, |
| { |
| "epoch": 0.870752427184466, |
| "grad_norm": 4.957935810089111, |
| "learning_rate": 8.061421343952731e-05, |
| "loss": 2.123280334472656, |
| "memory(GiB)": 41.25, |
| "step": 1435, |
| "token_acc": 0.5446927374301676, |
| "train_speed(iter/s)": 0.589414 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 5.678249359130859, |
| "learning_rate": 8.048845933670273e-05, |
| "loss": 1.9449834823608398, |
| "memory(GiB)": 41.25, |
| "step": 1440, |
| "token_acc": 0.5700934579439252, |
| "train_speed(iter/s)": 0.589581 |
| }, |
| { |
| "epoch": 0.8768203883495146, |
| "grad_norm": 7.655086040496826, |
| "learning_rate": 8.036239746978914e-05, |
| "loss": 2.4002641677856444, |
| "memory(GiB)": 41.25, |
| "step": 1445, |
| "token_acc": 0.4842105263157895, |
| "train_speed(iter/s)": 0.58949 |
| }, |
| { |
| "epoch": 0.8798543689320388, |
| "grad_norm": 6.851123332977295, |
| "learning_rate": 8.02360291113112e-05, |
| "loss": 2.103730392456055, |
| "memory(GiB)": 41.25, |
| "step": 1450, |
| "token_acc": 0.5627009646302251, |
| "train_speed(iter/s)": 0.589624 |
| }, |
| { |
| "epoch": 0.8828883495145631, |
| "grad_norm": 7.437098979949951, |
| "learning_rate": 8.010935553688741e-05, |
| "loss": 2.1862071990966796, |
| "memory(GiB)": 41.25, |
| "step": 1455, |
| "token_acc": 0.5364238410596026, |
| "train_speed(iter/s)": 0.58987 |
| }, |
| { |
| "epoch": 0.8859223300970874, |
| "grad_norm": 7.451559066772461, |
| "learning_rate": 7.998237802521726e-05, |
| "loss": 2.167529296875, |
| "memory(GiB)": 41.25, |
| "step": 1460, |
| "token_acc": 0.5220338983050847, |
| "train_speed(iter/s)": 0.589733 |
| }, |
| { |
| "epoch": 0.8889563106796117, |
| "grad_norm": 5.745720863342285, |
| "learning_rate": 7.985509785806827e-05, |
| "loss": 1.7958356857299804, |
| "memory(GiB)": 41.25, |
| "step": 1465, |
| "token_acc": 0.6163793103448276, |
| "train_speed(iter/s)": 0.589826 |
| }, |
| { |
| "epoch": 0.8919902912621359, |
| "grad_norm": 5.106093406677246, |
| "learning_rate": 7.97275163202632e-05, |
| "loss": 1.7782585144042968, |
| "memory(GiB)": 41.25, |
| "step": 1470, |
| "token_acc": 0.5852842809364549, |
| "train_speed(iter/s)": 0.589876 |
| }, |
| { |
| "epoch": 0.8950242718446602, |
| "grad_norm": 7.241384506225586, |
| "learning_rate": 7.959963469966687e-05, |
| "loss": 2.27147216796875, |
| "memory(GiB)": 41.25, |
| "step": 1475, |
| "token_acc": 0.52, |
| "train_speed(iter/s)": 0.590043 |
| }, |
| { |
| "epoch": 0.8980582524271845, |
| "grad_norm": 7.773332595825195, |
| "learning_rate": 7.947145428717335e-05, |
| "loss": 2.3339469909667967, |
| "memory(GiB)": 41.25, |
| "step": 1480, |
| "token_acc": 0.4868035190615836, |
| "train_speed(iter/s)": 0.59013 |
| }, |
| { |
| "epoch": 0.9010922330097088, |
| "grad_norm": 6.2095866203308105, |
| "learning_rate": 7.934297637669281e-05, |
| "loss": 2.15749568939209, |
| "memory(GiB)": 41.25, |
| "step": 1485, |
| "token_acc": 0.5335120643431636, |
| "train_speed(iter/s)": 0.590232 |
| }, |
| { |
| "epoch": 0.904126213592233, |
| "grad_norm": 9.049623489379883, |
| "learning_rate": 7.921420226513852e-05, |
| "loss": 2.2805938720703125, |
| "memory(GiB)": 41.25, |
| "step": 1490, |
| "token_acc": 0.48771929824561405, |
| "train_speed(iter/s)": 0.590181 |
| }, |
| { |
| "epoch": 0.9071601941747572, |
| "grad_norm": 5.86360502243042, |
| "learning_rate": 7.90851332524137e-05, |
| "loss": 2.204097557067871, |
| "memory(GiB)": 41.25, |
| "step": 1495, |
| "token_acc": 0.5291970802919708, |
| "train_speed(iter/s)": 0.589939 |
| }, |
| { |
| "epoch": 0.9101941747572816, |
| "grad_norm": 6.702127456665039, |
| "learning_rate": 7.895577064139848e-05, |
| "loss": 2.099565124511719, |
| "memory(GiB)": 41.25, |
| "step": 1500, |
| "token_acc": 0.5468164794007491, |
| "train_speed(iter/s)": 0.590107 |
| }, |
| { |
| "epoch": 0.9101941747572816, |
| "eval_loss": 1.9851206541061401, |
| "eval_runtime": 12.4849, |
| "eval_samples_per_second": 8.01, |
| "eval_steps_per_second": 8.01, |
| "eval_token_acc": 0.5260196905766527, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9132281553398058, |
| "grad_norm": 7.588992118835449, |
| "learning_rate": 7.882611573793663e-05, |
| "loss": 2.118764877319336, |
| "memory(GiB)": 41.25, |
| "step": 1505, |
| "token_acc": 0.5204795204795205, |
| "train_speed(iter/s)": 0.586984 |
| }, |
| { |
| "epoch": 0.9162621359223301, |
| "grad_norm": 5.986236572265625, |
| "learning_rate": 7.869616985082255e-05, |
| "loss": 2.0279298782348634, |
| "memory(GiB)": 41.25, |
| "step": 1510, |
| "token_acc": 0.5660377358490566, |
| "train_speed(iter/s)": 0.586663 |
| }, |
| { |
| "epoch": 0.9192961165048543, |
| "grad_norm": 7.583939075469971, |
| "learning_rate": 7.856593429178789e-05, |
| "loss": 2.0275857925415037, |
| "memory(GiB)": 41.25, |
| "step": 1515, |
| "token_acc": 0.5351170568561873, |
| "train_speed(iter/s)": 0.586556 |
| }, |
| { |
| "epoch": 0.9223300970873787, |
| "grad_norm": 7.145445823669434, |
| "learning_rate": 7.843541037548838e-05, |
| "loss": 2.181304168701172, |
| "memory(GiB)": 41.25, |
| "step": 1520, |
| "token_acc": 0.5451612903225806, |
| "train_speed(iter/s)": 0.58615 |
| }, |
| { |
| "epoch": 0.9253640776699029, |
| "grad_norm": 9.427350997924805, |
| "learning_rate": 7.830459941949058e-05, |
| "loss": 1.9623226165771483, |
| "memory(GiB)": 41.25, |
| "step": 1525, |
| "token_acc": 0.5575539568345323, |
| "train_speed(iter/s)": 0.585779 |
| }, |
| { |
| "epoch": 0.9283980582524272, |
| "grad_norm": 10.541104316711426, |
| "learning_rate": 7.817350274425856e-05, |
| "loss": 2.2855878829956056, |
| "memory(GiB)": 41.25, |
| "step": 1530, |
| "token_acc": 0.518796992481203, |
| "train_speed(iter/s)": 0.585757 |
| }, |
| { |
| "epoch": 0.9314320388349514, |
| "grad_norm": 8.254549980163574, |
| "learning_rate": 7.804212167314054e-05, |
| "loss": 2.3625198364257813, |
| "memory(GiB)": 41.25, |
| "step": 1535, |
| "token_acc": 0.45938375350140054, |
| "train_speed(iter/s)": 0.585525 |
| }, |
| { |
| "epoch": 0.9344660194174758, |
| "grad_norm": 5.327072620391846, |
| "learning_rate": 7.791045753235555e-05, |
| "loss": 2.1574447631835936, |
| "memory(GiB)": 41.25, |
| "step": 1540, |
| "token_acc": 0.5401234567901234, |
| "train_speed(iter/s)": 0.585161 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 6.956089496612549, |
| "learning_rate": 7.777851165098012e-05, |
| "loss": 2.220409965515137, |
| "memory(GiB)": 41.25, |
| "step": 1545, |
| "token_acc": 0.5068027210884354, |
| "train_speed(iter/s)": 0.58532 |
| }, |
| { |
| "epoch": 0.9405339805825242, |
| "grad_norm": 8.668743133544922, |
| "learning_rate": 7.76462853609347e-05, |
| "loss": 2.2191883087158204, |
| "memory(GiB)": 41.25, |
| "step": 1550, |
| "token_acc": 0.5181159420289855, |
| "train_speed(iter/s)": 0.58508 |
| }, |
| { |
| "epoch": 0.9435679611650486, |
| "grad_norm": 6.6736063957214355, |
| "learning_rate": 7.751377999697043e-05, |
| "loss": 2.111481857299805, |
| "memory(GiB)": 41.25, |
| "step": 1555, |
| "token_acc": 0.5555555555555556, |
| "train_speed(iter/s)": 0.585013 |
| }, |
| { |
| "epoch": 0.9466019417475728, |
| "grad_norm": 8.943532943725586, |
| "learning_rate": 7.73809968966554e-05, |
| "loss": 2.2334514617919923, |
| "memory(GiB)": 41.25, |
| "step": 1560, |
| "token_acc": 0.5, |
| "train_speed(iter/s)": 0.585004 |
| }, |
| { |
| "epoch": 0.9496359223300971, |
| "grad_norm": 6.392602443695068, |
| "learning_rate": 7.724793740036142e-05, |
| "loss": 2.3538848876953127, |
| "memory(GiB)": 41.25, |
| "step": 1565, |
| "token_acc": 0.5333333333333333, |
| "train_speed(iter/s)": 0.585018 |
| }, |
| { |
| "epoch": 0.9526699029126213, |
| "grad_norm": 8.349867820739746, |
| "learning_rate": 7.711460285125028e-05, |
| "loss": 1.9792165756225586, |
| "memory(GiB)": 41.25, |
| "step": 1570, |
| "token_acc": 0.5506756756756757, |
| "train_speed(iter/s)": 0.584841 |
| }, |
| { |
| "epoch": 0.9557038834951457, |
| "grad_norm": 6.740106582641602, |
| "learning_rate": 7.698099459526034e-05, |
| "loss": 2.2277217864990235, |
| "memory(GiB)": 41.25, |
| "step": 1575, |
| "token_acc": 0.5065359477124183, |
| "train_speed(iter/s)": 0.584644 |
| }, |
| { |
| "epoch": 0.9587378640776699, |
| "grad_norm": 7.618457317352295, |
| "learning_rate": 7.684711398109284e-05, |
| "loss": 2.152913284301758, |
| "memory(GiB)": 41.25, |
| "step": 1580, |
| "token_acc": 0.5343283582089552, |
| "train_speed(iter/s)": 0.584502 |
| }, |
| { |
| "epoch": 0.9617718446601942, |
| "grad_norm": 5.828027248382568, |
| "learning_rate": 7.67129623601983e-05, |
| "loss": 2.1841548919677733, |
| "memory(GiB)": 41.25, |
| "step": 1585, |
| "token_acc": 0.509493670886076, |
| "train_speed(iter/s)": 0.584655 |
| }, |
| { |
| "epoch": 0.9648058252427184, |
| "grad_norm": 8.393068313598633, |
| "learning_rate": 7.657854108676299e-05, |
| "loss": 2.4885177612304688, |
| "memory(GiB)": 41.25, |
| "step": 1590, |
| "token_acc": 0.48773006134969327, |
| "train_speed(iter/s)": 0.584201 |
| }, |
| { |
| "epoch": 0.9678398058252428, |
| "grad_norm": 6.520992755889893, |
| "learning_rate": 7.644385151769509e-05, |
| "loss": 2.489660453796387, |
| "memory(GiB)": 41.25, |
| "step": 1595, |
| "token_acc": 0.49107142857142855, |
| "train_speed(iter/s)": 0.584289 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 5.243824481964111, |
| "learning_rate": 7.630889501261109e-05, |
| "loss": 2.0495643615722656, |
| "memory(GiB)": 41.25, |
| "step": 1600, |
| "token_acc": 0.5572289156626506, |
| "train_speed(iter/s)": 0.584189 |
| }, |
| { |
| "epoch": 0.9739077669902912, |
| "grad_norm": 8.216861724853516, |
| "learning_rate": 7.617367293382211e-05, |
| "loss": 2.7457176208496095, |
| "memory(GiB)": 41.25, |
| "step": 1605, |
| "token_acc": 0.4244791666666667, |
| "train_speed(iter/s)": 0.584051 |
| }, |
| { |
| "epoch": 0.9769417475728155, |
| "grad_norm": 6.738630771636963, |
| "learning_rate": 7.603818664632001e-05, |
| "loss": 2.252565383911133, |
| "memory(GiB)": 41.25, |
| "step": 1610, |
| "token_acc": 0.48986486486486486, |
| "train_speed(iter/s)": 0.584192 |
| }, |
| { |
| "epoch": 0.9799757281553398, |
| "grad_norm": 6.404202938079834, |
| "learning_rate": 7.590243751776374e-05, |
| "loss": 2.2700517654418944, |
| "memory(GiB)": 41.25, |
| "step": 1615, |
| "token_acc": 0.4858757062146893, |
| "train_speed(iter/s)": 0.584132 |
| }, |
| { |
| "epoch": 0.9830097087378641, |
| "grad_norm": 6.124429702758789, |
| "learning_rate": 7.576642691846546e-05, |
| "loss": 2.3936836242675783, |
| "memory(GiB)": 41.25, |
| "step": 1620, |
| "token_acc": 0.5235294117647059, |
| "train_speed(iter/s)": 0.58398 |
| }, |
| { |
| "epoch": 0.9860436893203883, |
| "grad_norm": 7.0240044593811035, |
| "learning_rate": 7.563015622137674e-05, |
| "loss": 2.3892589569091798, |
| "memory(GiB)": 41.25, |
| "step": 1625, |
| "token_acc": 0.46688741721854304, |
| "train_speed(iter/s)": 0.583754 |
| }, |
| { |
| "epoch": 0.9890776699029126, |
| "grad_norm": 6.437112331390381, |
| "learning_rate": 7.549362680207472e-05, |
| "loss": 2.232225036621094, |
| "memory(GiB)": 41.25, |
| "step": 1630, |
| "token_acc": 0.4984025559105431, |
| "train_speed(iter/s)": 0.58366 |
| }, |
| { |
| "epoch": 0.9921116504854369, |
| "grad_norm": 6.010834217071533, |
| "learning_rate": 7.535684003874816e-05, |
| "loss": 2.146392822265625, |
| "memory(GiB)": 41.25, |
| "step": 1635, |
| "token_acc": 0.5261538461538462, |
| "train_speed(iter/s)": 0.58354 |
| }, |
| { |
| "epoch": 0.9951456310679612, |
| "grad_norm": 6.317235946655273, |
| "learning_rate": 7.521979731218356e-05, |
| "loss": 2.3056121826171876, |
| "memory(GiB)": 41.25, |
| "step": 1640, |
| "token_acc": 0.48264984227129337, |
| "train_speed(iter/s)": 0.583292 |
| }, |
| { |
| "epoch": 0.9981796116504854, |
| "grad_norm": 7.453293800354004, |
| "learning_rate": 7.508250000575125e-05, |
| "loss": 2.188218688964844, |
| "memory(GiB)": 41.25, |
| "step": 1645, |
| "token_acc": 0.5512367491166078, |
| "train_speed(iter/s)": 0.583245 |
| }, |
| { |
| "epoch": 1.0012135922330097, |
| "grad_norm": 8.073345184326172, |
| "learning_rate": 7.494494950539143e-05, |
| "loss": 1.7200986862182617, |
| "memory(GiB)": 41.25, |
| "step": 1650, |
| "token_acc": 0.6014760147601476, |
| "train_speed(iter/s)": 0.583281 |
| }, |
| { |
| "epoch": 1.004247572815534, |
| "grad_norm": 6.676420211791992, |
| "learning_rate": 7.480714719960007e-05, |
| "loss": 2.1127391815185548, |
| "memory(GiB)": 41.25, |
| "step": 1655, |
| "token_acc": 0.5288461538461539, |
| "train_speed(iter/s)": 0.583366 |
| }, |
| { |
| "epoch": 1.0072815533980584, |
| "grad_norm": 6.307994842529297, |
| "learning_rate": 7.466909447941508e-05, |
| "loss": 1.8806413650512694, |
| "memory(GiB)": 41.25, |
| "step": 1660, |
| "token_acc": 0.5547703180212014, |
| "train_speed(iter/s)": 0.583445 |
| }, |
| { |
| "epoch": 1.0103155339805825, |
| "grad_norm": 6.221712589263916, |
| "learning_rate": 7.453079273840207e-05, |
| "loss": 2.276551055908203, |
| "memory(GiB)": 41.25, |
| "step": 1665, |
| "token_acc": 0.5133333333333333, |
| "train_speed(iter/s)": 0.583162 |
| }, |
| { |
| "epoch": 1.0133495145631068, |
| "grad_norm": 5.912354469299316, |
| "learning_rate": 7.439224337264043e-05, |
| "loss": 1.9514554977416991, |
| "memory(GiB)": 41.25, |
| "step": 1670, |
| "token_acc": 0.5527950310559007, |
| "train_speed(iter/s)": 0.583074 |
| }, |
| { |
| "epoch": 1.016383495145631, |
| "grad_norm": 7.461360931396484, |
| "learning_rate": 7.425344778070917e-05, |
| "loss": 2.087990951538086, |
| "memory(GiB)": 41.25, |
| "step": 1675, |
| "token_acc": 0.5451713395638629, |
| "train_speed(iter/s)": 0.583019 |
| }, |
| { |
| "epoch": 1.0194174757281553, |
| "grad_norm": 6.206568241119385, |
| "learning_rate": 7.411440736367281e-05, |
| "loss": 2.088376045227051, |
| "memory(GiB)": 41.25, |
| "step": 1680, |
| "token_acc": 0.5496688741721855, |
| "train_speed(iter/s)": 0.58291 |
| }, |
| { |
| "epoch": 1.0224514563106797, |
| "grad_norm": 6.608606338500977, |
| "learning_rate": 7.397512352506727e-05, |
| "loss": 1.6116622924804687, |
| "memory(GiB)": 41.25, |
| "step": 1685, |
| "token_acc": 0.5833333333333334, |
| "train_speed(iter/s)": 0.582982 |
| }, |
| { |
| "epoch": 1.0254854368932038, |
| "grad_norm": 7.508535385131836, |
| "learning_rate": 7.383559767088566e-05, |
| "loss": 1.8518999099731446, |
| "memory(GiB)": 41.25, |
| "step": 1690, |
| "token_acc": 0.5867158671586716, |
| "train_speed(iter/s)": 0.583052 |
| }, |
| { |
| "epoch": 1.0285194174757282, |
| "grad_norm": 6.2956318855285645, |
| "learning_rate": 7.369583120956407e-05, |
| "loss": 2.077930450439453, |
| "memory(GiB)": 41.25, |
| "step": 1695, |
| "token_acc": 0.5295950155763239, |
| "train_speed(iter/s)": 0.583022 |
| }, |
| { |
| "epoch": 1.0315533980582525, |
| "grad_norm": 6.229779243469238, |
| "learning_rate": 7.355582555196745e-05, |
| "loss": 1.6506580352783202, |
| "memory(GiB)": 41.25, |
| "step": 1700, |
| "token_acc": 0.6342182890855457, |
| "train_speed(iter/s)": 0.582892 |
| }, |
| { |
| "epoch": 1.0345873786407767, |
| "grad_norm": 7.167182445526123, |
| "learning_rate": 7.341558211137526e-05, |
| "loss": 2.1481195449829102, |
| "memory(GiB)": 41.25, |
| "step": 1705, |
| "token_acc": 0.49226006191950467, |
| "train_speed(iter/s)": 0.582852 |
| }, |
| { |
| "epoch": 1.037621359223301, |
| "grad_norm": 7.526867866516113, |
| "learning_rate": 7.327510230346726e-05, |
| "loss": 2.0346538543701174, |
| "memory(GiB)": 41.25, |
| "step": 1710, |
| "token_acc": 0.5077399380804953, |
| "train_speed(iter/s)": 0.582708 |
| }, |
| { |
| "epoch": 1.0406553398058251, |
| "grad_norm": 6.285158634185791, |
| "learning_rate": 7.313438754630918e-05, |
| "loss": 2.084914779663086, |
| "memory(GiB)": 41.25, |
| "step": 1715, |
| "token_acc": 0.5326797385620915, |
| "train_speed(iter/s)": 0.58263 |
| }, |
| { |
| "epoch": 1.0436893203883495, |
| "grad_norm": 5.3016252517700195, |
| "learning_rate": 7.299343926033851e-05, |
| "loss": 1.8931154251098632, |
| "memory(GiB)": 41.25, |
| "step": 1720, |
| "token_acc": 0.5520504731861199, |
| "train_speed(iter/s)": 0.582235 |
| }, |
| { |
| "epoch": 1.0467233009708738, |
| "grad_norm": 6.363744258880615, |
| "learning_rate": 7.285225886834997e-05, |
| "loss": 2.1936279296875, |
| "memory(GiB)": 41.25, |
| "step": 1725, |
| "token_acc": 0.49683544303797467, |
| "train_speed(iter/s)": 0.582259 |
| }, |
| { |
| "epoch": 1.049757281553398, |
| "grad_norm": 6.571318626403809, |
| "learning_rate": 7.271084779548136e-05, |
| "loss": 2.0733669281005858, |
| "memory(GiB)": 41.25, |
| "step": 1730, |
| "token_acc": 0.5579937304075235, |
| "train_speed(iter/s)": 0.582202 |
| }, |
| { |
| "epoch": 1.0527912621359223, |
| "grad_norm": 7.151698589324951, |
| "learning_rate": 7.256920746919904e-05, |
| "loss": 2.2026699066162108, |
| "memory(GiB)": 41.25, |
| "step": 1735, |
| "token_acc": 0.5150375939849624, |
| "train_speed(iter/s)": 0.58212 |
| }, |
| { |
| "epoch": 1.0558252427184467, |
| "grad_norm": 6.636294364929199, |
| "learning_rate": 7.242733931928352e-05, |
| "loss": 2.145404052734375, |
| "memory(GiB)": 41.25, |
| "step": 1740, |
| "token_acc": 0.49221183800623053, |
| "train_speed(iter/s)": 0.582037 |
| }, |
| { |
| "epoch": 1.0588592233009708, |
| "grad_norm": 6.21516227722168, |
| "learning_rate": 7.228524477781514e-05, |
| "loss": 1.6696731567382812, |
| "memory(GiB)": 41.25, |
| "step": 1745, |
| "token_acc": 0.6295081967213115, |
| "train_speed(iter/s)": 0.581842 |
| }, |
| { |
| "epoch": 1.0618932038834952, |
| "grad_norm": 6.904699802398682, |
| "learning_rate": 7.214292527915949e-05, |
| "loss": 1.995549201965332, |
| "memory(GiB)": 41.25, |
| "step": 1750, |
| "token_acc": 0.5806451612903226, |
| "train_speed(iter/s)": 0.581695 |
| }, |
| { |
| "epoch": 1.0649271844660193, |
| "grad_norm": 4.713315963745117, |
| "learning_rate": 7.200038225995294e-05, |
| "loss": 2.3474475860595705, |
| "memory(GiB)": 41.25, |
| "step": 1755, |
| "token_acc": 0.4887005649717514, |
| "train_speed(iter/s)": 0.581706 |
| }, |
| { |
| "epoch": 1.0679611650485437, |
| "grad_norm": 8.901693344116211, |
| "learning_rate": 7.185761715908825e-05, |
| "loss": 2.004246139526367, |
| "memory(GiB)": 41.25, |
| "step": 1760, |
| "token_acc": 0.5867158671586716, |
| "train_speed(iter/s)": 0.581409 |
| }, |
| { |
| "epoch": 1.070995145631068, |
| "grad_norm": 6.650726318359375, |
| "learning_rate": 7.171463141769994e-05, |
| "loss": 2.21859130859375, |
| "memory(GiB)": 41.25, |
| "step": 1765, |
| "token_acc": 0.5466666666666666, |
| "train_speed(iter/s)": 0.581411 |
| }, |
| { |
| "epoch": 1.0740291262135921, |
| "grad_norm": 7.826591968536377, |
| "learning_rate": 7.157142647914979e-05, |
| "loss": 2.0319658279418946, |
| "memory(GiB)": 41.25, |
| "step": 1770, |
| "token_acc": 0.5594202898550724, |
| "train_speed(iter/s)": 0.581305 |
| }, |
| { |
| "epoch": 1.0770631067961165, |
| "grad_norm": 6.98701286315918, |
| "learning_rate": 7.14280037890122e-05, |
| "loss": 1.9901140213012696, |
| "memory(GiB)": 41.25, |
| "step": 1775, |
| "token_acc": 0.551829268292683, |
| "train_speed(iter/s)": 0.581264 |
| }, |
| { |
| "epoch": 1.0800970873786409, |
| "grad_norm": 6.480953693389893, |
| "learning_rate": 7.128436479505971e-05, |
| "loss": 2.1239852905273438, |
| "memory(GiB)": 41.25, |
| "step": 1780, |
| "token_acc": 0.5121359223300971, |
| "train_speed(iter/s)": 0.581217 |
| }, |
| { |
| "epoch": 1.083131067961165, |
| "grad_norm": 5.683126449584961, |
| "learning_rate": 7.114051094724831e-05, |
| "loss": 2.0841569900512695, |
| "memory(GiB)": 41.25, |
| "step": 1785, |
| "token_acc": 0.5318352059925093, |
| "train_speed(iter/s)": 0.581099 |
| }, |
| { |
| "epoch": 1.0861650485436893, |
| "grad_norm": 5.394412517547607, |
| "learning_rate": 7.09964436977028e-05, |
| "loss": 1.9973236083984376, |
| "memory(GiB)": 41.25, |
| "step": 1790, |
| "token_acc": 0.541095890410959, |
| "train_speed(iter/s)": 0.580947 |
| }, |
| { |
| "epoch": 1.0891990291262137, |
| "grad_norm": 5.046519756317139, |
| "learning_rate": 7.085216450070218e-05, |
| "loss": 2.029042053222656, |
| "memory(GiB)": 41.25, |
| "step": 1795, |
| "token_acc": 0.5591054313099042, |
| "train_speed(iter/s)": 0.580934 |
| }, |
| { |
| "epoch": 1.0922330097087378, |
| "grad_norm": 6.593071460723877, |
| "learning_rate": 7.070767481266492e-05, |
| "loss": 1.8102890014648438, |
| "memory(GiB)": 41.25, |
| "step": 1800, |
| "token_acc": 0.5718654434250765, |
| "train_speed(iter/s)": 0.581028 |
| }, |
| { |
| "epoch": 1.0952669902912622, |
| "grad_norm": 7.305717945098877, |
| "learning_rate": 7.056297609213432e-05, |
| "loss": 1.9902324676513672, |
| "memory(GiB)": 41.25, |
| "step": 1805, |
| "token_acc": 0.5699300699300699, |
| "train_speed(iter/s)": 0.581249 |
| }, |
| { |
| "epoch": 1.0983009708737863, |
| "grad_norm": 7.886199474334717, |
| "learning_rate": 7.041806979976368e-05, |
| "loss": 2.2953224182128906, |
| "memory(GiB)": 41.25, |
| "step": 1810, |
| "token_acc": 0.5, |
| "train_speed(iter/s)": 0.581276 |
| }, |
| { |
| "epoch": 1.1013349514563107, |
| "grad_norm": 10.443878173828125, |
| "learning_rate": 7.027295739830169e-05, |
| "loss": 2.220531463623047, |
| "memory(GiB)": 41.25, |
| "step": 1815, |
| "token_acc": 0.5054945054945055, |
| "train_speed(iter/s)": 0.581467 |
| }, |
| { |
| "epoch": 1.104368932038835, |
| "grad_norm": 8.019064903259277, |
| "learning_rate": 7.012764035257756e-05, |
| "loss": 2.4718793869018554, |
| "memory(GiB)": 41.25, |
| "step": 1820, |
| "token_acc": 0.47619047619047616, |
| "train_speed(iter/s)": 0.581659 |
| }, |
| { |
| "epoch": 1.1074029126213591, |
| "grad_norm": 7.334555625915527, |
| "learning_rate": 6.998212012948626e-05, |
| "loss": 1.9244306564331055, |
| "memory(GiB)": 41.25, |
| "step": 1825, |
| "token_acc": 0.5625, |
| "train_speed(iter/s)": 0.581498 |
| }, |
| { |
| "epoch": 1.1104368932038835, |
| "grad_norm": 10.03096866607666, |
| "learning_rate": 6.983639819797377e-05, |
| "loss": 2.2340341567993165, |
| "memory(GiB)": 41.25, |
| "step": 1830, |
| "token_acc": 0.5136986301369864, |
| "train_speed(iter/s)": 0.581297 |
| }, |
| { |
| "epoch": 1.1134708737864079, |
| "grad_norm": 8.886280059814453, |
| "learning_rate": 6.969047602902213e-05, |
| "loss": 2.0593013763427734, |
| "memory(GiB)": 41.25, |
| "step": 1835, |
| "token_acc": 0.5460526315789473, |
| "train_speed(iter/s)": 0.581181 |
| }, |
| { |
| "epoch": 1.116504854368932, |
| "grad_norm": 7.363580703735352, |
| "learning_rate": 6.954435509563478e-05, |
| "loss": 1.8324342727661134, |
| "memory(GiB)": 41.25, |
| "step": 1840, |
| "token_acc": 0.5607142857142857, |
| "train_speed(iter/s)": 0.581259 |
| }, |
| { |
| "epoch": 1.1195388349514563, |
| "grad_norm": 8.011999130249023, |
| "learning_rate": 6.939803687282146e-05, |
| "loss": 2.3135982513427735, |
| "memory(GiB)": 41.25, |
| "step": 1845, |
| "token_acc": 0.4666666666666667, |
| "train_speed(iter/s)": 0.581014 |
| }, |
| { |
| "epoch": 1.1225728155339807, |
| "grad_norm": 5.767248630523682, |
| "learning_rate": 6.925152283758348e-05, |
| "loss": 1.8407760620117188, |
| "memory(GiB)": 41.25, |
| "step": 1850, |
| "token_acc": 0.5792880258899676, |
| "train_speed(iter/s)": 0.58124 |
| }, |
| { |
| "epoch": 1.1256067961165048, |
| "grad_norm": 6.498402118682861, |
| "learning_rate": 6.91048144688988e-05, |
| "loss": 2.21679573059082, |
| "memory(GiB)": 41.25, |
| "step": 1855, |
| "token_acc": 0.5419354838709678, |
| "train_speed(iter/s)": 0.581462 |
| }, |
| { |
| "epoch": 1.1286407766990292, |
| "grad_norm": 6.394837379455566, |
| "learning_rate": 6.895791324770701e-05, |
| "loss": 1.947611427307129, |
| "memory(GiB)": 41.25, |
| "step": 1860, |
| "token_acc": 0.534375, |
| "train_speed(iter/s)": 0.581612 |
| }, |
| { |
| "epoch": 1.1316747572815533, |
| "grad_norm": 11.657426834106445, |
| "learning_rate": 6.881082065689453e-05, |
| "loss": 2.234457015991211, |
| "memory(GiB)": 41.25, |
| "step": 1865, |
| "token_acc": 0.543859649122807, |
| "train_speed(iter/s)": 0.581654 |
| }, |
| { |
| "epoch": 1.1347087378640777, |
| "grad_norm": 7.383030414581299, |
| "learning_rate": 6.866353818127942e-05, |
| "loss": 2.1067886352539062, |
| "memory(GiB)": 41.25, |
| "step": 1870, |
| "token_acc": 0.5800711743772242, |
| "train_speed(iter/s)": 0.581704 |
| }, |
| { |
| "epoch": 1.137742718446602, |
| "grad_norm": 6.467532157897949, |
| "learning_rate": 6.851606730759664e-05, |
| "loss": 2.12357234954834, |
| "memory(GiB)": 41.25, |
| "step": 1875, |
| "token_acc": 0.5160256410256411, |
| "train_speed(iter/s)": 0.581655 |
| }, |
| { |
| "epoch": 1.1407766990291262, |
| "grad_norm": 7.949151992797852, |
| "learning_rate": 6.836840952448285e-05, |
| "loss": 2.1536586761474608, |
| "memory(GiB)": 41.25, |
| "step": 1880, |
| "token_acc": 0.5520833333333334, |
| "train_speed(iter/s)": 0.581744 |
| }, |
| { |
| "epoch": 1.1438106796116505, |
| "grad_norm": 7.234400749206543, |
| "learning_rate": 6.82205663224615e-05, |
| "loss": 2.2570121765136717, |
| "memory(GiB)": 41.25, |
| "step": 1885, |
| "token_acc": 0.5168195718654435, |
| "train_speed(iter/s)": 0.581831 |
| }, |
| { |
| "epoch": 1.1468446601941746, |
| "grad_norm": 6.600982189178467, |
| "learning_rate": 6.807253919392773e-05, |
| "loss": 1.9843761444091796, |
| "memory(GiB)": 41.25, |
| "step": 1890, |
| "token_acc": 0.5444839857651246, |
| "train_speed(iter/s)": 0.581694 |
| }, |
| { |
| "epoch": 1.149878640776699, |
| "grad_norm": 7.820127010345459, |
| "learning_rate": 6.792432963313328e-05, |
| "loss": 2.096297836303711, |
| "memory(GiB)": 41.25, |
| "step": 1895, |
| "token_acc": 0.5566037735849056, |
| "train_speed(iter/s)": 0.581753 |
| }, |
| { |
| "epoch": 1.1529126213592233, |
| "grad_norm": 6.915624618530273, |
| "learning_rate": 6.777593913617152e-05, |
| "loss": 2.108437156677246, |
| "memory(GiB)": 41.25, |
| "step": 1900, |
| "token_acc": 0.5073313782991202, |
| "train_speed(iter/s)": 0.581827 |
| }, |
| { |
| "epoch": 1.1559466019417475, |
| "grad_norm": 7.475584030151367, |
| "learning_rate": 6.762736920096218e-05, |
| "loss": 2.277429389953613, |
| "memory(GiB)": 41.25, |
| "step": 1905, |
| "token_acc": 0.5014577259475219, |
| "train_speed(iter/s)": 0.581845 |
| }, |
| { |
| "epoch": 1.1589805825242718, |
| "grad_norm": 7.104306221008301, |
| "learning_rate": 6.747862132723641e-05, |
| "loss": 2.067903518676758, |
| "memory(GiB)": 41.25, |
| "step": 1910, |
| "token_acc": 0.5628930817610063, |
| "train_speed(iter/s)": 0.581719 |
| }, |
| { |
| "epoch": 1.1620145631067962, |
| "grad_norm": 8.869878768920898, |
| "learning_rate": 6.732969701652145e-05, |
| "loss": 2.2940914154052736, |
| "memory(GiB)": 41.25, |
| "step": 1915, |
| "token_acc": 0.5511551155115512, |
| "train_speed(iter/s)": 0.58164 |
| }, |
| { |
| "epoch": 1.1650485436893203, |
| "grad_norm": 7.5197248458862305, |
| "learning_rate": 6.718059777212567e-05, |
| "loss": 2.0857444763183595, |
| "memory(GiB)": 41.25, |
| "step": 1920, |
| "token_acc": 0.5338645418326693, |
| "train_speed(iter/s)": 0.581495 |
| }, |
| { |
| "epoch": 1.1680825242718447, |
| "grad_norm": 6.92659854888916, |
| "learning_rate": 6.703132509912322e-05, |
| "loss": 1.807958221435547, |
| "memory(GiB)": 41.25, |
| "step": 1925, |
| "token_acc": 0.5786350148367952, |
| "train_speed(iter/s)": 0.581481 |
| }, |
| { |
| "epoch": 1.171116504854369, |
| "grad_norm": 7.253981113433838, |
| "learning_rate": 6.688188050433897e-05, |
| "loss": 1.9212162017822265, |
| "memory(GiB)": 41.25, |
| "step": 1930, |
| "token_acc": 0.5470383275261324, |
| "train_speed(iter/s)": 0.581673 |
| }, |
| { |
| "epoch": 1.1741504854368932, |
| "grad_norm": 7.32392692565918, |
| "learning_rate": 6.673226549633325e-05, |
| "loss": 2.0752506256103516, |
| "memory(GiB)": 41.25, |
| "step": 1935, |
| "token_acc": 0.5434782608695652, |
| "train_speed(iter/s)": 0.581879 |
| }, |
| { |
| "epoch": 1.1771844660194175, |
| "grad_norm": 6.774953842163086, |
| "learning_rate": 6.658248158538655e-05, |
| "loss": 2.022067832946777, |
| "memory(GiB)": 41.25, |
| "step": 1940, |
| "token_acc": 0.5303430079155673, |
| "train_speed(iter/s)": 0.582004 |
| }, |
| { |
| "epoch": 1.1802184466019416, |
| "grad_norm": 8.567710876464844, |
| "learning_rate": 6.643253028348443e-05, |
| "loss": 1.9163774490356444, |
| "memory(GiB)": 41.25, |
| "step": 1945, |
| "token_acc": 0.5769230769230769, |
| "train_speed(iter/s)": 0.581884 |
| }, |
| { |
| "epoch": 1.183252427184466, |
| "grad_norm": 7.197096347808838, |
| "learning_rate": 6.628241310430208e-05, |
| "loss": 1.9915233612060548, |
| "memory(GiB)": 41.25, |
| "step": 1950, |
| "token_acc": 0.5397923875432526, |
| "train_speed(iter/s)": 0.58194 |
| }, |
| { |
| "epoch": 1.1862864077669903, |
| "grad_norm": 7.874612808227539, |
| "learning_rate": 6.613213156318921e-05, |
| "loss": 2.039535331726074, |
| "memory(GiB)": 41.25, |
| "step": 1955, |
| "token_acc": 0.5563380281690141, |
| "train_speed(iter/s)": 0.58199 |
| }, |
| { |
| "epoch": 1.1893203883495145, |
| "grad_norm": 6.794829368591309, |
| "learning_rate": 6.598168717715462e-05, |
| "loss": 2.182103729248047, |
| "memory(GiB)": 41.25, |
| "step": 1960, |
| "token_acc": 0.48936170212765956, |
| "train_speed(iter/s)": 0.581769 |
| }, |
| { |
| "epoch": 1.1923543689320388, |
| "grad_norm": 8.138648986816406, |
| "learning_rate": 6.583108146485092e-05, |
| "loss": 2.205635833740234, |
| "memory(GiB)": 41.25, |
| "step": 1965, |
| "token_acc": 0.5301507537688442, |
| "train_speed(iter/s)": 0.581594 |
| }, |
| { |
| "epoch": 1.1953883495145632, |
| "grad_norm": 5.8334197998046875, |
| "learning_rate": 6.568031594655933e-05, |
| "loss": 2.1141899108886717, |
| "memory(GiB)": 41.25, |
| "step": 1970, |
| "token_acc": 0.5442622950819672, |
| "train_speed(iter/s)": 0.581474 |
| }, |
| { |
| "epoch": 1.1984223300970873, |
| "grad_norm": 6.450995922088623, |
| "learning_rate": 6.552939214417411e-05, |
| "loss": 2.0908193588256836, |
| "memory(GiB)": 41.25, |
| "step": 1975, |
| "token_acc": 0.5270758122743683, |
| "train_speed(iter/s)": 0.581538 |
| }, |
| { |
| "epoch": 1.2014563106796117, |
| "grad_norm": 5.936161041259766, |
| "learning_rate": 6.537831158118732e-05, |
| "loss": 2.2035654067993162, |
| "memory(GiB)": 41.25, |
| "step": 1980, |
| "token_acc": 0.5281899109792285, |
| "train_speed(iter/s)": 0.581735 |
| }, |
| { |
| "epoch": 1.204490291262136, |
| "grad_norm": 6.0731282234191895, |
| "learning_rate": 6.522707578267349e-05, |
| "loss": 2.015408515930176, |
| "memory(GiB)": 41.25, |
| "step": 1985, |
| "token_acc": 0.5625, |
| "train_speed(iter/s)": 0.581728 |
| }, |
| { |
| "epoch": 1.2075242718446602, |
| "grad_norm": 8.126087188720703, |
| "learning_rate": 6.507568627527411e-05, |
| "loss": 2.233916091918945, |
| "memory(GiB)": 41.25, |
| "step": 1990, |
| "token_acc": 0.5214899713467048, |
| "train_speed(iter/s)": 0.581678 |
| }, |
| { |
| "epoch": 1.2105582524271845, |
| "grad_norm": 5.780952453613281, |
| "learning_rate": 6.492414458718235e-05, |
| "loss": 2.153764533996582, |
| "memory(GiB)": 41.25, |
| "step": 1995, |
| "token_acc": 0.5223880597014925, |
| "train_speed(iter/s)": 0.581876 |
| }, |
| { |
| "epoch": 1.2135922330097086, |
| "grad_norm": 6.646781921386719, |
| "learning_rate": 6.477245224812745e-05, |
| "loss": 2.137297439575195, |
| "memory(GiB)": 41.25, |
| "step": 2000, |
| "token_acc": 0.5310880829015544, |
| "train_speed(iter/s)": 0.581927 |
| }, |
| { |
| "epoch": 1.2135922330097086, |
| "eval_loss": 2.180868148803711, |
| "eval_runtime": 12.0025, |
| "eval_samples_per_second": 8.332, |
| "eval_steps_per_second": 8.332, |
| "eval_token_acc": 0.5036284470246735, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.216626213592233, |
| "grad_norm": 6.332268238067627, |
| "learning_rate": 6.462061078935951e-05, |
| "loss": 2.0248859405517576, |
| "memory(GiB)": 41.25, |
| "step": 2005, |
| "token_acc": 0.5138888888888888, |
| "train_speed(iter/s)": 0.579819 |
| }, |
| { |
| "epoch": 1.2196601941747574, |
| "grad_norm": 9.277915954589844, |
| "learning_rate": 6.446862174363378e-05, |
| "loss": 2.223433494567871, |
| "memory(GiB)": 41.25, |
| "step": 2010, |
| "token_acc": 0.5347985347985348, |
| "train_speed(iter/s)": 0.579723 |
| }, |
| { |
| "epoch": 1.2226941747572815, |
| "grad_norm": 6.857091903686523, |
| "learning_rate": 6.431648664519544e-05, |
| "loss": 2.093130111694336, |
| "memory(GiB)": 41.25, |
| "step": 2015, |
| "token_acc": 0.5523809523809524, |
| "train_speed(iter/s)": 0.579662 |
| }, |
| { |
| "epoch": 1.2257281553398058, |
| "grad_norm": 7.251791000366211, |
| "learning_rate": 6.416420702976393e-05, |
| "loss": 2.4163230895996093, |
| "memory(GiB)": 41.25, |
| "step": 2020, |
| "token_acc": 0.5063291139240507, |
| "train_speed(iter/s)": 0.579631 |
| }, |
| { |
| "epoch": 1.2287621359223302, |
| "grad_norm": 6.369975566864014, |
| "learning_rate": 6.401178443451751e-05, |
| "loss": 1.8332990646362304, |
| "memory(GiB)": 41.25, |
| "step": 2025, |
| "token_acc": 0.5765124555160143, |
| "train_speed(iter/s)": 0.579685 |
| }, |
| { |
| "epoch": 1.2317961165048543, |
| "grad_norm": 12.884454727172852, |
| "learning_rate": 6.385922039807773e-05, |
| "loss": 1.9554672241210938, |
| "memory(GiB)": 41.25, |
| "step": 2030, |
| "token_acc": 0.5648148148148148, |
| "train_speed(iter/s)": 0.579632 |
| }, |
| { |
| "epoch": 1.2348300970873787, |
| "grad_norm": 9.875422477722168, |
| "learning_rate": 6.370651646049398e-05, |
| "loss": 2.229812812805176, |
| "memory(GiB)": 41.25, |
| "step": 2035, |
| "token_acc": 0.49691358024691357, |
| "train_speed(iter/s)": 0.579766 |
| }, |
| { |
| "epoch": 1.237864077669903, |
| "grad_norm": 5.669778823852539, |
| "learning_rate": 6.355367416322779e-05, |
| "loss": 1.7003231048583984, |
| "memory(GiB)": 41.25, |
| "step": 2040, |
| "token_acc": 0.5830508474576271, |
| "train_speed(iter/s)": 0.579765 |
| }, |
| { |
| "epoch": 1.2408980582524272, |
| "grad_norm": 6.894186019897461, |
| "learning_rate": 6.340069504913737e-05, |
| "loss": 2.091649627685547, |
| "memory(GiB)": 41.25, |
| "step": 2045, |
| "token_acc": 0.5504885993485342, |
| "train_speed(iter/s)": 0.579827 |
| }, |
| { |
| "epoch": 1.2439320388349515, |
| "grad_norm": 8.025986671447754, |
| "learning_rate": 6.324758066246211e-05, |
| "loss": 2.0427883148193358, |
| "memory(GiB)": 41.25, |
| "step": 2050, |
| "token_acc": 0.5252225519287834, |
| "train_speed(iter/s)": 0.579952 |
| }, |
| { |
| "epoch": 1.2469660194174756, |
| "grad_norm": 6.996369361877441, |
| "learning_rate": 6.309433254880675e-05, |
| "loss": 2.1355659484863283, |
| "memory(GiB)": 41.25, |
| "step": 2055, |
| "token_acc": 0.49606299212598426, |
| "train_speed(iter/s)": 0.580029 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 9.216190338134766, |
| "learning_rate": 6.294095225512603e-05, |
| "loss": 2.045370101928711, |
| "memory(GiB)": 41.25, |
| "step": 2060, |
| "token_acc": 0.5151515151515151, |
| "train_speed(iter/s)": 0.579931 |
| }, |
| { |
| "epoch": 1.2530339805825244, |
| "grad_norm": 8.278094291687012, |
| "learning_rate": 6.278744132970899e-05, |
| "loss": 1.7628780364990235, |
| "memory(GiB)": 41.25, |
| "step": 2065, |
| "token_acc": 0.6045016077170418, |
| "train_speed(iter/s)": 0.57996 |
| }, |
| { |
| "epoch": 1.2560679611650485, |
| "grad_norm": 6.4922027587890625, |
| "learning_rate": 6.263380132216328e-05, |
| "loss": 2.0872188568115235, |
| "memory(GiB)": 41.25, |
| "step": 2070, |
| "token_acc": 0.5318471337579618, |
| "train_speed(iter/s)": 0.580006 |
| }, |
| { |
| "epoch": 1.2591019417475728, |
| "grad_norm": 9.755973815917969, |
| "learning_rate": 6.248003378339958e-05, |
| "loss": 2.043658638000488, |
| "memory(GiB)": 41.25, |
| "step": 2075, |
| "token_acc": 0.521594684385382, |
| "train_speed(iter/s)": 0.579989 |
| }, |
| { |
| "epoch": 1.262135922330097, |
| "grad_norm": 9.78760051727295, |
| "learning_rate": 6.232614026561587e-05, |
| "loss": 2.1496110916137696, |
| "memory(GiB)": 41.25, |
| "step": 2080, |
| "token_acc": 0.5136054421768708, |
| "train_speed(iter/s)": 0.579983 |
| }, |
| { |
| "epoch": 1.2651699029126213, |
| "grad_norm": 6.134158611297607, |
| "learning_rate": 6.217212232228189e-05, |
| "loss": 1.965431022644043, |
| "memory(GiB)": 41.25, |
| "step": 2085, |
| "token_acc": 0.5578231292517006, |
| "train_speed(iter/s)": 0.579816 |
| }, |
| { |
| "epoch": 1.2682038834951457, |
| "grad_norm": 6.624486446380615, |
| "learning_rate": 6.201798150812338e-05, |
| "loss": 2.282021713256836, |
| "memory(GiB)": 41.25, |
| "step": 2090, |
| "token_acc": 0.48556430446194226, |
| "train_speed(iter/s)": 0.579749 |
| }, |
| { |
| "epoch": 1.27123786407767, |
| "grad_norm": 7.1900739669799805, |
| "learning_rate": 6.186371937910637e-05, |
| "loss": 2.047537994384766, |
| "memory(GiB)": 41.25, |
| "step": 2095, |
| "token_acc": 0.4966442953020134, |
| "train_speed(iter/s)": 0.579939 |
| }, |
| { |
| "epoch": 1.2742718446601942, |
| "grad_norm": 6.147539138793945, |
| "learning_rate": 6.170933749242152e-05, |
| "loss": 2.319692039489746, |
| "memory(GiB)": 41.25, |
| "step": 2100, |
| "token_acc": 0.5370370370370371, |
| "train_speed(iter/s)": 0.580086 |
| }, |
| { |
| "epoch": 1.2773058252427185, |
| "grad_norm": 7.209454536437988, |
| "learning_rate": 6.155483740646832e-05, |
| "loss": 2.322870445251465, |
| "memory(GiB)": 41.25, |
| "step": 2105, |
| "token_acc": 0.521865889212828, |
| "train_speed(iter/s)": 0.580276 |
| }, |
| { |
| "epoch": 1.2803398058252426, |
| "grad_norm": 6.588000297546387, |
| "learning_rate": 6.140022068083948e-05, |
| "loss": 2.015561103820801, |
| "memory(GiB)": 41.25, |
| "step": 2110, |
| "token_acc": 0.5344129554655871, |
| "train_speed(iter/s)": 0.5802 |
| }, |
| { |
| "epoch": 1.283373786407767, |
| "grad_norm": 9.121885299682617, |
| "learning_rate": 6.124548887630508e-05, |
| "loss": 2.019037628173828, |
| "memory(GiB)": 41.25, |
| "step": 2115, |
| "token_acc": 0.5336927223719676, |
| "train_speed(iter/s)": 0.580255 |
| }, |
| { |
| "epoch": 1.2864077669902914, |
| "grad_norm": 10.898550987243652, |
| "learning_rate": 6.109064355479692e-05, |
| "loss": 1.740947151184082, |
| "memory(GiB)": 41.25, |
| "step": 2120, |
| "token_acc": 0.5368421052631579, |
| "train_speed(iter/s)": 0.580136 |
| }, |
| { |
| "epoch": 1.2894417475728155, |
| "grad_norm": 6.376506328582764, |
| "learning_rate": 6.093568627939261e-05, |
| "loss": 1.9328853607177734, |
| "memory(GiB)": 41.34, |
| "step": 2125, |
| "token_acc": 0.55, |
| "train_speed(iter/s)": 0.579925 |
| }, |
| { |
| "epoch": 1.2924757281553398, |
| "grad_norm": 7.9046525955200195, |
| "learning_rate": 6.078061861429995e-05, |
| "loss": 2.187295913696289, |
| "memory(GiB)": 41.34, |
| "step": 2130, |
| "token_acc": 0.5116279069767442, |
| "train_speed(iter/s)": 0.580068 |
| }, |
| { |
| "epoch": 1.295509708737864, |
| "grad_norm": 6.604916095733643, |
| "learning_rate": 6.062544212484096e-05, |
| "loss": 2.0762821197509767, |
| "memory(GiB)": 41.34, |
| "step": 2135, |
| "token_acc": 0.5333333333333333, |
| "train_speed(iter/s)": 0.58009 |
| }, |
| { |
| "epoch": 1.2985436893203883, |
| "grad_norm": 7.367359638214111, |
| "learning_rate": 6.047015837743629e-05, |
| "loss": 2.126904106140137, |
| "memory(GiB)": 41.34, |
| "step": 2140, |
| "token_acc": 0.5333333333333333, |
| "train_speed(iter/s)": 0.580247 |
| }, |
| { |
| "epoch": 1.3015776699029127, |
| "grad_norm": 5.810800552368164, |
| "learning_rate": 6.031476893958926e-05, |
| "loss": 1.7963878631591796, |
| "memory(GiB)": 41.34, |
| "step": 2145, |
| "token_acc": 0.5631399317406144, |
| "train_speed(iter/s)": 0.580352 |
| }, |
| { |
| "epoch": 1.3046116504854368, |
| "grad_norm": 6.407706260681152, |
| "learning_rate": 6.015927537987004e-05, |
| "loss": 2.1866846084594727, |
| "memory(GiB)": 41.34, |
| "step": 2150, |
| "token_acc": 0.5258855585831063, |
| "train_speed(iter/s)": 0.5802 |
| }, |
| { |
| "epoch": 1.3076456310679612, |
| "grad_norm": 7.020833969116211, |
| "learning_rate": 6.0003679267899904e-05, |
| "loss": 1.8915981292724608, |
| "memory(GiB)": 41.34, |
| "step": 2155, |
| "token_acc": 0.5571428571428572, |
| "train_speed(iter/s)": 0.580155 |
| }, |
| { |
| "epoch": 1.3106796116504853, |
| "grad_norm": 8.229516983032227, |
| "learning_rate": 5.9847982174335316e-05, |
| "loss": 1.890799331665039, |
| "memory(GiB)": 41.34, |
| "step": 2160, |
| "token_acc": 0.5424836601307189, |
| "train_speed(iter/s)": 0.579946 |
| }, |
| { |
| "epoch": 1.3137135922330097, |
| "grad_norm": 6.056339263916016, |
| "learning_rate": 5.969218567085206e-05, |
| "loss": 2.39956111907959, |
| "memory(GiB)": 41.34, |
| "step": 2165, |
| "token_acc": 0.49453551912568305, |
| "train_speed(iter/s)": 0.580006 |
| }, |
| { |
| "epoch": 1.316747572815534, |
| "grad_norm": 7.4000468254089355, |
| "learning_rate": 5.953629133012949e-05, |
| "loss": 2.256308937072754, |
| "memory(GiB)": 41.34, |
| "step": 2170, |
| "token_acc": 0.5133689839572193, |
| "train_speed(iter/s)": 0.579824 |
| }, |
| { |
| "epoch": 1.3197815533980584, |
| "grad_norm": 6.835947513580322, |
| "learning_rate": 5.938030072583447e-05, |
| "loss": 1.8971139907836914, |
| "memory(GiB)": 41.34, |
| "step": 2175, |
| "token_acc": 0.542319749216301, |
| "train_speed(iter/s)": 0.57985 |
| }, |
| { |
| "epoch": 1.3228155339805825, |
| "grad_norm": 8.275431632995605, |
| "learning_rate": 5.922421543260567e-05, |
| "loss": 1.7686073303222656, |
| "memory(GiB)": 41.34, |
| "step": 2180, |
| "token_acc": 0.5703971119133574, |
| "train_speed(iter/s)": 0.579752 |
| }, |
| { |
| "epoch": 1.3258495145631068, |
| "grad_norm": 7.795175552368164, |
| "learning_rate": 5.906803702603755e-05, |
| "loss": 1.9470417022705078, |
| "memory(GiB)": 41.34, |
| "step": 2185, |
| "token_acc": 0.5381944444444444, |
| "train_speed(iter/s)": 0.57989 |
| }, |
| { |
| "epoch": 1.328883495145631, |
| "grad_norm": 5.923962593078613, |
| "learning_rate": 5.891176708266454e-05, |
| "loss": 2.17016716003418, |
| "memory(GiB)": 41.34, |
| "step": 2190, |
| "token_acc": 0.5444126074498568, |
| "train_speed(iter/s)": 0.579998 |
| }, |
| { |
| "epoch": 1.3319174757281553, |
| "grad_norm": 7.121251106262207, |
| "learning_rate": 5.875540717994503e-05, |
| "loss": 1.586796760559082, |
| "memory(GiB)": 41.34, |
| "step": 2195, |
| "token_acc": 0.6234817813765182, |
| "train_speed(iter/s)": 0.579935 |
| }, |
| { |
| "epoch": 1.3349514563106797, |
| "grad_norm": 7.5099921226501465, |
| "learning_rate": 5.859895889624554e-05, |
| "loss": 1.777475357055664, |
| "memory(GiB)": 41.34, |
| "step": 2200, |
| "token_acc": 0.5938566552901023, |
| "train_speed(iter/s)": 0.580098 |
| }, |
| { |
| "epoch": 1.3379854368932038, |
| "grad_norm": 8.970749855041504, |
| "learning_rate": 5.84424238108247e-05, |
| "loss": 1.670484733581543, |
| "memory(GiB)": 41.34, |
| "step": 2205, |
| "token_acc": 0.5873015873015873, |
| "train_speed(iter/s)": 0.580043 |
| }, |
| { |
| "epoch": 1.3410194174757282, |
| "grad_norm": 6.932069778442383, |
| "learning_rate": 5.8285803503817425e-05, |
| "loss": 2.056923675537109, |
| "memory(GiB)": 41.34, |
| "step": 2210, |
| "token_acc": 0.5124653739612188, |
| "train_speed(iter/s)": 0.580004 |
| }, |
| { |
| "epoch": 1.3440533980582523, |
| "grad_norm": 7.549715518951416, |
| "learning_rate": 5.812909955621886e-05, |
| "loss": 1.9996042251586914, |
| "memory(GiB)": 41.34, |
| "step": 2215, |
| "token_acc": 0.5565749235474006, |
| "train_speed(iter/s)": 0.579986 |
| }, |
| { |
| "epoch": 1.3470873786407767, |
| "grad_norm": 8.340503692626953, |
| "learning_rate": 5.7972313549868415e-05, |
| "loss": 2.207027816772461, |
| "memory(GiB)": 41.34, |
| "step": 2220, |
| "token_acc": 0.4897959183673469, |
| "train_speed(iter/s)": 0.579916 |
| }, |
| { |
| "epoch": 1.350121359223301, |
| "grad_norm": 6.941786766052246, |
| "learning_rate": 5.7815447067433917e-05, |
| "loss": 1.7856271743774415, |
| "memory(GiB)": 41.34, |
| "step": 2225, |
| "token_acc": 0.5862068965517241, |
| "train_speed(iter/s)": 0.579928 |
| }, |
| { |
| "epoch": 1.3531553398058254, |
| "grad_norm": 5.413527488708496, |
| "learning_rate": 5.7658501692395475e-05, |
| "loss": 1.8429689407348633, |
| "memory(GiB)": 41.34, |
| "step": 2230, |
| "token_acc": 0.6061643835616438, |
| "train_speed(iter/s)": 0.579742 |
| }, |
| { |
| "epoch": 1.3561893203883495, |
| "grad_norm": 6.279661655426025, |
| "learning_rate": 5.7501479009029636e-05, |
| "loss": 1.8153335571289062, |
| "memory(GiB)": 41.34, |
| "step": 2235, |
| "token_acc": 0.5572289156626506, |
| "train_speed(iter/s)": 0.57984 |
| }, |
| { |
| "epoch": 1.3592233009708738, |
| "grad_norm": 7.204460620880127, |
| "learning_rate": 5.734438060239331e-05, |
| "loss": 2.255967712402344, |
| "memory(GiB)": 41.34, |
| "step": 2240, |
| "token_acc": 0.513595166163142, |
| "train_speed(iter/s)": 0.579902 |
| }, |
| { |
| "epoch": 1.362257281553398, |
| "grad_norm": 7.191935062408447, |
| "learning_rate": 5.718720805830777e-05, |
| "loss": 2.1052494049072266, |
| "memory(GiB)": 41.34, |
| "step": 2245, |
| "token_acc": 0.533724340175953, |
| "train_speed(iter/s)": 0.579865 |
| }, |
| { |
| "epoch": 1.3652912621359223, |
| "grad_norm": 9.75123119354248, |
| "learning_rate": 5.70299629633427e-05, |
| "loss": 2.176554489135742, |
| "memory(GiB)": 41.34, |
| "step": 2250, |
| "token_acc": 0.5303514376996805, |
| "train_speed(iter/s)": 0.579795 |
| }, |
| { |
| "epoch": 1.3683252427184467, |
| "grad_norm": 8.081015586853027, |
| "learning_rate": 5.687264690480014e-05, |
| "loss": 2.253178024291992, |
| "memory(GiB)": 41.34, |
| "step": 2255, |
| "token_acc": 0.5040431266846361, |
| "train_speed(iter/s)": 0.579802 |
| }, |
| { |
| "epoch": 1.3713592233009708, |
| "grad_norm": 5.86273193359375, |
| "learning_rate": 5.6715261470698434e-05, |
| "loss": 2.2541793823242187, |
| "memory(GiB)": 41.34, |
| "step": 2260, |
| "token_acc": 0.5361842105263158, |
| "train_speed(iter/s)": 0.57974 |
| }, |
| { |
| "epoch": 1.3743932038834952, |
| "grad_norm": 6.653288841247559, |
| "learning_rate": 5.655780824975628e-05, |
| "loss": 2.219985008239746, |
| "memory(GiB)": 41.34, |
| "step": 2265, |
| "token_acc": 0.5471014492753623, |
| "train_speed(iter/s)": 0.579644 |
| }, |
| { |
| "epoch": 1.3774271844660193, |
| "grad_norm": 9.517049789428711, |
| "learning_rate": 5.6400288831376604e-05, |
| "loss": 2.2441757202148436, |
| "memory(GiB)": 41.34, |
| "step": 2270, |
| "token_acc": 0.5112359550561798, |
| "train_speed(iter/s)": 0.579692 |
| }, |
| { |
| "epoch": 1.3804611650485437, |
| "grad_norm": 6.048003673553467, |
| "learning_rate": 5.624270480563059e-05, |
| "loss": 2.186481475830078, |
| "memory(GiB)": 41.34, |
| "step": 2275, |
| "token_acc": 0.5155875299760192, |
| "train_speed(iter/s)": 0.57971 |
| }, |
| { |
| "epoch": 1.383495145631068, |
| "grad_norm": 7.275609970092773, |
| "learning_rate": 5.608505776324158e-05, |
| "loss": 2.20775146484375, |
| "memory(GiB)": 41.34, |
| "step": 2280, |
| "token_acc": 0.5342465753424658, |
| "train_speed(iter/s)": 0.57976 |
| }, |
| { |
| "epoch": 1.3865291262135924, |
| "grad_norm": 7.088268280029297, |
| "learning_rate": 5.592734929556907e-05, |
| "loss": 1.7822921752929688, |
| "memory(GiB)": 41.34, |
| "step": 2285, |
| "token_acc": 0.610223642172524, |
| "train_speed(iter/s)": 0.57981 |
| }, |
| { |
| "epoch": 1.3895631067961165, |
| "grad_norm": 6.6104207038879395, |
| "learning_rate": 5.576958099459254e-05, |
| "loss": 2.022065353393555, |
| "memory(GiB)": 41.34, |
| "step": 2290, |
| "token_acc": 0.5352941176470588, |
| "train_speed(iter/s)": 0.579726 |
| }, |
| { |
| "epoch": 1.3925970873786409, |
| "grad_norm": 7.773556709289551, |
| "learning_rate": 5.5611754452895516e-05, |
| "loss": 1.8300546646118163, |
| "memory(GiB)": 41.34, |
| "step": 2295, |
| "token_acc": 0.577922077922078, |
| "train_speed(iter/s)": 0.579707 |
| }, |
| { |
| "epoch": 1.395631067961165, |
| "grad_norm": 7.439202785491943, |
| "learning_rate": 5.5453871263649395e-05, |
| "loss": 1.970297622680664, |
| "memory(GiB)": 41.84, |
| "step": 2300, |
| "token_acc": 0.6112852664576802, |
| "train_speed(iter/s)": 0.579403 |
| }, |
| { |
| "epoch": 1.3986650485436893, |
| "grad_norm": 9.190638542175293, |
| "learning_rate": 5.5295933020597426e-05, |
| "loss": 2.140420913696289, |
| "memory(GiB)": 41.84, |
| "step": 2305, |
| "token_acc": 0.5065359477124183, |
| "train_speed(iter/s)": 0.579349 |
| }, |
| { |
| "epoch": 1.4016990291262137, |
| "grad_norm": 5.690435409545898, |
| "learning_rate": 5.5137941318038596e-05, |
| "loss": 1.893089485168457, |
| "memory(GiB)": 41.84, |
| "step": 2310, |
| "token_acc": 0.6, |
| "train_speed(iter/s)": 0.579324 |
| }, |
| { |
| "epoch": 1.4047330097087378, |
| "grad_norm": 7.719916343688965, |
| "learning_rate": 5.4979897750811506e-05, |
| "loss": 2.3775409698486327, |
| "memory(GiB)": 41.84, |
| "step": 2315, |
| "token_acc": 0.5171102661596958, |
| "train_speed(iter/s)": 0.579192 |
| }, |
| { |
| "epoch": 1.4077669902912622, |
| "grad_norm": 7.299395561218262, |
| "learning_rate": 5.4821803914278336e-05, |
| "loss": 1.9694931030273437, |
| "memory(GiB)": 41.84, |
| "step": 2320, |
| "token_acc": 0.5427728613569321, |
| "train_speed(iter/s)": 0.579086 |
| }, |
| { |
| "epoch": 1.4108009708737863, |
| "grad_norm": 6.726255893707275, |
| "learning_rate": 5.4663661404308677e-05, |
| "loss": 2.0492481231689452, |
| "memory(GiB)": 41.84, |
| "step": 2325, |
| "token_acc": 0.5476923076923077, |
| "train_speed(iter/s)": 0.579024 |
| }, |
| { |
| "epoch": 1.4138349514563107, |
| "grad_norm": 9.350031852722168, |
| "learning_rate": 5.4505471817263475e-05, |
| "loss": 2.0813602447509765, |
| "memory(GiB)": 41.84, |
| "step": 2330, |
| "token_acc": 0.5481727574750831, |
| "train_speed(iter/s)": 0.578975 |
| }, |
| { |
| "epoch": 1.416868932038835, |
| "grad_norm": 6.127203464508057, |
| "learning_rate": 5.434723674997888e-05, |
| "loss": 1.884780502319336, |
| "memory(GiB)": 41.84, |
| "step": 2335, |
| "token_acc": 0.5686900958466453, |
| "train_speed(iter/s)": 0.579032 |
| }, |
| { |
| "epoch": 1.4199029126213591, |
| "grad_norm": 6.9619646072387695, |
| "learning_rate": 5.418895779975014e-05, |
| "loss": 1.7420536041259767, |
| "memory(GiB)": 41.84, |
| "step": 2340, |
| "token_acc": 0.552901023890785, |
| "train_speed(iter/s)": 0.578878 |
| }, |
| { |
| "epoch": 1.4229368932038835, |
| "grad_norm": 8.211845397949219, |
| "learning_rate": 5.403063656431548e-05, |
| "loss": 1.926046371459961, |
| "memory(GiB)": 41.84, |
| "step": 2345, |
| "token_acc": 0.5566666666666666, |
| "train_speed(iter/s)": 0.578768 |
| }, |
| { |
| "epoch": 1.4259708737864076, |
| "grad_norm": 8.615828514099121, |
| "learning_rate": 5.387227464183999e-05, |
| "loss": 1.8713953018188476, |
| "memory(GiB)": 41.84, |
| "step": 2350, |
| "token_acc": 0.5667870036101083, |
| "train_speed(iter/s)": 0.578908 |
| }, |
| { |
| "epoch": 1.429004854368932, |
| "grad_norm": 8.677647590637207, |
| "learning_rate": 5.371387363089945e-05, |
| "loss": 2.0104761123657227, |
| "memory(GiB)": 41.84, |
| "step": 2355, |
| "token_acc": 0.5653710247349824, |
| "train_speed(iter/s)": 0.578973 |
| }, |
| { |
| "epoch": 1.4320388349514563, |
| "grad_norm": 8.752043724060059, |
| "learning_rate": 5.355543513046419e-05, |
| "loss": 2.0104990005493164, |
| "memory(GiB)": 41.84, |
| "step": 2360, |
| "token_acc": 0.5486111111111112, |
| "train_speed(iter/s)": 0.579051 |
| }, |
| { |
| "epoch": 1.4350728155339807, |
| "grad_norm": 6.938195705413818, |
| "learning_rate": 5.3396960739883037e-05, |
| "loss": 1.974110984802246, |
| "memory(GiB)": 41.84, |
| "step": 2365, |
| "token_acc": 0.5476190476190477, |
| "train_speed(iter/s)": 0.579115 |
| }, |
| { |
| "epoch": 1.4381067961165048, |
| "grad_norm": 6.470673561096191, |
| "learning_rate": 5.323845205886707e-05, |
| "loss": 2.092882537841797, |
| "memory(GiB)": 41.84, |
| "step": 2370, |
| "token_acc": 0.5299684542586751, |
| "train_speed(iter/s)": 0.57924 |
| }, |
| { |
| "epoch": 1.4411407766990292, |
| "grad_norm": 6.7543206214904785, |
| "learning_rate": 5.307991068747353e-05, |
| "loss": 2.317662811279297, |
| "memory(GiB)": 41.84, |
| "step": 2375, |
| "token_acc": 0.5239616613418531, |
| "train_speed(iter/s)": 0.579126 |
| }, |
| { |
| "epoch": 1.4441747572815533, |
| "grad_norm": 7.441592216491699, |
| "learning_rate": 5.292133822608961e-05, |
| "loss": 2.0434192657470702, |
| "memory(GiB)": 41.84, |
| "step": 2380, |
| "token_acc": 0.547945205479452, |
| "train_speed(iter/s)": 0.579147 |
| }, |
| { |
| "epoch": 1.4472087378640777, |
| "grad_norm": 7.122344970703125, |
| "learning_rate": 5.2762736275416416e-05, |
| "loss": 2.2737056732177736, |
| "memory(GiB)": 41.84, |
| "step": 2385, |
| "token_acc": 0.540785498489426, |
| "train_speed(iter/s)": 0.579209 |
| }, |
| { |
| "epoch": 1.450242718446602, |
| "grad_norm": 6.282622337341309, |
| "learning_rate": 5.260410643645263e-05, |
| "loss": 2.0695510864257813, |
| "memory(GiB)": 41.84, |
| "step": 2390, |
| "token_acc": 0.5391849529780565, |
| "train_speed(iter/s)": 0.579338 |
| }, |
| { |
| "epoch": 1.4532766990291262, |
| "grad_norm": 6.010311603546143, |
| "learning_rate": 5.2445450310478525e-05, |
| "loss": 1.819678497314453, |
| "memory(GiB)": 41.84, |
| "step": 2395, |
| "token_acc": 0.5876288659793815, |
| "train_speed(iter/s)": 0.579301 |
| }, |
| { |
| "epoch": 1.4563106796116505, |
| "grad_norm": 8.786865234375, |
| "learning_rate": 5.228676949903973e-05, |
| "loss": 1.9962085723876952, |
| "memory(GiB)": 41.84, |
| "step": 2400, |
| "token_acc": 0.543046357615894, |
| "train_speed(iter/s)": 0.579291 |
| }, |
| { |
| "epoch": 1.4593446601941746, |
| "grad_norm": 6.772591590881348, |
| "learning_rate": 5.2128065603931006e-05, |
| "loss": 1.931478500366211, |
| "memory(GiB)": 41.84, |
| "step": 2405, |
| "token_acc": 0.584717607973422, |
| "train_speed(iter/s)": 0.579183 |
| }, |
| { |
| "epoch": 1.462378640776699, |
| "grad_norm": 7.0186357498168945, |
| "learning_rate": 5.196934022718017e-05, |
| "loss": 1.8834335327148437, |
| "memory(GiB)": 41.84, |
| "step": 2410, |
| "token_acc": 0.5857142857142857, |
| "train_speed(iter/s)": 0.579263 |
| }, |
| { |
| "epoch": 1.4654126213592233, |
| "grad_norm": 7.649616241455078, |
| "learning_rate": 5.18105949710319e-05, |
| "loss": 2.1677167892456053, |
| "memory(GiB)": 41.84, |
| "step": 2415, |
| "token_acc": 0.5331010452961672, |
| "train_speed(iter/s)": 0.579501 |
| }, |
| { |
| "epoch": 1.4684466019417477, |
| "grad_norm": 7.913327693939209, |
| "learning_rate": 5.165183143793149e-05, |
| "loss": 2.4113887786865233, |
| "memory(GiB)": 41.84, |
| "step": 2420, |
| "token_acc": 0.47790055248618785, |
| "train_speed(iter/s)": 0.579573 |
| }, |
| { |
| "epoch": 1.4714805825242718, |
| "grad_norm": 8.196721076965332, |
| "learning_rate": 5.149305123050877e-05, |
| "loss": 1.6590158462524414, |
| "memory(GiB)": 41.84, |
| "step": 2425, |
| "token_acc": 0.5425531914893617, |
| "train_speed(iter/s)": 0.579678 |
| }, |
| { |
| "epoch": 1.4745145631067962, |
| "grad_norm": 5.6772637367248535, |
| "learning_rate": 5.133425595156187e-05, |
| "loss": 2.0934783935546877, |
| "memory(GiB)": 41.84, |
| "step": 2430, |
| "token_acc": 0.49586776859504134, |
| "train_speed(iter/s)": 0.579607 |
| }, |
| { |
| "epoch": 1.4775485436893203, |
| "grad_norm": 9.212677955627441, |
| "learning_rate": 5.1175447204041096e-05, |
| "loss": 2.0111692428588865, |
| "memory(GiB)": 41.84, |
| "step": 2435, |
| "token_acc": 0.5536912751677853, |
| "train_speed(iter/s)": 0.579662 |
| }, |
| { |
| "epoch": 1.4805825242718447, |
| "grad_norm": 6.798145771026611, |
| "learning_rate": 5.101662659103265e-05, |
| "loss": 1.8395654678344726, |
| "memory(GiB)": 41.84, |
| "step": 2440, |
| "token_acc": 0.597972972972973, |
| "train_speed(iter/s)": 0.579718 |
| }, |
| { |
| "epoch": 1.483616504854369, |
| "grad_norm": 5.608346462249756, |
| "learning_rate": 5.0857795715742575e-05, |
| "loss": 2.0497175216674806, |
| "memory(GiB)": 41.84, |
| "step": 2445, |
| "token_acc": 0.5542168674698795, |
| "train_speed(iter/s)": 0.579756 |
| }, |
| { |
| "epoch": 1.4866504854368932, |
| "grad_norm": 7.392420291900635, |
| "learning_rate": 5.0698956181480465e-05, |
| "loss": 2.040939521789551, |
| "memory(GiB)": 41.84, |
| "step": 2450, |
| "token_acc": 0.516728624535316, |
| "train_speed(iter/s)": 0.57972 |
| }, |
| { |
| "epoch": 1.4896844660194175, |
| "grad_norm": 5.091887474060059, |
| "learning_rate": 5.054010959164329e-05, |
| "loss": 2.256111907958984, |
| "memory(GiB)": 41.84, |
| "step": 2455, |
| "token_acc": 0.5181347150259067, |
| "train_speed(iter/s)": 0.57971 |
| }, |
| { |
| "epoch": 1.4927184466019416, |
| "grad_norm": 8.56528091430664, |
| "learning_rate": 5.038125754969933e-05, |
| "loss": 2.1345645904541017, |
| "memory(GiB)": 41.84, |
| "step": 2460, |
| "token_acc": 0.524390243902439, |
| "train_speed(iter/s)": 0.579785 |
| }, |
| { |
| "epoch": 1.495752427184466, |
| "grad_norm": 8.425841331481934, |
| "learning_rate": 5.0222401659171846e-05, |
| "loss": 1.8225021362304688, |
| "memory(GiB)": 41.84, |
| "step": 2465, |
| "token_acc": 0.6041666666666666, |
| "train_speed(iter/s)": 0.579851 |
| }, |
| { |
| "epoch": 1.4987864077669903, |
| "grad_norm": 7.502073287963867, |
| "learning_rate": 5.006354352362296e-05, |
| "loss": 2.2287876129150392, |
| "memory(GiB)": 41.84, |
| "step": 2470, |
| "token_acc": 0.5451505016722408, |
| "train_speed(iter/s)": 0.579885 |
| }, |
| { |
| "epoch": 1.5018203883495147, |
| "grad_norm": 14.120893478393555, |
| "learning_rate": 4.9904684746637445e-05, |
| "loss": 2.1780731201171877, |
| "memory(GiB)": 41.84, |
| "step": 2475, |
| "token_acc": 0.5900621118012422, |
| "train_speed(iter/s)": 0.580025 |
| }, |
| { |
| "epoch": 1.5048543689320388, |
| "grad_norm": 6.581485271453857, |
| "learning_rate": 4.9745826931806524e-05, |
| "loss": 2.466159439086914, |
| "memory(GiB)": 41.84, |
| "step": 2480, |
| "token_acc": 0.4410958904109589, |
| "train_speed(iter/s)": 0.580064 |
| }, |
| { |
| "epoch": 1.507888349514563, |
| "grad_norm": 6.508731365203857, |
| "learning_rate": 4.958697168271179e-05, |
| "loss": 1.8887645721435546, |
| "memory(GiB)": 41.84, |
| "step": 2485, |
| "token_acc": 0.5559440559440559, |
| "train_speed(iter/s)": 0.58009 |
| }, |
| { |
| "epoch": 1.5109223300970873, |
| "grad_norm": 5.886694431304932, |
| "learning_rate": 4.942812060290886e-05, |
| "loss": 2.1457874298095705, |
| "memory(GiB)": 41.84, |
| "step": 2490, |
| "token_acc": 0.5476190476190477, |
| "train_speed(iter/s)": 0.580188 |
| }, |
| { |
| "epoch": 1.5139563106796117, |
| "grad_norm": 5.6448655128479, |
| "learning_rate": 4.92692752959113e-05, |
| "loss": 1.9578502655029297, |
| "memory(GiB)": 41.84, |
| "step": 2495, |
| "token_acc": 0.5710227272727273, |
| "train_speed(iter/s)": 0.58031 |
| }, |
| { |
| "epoch": 1.516990291262136, |
| "grad_norm": 9.438764572143555, |
| "learning_rate": 4.91104373651744e-05, |
| "loss": 2.124725341796875, |
| "memory(GiB)": 41.84, |
| "step": 2500, |
| "token_acc": 0.5164179104477612, |
| "train_speed(iter/s)": 0.580368 |
| }, |
| { |
| "epoch": 1.516990291262136, |
| "eval_loss": 2.0256900787353516, |
| "eval_runtime": 12.7025, |
| "eval_samples_per_second": 7.872, |
| "eval_steps_per_second": 7.872, |
| "eval_token_acc": 0.5185185185185185, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.5200242718446602, |
| "grad_norm": 7.8130106925964355, |
| "learning_rate": 4.8951608414078944e-05, |
| "loss": 2.377336311340332, |
| "memory(GiB)": 41.84, |
| "step": 2505, |
| "token_acc": 0.509090909090909, |
| "train_speed(iter/s)": 0.578557 |
| }, |
| { |
| "epoch": 1.5230582524271845, |
| "grad_norm": 7.16809606552124, |
| "learning_rate": 4.8792790045915167e-05, |
| "loss": 1.6067583084106445, |
| "memory(GiB)": 41.84, |
| "step": 2510, |
| "token_acc": 0.6186770428015564, |
| "train_speed(iter/s)": 0.578509 |
| }, |
| { |
| "epoch": 1.5260922330097086, |
| "grad_norm": 6.225858688354492, |
| "learning_rate": 4.863398386386638e-05, |
| "loss": 1.8492023468017578, |
| "memory(GiB)": 41.84, |
| "step": 2515, |
| "token_acc": 0.5787965616045845, |
| "train_speed(iter/s)": 0.578454 |
| }, |
| { |
| "epoch": 1.529126213592233, |
| "grad_norm": 8.595073699951172, |
| "learning_rate": 4.847519147099294e-05, |
| "loss": 1.9532032012939453, |
| "memory(GiB)": 41.84, |
| "step": 2520, |
| "token_acc": 0.535593220338983, |
| "train_speed(iter/s)": 0.578603 |
| }, |
| { |
| "epoch": 1.5321601941747574, |
| "grad_norm": 7.294178009033203, |
| "learning_rate": 4.831641447021599e-05, |
| "loss": 1.7893003463745116, |
| "memory(GiB)": 41.84, |
| "step": 2525, |
| "token_acc": 0.6137184115523465, |
| "train_speed(iter/s)": 0.57858 |
| }, |
| { |
| "epoch": 1.5351941747572817, |
| "grad_norm": 7.821887969970703, |
| "learning_rate": 4.8157654464301275e-05, |
| "loss": 2.2367401123046875, |
| "memory(GiB)": 41.84, |
| "step": 2530, |
| "token_acc": 0.5182072829131653, |
| "train_speed(iter/s)": 0.578758 |
| }, |
| { |
| "epoch": 1.5382281553398058, |
| "grad_norm": 7.00529670715332, |
| "learning_rate": 4.7998913055843054e-05, |
| "loss": 2.1124399185180662, |
| "memory(GiB)": 41.84, |
| "step": 2535, |
| "token_acc": 0.5432835820895522, |
| "train_speed(iter/s)": 0.578828 |
| }, |
| { |
| "epoch": 1.54126213592233, |
| "grad_norm": 5.952232837677002, |
| "learning_rate": 4.7840191847247774e-05, |
| "loss": 2.0016332626342774, |
| "memory(GiB)": 41.84, |
| "step": 2540, |
| "token_acc": 0.5930232558139535, |
| "train_speed(iter/s)": 0.578959 |
| }, |
| { |
| "epoch": 1.5442961165048543, |
| "grad_norm": 7.779722213745117, |
| "learning_rate": 4.7681492440718045e-05, |
| "loss": 1.982724952697754, |
| "memory(GiB)": 41.84, |
| "step": 2545, |
| "token_acc": 0.5338345864661654, |
| "train_speed(iter/s)": 0.579046 |
| }, |
| { |
| "epoch": 1.5473300970873787, |
| "grad_norm": 7.770874977111816, |
| "learning_rate": 4.752281643823633e-05, |
| "loss": 2.032842254638672, |
| "memory(GiB)": 41.84, |
| "step": 2550, |
| "token_acc": 0.5749235474006116, |
| "train_speed(iter/s)": 0.579014 |
| }, |
| { |
| "epoch": 1.550364077669903, |
| "grad_norm": 6.972710609436035, |
| "learning_rate": 4.736416544154891e-05, |
| "loss": 1.9030048370361328, |
| "memory(GiB)": 41.84, |
| "step": 2555, |
| "token_acc": 0.5656565656565656, |
| "train_speed(iter/s)": 0.579128 |
| }, |
| { |
| "epoch": 1.5533980582524272, |
| "grad_norm": 7.349340915679932, |
| "learning_rate": 4.720554105214961e-05, |
| "loss": 1.903385543823242, |
| "memory(GiB)": 41.84, |
| "step": 2560, |
| "token_acc": 0.5469798657718121, |
| "train_speed(iter/s)": 0.579119 |
| }, |
| { |
| "epoch": 1.5564320388349513, |
| "grad_norm": 7.2185444831848145, |
| "learning_rate": 4.704694487126365e-05, |
| "loss": 1.8204626083374023, |
| "memory(GiB)": 41.84, |
| "step": 2565, |
| "token_acc": 0.5747126436781609, |
| "train_speed(iter/s)": 0.579258 |
| }, |
| { |
| "epoch": 1.5594660194174756, |
| "grad_norm": 7.047289848327637, |
| "learning_rate": 4.688837849983154e-05, |
| "loss": 2.169702339172363, |
| "memory(GiB)": 41.84, |
| "step": 2570, |
| "token_acc": 0.509493670886076, |
| "train_speed(iter/s)": 0.579358 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 10.583885192871094, |
| "learning_rate": 4.6729843538492847e-05, |
| "loss": 1.8666536331176757, |
| "memory(GiB)": 41.84, |
| "step": 2575, |
| "token_acc": 0.5373134328358209, |
| "train_speed(iter/s)": 0.579491 |
| }, |
| { |
| "epoch": 1.5655339805825244, |
| "grad_norm": 7.884814262390137, |
| "learning_rate": 4.657134158757012e-05, |
| "loss": 2.1705270767211915, |
| "memory(GiB)": 41.84, |
| "step": 2580, |
| "token_acc": 0.5142857142857142, |
| "train_speed(iter/s)": 0.579527 |
| }, |
| { |
| "epoch": 1.5685679611650487, |
| "grad_norm": 7.872768402099609, |
| "learning_rate": 4.6412874247052615e-05, |
| "loss": 2.2928442001342773, |
| "memory(GiB)": 41.84, |
| "step": 2585, |
| "token_acc": 0.5105105105105106, |
| "train_speed(iter/s)": 0.579593 |
| }, |
| { |
| "epoch": 1.5716019417475728, |
| "grad_norm": 9.023248672485352, |
| "learning_rate": 4.625444311658028e-05, |
| "loss": 1.8835826873779298, |
| "memory(GiB)": 41.84, |
| "step": 2590, |
| "token_acc": 0.5552147239263804, |
| "train_speed(iter/s)": 0.579652 |
| }, |
| { |
| "epoch": 1.574635922330097, |
| "grad_norm": 7.943882942199707, |
| "learning_rate": 4.6096049795427514e-05, |
| "loss": 2.0815145492553713, |
| "memory(GiB)": 41.84, |
| "step": 2595, |
| "token_acc": 0.5218855218855218, |
| "train_speed(iter/s)": 0.579716 |
| }, |
| { |
| "epoch": 1.5776699029126213, |
| "grad_norm": 7.587296009063721, |
| "learning_rate": 4.593769588248702e-05, |
| "loss": 1.6165863037109376, |
| "memory(GiB)": 41.84, |
| "step": 2600, |
| "token_acc": 0.6129032258064516, |
| "train_speed(iter/s)": 0.579694 |
| }, |
| { |
| "epoch": 1.5807038834951457, |
| "grad_norm": 8.291844367980957, |
| "learning_rate": 4.577938297625378e-05, |
| "loss": 2.093304443359375, |
| "memory(GiB)": 41.84, |
| "step": 2605, |
| "token_acc": 0.5827814569536424, |
| "train_speed(iter/s)": 0.579729 |
| }, |
| { |
| "epoch": 1.58373786407767, |
| "grad_norm": 6.745671272277832, |
| "learning_rate": 4.5621112674808756e-05, |
| "loss": 1.9251686096191407, |
| "memory(GiB)": 41.84, |
| "step": 2610, |
| "token_acc": 0.5833333333333334, |
| "train_speed(iter/s)": 0.579877 |
| }, |
| { |
| "epoch": 1.5867718446601942, |
| "grad_norm": 8.493294715881348, |
| "learning_rate": 4.5462886575802884e-05, |
| "loss": 1.971460723876953, |
| "memory(GiB)": 41.84, |
| "step": 2615, |
| "token_acc": 0.5821428571428572, |
| "train_speed(iter/s)": 0.579847 |
| }, |
| { |
| "epoch": 1.5898058252427183, |
| "grad_norm": 13.71259593963623, |
| "learning_rate": 4.530470627644088e-05, |
| "loss": 2.0272783279418944, |
| "memory(GiB)": 41.84, |
| "step": 2620, |
| "token_acc": 0.5578231292517006, |
| "train_speed(iter/s)": 0.579923 |
| }, |
| { |
| "epoch": 1.5928398058252426, |
| "grad_norm": 6.396689414978027, |
| "learning_rate": 4.514657337346512e-05, |
| "loss": 1.958717155456543, |
| "memory(GiB)": 41.84, |
| "step": 2625, |
| "token_acc": 0.5413333333333333, |
| "train_speed(iter/s)": 0.579933 |
| }, |
| { |
| "epoch": 1.595873786407767, |
| "grad_norm": 8.41101360321045, |
| "learning_rate": 4.4988489463139605e-05, |
| "loss": 1.8024402618408204, |
| "memory(GiB)": 41.84, |
| "step": 2630, |
| "token_acc": 0.574468085106383, |
| "train_speed(iter/s)": 0.580054 |
| }, |
| { |
| "epoch": 1.5989077669902914, |
| "grad_norm": 6.545622825622559, |
| "learning_rate": 4.483045614123371e-05, |
| "loss": 2.081429862976074, |
| "memory(GiB)": 41.84, |
| "step": 2635, |
| "token_acc": 0.5523809523809524, |
| "train_speed(iter/s)": 0.580079 |
| }, |
| { |
| "epoch": 1.6019417475728155, |
| "grad_norm": 7.194870471954346, |
| "learning_rate": 4.46724750030062e-05, |
| "loss": 1.9362052917480468, |
| "memory(GiB)": 41.84, |
| "step": 2640, |
| "token_acc": 0.5756578947368421, |
| "train_speed(iter/s)": 0.580221 |
| }, |
| { |
| "epoch": 1.6049757281553398, |
| "grad_norm": 6.871307849884033, |
| "learning_rate": 4.451454764318903e-05, |
| "loss": 2.0093603134155273, |
| "memory(GiB)": 41.84, |
| "step": 2645, |
| "token_acc": 0.5370370370370371, |
| "train_speed(iter/s)": 0.580085 |
| }, |
| { |
| "epoch": 1.608009708737864, |
| "grad_norm": 6.45038366317749, |
| "learning_rate": 4.4356675655971344e-05, |
| "loss": 1.9990568161010742, |
| "memory(GiB)": 41.84, |
| "step": 2650, |
| "token_acc": 0.5369774919614148, |
| "train_speed(iter/s)": 0.580256 |
| }, |
| { |
| "epoch": 1.6110436893203883, |
| "grad_norm": 10.047187805175781, |
| "learning_rate": 4.419886063498329e-05, |
| "loss": 2.281326103210449, |
| "memory(GiB)": 41.84, |
| "step": 2655, |
| "token_acc": 0.4965034965034965, |
| "train_speed(iter/s)": 0.580351 |
| }, |
| { |
| "epoch": 1.6140776699029127, |
| "grad_norm": 8.295970916748047, |
| "learning_rate": 4.404110417327998e-05, |
| "loss": 2.0824228286743165, |
| "memory(GiB)": 41.84, |
| "step": 2660, |
| "token_acc": 0.519434628975265, |
| "train_speed(iter/s)": 0.580351 |
| }, |
| { |
| "epoch": 1.617111650485437, |
| "grad_norm": 8.373644828796387, |
| "learning_rate": 4.388340786332541e-05, |
| "loss": 1.9413429260253907, |
| "memory(GiB)": 41.84, |
| "step": 2665, |
| "token_acc": 0.580110497237569, |
| "train_speed(iter/s)": 0.58041 |
| }, |
| { |
| "epoch": 1.6201456310679612, |
| "grad_norm": 6.771739482879639, |
| "learning_rate": 4.372577329697636e-05, |
| "loss": 2.1314056396484373, |
| "memory(GiB)": 41.84, |
| "step": 2670, |
| "token_acc": 0.5014005602240896, |
| "train_speed(iter/s)": 0.580318 |
| }, |
| { |
| "epoch": 1.6231796116504853, |
| "grad_norm": 6.547637462615967, |
| "learning_rate": 4.35682020654664e-05, |
| "loss": 1.8196992874145508, |
| "memory(GiB)": 41.84, |
| "step": 2675, |
| "token_acc": 0.5973154362416108, |
| "train_speed(iter/s)": 0.580398 |
| }, |
| { |
| "epoch": 1.6262135922330097, |
| "grad_norm": 7.0243449211120605, |
| "learning_rate": 4.341069575938968e-05, |
| "loss": 2.0443634033203124, |
| "memory(GiB)": 41.84, |
| "step": 2680, |
| "token_acc": 0.5777027027027027, |
| "train_speed(iter/s)": 0.580433 |
| }, |
| { |
| "epoch": 1.629247572815534, |
| "grad_norm": 7.968044281005859, |
| "learning_rate": 4.3253255968685044e-05, |
| "loss": 2.372605323791504, |
| "memory(GiB)": 41.84, |
| "step": 2685, |
| "token_acc": 0.5537459283387622, |
| "train_speed(iter/s)": 0.580421 |
| }, |
| { |
| "epoch": 1.6322815533980584, |
| "grad_norm": 7.074746608734131, |
| "learning_rate": 4.3095884282619866e-05, |
| "loss": 1.9867733001708985, |
| "memory(GiB)": 41.84, |
| "step": 2690, |
| "token_acc": 0.5676691729323309, |
| "train_speed(iter/s)": 0.580481 |
| }, |
| { |
| "epoch": 1.6353155339805825, |
| "grad_norm": 6.959107398986816, |
| "learning_rate": 4.2938582289774e-05, |
| "loss": 1.9854732513427735, |
| "memory(GiB)": 41.84, |
| "step": 2695, |
| "token_acc": 0.5686813186813187, |
| "train_speed(iter/s)": 0.58059 |
| }, |
| { |
| "epoch": 1.6383495145631068, |
| "grad_norm": 6.535874843597412, |
| "learning_rate": 4.278135157802389e-05, |
| "loss": 2.186625289916992, |
| "memory(GiB)": 41.84, |
| "step": 2700, |
| "token_acc": 0.5300859598853869, |
| "train_speed(iter/s)": 0.5806 |
| }, |
| { |
| "epoch": 1.641383495145631, |
| "grad_norm": 6.670753002166748, |
| "learning_rate": 4.262419373452634e-05, |
| "loss": 2.415786361694336, |
| "memory(GiB)": 41.84, |
| "step": 2705, |
| "token_acc": 0.4827586206896552, |
| "train_speed(iter/s)": 0.580602 |
| }, |
| { |
| "epoch": 1.6444174757281553, |
| "grad_norm": 11.83166790008545, |
| "learning_rate": 4.246711034570264e-05, |
| "loss": 2.008403015136719, |
| "memory(GiB)": 41.84, |
| "step": 2710, |
| "token_acc": 0.5294117647058824, |
| "train_speed(iter/s)": 0.580751 |
| }, |
| { |
| "epoch": 1.6474514563106797, |
| "grad_norm": 7.605556964874268, |
| "learning_rate": 4.231010299722248e-05, |
| "loss": 2.3934700012207033, |
| "memory(GiB)": 41.84, |
| "step": 2715, |
| "token_acc": 0.4915254237288136, |
| "train_speed(iter/s)": 0.580846 |
| }, |
| { |
| "epoch": 1.650485436893204, |
| "grad_norm": 6.8486504554748535, |
| "learning_rate": 4.2153173273987946e-05, |
| "loss": 1.9181827545166015, |
| "memory(GiB)": 41.84, |
| "step": 2720, |
| "token_acc": 0.5562913907284768, |
| "train_speed(iter/s)": 0.580869 |
| }, |
| { |
| "epoch": 1.6535194174757282, |
| "grad_norm": 8.30029296875, |
| "learning_rate": 4.199632276011761e-05, |
| "loss": 2.099735641479492, |
| "memory(GiB)": 41.84, |
| "step": 2725, |
| "token_acc": 0.5529100529100529, |
| "train_speed(iter/s)": 0.580925 |
| }, |
| { |
| "epoch": 1.6565533980582523, |
| "grad_norm": 6.734464168548584, |
| "learning_rate": 4.1839553038930396e-05, |
| "loss": 1.9709980010986328, |
| "memory(GiB)": 41.84, |
| "step": 2730, |
| "token_acc": 0.5331230283911672, |
| "train_speed(iter/s)": 0.580952 |
| }, |
| { |
| "epoch": 1.6595873786407767, |
| "grad_norm": 6.3508710861206055, |
| "learning_rate": 4.168286569292972e-05, |
| "loss": 2.039066123962402, |
| "memory(GiB)": 41.84, |
| "step": 2735, |
| "token_acc": 0.5649717514124294, |
| "train_speed(iter/s)": 0.581109 |
| }, |
| { |
| "epoch": 1.662621359223301, |
| "grad_norm": 6.782240867614746, |
| "learning_rate": 4.152626230378741e-05, |
| "loss": 1.832118606567383, |
| "memory(GiB)": 41.84, |
| "step": 2740, |
| "token_acc": 0.6, |
| "train_speed(iter/s)": 0.581153 |
| }, |
| { |
| "epoch": 1.6656553398058254, |
| "grad_norm": 8.437490463256836, |
| "learning_rate": 4.136974445232788e-05, |
| "loss": 1.9984106063842773, |
| "memory(GiB)": 41.84, |
| "step": 2745, |
| "token_acc": 0.5113636363636364, |
| "train_speed(iter/s)": 0.581248 |
| }, |
| { |
| "epoch": 1.6686893203883495, |
| "grad_norm": 8.64138126373291, |
| "learning_rate": 4.121331371851201e-05, |
| "loss": 1.9429035186767578, |
| "memory(GiB)": 41.84, |
| "step": 2750, |
| "token_acc": 0.574468085106383, |
| "train_speed(iter/s)": 0.581216 |
| }, |
| { |
| "epoch": 1.6717233009708736, |
| "grad_norm": 7.808033466339111, |
| "learning_rate": 4.10569716814213e-05, |
| "loss": 2.069664192199707, |
| "memory(GiB)": 41.84, |
| "step": 2755, |
| "token_acc": 0.546583850931677, |
| "train_speed(iter/s)": 0.581204 |
| }, |
| { |
| "epoch": 1.674757281553398, |
| "grad_norm": 7.158506393432617, |
| "learning_rate": 4.0900719919241935e-05, |
| "loss": 2.2129743576049803, |
| "memory(GiB)": 41.84, |
| "step": 2760, |
| "token_acc": 0.5330882352941176, |
| "train_speed(iter/s)": 0.581324 |
| }, |
| { |
| "epoch": 1.6777912621359223, |
| "grad_norm": 6.141445636749268, |
| "learning_rate": 4.0744560009248766e-05, |
| "loss": 2.1222957611083983, |
| "memory(GiB)": 41.84, |
| "step": 2765, |
| "token_acc": 0.5301204819277109, |
| "train_speed(iter/s)": 0.581344 |
| }, |
| { |
| "epoch": 1.6808252427184467, |
| "grad_norm": 9.04359245300293, |
| "learning_rate": 4.0588493527789537e-05, |
| "loss": 2.0622652053833006, |
| "memory(GiB)": 41.84, |
| "step": 2770, |
| "token_acc": 0.5793103448275863, |
| "train_speed(iter/s)": 0.581484 |
| }, |
| { |
| "epoch": 1.6838592233009708, |
| "grad_norm": 7.4207892417907715, |
| "learning_rate": 4.043252205026879e-05, |
| "loss": 1.9703941345214844, |
| "memory(GiB)": 41.84, |
| "step": 2775, |
| "token_acc": 0.5451807228915663, |
| "train_speed(iter/s)": 0.581551 |
| }, |
| { |
| "epoch": 1.6868932038834952, |
| "grad_norm": 6.962371826171875, |
| "learning_rate": 4.027664715113209e-05, |
| "loss": 2.0751678466796877, |
| "memory(GiB)": 41.84, |
| "step": 2780, |
| "token_acc": 0.533724340175953, |
| "train_speed(iter/s)": 0.58165 |
| }, |
| { |
| "epoch": 1.6899271844660193, |
| "grad_norm": 6.551590919494629, |
| "learning_rate": 4.012087040385012e-05, |
| "loss": 1.9780982971191405, |
| "memory(GiB)": 41.84, |
| "step": 2785, |
| "token_acc": 0.564625850340136, |
| "train_speed(iter/s)": 0.581595 |
| }, |
| { |
| "epoch": 1.6929611650485437, |
| "grad_norm": 8.19705867767334, |
| "learning_rate": 3.996519338090273e-05, |
| "loss": 1.9075267791748047, |
| "memory(GiB)": 41.84, |
| "step": 2790, |
| "token_acc": 0.5729537366548043, |
| "train_speed(iter/s)": 0.58155 |
| }, |
| { |
| "epoch": 1.695995145631068, |
| "grad_norm": 6.0668206214904785, |
| "learning_rate": 3.980961765376316e-05, |
| "loss": 2.269983100891113, |
| "memory(GiB)": 41.84, |
| "step": 2795, |
| "token_acc": 0.5031446540880503, |
| "train_speed(iter/s)": 0.581377 |
| }, |
| { |
| "epoch": 1.6990291262135924, |
| "grad_norm": 7.507983684539795, |
| "learning_rate": 3.965414479288209e-05, |
| "loss": 2.1596681594848635, |
| "memory(GiB)": 41.84, |
| "step": 2800, |
| "token_acc": 0.5704225352112676, |
| "train_speed(iter/s)": 0.581409 |
| }, |
| { |
| "epoch": 1.7020631067961165, |
| "grad_norm": 9.827066421508789, |
| "learning_rate": 3.9498776367671825e-05, |
| "loss": 2.028460884094238, |
| "memory(GiB)": 41.84, |
| "step": 2805, |
| "token_acc": 0.5544871794871795, |
| "train_speed(iter/s)": 0.581541 |
| }, |
| { |
| "epoch": 1.7050970873786406, |
| "grad_norm": 7.970204830169678, |
| "learning_rate": 3.9343513946490454e-05, |
| "loss": 2.2608503341674804, |
| "memory(GiB)": 41.84, |
| "step": 2810, |
| "token_acc": 0.532871972318339, |
| "train_speed(iter/s)": 0.5816 |
| }, |
| { |
| "epoch": 1.708131067961165, |
| "grad_norm": 8.01364517211914, |
| "learning_rate": 3.9188359096626e-05, |
| "loss": 1.965842056274414, |
| "memory(GiB)": 41.84, |
| "step": 2815, |
| "token_acc": 0.5447154471544715, |
| "train_speed(iter/s)": 0.581736 |
| }, |
| { |
| "epoch": 1.7111650485436893, |
| "grad_norm": 7.19758939743042, |
| "learning_rate": 3.903331338428067e-05, |
| "loss": 2.0728851318359376, |
| "memory(GiB)": 41.84, |
| "step": 2820, |
| "token_acc": 0.5568862275449101, |
| "train_speed(iter/s)": 0.581956 |
| }, |
| { |
| "epoch": 1.7141990291262137, |
| "grad_norm": 6.977797508239746, |
| "learning_rate": 3.88783783745549e-05, |
| "loss": 1.7800270080566407, |
| "memory(GiB)": 41.84, |
| "step": 2825, |
| "token_acc": 0.565359477124183, |
| "train_speed(iter/s)": 0.581974 |
| }, |
| { |
| "epoch": 1.7172330097087378, |
| "grad_norm": 8.389069557189941, |
| "learning_rate": 3.872355563143173e-05, |
| "loss": 1.479856300354004, |
| "memory(GiB)": 41.84, |
| "step": 2830, |
| "token_acc": 0.6463878326996197, |
| "train_speed(iter/s)": 0.582021 |
| }, |
| { |
| "epoch": 1.7202669902912622, |
| "grad_norm": 8.598016738891602, |
| "learning_rate": 3.856884671776085e-05, |
| "loss": 1.9001766204833985, |
| "memory(GiB)": 41.84, |
| "step": 2835, |
| "token_acc": 0.5427509293680297, |
| "train_speed(iter/s)": 0.582021 |
| }, |
| { |
| "epoch": 1.7233009708737863, |
| "grad_norm": 7.339463233947754, |
| "learning_rate": 3.8414253195242986e-05, |
| "loss": 2.0311508178710938, |
| "memory(GiB)": 41.84, |
| "step": 2840, |
| "token_acc": 0.5960912052117264, |
| "train_speed(iter/s)": 0.582075 |
| }, |
| { |
| "epoch": 1.7263349514563107, |
| "grad_norm": 6.700257778167725, |
| "learning_rate": 3.8259776624414e-05, |
| "loss": 1.824915313720703, |
| "memory(GiB)": 41.84, |
| "step": 2845, |
| "token_acc": 0.5838709677419355, |
| "train_speed(iter/s)": 0.582141 |
| }, |
| { |
| "epoch": 1.729368932038835, |
| "grad_norm": 7.298790454864502, |
| "learning_rate": 3.81054185646292e-05, |
| "loss": 2.0110477447509765, |
| "memory(GiB)": 41.84, |
| "step": 2850, |
| "token_acc": 0.5802047781569966, |
| "train_speed(iter/s)": 0.581997 |
| }, |
| { |
| "epoch": 1.7324029126213594, |
| "grad_norm": 7.2910332679748535, |
| "learning_rate": 3.795118057404761e-05, |
| "loss": 1.9101539611816407, |
| "memory(GiB)": 41.84, |
| "step": 2855, |
| "token_acc": 0.5787545787545788, |
| "train_speed(iter/s)": 0.582142 |
| }, |
| { |
| "epoch": 1.7354368932038835, |
| "grad_norm": 5.262487411499023, |
| "learning_rate": 3.779706420961617e-05, |
| "loss": 1.8585384368896485, |
| "memory(GiB)": 41.84, |
| "step": 2860, |
| "token_acc": 0.5941176470588235, |
| "train_speed(iter/s)": 0.5821 |
| }, |
| { |
| "epoch": 1.7384708737864076, |
| "grad_norm": 10.52902603149414, |
| "learning_rate": 3.764307102705417e-05, |
| "loss": 2.2284523010253907, |
| "memory(GiB)": 41.84, |
| "step": 2865, |
| "token_acc": 0.5323076923076923, |
| "train_speed(iter/s)": 0.582044 |
| }, |
| { |
| "epoch": 1.741504854368932, |
| "grad_norm": 7.36726188659668, |
| "learning_rate": 3.748920258083736e-05, |
| "loss": 2.3935964584350584, |
| "memory(GiB)": 41.84, |
| "step": 2870, |
| "token_acc": 0.5157593123209169, |
| "train_speed(iter/s)": 0.582023 |
| }, |
| { |
| "epoch": 1.7445388349514563, |
| "grad_norm": 9.515303611755371, |
| "learning_rate": 3.7335460424182356e-05, |
| "loss": 2.0206344604492186, |
| "memory(GiB)": 41.84, |
| "step": 2875, |
| "token_acc": 0.5436241610738255, |
| "train_speed(iter/s)": 0.582136 |
| }, |
| { |
| "epoch": 1.7475728155339807, |
| "grad_norm": 7.746051788330078, |
| "learning_rate": 3.7181846109031005e-05, |
| "loss": 1.9893791198730468, |
| "memory(GiB)": 41.84, |
| "step": 2880, |
| "token_acc": 0.5664335664335665, |
| "train_speed(iter/s)": 0.582034 |
| }, |
| { |
| "epoch": 1.7506067961165048, |
| "grad_norm": 7.868143081665039, |
| "learning_rate": 3.702836118603458e-05, |
| "loss": 2.369589614868164, |
| "memory(GiB)": 41.84, |
| "step": 2885, |
| "token_acc": 0.5084745762711864, |
| "train_speed(iter/s)": 0.581894 |
| }, |
| { |
| "epoch": 1.7536407766990292, |
| "grad_norm": 6.672244071960449, |
| "learning_rate": 3.687500720453831e-05, |
| "loss": 1.9809467315673828, |
| "memory(GiB)": 41.84, |
| "step": 2890, |
| "token_acc": 0.5498489425981873, |
| "train_speed(iter/s)": 0.58182 |
| }, |
| { |
| "epoch": 1.7566747572815533, |
| "grad_norm": 5.8379011154174805, |
| "learning_rate": 3.672178571256556e-05, |
| "loss": 2.137996864318848, |
| "memory(GiB)": 41.84, |
| "step": 2895, |
| "token_acc": 0.5470588235294118, |
| "train_speed(iter/s)": 0.581917 |
| }, |
| { |
| "epoch": 1.7597087378640777, |
| "grad_norm": 5.696329593658447, |
| "learning_rate": 3.656869825680234e-05, |
| "loss": 1.7796316146850586, |
| "memory(GiB)": 41.84, |
| "step": 2900, |
| "token_acc": 0.6054421768707483, |
| "train_speed(iter/s)": 0.581974 |
| }, |
| { |
| "epoch": 1.762742718446602, |
| "grad_norm": 7.160623550415039, |
| "learning_rate": 3.641574638258162e-05, |
| "loss": 2.0094619750976563, |
| "memory(GiB)": 41.84, |
| "step": 2905, |
| "token_acc": 0.5428571428571428, |
| "train_speed(iter/s)": 0.58194 |
| }, |
| { |
| "epoch": 1.7657766990291264, |
| "grad_norm": 5.733323097229004, |
| "learning_rate": 3.62629316338677e-05, |
| "loss": 2.0144931793212892, |
| "memory(GiB)": 41.84, |
| "step": 2910, |
| "token_acc": 0.5308988764044944, |
| "train_speed(iter/s)": 0.581796 |
| }, |
| { |
| "epoch": 1.7688106796116505, |
| "grad_norm": 6.644180774688721, |
| "learning_rate": 3.611025555324079e-05, |
| "loss": 1.9589729309082031, |
| "memory(GiB)": 41.84, |
| "step": 2915, |
| "token_acc": 0.5672727272727273, |
| "train_speed(iter/s)": 0.581878 |
| }, |
| { |
| "epoch": 1.7718446601941746, |
| "grad_norm": 13.900938034057617, |
| "learning_rate": 3.595771968188121e-05, |
| "loss": 1.9292577743530273, |
| "memory(GiB)": 41.84, |
| "step": 2920, |
| "token_acc": 0.59, |
| "train_speed(iter/s)": 0.582002 |
| }, |
| { |
| "epoch": 1.774878640776699, |
| "grad_norm": 9.342930793762207, |
| "learning_rate": 3.5805325559554006e-05, |
| "loss": 1.8789905548095702, |
| "memory(GiB)": 41.84, |
| "step": 2925, |
| "token_acc": 0.556, |
| "train_speed(iter/s)": 0.582068 |
| }, |
| { |
| "epoch": 1.7779126213592233, |
| "grad_norm": 10.121810913085938, |
| "learning_rate": 3.5653074724593306e-05, |
| "loss": 2.171294593811035, |
| "memory(GiB)": 41.84, |
| "step": 2930, |
| "token_acc": 0.5441176470588235, |
| "train_speed(iter/s)": 0.582172 |
| }, |
| { |
| "epoch": 1.7809466019417477, |
| "grad_norm": 8.192787170410156, |
| "learning_rate": 3.550096871388689e-05, |
| "loss": 1.9008895874023437, |
| "memory(GiB)": 41.84, |
| "step": 2935, |
| "token_acc": 0.5387205387205387, |
| "train_speed(iter/s)": 0.582287 |
| }, |
| { |
| "epoch": 1.7839805825242718, |
| "grad_norm": 9.528207778930664, |
| "learning_rate": 3.5349009062860586e-05, |
| "loss": 2.1617660522460938, |
| "memory(GiB)": 41.84, |
| "step": 2940, |
| "token_acc": 0.5551601423487544, |
| "train_speed(iter/s)": 0.582322 |
| }, |
| { |
| "epoch": 1.787014563106796, |
| "grad_norm": 11.588967323303223, |
| "learning_rate": 3.519719730546275e-05, |
| "loss": 1.679486083984375, |
| "memory(GiB)": 41.84, |
| "step": 2945, |
| "token_acc": 0.6188679245283019, |
| "train_speed(iter/s)": 0.582412 |
| }, |
| { |
| "epoch": 1.7900485436893203, |
| "grad_norm": 8.055891990661621, |
| "learning_rate": 3.504553497414893e-05, |
| "loss": 1.960872268676758, |
| "memory(GiB)": 41.84, |
| "step": 2950, |
| "token_acc": 0.6287878787878788, |
| "train_speed(iter/s)": 0.582489 |
| }, |
| { |
| "epoch": 1.7930825242718447, |
| "grad_norm": 6.05890417098999, |
| "learning_rate": 3.489402359986621e-05, |
| "loss": 1.9190954208374023, |
| "memory(GiB)": 41.84, |
| "step": 2955, |
| "token_acc": 0.5660377358490566, |
| "train_speed(iter/s)": 0.582598 |
| }, |
| { |
| "epoch": 1.796116504854369, |
| "grad_norm": 10.20227336883545, |
| "learning_rate": 3.474266471203794e-05, |
| "loss": 1.7310752868652344, |
| "memory(GiB)": 41.84, |
| "step": 2960, |
| "token_acc": 0.6046511627906976, |
| "train_speed(iter/s)": 0.582734 |
| }, |
| { |
| "epoch": 1.7991504854368932, |
| "grad_norm": 8.176021575927734, |
| "learning_rate": 3.459145983854813e-05, |
| "loss": 1.9539764404296875, |
| "memory(GiB)": 41.84, |
| "step": 2965, |
| "token_acc": 0.5962962962962963, |
| "train_speed(iter/s)": 0.582861 |
| }, |
| { |
| "epoch": 1.8021844660194175, |
| "grad_norm": 7.691636085510254, |
| "learning_rate": 3.444041050572611e-05, |
| "loss": 2.0364006042480467, |
| "memory(GiB)": 41.84, |
| "step": 2970, |
| "token_acc": 0.5605536332179931, |
| "train_speed(iter/s)": 0.582943 |
| }, |
| { |
| "epoch": 1.8052184466019416, |
| "grad_norm": 8.828807830810547, |
| "learning_rate": 3.4289518238331145e-05, |
| "loss": 1.7169891357421876, |
| "memory(GiB)": 41.84, |
| "step": 2975, |
| "token_acc": 0.654275092936803, |
| "train_speed(iter/s)": 0.583015 |
| }, |
| { |
| "epoch": 1.808252427184466, |
| "grad_norm": 6.20446252822876, |
| "learning_rate": 3.413878455953698e-05, |
| "loss": 2.094204902648926, |
| "memory(GiB)": 41.84, |
| "step": 2980, |
| "token_acc": 0.5351170568561873, |
| "train_speed(iter/s)": 0.583072 |
| }, |
| { |
| "epoch": 1.8112864077669903, |
| "grad_norm": 7.542689800262451, |
| "learning_rate": 3.398821099091652e-05, |
| "loss": 1.8194765090942382, |
| "memory(GiB)": 41.84, |
| "step": 2985, |
| "token_acc": 0.5900621118012422, |
| "train_speed(iter/s)": 0.583267 |
| }, |
| { |
| "epoch": 1.8143203883495147, |
| "grad_norm": 5.989041328430176, |
| "learning_rate": 3.3837799052426434e-05, |
| "loss": 2.085628128051758, |
| "memory(GiB)": 41.84, |
| "step": 2990, |
| "token_acc": 0.5573770491803278, |
| "train_speed(iter/s)": 0.58343 |
| }, |
| { |
| "epoch": 1.8173543689320388, |
| "grad_norm": 8.956052780151367, |
| "learning_rate": 3.3687550262391836e-05, |
| "loss": 2.0220142364501954, |
| "memory(GiB)": 41.84, |
| "step": 2995, |
| "token_acc": 0.563076923076923, |
| "train_speed(iter/s)": 0.583454 |
| }, |
| { |
| "epoch": 1.820388349514563, |
| "grad_norm": 9.703901290893555, |
| "learning_rate": 3.353746613749094e-05, |
| "loss": 1.7758405685424805, |
| "memory(GiB)": 41.84, |
| "step": 3000, |
| "token_acc": 0.5978260869565217, |
| "train_speed(iter/s)": 0.583443 |
| }, |
| { |
| "epoch": 1.820388349514563, |
| "eval_loss": 2.155855655670166, |
| "eval_runtime": 12.3446, |
| "eval_samples_per_second": 8.101, |
| "eval_steps_per_second": 8.101, |
| "eval_token_acc": 0.5071335927367056, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.8234223300970873, |
| "grad_norm": 10.289823532104492, |
| "learning_rate": 3.33875481927397e-05, |
| "loss": 1.9597461700439454, |
| "memory(GiB)": 41.84, |
| "step": 3005, |
| "token_acc": 0.5239887111947319, |
| "train_speed(iter/s)": 0.581978 |
| }, |
| { |
| "epoch": 1.8264563106796117, |
| "grad_norm": 8.281176567077637, |
| "learning_rate": 3.3237797941476715e-05, |
| "loss": 1.7820388793945312, |
| "memory(GiB)": 41.84, |
| "step": 3010, |
| "token_acc": 0.5830258302583026, |
| "train_speed(iter/s)": 0.582021 |
| }, |
| { |
| "epoch": 1.829490291262136, |
| "grad_norm": 6.512312889099121, |
| "learning_rate": 3.308821689534766e-05, |
| "loss": 1.9633775711059571, |
| "memory(GiB)": 41.84, |
| "step": 3015, |
| "token_acc": 0.526984126984127, |
| "train_speed(iter/s)": 0.582048 |
| }, |
| { |
| "epoch": 1.8325242718446602, |
| "grad_norm": 6.695690631866455, |
| "learning_rate": 3.293880656429028e-05, |
| "loss": 1.9059555053710937, |
| "memory(GiB)": 41.84, |
| "step": 3020, |
| "token_acc": 0.558282208588957, |
| "train_speed(iter/s)": 0.582014 |
| }, |
| { |
| "epoch": 1.8355582524271845, |
| "grad_norm": 8.261147499084473, |
| "learning_rate": 3.278956845651897e-05, |
| "loss": 1.9743257522583009, |
| "memory(GiB)": 41.84, |
| "step": 3025, |
| "token_acc": 0.5537974683544303, |
| "train_speed(iter/s)": 0.582071 |
| }, |
| { |
| "epoch": 1.8385922330097086, |
| "grad_norm": 8.489652633666992, |
| "learning_rate": 3.2640504078509706e-05, |
| "loss": 2.0056623458862304, |
| "memory(GiB)": 41.84, |
| "step": 3030, |
| "token_acc": 0.5425219941348973, |
| "train_speed(iter/s)": 0.582011 |
| }, |
| { |
| "epoch": 1.841626213592233, |
| "grad_norm": 5.517820835113525, |
| "learning_rate": 3.2491614934984706e-05, |
| "loss": 2.0196483612060545, |
| "memory(GiB)": 41.84, |
| "step": 3035, |
| "token_acc": 0.5681159420289855, |
| "train_speed(iter/s)": 0.58204 |
| }, |
| { |
| "epoch": 1.8446601941747574, |
| "grad_norm": 11.377049446105957, |
| "learning_rate": 3.2342902528897276e-05, |
| "loss": 2.4981143951416014, |
| "memory(GiB)": 41.84, |
| "step": 3040, |
| "token_acc": 0.4857142857142857, |
| "train_speed(iter/s)": 0.582056 |
| }, |
| { |
| "epoch": 1.8476941747572817, |
| "grad_norm": 9.072402954101562, |
| "learning_rate": 3.219436836141672e-05, |
| "loss": 1.7939895629882812, |
| "memory(GiB)": 41.84, |
| "step": 3045, |
| "token_acc": 0.5458015267175572, |
| "train_speed(iter/s)": 0.582115 |
| }, |
| { |
| "epoch": 1.8507281553398058, |
| "grad_norm": 8.273455619812012, |
| "learning_rate": 3.204601393191305e-05, |
| "loss": 2.0849941253662108, |
| "memory(GiB)": 41.84, |
| "step": 3050, |
| "token_acc": 0.5551948051948052, |
| "train_speed(iter/s)": 0.582202 |
| }, |
| { |
| "epoch": 1.85376213592233, |
| "grad_norm": 6.509883880615234, |
| "learning_rate": 3.1897840737941996e-05, |
| "loss": 1.894825553894043, |
| "memory(GiB)": 41.84, |
| "step": 3055, |
| "token_acc": 0.5211726384364821, |
| "train_speed(iter/s)": 0.582142 |
| }, |
| { |
| "epoch": 1.8567961165048543, |
| "grad_norm": 8.81839370727539, |
| "learning_rate": 3.174985027522978e-05, |
| "loss": 1.9194953918457032, |
| "memory(GiB)": 41.84, |
| "step": 3060, |
| "token_acc": 0.5727554179566563, |
| "train_speed(iter/s)": 0.582189 |
| }, |
| { |
| "epoch": 1.8598300970873787, |
| "grad_norm": 7.000573635101318, |
| "learning_rate": 3.1602044037657994e-05, |
| "loss": 1.977131462097168, |
| "memory(GiB)": 41.84, |
| "step": 3065, |
| "token_acc": 0.543046357615894, |
| "train_speed(iter/s)": 0.582179 |
| }, |
| { |
| "epoch": 1.862864077669903, |
| "grad_norm": 8.45114803314209, |
| "learning_rate": 3.1454423517248704e-05, |
| "loss": 2.187137985229492, |
| "memory(GiB)": 41.84, |
| "step": 3070, |
| "token_acc": 0.5319767441860465, |
| "train_speed(iter/s)": 0.582204 |
| }, |
| { |
| "epoch": 1.8658980582524272, |
| "grad_norm": 11.056445121765137, |
| "learning_rate": 3.1306990204149146e-05, |
| "loss": 1.8925033569335938, |
| "memory(GiB)": 41.84, |
| "step": 3075, |
| "token_acc": 0.568, |
| "train_speed(iter/s)": 0.582108 |
| }, |
| { |
| "epoch": 1.8689320388349513, |
| "grad_norm": 7.232324123382568, |
| "learning_rate": 3.115974558661691e-05, |
| "loss": 2.050203323364258, |
| "memory(GiB)": 41.84, |
| "step": 3080, |
| "token_acc": 0.5853658536585366, |
| "train_speed(iter/s)": 0.582179 |
| }, |
| { |
| "epoch": 1.8719660194174756, |
| "grad_norm": 6.1433024406433105, |
| "learning_rate": 3.1012691151004694e-05, |
| "loss": 1.7500345230102539, |
| "memory(GiB)": 41.84, |
| "step": 3085, |
| "token_acc": 0.5871886120996441, |
| "train_speed(iter/s)": 0.582301 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 5.706048488616943, |
| "learning_rate": 3.086582838174551e-05, |
| "loss": 1.8604692459106444, |
| "memory(GiB)": 41.84, |
| "step": 3090, |
| "token_acc": 0.5847953216374269, |
| "train_speed(iter/s)": 0.582285 |
| }, |
| { |
| "epoch": 1.8780339805825244, |
| "grad_norm": 7.604012489318848, |
| "learning_rate": 3.0719158761337574e-05, |
| "loss": 1.8550039291381837, |
| "memory(GiB)": 41.84, |
| "step": 3095, |
| "token_acc": 0.558641975308642, |
| "train_speed(iter/s)": 0.582099 |
| }, |
| { |
| "epoch": 1.8810679611650487, |
| "grad_norm": 7.333124160766602, |
| "learning_rate": 3.0572683770329316e-05, |
| "loss": 2.143758010864258, |
| "memory(GiB)": 41.84, |
| "step": 3100, |
| "token_acc": 0.5300353356890459, |
| "train_speed(iter/s)": 0.581945 |
| }, |
| { |
| "epoch": 1.8841019417475728, |
| "grad_norm": 6.552914142608643, |
| "learning_rate": 3.0426404887304605e-05, |
| "loss": 1.7599102020263673, |
| "memory(GiB)": 41.84, |
| "step": 3105, |
| "token_acc": 0.5617283950617284, |
| "train_speed(iter/s)": 0.58193 |
| }, |
| { |
| "epoch": 1.887135922330097, |
| "grad_norm": 7.146379470825195, |
| "learning_rate": 3.0280323588867586e-05, |
| "loss": 1.814478302001953, |
| "memory(GiB)": 41.84, |
| "step": 3110, |
| "token_acc": 0.5836177474402731, |
| "train_speed(iter/s)": 0.582031 |
| }, |
| { |
| "epoch": 1.8901699029126213, |
| "grad_norm": 4.842132568359375, |
| "learning_rate": 3.0134441349627997e-05, |
| "loss": 2.0125823974609376, |
| "memory(GiB)": 41.84, |
| "step": 3115, |
| "token_acc": 0.575, |
| "train_speed(iter/s)": 0.581976 |
| }, |
| { |
| "epoch": 1.8932038834951457, |
| "grad_norm": 7.434795379638672, |
| "learning_rate": 2.9988759642186097e-05, |
| "loss": 2.0929500579833986, |
| "memory(GiB)": 41.84, |
| "step": 3120, |
| "token_acc": 0.5663956639566395, |
| "train_speed(iter/s)": 0.582017 |
| }, |
| { |
| "epoch": 1.89623786407767, |
| "grad_norm": 15.827396392822266, |
| "learning_rate": 2.9843279937117997e-05, |
| "loss": 2.314325141906738, |
| "memory(GiB)": 41.84, |
| "step": 3125, |
| "token_acc": 0.5579399141630901, |
| "train_speed(iter/s)": 0.582137 |
| }, |
| { |
| "epoch": 1.8992718446601942, |
| "grad_norm": 7.544915199279785, |
| "learning_rate": 2.9698003702960586e-05, |
| "loss": 2.055324745178223, |
| "memory(GiB)": 41.84, |
| "step": 3130, |
| "token_acc": 0.5213903743315508, |
| "train_speed(iter/s)": 0.5821 |
| }, |
| { |
| "epoch": 1.9023058252427183, |
| "grad_norm": 6.31001091003418, |
| "learning_rate": 2.9552932406196876e-05, |
| "loss": 1.8344003677368164, |
| "memory(GiB)": 41.84, |
| "step": 3135, |
| "token_acc": 0.5980707395498392, |
| "train_speed(iter/s)": 0.582162 |
| }, |
| { |
| "epoch": 1.9053398058252426, |
| "grad_norm": 9.230671882629395, |
| "learning_rate": 2.94080675112412e-05, |
| "loss": 1.9021150588989257, |
| "memory(GiB)": 41.84, |
| "step": 3140, |
| "token_acc": 0.5845070422535211, |
| "train_speed(iter/s)": 0.582193 |
| }, |
| { |
| "epoch": 1.908373786407767, |
| "grad_norm": 7.505317211151123, |
| "learning_rate": 2.9263410480424303e-05, |
| "loss": 2.2937973022460936, |
| "memory(GiB)": 41.84, |
| "step": 3145, |
| "token_acc": 0.5370370370370371, |
| "train_speed(iter/s)": 0.582135 |
| }, |
| { |
| "epoch": 1.9114077669902914, |
| "grad_norm": 11.365267753601074, |
| "learning_rate": 2.9118962773978693e-05, |
| "loss": 2.124867057800293, |
| "memory(GiB)": 41.84, |
| "step": 3150, |
| "token_acc": 0.5379310344827586, |
| "train_speed(iter/s)": 0.582048 |
| }, |
| { |
| "epoch": 1.9144417475728155, |
| "grad_norm": 6.946807861328125, |
| "learning_rate": 2.8974725850023886e-05, |
| "loss": 1.7865402221679687, |
| "memory(GiB)": 41.84, |
| "step": 3155, |
| "token_acc": 0.6114649681528662, |
| "train_speed(iter/s)": 0.582042 |
| }, |
| { |
| "epoch": 1.9174757281553398, |
| "grad_norm": 7.430286884307861, |
| "learning_rate": 2.8830701164551598e-05, |
| "loss": 2.096043014526367, |
| "memory(GiB)": 41.84, |
| "step": 3160, |
| "token_acc": 0.5474006116207951, |
| "train_speed(iter/s)": 0.581966 |
| }, |
| { |
| "epoch": 1.920509708737864, |
| "grad_norm": 5.716464996337891, |
| "learning_rate": 2.8686890171411175e-05, |
| "loss": 1.8883914947509766, |
| "memory(GiB)": 41.84, |
| "step": 3165, |
| "token_acc": 0.55, |
| "train_speed(iter/s)": 0.582002 |
| }, |
| { |
| "epoch": 1.9235436893203883, |
| "grad_norm": 6.345276832580566, |
| "learning_rate": 2.8543294322294846e-05, |
| "loss": 1.888068962097168, |
| "memory(GiB)": 41.84, |
| "step": 3170, |
| "token_acc": 0.5714285714285714, |
| "train_speed(iter/s)": 0.581965 |
| }, |
| { |
| "epoch": 1.9265776699029127, |
| "grad_norm": 8.231746673583984, |
| "learning_rate": 2.8399915066723072e-05, |
| "loss": 2.047636795043945, |
| "memory(GiB)": 41.84, |
| "step": 3175, |
| "token_acc": 0.5, |
| "train_speed(iter/s)": 0.581961 |
| }, |
| { |
| "epoch": 1.929611650485437, |
| "grad_norm": 7.52333927154541, |
| "learning_rate": 2.8256753852029915e-05, |
| "loss": 1.964263916015625, |
| "memory(GiB)": 41.84, |
| "step": 3180, |
| "token_acc": 0.5566666666666666, |
| "train_speed(iter/s)": 0.581957 |
| }, |
| { |
| "epoch": 1.9326456310679612, |
| "grad_norm": 8.115636825561523, |
| "learning_rate": 2.811381212334847e-05, |
| "loss": 1.974155807495117, |
| "memory(GiB)": 41.84, |
| "step": 3185, |
| "token_acc": 0.5273775216138329, |
| "train_speed(iter/s)": 0.58201 |
| }, |
| { |
| "epoch": 1.9356796116504853, |
| "grad_norm": 10.928778648376465, |
| "learning_rate": 2.7971091323596177e-05, |
| "loss": 1.7765790939331054, |
| "memory(GiB)": 41.84, |
| "step": 3190, |
| "token_acc": 0.5967078189300411, |
| "train_speed(iter/s)": 0.581997 |
| }, |
| { |
| "epoch": 1.9387135922330097, |
| "grad_norm": 10.940017700195312, |
| "learning_rate": 2.782859289346038e-05, |
| "loss": 2.00123291015625, |
| "memory(GiB)": 41.84, |
| "step": 3195, |
| "token_acc": 0.5628930817610063, |
| "train_speed(iter/s)": 0.58186 |
| }, |
| { |
| "epoch": 1.941747572815534, |
| "grad_norm": 5.479226112365723, |
| "learning_rate": 2.7686318271383714e-05, |
| "loss": 1.7830612182617187, |
| "memory(GiB)": 41.84, |
| "step": 3200, |
| "token_acc": 0.5899705014749262, |
| "train_speed(iter/s)": 0.581899 |
| }, |
| { |
| "epoch": 1.9447815533980584, |
| "grad_norm": 9.235628128051758, |
| "learning_rate": 2.7544268893549573e-05, |
| "loss": 2.1630695343017576, |
| "memory(GiB)": 41.84, |
| "step": 3205, |
| "token_acc": 0.5141843971631206, |
| "train_speed(iter/s)": 0.581774 |
| }, |
| { |
| "epoch": 1.9478155339805825, |
| "grad_norm": 9.116209030151367, |
| "learning_rate": 2.740244619386768e-05, |
| "loss": 1.9152229309082032, |
| "memory(GiB)": 41.84, |
| "step": 3210, |
| "token_acc": 0.5373134328358209, |
| "train_speed(iter/s)": 0.581685 |
| }, |
| { |
| "epoch": 1.9508495145631068, |
| "grad_norm": 8.476284980773926, |
| "learning_rate": 2.726085160395948e-05, |
| "loss": 1.9020435333251953, |
| "memory(GiB)": 41.84, |
| "step": 3215, |
| "token_acc": 0.6095238095238096, |
| "train_speed(iter/s)": 0.581688 |
| }, |
| { |
| "epoch": 1.953883495145631, |
| "grad_norm": 6.1975226402282715, |
| "learning_rate": 2.7119486553143904e-05, |
| "loss": 1.6950944900512694, |
| "memory(GiB)": 41.84, |
| "step": 3220, |
| "token_acc": 0.5774193548387097, |
| "train_speed(iter/s)": 0.58168 |
| }, |
| { |
| "epoch": 1.9569174757281553, |
| "grad_norm": 8.92437744140625, |
| "learning_rate": 2.6978352468422685e-05, |
| "loss": 1.9295099258422852, |
| "memory(GiB)": 41.84, |
| "step": 3225, |
| "token_acc": 0.5393586005830904, |
| "train_speed(iter/s)": 0.581539 |
| }, |
| { |
| "epoch": 1.9599514563106797, |
| "grad_norm": 7.443687438964844, |
| "learning_rate": 2.683745077446616e-05, |
| "loss": 1.8496671676635743, |
| "memory(GiB)": 41.84, |
| "step": 3230, |
| "token_acc": 0.6013289036544851, |
| "train_speed(iter/s)": 0.581521 |
| }, |
| { |
| "epoch": 1.962985436893204, |
| "grad_norm": 8.71033763885498, |
| "learning_rate": 2.6696782893598816e-05, |
| "loss": 1.8758098602294921, |
| "memory(GiB)": 41.84, |
| "step": 3235, |
| "token_acc": 0.5804195804195804, |
| "train_speed(iter/s)": 0.581603 |
| }, |
| { |
| "epoch": 1.9660194174757282, |
| "grad_norm": 9.311905860900879, |
| "learning_rate": 2.6556350245784833e-05, |
| "loss": 2.088191795349121, |
| "memory(GiB)": 41.84, |
| "step": 3240, |
| "token_acc": 0.5805626598465473, |
| "train_speed(iter/s)": 0.581562 |
| }, |
| { |
| "epoch": 1.9690533980582523, |
| "grad_norm": 7.559510707855225, |
| "learning_rate": 2.641615424861399e-05, |
| "loss": 2.090311050415039, |
| "memory(GiB)": 41.84, |
| "step": 3245, |
| "token_acc": 0.5533980582524272, |
| "train_speed(iter/s)": 0.58146 |
| }, |
| { |
| "epoch": 1.9720873786407767, |
| "grad_norm": 9.421564102172852, |
| "learning_rate": 2.6276196317287083e-05, |
| "loss": 2.2272558212280273, |
| "memory(GiB)": 41.84, |
| "step": 3250, |
| "token_acc": 0.5223463687150838, |
| "train_speed(iter/s)": 0.581307 |
| }, |
| { |
| "epoch": 1.975121359223301, |
| "grad_norm": 6.799111843109131, |
| "learning_rate": 2.6136477864601817e-05, |
| "loss": 2.049495887756348, |
| "memory(GiB)": 41.84, |
| "step": 3255, |
| "token_acc": 0.5488215488215489, |
| "train_speed(iter/s)": 0.581288 |
| }, |
| { |
| "epoch": 1.9781553398058254, |
| "grad_norm": 6.001493453979492, |
| "learning_rate": 2.5997000300938506e-05, |
| "loss": 1.8592962265014648, |
| "memory(GiB)": 41.84, |
| "step": 3260, |
| "token_acc": 0.5870206489675516, |
| "train_speed(iter/s)": 0.581194 |
| }, |
| { |
| "epoch": 1.9811893203883495, |
| "grad_norm": 8.738608360290527, |
| "learning_rate": 2.585776503424576e-05, |
| "loss": 2.017384719848633, |
| "memory(GiB)": 41.84, |
| "step": 3265, |
| "token_acc": 0.5529801324503312, |
| "train_speed(iter/s)": 0.581142 |
| }, |
| { |
| "epoch": 1.9842233009708736, |
| "grad_norm": 9.666224479675293, |
| "learning_rate": 2.5718773470026448e-05, |
| "loss": 1.999835205078125, |
| "memory(GiB)": 41.84, |
| "step": 3270, |
| "token_acc": 0.5418060200668896, |
| "train_speed(iter/s)": 0.581015 |
| }, |
| { |
| "epoch": 1.987257281553398, |
| "grad_norm": 10.135787963867188, |
| "learning_rate": 2.5580027011323282e-05, |
| "loss": 1.6806678771972656, |
| "memory(GiB)": 41.84, |
| "step": 3275, |
| "token_acc": 0.5893536121673004, |
| "train_speed(iter/s)": 0.580994 |
| }, |
| { |
| "epoch": 1.9902912621359223, |
| "grad_norm": 7.922843933105469, |
| "learning_rate": 2.544152705870483e-05, |
| "loss": 2.177354431152344, |
| "memory(GiB)": 41.84, |
| "step": 3280, |
| "token_acc": 0.5117056856187291, |
| "train_speed(iter/s)": 0.581002 |
| }, |
| { |
| "epoch": 1.9933252427184467, |
| "grad_norm": 6.94931697845459, |
| "learning_rate": 2.5303275010251315e-05, |
| "loss": 2.506937026977539, |
| "memory(GiB)": 41.84, |
| "step": 3285, |
| "token_acc": 0.4811594202898551, |
| "train_speed(iter/s)": 0.581043 |
| }, |
| { |
| "epoch": 1.9963592233009708, |
| "grad_norm": 10.3767728805542, |
| "learning_rate": 2.5165272261540458e-05, |
| "loss": 2.0383968353271484, |
| "memory(GiB)": 41.84, |
| "step": 3290, |
| "token_acc": 0.5487364620938628, |
| "train_speed(iter/s)": 0.581 |
| }, |
| { |
| "epoch": 1.9993932038834952, |
| "grad_norm": 9.176785469055176, |
| "learning_rate": 2.5027520205633537e-05, |
| "loss": 2.0018213272094725, |
| "memory(GiB)": 41.84, |
| "step": 3295, |
| "token_acc": 0.5522875816993464, |
| "train_speed(iter/s)": 0.580883 |
| }, |
| { |
| "epoch": 2.0024271844660193, |
| "grad_norm": 6.717225551605225, |
| "learning_rate": 2.4890020233061117e-05, |
| "loss": 1.7098587036132813, |
| "memory(GiB)": 41.84, |
| "step": 3300, |
| "token_acc": 0.5948905109489051, |
| "train_speed(iter/s)": 0.580883 |
| }, |
| { |
| "epoch": 2.0054611650485437, |
| "grad_norm": 5.7973246574401855, |
| "learning_rate": 2.4752773731809176e-05, |
| "loss": 2.0262834548950197, |
| "memory(GiB)": 41.84, |
| "step": 3305, |
| "token_acc": 0.558641975308642, |
| "train_speed(iter/s)": 0.580856 |
| }, |
| { |
| "epoch": 2.008495145631068, |
| "grad_norm": 7.4671831130981445, |
| "learning_rate": 2.461578208730504e-05, |
| "loss": 1.7233488082885742, |
| "memory(GiB)": 41.84, |
| "step": 3310, |
| "token_acc": 0.6162790697674418, |
| "train_speed(iter/s)": 0.580794 |
| }, |
| { |
| "epoch": 2.0115291262135924, |
| "grad_norm": 12.061534881591797, |
| "learning_rate": 2.447904668240338e-05, |
| "loss": 1.8241962432861327, |
| "memory(GiB)": 41.84, |
| "step": 3315, |
| "token_acc": 0.6076923076923076, |
| "train_speed(iter/s)": 0.580734 |
| }, |
| { |
| "epoch": 2.0145631067961167, |
| "grad_norm": 8.090734481811523, |
| "learning_rate": 2.4342568897372304e-05, |
| "loss": 1.7618919372558595, |
| "memory(GiB)": 41.84, |
| "step": 3320, |
| "token_acc": 0.5701219512195121, |
| "train_speed(iter/s)": 0.58063 |
| }, |
| { |
| "epoch": 2.0175970873786406, |
| "grad_norm": 9.886768341064453, |
| "learning_rate": 2.4206350109879322e-05, |
| "loss": 2.333799362182617, |
| "memory(GiB)": 41.84, |
| "step": 3325, |
| "token_acc": 0.5303514376996805, |
| "train_speed(iter/s)": 0.58057 |
| }, |
| { |
| "epoch": 2.020631067961165, |
| "grad_norm": 9.405782699584961, |
| "learning_rate": 2.4070391694977578e-05, |
| "loss": 1.9533946990966797, |
| "memory(GiB)": 41.84, |
| "step": 3330, |
| "token_acc": 0.5647840531561462, |
| "train_speed(iter/s)": 0.580546 |
| }, |
| { |
| "epoch": 2.0236650485436893, |
| "grad_norm": 8.449411392211914, |
| "learning_rate": 2.3934695025091863e-05, |
| "loss": 1.9143606185913087, |
| "memory(GiB)": 41.84, |
| "step": 3335, |
| "token_acc": 0.5501618122977346, |
| "train_speed(iter/s)": 0.580463 |
| }, |
| { |
| "epoch": 2.0266990291262137, |
| "grad_norm": 9.61319351196289, |
| "learning_rate": 2.3799261470004817e-05, |
| "loss": 1.825465202331543, |
| "memory(GiB)": 41.84, |
| "step": 3340, |
| "token_acc": 0.5772357723577236, |
| "train_speed(iter/s)": 0.580476 |
| }, |
| { |
| "epoch": 2.029733009708738, |
| "grad_norm": 10.004016876220703, |
| "learning_rate": 2.3664092396843078e-05, |
| "loss": 2.128991889953613, |
| "memory(GiB)": 41.84, |
| "step": 3345, |
| "token_acc": 0.5173501577287066, |
| "train_speed(iter/s)": 0.58049 |
| }, |
| { |
| "epoch": 2.032766990291262, |
| "grad_norm": 8.138049125671387, |
| "learning_rate": 2.3529189170063448e-05, |
| "loss": 2.3146188735961912, |
| "memory(GiB)": 41.84, |
| "step": 3350, |
| "token_acc": 0.5306122448979592, |
| "train_speed(iter/s)": 0.580442 |
| }, |
| { |
| "epoch": 2.0358009708737863, |
| "grad_norm": 8.229063987731934, |
| "learning_rate": 2.3394553151439207e-05, |
| "loss": 1.8358327865600585, |
| "memory(GiB)": 41.84, |
| "step": 3355, |
| "token_acc": 0.583941605839416, |
| "train_speed(iter/s)": 0.580463 |
| }, |
| { |
| "epoch": 2.0388349514563107, |
| "grad_norm": 7.304425239562988, |
| "learning_rate": 2.3260185700046294e-05, |
| "loss": 1.8064495086669923, |
| "memory(GiB)": 41.84, |
| "step": 3360, |
| "token_acc": 0.5791245791245792, |
| "train_speed(iter/s)": 0.58043 |
| }, |
| { |
| "epoch": 2.041868932038835, |
| "grad_norm": 9.741589546203613, |
| "learning_rate": 2.3126088172249617e-05, |
| "loss": 1.8935234069824218, |
| "memory(GiB)": 41.84, |
| "step": 3365, |
| "token_acc": 0.5535055350553506, |
| "train_speed(iter/s)": 0.580243 |
| }, |
| { |
| "epoch": 2.0449029126213594, |
| "grad_norm": 11.936101913452148, |
| "learning_rate": 2.299226192168935e-05, |
| "loss": 1.8312896728515624, |
| "memory(GiB)": 41.84, |
| "step": 3370, |
| "token_acc": 0.5755627009646302, |
| "train_speed(iter/s)": 0.580241 |
| }, |
| { |
| "epoch": 2.0479368932038833, |
| "grad_norm": 8.954520225524902, |
| "learning_rate": 2.28587082992673e-05, |
| "loss": 1.9918130874633788, |
| "memory(GiB)": 41.84, |
| "step": 3375, |
| "token_acc": 0.5692307692307692, |
| "train_speed(iter/s)": 0.580236 |
| }, |
| { |
| "epoch": 2.0509708737864076, |
| "grad_norm": 7.279824256896973, |
| "learning_rate": 2.2725428653133178e-05, |
| "loss": 2.056449317932129, |
| "memory(GiB)": 41.84, |
| "step": 3380, |
| "token_acc": 0.5582655826558266, |
| "train_speed(iter/s)": 0.580216 |
| }, |
| { |
| "epoch": 2.054004854368932, |
| "grad_norm": 8.318132400512695, |
| "learning_rate": 2.2592424328671125e-05, |
| "loss": 1.845474624633789, |
| "memory(GiB)": 41.84, |
| "step": 3385, |
| "token_acc": 0.5753846153846154, |
| "train_speed(iter/s)": 0.580119 |
| }, |
| { |
| "epoch": 2.0570388349514563, |
| "grad_norm": 8.473575592041016, |
| "learning_rate": 2.2459696668486025e-05, |
| "loss": 2.0317916870117188, |
| "memory(GiB)": 41.84, |
| "step": 3390, |
| "token_acc": 0.5693950177935944, |
| "train_speed(iter/s)": 0.580048 |
| }, |
| { |
| "epoch": 2.0600728155339807, |
| "grad_norm": 6.581578254699707, |
| "learning_rate": 2.2327247012390005e-05, |
| "loss": 1.8874988555908203, |
| "memory(GiB)": 41.84, |
| "step": 3395, |
| "token_acc": 0.5551839464882943, |
| "train_speed(iter/s)": 0.579895 |
| }, |
| { |
| "epoch": 2.063106796116505, |
| "grad_norm": 9.253079414367676, |
| "learning_rate": 2.2195076697388915e-05, |
| "loss": 1.6856924057006837, |
| "memory(GiB)": 41.84, |
| "step": 3400, |
| "token_acc": 0.6493506493506493, |
| "train_speed(iter/s)": 0.57988 |
| }, |
| { |
| "epoch": 2.066140776699029, |
| "grad_norm": 8.945847511291504, |
| "learning_rate": 2.2063187057668727e-05, |
| "loss": 1.6917535781860351, |
| "memory(GiB)": 41.84, |
| "step": 3405, |
| "token_acc": 0.5947712418300654, |
| "train_speed(iter/s)": 0.57992 |
| }, |
| { |
| "epoch": 2.0691747572815533, |
| "grad_norm": 9.185718536376953, |
| "learning_rate": 2.1931579424582283e-05, |
| "loss": 1.7603189468383789, |
| "memory(GiB)": 41.84, |
| "step": 3410, |
| "token_acc": 0.5736434108527132, |
| "train_speed(iter/s)": 0.579988 |
| }, |
| { |
| "epoch": 2.0722087378640777, |
| "grad_norm": 6.9922332763671875, |
| "learning_rate": 2.18002551266356e-05, |
| "loss": 2.1215755462646486, |
| "memory(GiB)": 41.84, |
| "step": 3415, |
| "token_acc": 0.5202312138728323, |
| "train_speed(iter/s)": 0.579988 |
| }, |
| { |
| "epoch": 2.075242718446602, |
| "grad_norm": 8.512064933776855, |
| "learning_rate": 2.166921548947466e-05, |
| "loss": 1.720651626586914, |
| "memory(GiB)": 41.84, |
| "step": 3420, |
| "token_acc": 0.5985915492957746, |
| "train_speed(iter/s)": 0.579814 |
| }, |
| { |
| "epoch": 2.0782766990291264, |
| "grad_norm": 8.933260917663574, |
| "learning_rate": 2.1538461835871937e-05, |
| "loss": 1.8302701950073241, |
| "memory(GiB)": 41.84, |
| "step": 3425, |
| "token_acc": 0.5993975903614458, |
| "train_speed(iter/s)": 0.579754 |
| }, |
| { |
| "epoch": 2.0813106796116503, |
| "grad_norm": 7.324397087097168, |
| "learning_rate": 2.1407995485713007e-05, |
| "loss": 1.9634611129760742, |
| "memory(GiB)": 41.84, |
| "step": 3430, |
| "token_acc": 0.5775075987841946, |
| "train_speed(iter/s)": 0.579633 |
| }, |
| { |
| "epoch": 2.0843446601941746, |
| "grad_norm": 6.617276191711426, |
| "learning_rate": 2.127781775598339e-05, |
| "loss": 1.535646343231201, |
| "memory(GiB)": 41.84, |
| "step": 3435, |
| "token_acc": 0.62, |
| "train_speed(iter/s)": 0.579668 |
| }, |
| { |
| "epoch": 2.087378640776699, |
| "grad_norm": 8.722604751586914, |
| "learning_rate": 2.1147929960755032e-05, |
| "loss": 1.8054920196533204, |
| "memory(GiB)": 41.84, |
| "step": 3440, |
| "token_acc": 0.5772058823529411, |
| "train_speed(iter/s)": 0.579792 |
| }, |
| { |
| "epoch": 2.0904126213592233, |
| "grad_norm": 7.9137043952941895, |
| "learning_rate": 2.101833341117319e-05, |
| "loss": 1.9117881774902343, |
| "memory(GiB)": 41.84, |
| "step": 3445, |
| "token_acc": 0.5891238670694864, |
| "train_speed(iter/s)": 0.579887 |
| }, |
| { |
| "epoch": 2.0934466019417477, |
| "grad_norm": 8.221436500549316, |
| "learning_rate": 2.08890294154432e-05, |
| "loss": 2.002272033691406, |
| "memory(GiB)": 41.84, |
| "step": 3450, |
| "token_acc": 0.5616883116883117, |
| "train_speed(iter/s)": 0.580016 |
| }, |
| { |
| "epoch": 2.096480582524272, |
| "grad_norm": 8.50936222076416, |
| "learning_rate": 2.0760019278817123e-05, |
| "loss": 1.9437885284423828, |
| "memory(GiB)": 44.28, |
| "step": 3455, |
| "token_acc": 0.6167247386759582, |
| "train_speed(iter/s)": 0.580014 |
| }, |
| { |
| "epoch": 2.099514563106796, |
| "grad_norm": 8.858839988708496, |
| "learning_rate": 2.0631304303580824e-05, |
| "loss": 1.8394168853759765, |
| "memory(GiB)": 44.28, |
| "step": 3460, |
| "token_acc": 0.5693430656934306, |
| "train_speed(iter/s)": 0.580039 |
| }, |
| { |
| "epoch": 2.1025485436893203, |
| "grad_norm": 7.461985111236572, |
| "learning_rate": 2.0502885789040537e-05, |
| "loss": 2.222452163696289, |
| "memory(GiB)": 44.28, |
| "step": 3465, |
| "token_acc": 0.5231607629427792, |
| "train_speed(iter/s)": 0.580161 |
| }, |
| { |
| "epoch": 2.1055825242718447, |
| "grad_norm": 6.139802932739258, |
| "learning_rate": 2.037476503150997e-05, |
| "loss": 1.6303333282470702, |
| "memory(GiB)": 44.28, |
| "step": 3470, |
| "token_acc": 0.6225165562913907, |
| "train_speed(iter/s)": 0.580274 |
| }, |
| { |
| "epoch": 2.108616504854369, |
| "grad_norm": 9.019342422485352, |
| "learning_rate": 2.024694332429713e-05, |
| "loss": 2.1092754364013673, |
| "memory(GiB)": 44.28, |
| "step": 3475, |
| "token_acc": 0.5068493150684932, |
| "train_speed(iter/s)": 0.580406 |
| }, |
| { |
| "epoch": 2.1116504854368934, |
| "grad_norm": 10.167961120605469, |
| "learning_rate": 2.011942195769122e-05, |
| "loss": 1.965473747253418, |
| "memory(GiB)": 44.28, |
| "step": 3480, |
| "token_acc": 0.5733788395904437, |
| "train_speed(iter/s)": 0.580421 |
| }, |
| { |
| "epoch": 2.1146844660194173, |
| "grad_norm": 11.388608932495117, |
| "learning_rate": 1.9992202218949784e-05, |
| "loss": 1.9142690658569337, |
| "memory(GiB)": 44.28, |
| "step": 3485, |
| "token_acc": 0.5517241379310345, |
| "train_speed(iter/s)": 0.58051 |
| }, |
| { |
| "epoch": 2.1177184466019416, |
| "grad_norm": 6.913421154022217, |
| "learning_rate": 1.986528539228548e-05, |
| "loss": 1.9621810913085938, |
| "memory(GiB)": 44.28, |
| "step": 3490, |
| "token_acc": 0.55, |
| "train_speed(iter/s)": 0.580592 |
| }, |
| { |
| "epoch": 2.120752427184466, |
| "grad_norm": 7.60167121887207, |
| "learning_rate": 1.9738672758853305e-05, |
| "loss": 1.8437973022460938, |
| "memory(GiB)": 44.28, |
| "step": 3495, |
| "token_acc": 0.5822368421052632, |
| "train_speed(iter/s)": 0.580641 |
| }, |
| { |
| "epoch": 2.1237864077669903, |
| "grad_norm": 7.163271427154541, |
| "learning_rate": 1.9612365596737598e-05, |
| "loss": 1.6543169021606445, |
| "memory(GiB)": 44.28, |
| "step": 3500, |
| "token_acc": 0.6292134831460674, |
| "train_speed(iter/s)": 0.58071 |
| }, |
| { |
| "epoch": 2.1237864077669903, |
| "eval_loss": 1.866715431213379, |
| "eval_runtime": 11.5427, |
| "eval_samples_per_second": 8.663, |
| "eval_steps_per_second": 8.663, |
| "eval_token_acc": 0.5221745350500715, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.1268203883495147, |
| "grad_norm": 8.629687309265137, |
| "learning_rate": 1.948636518093906e-05, |
| "loss": 2.077587127685547, |
| "memory(GiB)": 44.29, |
| "step": 3505, |
| "token_acc": 0.5247895229186156, |
| "train_speed(iter/s)": 0.579649 |
| }, |
| { |
| "epoch": 2.1298543689320386, |
| "grad_norm": 9.276599884033203, |
| "learning_rate": 1.9360672783362076e-05, |
| "loss": 1.759820556640625, |
| "memory(GiB)": 44.29, |
| "step": 3510, |
| "token_acc": 0.5964912280701754, |
| "train_speed(iter/s)": 0.579656 |
| }, |
| { |
| "epoch": 2.132888349514563, |
| "grad_norm": 7.651179790496826, |
| "learning_rate": 1.9235289672801653e-05, |
| "loss": 2.0451793670654297, |
| "memory(GiB)": 44.29, |
| "step": 3515, |
| "token_acc": 0.4962025316455696, |
| "train_speed(iter/s)": 0.579702 |
| }, |
| { |
| "epoch": 2.1359223300970873, |
| "grad_norm": 8.811480522155762, |
| "learning_rate": 1.911021711493077e-05, |
| "loss": 2.1489105224609375, |
| "memory(GiB)": 44.29, |
| "step": 3520, |
| "token_acc": 0.5483870967741935, |
| "train_speed(iter/s)": 0.579789 |
| }, |
| { |
| "epoch": 2.1389563106796117, |
| "grad_norm": 7.404130935668945, |
| "learning_rate": 1.8985456372287534e-05, |
| "loss": 1.7454706192016602, |
| "memory(GiB)": 44.29, |
| "step": 3525, |
| "token_acc": 0.6334405144694534, |
| "train_speed(iter/s)": 0.579816 |
| }, |
| { |
| "epoch": 2.141990291262136, |
| "grad_norm": 10.67794132232666, |
| "learning_rate": 1.8861008704262457e-05, |
| "loss": 1.8724552154541017, |
| "memory(GiB)": 44.29, |
| "step": 3530, |
| "token_acc": 0.5796610169491525, |
| "train_speed(iter/s)": 0.579807 |
| }, |
| { |
| "epoch": 2.1450242718446604, |
| "grad_norm": 8.964271545410156, |
| "learning_rate": 1.8736875367085755e-05, |
| "loss": 1.8260086059570313, |
| "memory(GiB)": 44.29, |
| "step": 3535, |
| "token_acc": 0.5640138408304498, |
| "train_speed(iter/s)": 0.579826 |
| }, |
| { |
| "epoch": 2.1480582524271843, |
| "grad_norm": 8.768270492553711, |
| "learning_rate": 1.8613057613814584e-05, |
| "loss": 1.9611518859863282, |
| "memory(GiB)": 44.29, |
| "step": 3540, |
| "token_acc": 0.552901023890785, |
| "train_speed(iter/s)": 0.579777 |
| }, |
| { |
| "epoch": 2.1510922330097086, |
| "grad_norm": 9.284390449523926, |
| "learning_rate": 1.8489556694320513e-05, |
| "loss": 2.0381515502929686, |
| "memory(GiB)": 44.29, |
| "step": 3545, |
| "token_acc": 0.5503875968992248, |
| "train_speed(iter/s)": 0.579814 |
| }, |
| { |
| "epoch": 2.154126213592233, |
| "grad_norm": 11.225659370422363, |
| "learning_rate": 1.836637385527684e-05, |
| "loss": 2.1475587844848634, |
| "memory(GiB)": 44.29, |
| "step": 3550, |
| "token_acc": 0.5677233429394812, |
| "train_speed(iter/s)": 0.579745 |
| }, |
| { |
| "epoch": 2.1571601941747574, |
| "grad_norm": 7.625835418701172, |
| "learning_rate": 1.8243510340146015e-05, |
| "loss": 1.9312858581542969, |
| "memory(GiB)": 44.29, |
| "step": 3555, |
| "token_acc": 0.5634328358208955, |
| "train_speed(iter/s)": 0.579783 |
| }, |
| { |
| "epoch": 2.1601941747572817, |
| "grad_norm": 9.190287590026855, |
| "learning_rate": 1.8120967389167076e-05, |
| "loss": 1.5170929908752442, |
| "memory(GiB)": 44.29, |
| "step": 3560, |
| "token_acc": 0.6521739130434783, |
| "train_speed(iter/s)": 0.579754 |
| }, |
| { |
| "epoch": 2.163228155339806, |
| "grad_norm": 11.153077125549316, |
| "learning_rate": 1.799874623934318e-05, |
| "loss": 1.931208610534668, |
| "memory(GiB)": 44.29, |
| "step": 3565, |
| "token_acc": 0.5909090909090909, |
| "train_speed(iter/s)": 0.579843 |
| }, |
| { |
| "epoch": 2.16626213592233, |
| "grad_norm": 6.920065879821777, |
| "learning_rate": 1.7876848124429014e-05, |
| "loss": 1.7487638473510743, |
| "memory(GiB)": 44.29, |
| "step": 3570, |
| "token_acc": 0.5811209439528023, |
| "train_speed(iter/s)": 0.579783 |
| }, |
| { |
| "epoch": 2.1692961165048543, |
| "grad_norm": 9.68315601348877, |
| "learning_rate": 1.775527427491847e-05, |
| "loss": 1.9304796218872071, |
| "memory(GiB)": 44.29, |
| "step": 3575, |
| "token_acc": 0.5424354243542435, |
| "train_speed(iter/s)": 0.579873 |
| }, |
| { |
| "epoch": 2.1723300970873787, |
| "grad_norm": 8.648472785949707, |
| "learning_rate": 1.7634025918032132e-05, |
| "loss": 1.822089385986328, |
| "memory(GiB)": 44.29, |
| "step": 3580, |
| "token_acc": 0.615625, |
| "train_speed(iter/s)": 0.579913 |
| }, |
| { |
| "epoch": 2.175364077669903, |
| "grad_norm": 5.941222190856934, |
| "learning_rate": 1.7513104277704926e-05, |
| "loss": 1.5487011909484862, |
| "memory(GiB)": 44.29, |
| "step": 3585, |
| "token_acc": 0.6267123287671232, |
| "train_speed(iter/s)": 0.579944 |
| }, |
| { |
| "epoch": 2.1783980582524274, |
| "grad_norm": 8.311307907104492, |
| "learning_rate": 1.739251057457377e-05, |
| "loss": 1.876582145690918, |
| "memory(GiB)": 44.29, |
| "step": 3590, |
| "token_acc": 0.5734463276836158, |
| "train_speed(iter/s)": 0.579958 |
| }, |
| { |
| "epoch": 2.1814320388349513, |
| "grad_norm": 9.144810676574707, |
| "learning_rate": 1.7272246025965178e-05, |
| "loss": 2.155200386047363, |
| "memory(GiB)": 44.29, |
| "step": 3595, |
| "token_acc": 0.5140845070422535, |
| "train_speed(iter/s)": 0.579869 |
| }, |
| { |
| "epoch": 2.1844660194174756, |
| "grad_norm": 7.681180953979492, |
| "learning_rate": 1.7152311845883095e-05, |
| "loss": 1.7877147674560547, |
| "memory(GiB)": 44.29, |
| "step": 3600, |
| "token_acc": 0.5666666666666667, |
| "train_speed(iter/s)": 0.57981 |
| }, |
| { |
| "epoch": 2.1875, |
| "grad_norm": 8.98862361907959, |
| "learning_rate": 1.703270924499656e-05, |
| "loss": 1.7724479675292968, |
| "memory(GiB)": 44.29, |
| "step": 3605, |
| "token_acc": 0.5951557093425606, |
| "train_speed(iter/s)": 0.57974 |
| }, |
| { |
| "epoch": 2.1905339805825244, |
| "grad_norm": 6.949456214904785, |
| "learning_rate": 1.691343943062749e-05, |
| "loss": 1.7420495986938476, |
| "memory(GiB)": 44.29, |
| "step": 3610, |
| "token_acc": 0.5741935483870968, |
| "train_speed(iter/s)": 0.579755 |
| }, |
| { |
| "epoch": 2.1935679611650487, |
| "grad_norm": 7.481090545654297, |
| "learning_rate": 1.6794503606738548e-05, |
| "loss": 2.0047124862670898, |
| "memory(GiB)": 44.29, |
| "step": 3615, |
| "token_acc": 0.5398230088495575, |
| "train_speed(iter/s)": 0.57981 |
| }, |
| { |
| "epoch": 2.1966019417475726, |
| "grad_norm": 7.942904472351074, |
| "learning_rate": 1.667590297392086e-05, |
| "loss": 2.1652708053588867, |
| "memory(GiB)": 44.29, |
| "step": 3620, |
| "token_acc": 0.5389048991354467, |
| "train_speed(iter/s)": 0.57981 |
| }, |
| { |
| "epoch": 2.199635922330097, |
| "grad_norm": 7.470623016357422, |
| "learning_rate": 1.6557638729382107e-05, |
| "loss": 1.7064685821533203, |
| "memory(GiB)": 44.29, |
| "step": 3625, |
| "token_acc": 0.6104651162790697, |
| "train_speed(iter/s)": 0.57981 |
| }, |
| { |
| "epoch": 2.2026699029126213, |
| "grad_norm": 6.908362865447998, |
| "learning_rate": 1.6439712066934204e-05, |
| "loss": 1.8296821594238282, |
| "memory(GiB)": 44.29, |
| "step": 3630, |
| "token_acc": 0.5864022662889519, |
| "train_speed(iter/s)": 0.579781 |
| }, |
| { |
| "epoch": 2.2057038834951457, |
| "grad_norm": 7.870819568634033, |
| "learning_rate": 1.632212417698143e-05, |
| "loss": 1.9550270080566405, |
| "memory(GiB)": 44.29, |
| "step": 3635, |
| "token_acc": 0.5835777126099707, |
| "train_speed(iter/s)": 0.579726 |
| }, |
| { |
| "epoch": 2.20873786407767, |
| "grad_norm": 8.01059627532959, |
| "learning_rate": 1.620487624650834e-05, |
| "loss": 1.8678318023681642, |
| "memory(GiB)": 44.29, |
| "step": 3640, |
| "token_acc": 0.61875, |
| "train_speed(iter/s)": 0.579706 |
| }, |
| { |
| "epoch": 2.211771844660194, |
| "grad_norm": 7.753682613372803, |
| "learning_rate": 1.6087969459067708e-05, |
| "loss": 1.5739126205444336, |
| "memory(GiB)": 44.29, |
| "step": 3645, |
| "token_acc": 0.6141479099678456, |
| "train_speed(iter/s)": 0.57974 |
| }, |
| { |
| "epoch": 2.2148058252427183, |
| "grad_norm": 8.250489234924316, |
| "learning_rate": 1.5971404994768797e-05, |
| "loss": 1.9059646606445313, |
| "memory(GiB)": 44.29, |
| "step": 3650, |
| "token_acc": 0.5551470588235294, |
| "train_speed(iter/s)": 0.579782 |
| }, |
| { |
| "epoch": 2.2178398058252426, |
| "grad_norm": 8.499149322509766, |
| "learning_rate": 1.585518403026518e-05, |
| "loss": 2.0898170471191406, |
| "memory(GiB)": 44.29, |
| "step": 3655, |
| "token_acc": 0.59, |
| "train_speed(iter/s)": 0.579868 |
| }, |
| { |
| "epoch": 2.220873786407767, |
| "grad_norm": 9.44747543334961, |
| "learning_rate": 1.5739307738743057e-05, |
| "loss": 1.9359277725219726, |
| "memory(GiB)": 44.29, |
| "step": 3660, |
| "token_acc": 0.5628930817610063, |
| "train_speed(iter/s)": 0.579972 |
| }, |
| { |
| "epoch": 2.2239077669902914, |
| "grad_norm": 6.627506256103516, |
| "learning_rate": 1.5623777289909347e-05, |
| "loss": 1.749598503112793, |
| "memory(GiB)": 44.29, |
| "step": 3665, |
| "token_acc": 0.621160409556314, |
| "train_speed(iter/s)": 0.579956 |
| }, |
| { |
| "epoch": 2.2269417475728157, |
| "grad_norm": 9.652698516845703, |
| "learning_rate": 1.5508593849979812e-05, |
| "loss": 1.946786117553711, |
| "memory(GiB)": 44.29, |
| "step": 3670, |
| "token_acc": 0.5962732919254659, |
| "train_speed(iter/s)": 0.580051 |
| }, |
| { |
| "epoch": 2.2299757281553396, |
| "grad_norm": 11.728522300720215, |
| "learning_rate": 1.5393758581667462e-05, |
| "loss": 1.8440595626831056, |
| "memory(GiB)": 44.29, |
| "step": 3675, |
| "token_acc": 0.5598455598455598, |
| "train_speed(iter/s)": 0.580023 |
| }, |
| { |
| "epoch": 2.233009708737864, |
| "grad_norm": 9.42689323425293, |
| "learning_rate": 1.52792726441706e-05, |
| "loss": 2.040317916870117, |
| "memory(GiB)": 44.29, |
| "step": 3680, |
| "token_acc": 0.5699658703071673, |
| "train_speed(iter/s)": 0.580045 |
| }, |
| { |
| "epoch": 2.2360436893203883, |
| "grad_norm": 9.370969772338867, |
| "learning_rate": 1.5165137193161289e-05, |
| "loss": 1.9046701431274413, |
| "memory(GiB)": 44.29, |
| "step": 3685, |
| "token_acc": 0.5689149560117303, |
| "train_speed(iter/s)": 0.580056 |
| }, |
| { |
| "epoch": 2.2390776699029127, |
| "grad_norm": 9.691226959228516, |
| "learning_rate": 1.505135338077363e-05, |
| "loss": 2.0255931854248046, |
| "memory(GiB)": 44.29, |
| "step": 3690, |
| "token_acc": 0.5156695156695157, |
| "train_speed(iter/s)": 0.579994 |
| }, |
| { |
| "epoch": 2.242111650485437, |
| "grad_norm": 7.089369773864746, |
| "learning_rate": 1.4937922355592054e-05, |
| "loss": 1.856874656677246, |
| "memory(GiB)": 44.29, |
| "step": 3695, |
| "token_acc": 0.5548961424332344, |
| "train_speed(iter/s)": 0.580088 |
| }, |
| { |
| "epoch": 2.2451456310679614, |
| "grad_norm": 8.272523880004883, |
| "learning_rate": 1.482484526263993e-05, |
| "loss": 1.9418399810791016, |
| "memory(GiB)": 44.29, |
| "step": 3700, |
| "token_acc": 0.5479041916167665, |
| "train_speed(iter/s)": 0.580039 |
| }, |
| { |
| "epoch": 2.2481796116504853, |
| "grad_norm": 19.949644088745117, |
| "learning_rate": 1.4712123243367742e-05, |
| "loss": 2.0299962997436523, |
| "memory(GiB)": 44.29, |
| "step": 3705, |
| "token_acc": 0.5658362989323843, |
| "train_speed(iter/s)": 0.580004 |
| }, |
| { |
| "epoch": 2.2512135922330097, |
| "grad_norm": 12.743327140808105, |
| "learning_rate": 1.459975743564178e-05, |
| "loss": 1.9635414123535155, |
| "memory(GiB)": 44.29, |
| "step": 3710, |
| "token_acc": 0.5590277777777778, |
| "train_speed(iter/s)": 0.579959 |
| }, |
| { |
| "epoch": 2.254247572815534, |
| "grad_norm": 6.324910640716553, |
| "learning_rate": 1.4487748973732567e-05, |
| "loss": 2.068693733215332, |
| "memory(GiB)": 44.29, |
| "step": 3715, |
| "token_acc": 0.5710382513661202, |
| "train_speed(iter/s)": 0.579955 |
| }, |
| { |
| "epoch": 2.2572815533980584, |
| "grad_norm": 8.611750602722168, |
| "learning_rate": 1.4376098988303405e-05, |
| "loss": 1.7477828979492187, |
| "memory(GiB)": 44.29, |
| "step": 3720, |
| "token_acc": 0.5627118644067797, |
| "train_speed(iter/s)": 0.579921 |
| }, |
| { |
| "epoch": 2.2603155339805827, |
| "grad_norm": 8.731199264526367, |
| "learning_rate": 1.4264808606398988e-05, |
| "loss": 1.9445646286010743, |
| "memory(GiB)": 44.29, |
| "step": 3725, |
| "token_acc": 0.5650969529085873, |
| "train_speed(iter/s)": 0.579908 |
| }, |
| { |
| "epoch": 2.2633495145631066, |
| "grad_norm": 8.617072105407715, |
| "learning_rate": 1.4153878951433985e-05, |
| "loss": 1.764409065246582, |
| "memory(GiB)": 44.29, |
| "step": 3730, |
| "token_acc": 0.6271186440677966, |
| "train_speed(iter/s)": 0.579952 |
| }, |
| { |
| "epoch": 2.266383495145631, |
| "grad_norm": 6.622957706451416, |
| "learning_rate": 1.4043311143181743e-05, |
| "loss": 1.8772661209106445, |
| "memory(GiB)": 44.29, |
| "step": 3735, |
| "token_acc": 0.5902578796561605, |
| "train_speed(iter/s)": 0.579925 |
| }, |
| { |
| "epoch": 2.2694174757281553, |
| "grad_norm": 7.272273063659668, |
| "learning_rate": 1.3933106297762983e-05, |
| "loss": 1.6700300216674804, |
| "memory(GiB)": 44.29, |
| "step": 3740, |
| "token_acc": 0.6431095406360424, |
| "train_speed(iter/s)": 0.579928 |
| }, |
| { |
| "epoch": 2.2724514563106797, |
| "grad_norm": 8.500160217285156, |
| "learning_rate": 1.38232655276345e-05, |
| "loss": 1.9523941040039063, |
| "memory(GiB)": 44.29, |
| "step": 3745, |
| "token_acc": 0.5574324324324325, |
| "train_speed(iter/s)": 0.579866 |
| }, |
| { |
| "epoch": 2.275485436893204, |
| "grad_norm": 10.481255531311035, |
| "learning_rate": 1.3713789941577947e-05, |
| "loss": 1.935152816772461, |
| "memory(GiB)": 44.29, |
| "step": 3750, |
| "token_acc": 0.5851851851851851, |
| "train_speed(iter/s)": 0.579728 |
| }, |
| { |
| "epoch": 2.278519417475728, |
| "grad_norm": 8.817157745361328, |
| "learning_rate": 1.3604680644688673e-05, |
| "loss": 2.029979705810547, |
| "memory(GiB)": 44.29, |
| "step": 3755, |
| "token_acc": 0.5822784810126582, |
| "train_speed(iter/s)": 0.579834 |
| }, |
| { |
| "epoch": 2.2815533980582523, |
| "grad_norm": 8.694374084472656, |
| "learning_rate": 1.3495938738364495e-05, |
| "loss": 1.8262203216552735, |
| "memory(GiB)": 44.29, |
| "step": 3760, |
| "token_acc": 0.6044776119402985, |
| "train_speed(iter/s)": 0.57994 |
| }, |
| { |
| "epoch": 2.2845873786407767, |
| "grad_norm": 8.665304183959961, |
| "learning_rate": 1.338756532029466e-05, |
| "loss": 1.8623455047607422, |
| "memory(GiB)": 44.29, |
| "step": 3765, |
| "token_acc": 0.5573122529644269, |
| "train_speed(iter/s)": 0.579929 |
| }, |
| { |
| "epoch": 2.287621359223301, |
| "grad_norm": 8.049120903015137, |
| "learning_rate": 1.3279561484448726e-05, |
| "loss": 1.8126539230346679, |
| "memory(GiB)": 44.29, |
| "step": 3770, |
| "token_acc": 0.6254416961130742, |
| "train_speed(iter/s)": 0.579935 |
| }, |
| { |
| "epoch": 2.2906553398058254, |
| "grad_norm": 5.988779544830322, |
| "learning_rate": 1.3171928321065525e-05, |
| "loss": 1.5385218620300294, |
| "memory(GiB)": 44.29, |
| "step": 3775, |
| "token_acc": 0.657243816254417, |
| "train_speed(iter/s)": 0.580025 |
| }, |
| { |
| "epoch": 2.2936893203883493, |
| "grad_norm": 5.922063827514648, |
| "learning_rate": 1.306466691664216e-05, |
| "loss": 1.7553050994873047, |
| "memory(GiB)": 44.29, |
| "step": 3780, |
| "token_acc": 0.5925925925925926, |
| "train_speed(iter/s)": 0.579998 |
| }, |
| { |
| "epoch": 2.2967233009708736, |
| "grad_norm": 6.754926681518555, |
| "learning_rate": 1.2957778353922994e-05, |
| "loss": 1.6977853775024414, |
| "memory(GiB)": 44.29, |
| "step": 3785, |
| "token_acc": 0.5875912408759124, |
| "train_speed(iter/s)": 0.580116 |
| }, |
| { |
| "epoch": 2.299757281553398, |
| "grad_norm": 6.817199230194092, |
| "learning_rate": 1.285126371188881e-05, |
| "loss": 1.9571613311767577, |
| "memory(GiB)": 44.29, |
| "step": 3790, |
| "token_acc": 0.5661971830985916, |
| "train_speed(iter/s)": 0.580194 |
| }, |
| { |
| "epoch": 2.3027912621359223, |
| "grad_norm": 10.896566390991211, |
| "learning_rate": 1.2745124065745845e-05, |
| "loss": 1.7496770858764648, |
| "memory(GiB)": 44.29, |
| "step": 3795, |
| "token_acc": 0.6095238095238096, |
| "train_speed(iter/s)": 0.580115 |
| }, |
| { |
| "epoch": 2.3058252427184467, |
| "grad_norm": 7.805569171905518, |
| "learning_rate": 1.2639360486914964e-05, |
| "loss": 2.1383758544921876, |
| "memory(GiB)": 44.29, |
| "step": 3800, |
| "token_acc": 0.5325779036827195, |
| "train_speed(iter/s)": 0.580145 |
| }, |
| { |
| "epoch": 2.308859223300971, |
| "grad_norm": 8.069032669067383, |
| "learning_rate": 1.2533974043020862e-05, |
| "loss": 1.7861778259277343, |
| "memory(GiB)": 44.29, |
| "step": 3805, |
| "token_acc": 0.594855305466238, |
| "train_speed(iter/s)": 0.580121 |
| }, |
| { |
| "epoch": 2.311893203883495, |
| "grad_norm": 10.002004623413086, |
| "learning_rate": 1.2428965797881204e-05, |
| "loss": 1.8549165725708008, |
| "memory(GiB)": 44.29, |
| "step": 3810, |
| "token_acc": 0.559375, |
| "train_speed(iter/s)": 0.580167 |
| }, |
| { |
| "epoch": 2.3149271844660193, |
| "grad_norm": 10.041362762451172, |
| "learning_rate": 1.232433681149604e-05, |
| "loss": 1.9269153594970703, |
| "memory(GiB)": 44.29, |
| "step": 3815, |
| "token_acc": 0.5762195121951219, |
| "train_speed(iter/s)": 0.580098 |
| }, |
| { |
| "epoch": 2.3179611650485437, |
| "grad_norm": 7.903229236602783, |
| "learning_rate": 1.2220088140036934e-05, |
| "loss": 1.8197761535644532, |
| "memory(GiB)": 44.29, |
| "step": 3820, |
| "token_acc": 0.6220735785953178, |
| "train_speed(iter/s)": 0.580156 |
| }, |
| { |
| "epoch": 2.320995145631068, |
| "grad_norm": 7.331014156341553, |
| "learning_rate": 1.2116220835836389e-05, |
| "loss": 2.0878772735595703, |
| "memory(GiB)": 44.29, |
| "step": 3825, |
| "token_acc": 0.5467128027681661, |
| "train_speed(iter/s)": 0.580167 |
| }, |
| { |
| "epoch": 2.3240291262135924, |
| "grad_norm": 11.29516315460205, |
| "learning_rate": 1.2012735947377297e-05, |
| "loss": 1.9641210556030273, |
| "memory(GiB)": 44.29, |
| "step": 3830, |
| "token_acc": 0.5759493670886076, |
| "train_speed(iter/s)": 0.580195 |
| }, |
| { |
| "epoch": 2.3270631067961167, |
| "grad_norm": 9.621826171875, |
| "learning_rate": 1.1909634519282154e-05, |
| "loss": 1.9087528228759765, |
| "memory(GiB)": 44.29, |
| "step": 3835, |
| "token_acc": 0.5802047781569966, |
| "train_speed(iter/s)": 0.580189 |
| }, |
| { |
| "epoch": 2.3300970873786406, |
| "grad_norm": 7.312023162841797, |
| "learning_rate": 1.1806917592302762e-05, |
| "loss": 1.5428638458251953, |
| "memory(GiB)": 44.29, |
| "step": 3840, |
| "token_acc": 0.6550522648083623, |
| "train_speed(iter/s)": 0.580258 |
| }, |
| { |
| "epoch": 2.333131067961165, |
| "grad_norm": 12.322574615478516, |
| "learning_rate": 1.1704586203309486e-05, |
| "loss": 2.2512718200683595, |
| "memory(GiB)": 44.29, |
| "step": 3845, |
| "token_acc": 0.5261627906976745, |
| "train_speed(iter/s)": 0.580196 |
| }, |
| { |
| "epoch": 2.3361650485436893, |
| "grad_norm": 7.682923316955566, |
| "learning_rate": 1.1602641385280971e-05, |
| "loss": 2.14353084564209, |
| "memory(GiB)": 44.29, |
| "step": 3850, |
| "token_acc": 0.5100502512562815, |
| "train_speed(iter/s)": 0.580288 |
| }, |
| { |
| "epoch": 2.3391990291262137, |
| "grad_norm": 8.914677619934082, |
| "learning_rate": 1.1501084167293624e-05, |
| "loss": 1.8753440856933594, |
| "memory(GiB)": 44.29, |
| "step": 3855, |
| "token_acc": 0.587248322147651, |
| "train_speed(iter/s)": 0.58029 |
| }, |
| { |
| "epoch": 2.342233009708738, |
| "grad_norm": 8.7797212600708, |
| "learning_rate": 1.1399915574511205e-05, |
| "loss": 1.93109130859375, |
| "memory(GiB)": 44.29, |
| "step": 3860, |
| "token_acc": 0.5684931506849316, |
| "train_speed(iter/s)": 0.58018 |
| }, |
| { |
| "epoch": 2.345266990291262, |
| "grad_norm": 6.757023334503174, |
| "learning_rate": 1.1299136628174606e-05, |
| "loss": 1.959303855895996, |
| "memory(GiB)": 44.29, |
| "step": 3865, |
| "token_acc": 0.573134328358209, |
| "train_speed(iter/s)": 0.580123 |
| }, |
| { |
| "epoch": 2.3483009708737863, |
| "grad_norm": 7.724388599395752, |
| "learning_rate": 1.1198748345591358e-05, |
| "loss": 1.923073959350586, |
| "memory(GiB)": 44.29, |
| "step": 3870, |
| "token_acc": 0.5460122699386503, |
| "train_speed(iter/s)": 0.580153 |
| }, |
| { |
| "epoch": 2.3513349514563107, |
| "grad_norm": 8.733378410339355, |
| "learning_rate": 1.1098751740125518e-05, |
| "loss": 1.9303054809570312, |
| "memory(GiB)": 44.29, |
| "step": 3875, |
| "token_acc": 0.5620437956204379, |
| "train_speed(iter/s)": 0.580197 |
| }, |
| { |
| "epoch": 2.354368932038835, |
| "grad_norm": 7.248959541320801, |
| "learning_rate": 1.0999147821187378e-05, |
| "loss": 1.9763971328735352, |
| "memory(GiB)": 44.29, |
| "step": 3880, |
| "token_acc": 0.528052805280528, |
| "train_speed(iter/s)": 0.580278 |
| }, |
| { |
| "epoch": 2.3574029126213594, |
| "grad_norm": 7.560742378234863, |
| "learning_rate": 1.0899937594223225e-05, |
| "loss": 2.138459014892578, |
| "memory(GiB)": 44.29, |
| "step": 3885, |
| "token_acc": 0.5240793201133145, |
| "train_speed(iter/s)": 0.580203 |
| }, |
| { |
| "epoch": 2.3604368932038833, |
| "grad_norm": 7.769505023956299, |
| "learning_rate": 1.080112206070531e-05, |
| "loss": 1.8142425537109375, |
| "memory(GiB)": 44.29, |
| "step": 3890, |
| "token_acc": 0.5935483870967742, |
| "train_speed(iter/s)": 0.580071 |
| }, |
| { |
| "epoch": 2.3634708737864076, |
| "grad_norm": 8.150938987731934, |
| "learning_rate": 1.070270221812163e-05, |
| "loss": 2.216781234741211, |
| "memory(GiB)": 44.29, |
| "step": 3895, |
| "token_acc": 0.49122807017543857, |
| "train_speed(iter/s)": 0.580092 |
| }, |
| { |
| "epoch": 2.366504854368932, |
| "grad_norm": 6.342752456665039, |
| "learning_rate": 1.0604679059965922e-05, |
| "loss": 1.5916692733764648, |
| "memory(GiB)": 44.29, |
| "step": 3900, |
| "token_acc": 0.6594982078853047, |
| "train_speed(iter/s)": 0.580113 |
| }, |
| { |
| "epoch": 2.3695388349514563, |
| "grad_norm": 8.572466850280762, |
| "learning_rate": 1.050705357572761e-05, |
| "loss": 1.6800006866455077, |
| "memory(GiB)": 44.29, |
| "step": 3905, |
| "token_acc": 0.5981873111782477, |
| "train_speed(iter/s)": 0.580211 |
| }, |
| { |
| "epoch": 2.3725728155339807, |
| "grad_norm": 10.74704360961914, |
| "learning_rate": 1.0409826750881824e-05, |
| "loss": 2.0315380096435547, |
| "memory(GiB)": 44.29, |
| "step": 3910, |
| "token_acc": 0.5259067357512953, |
| "train_speed(iter/s)": 0.580153 |
| }, |
| { |
| "epoch": 2.375606796116505, |
| "grad_norm": 10.060522079467773, |
| "learning_rate": 1.031299956687941e-05, |
| "loss": 1.925653839111328, |
| "memory(GiB)": 44.29, |
| "step": 3915, |
| "token_acc": 0.5745454545454546, |
| "train_speed(iter/s)": 0.580105 |
| }, |
| { |
| "epoch": 2.378640776699029, |
| "grad_norm": 6.9279704093933105, |
| "learning_rate": 1.0216573001137126e-05, |
| "loss": 1.7791040420532227, |
| "memory(GiB)": 44.29, |
| "step": 3920, |
| "token_acc": 0.6091954022988506, |
| "train_speed(iter/s)": 0.580094 |
| }, |
| { |
| "epoch": 2.3816747572815533, |
| "grad_norm": 8.384385108947754, |
| "learning_rate": 1.0120548027027655e-05, |
| "loss": 1.839115524291992, |
| "memory(GiB)": 44.29, |
| "step": 3925, |
| "token_acc": 0.5759493670886076, |
| "train_speed(iter/s)": 0.580033 |
| }, |
| { |
| "epoch": 2.3847087378640777, |
| "grad_norm": 6.519843578338623, |
| "learning_rate": 1.0024925613869874e-05, |
| "loss": 2.303724670410156, |
| "memory(GiB)": 44.29, |
| "step": 3930, |
| "token_acc": 0.5181818181818182, |
| "train_speed(iter/s)": 0.580098 |
| }, |
| { |
| "epoch": 2.387742718446602, |
| "grad_norm": 6.988163948059082, |
| "learning_rate": 9.929706726919019e-06, |
| "loss": 2.0136226654052733, |
| "memory(GiB)": 44.29, |
| "step": 3935, |
| "token_acc": 0.5746031746031746, |
| "train_speed(iter/s)": 0.580148 |
| }, |
| { |
| "epoch": 2.3907766990291264, |
| "grad_norm": 8.978435516357422, |
| "learning_rate": 9.834892327356909e-06, |
| "loss": 2.091661262512207, |
| "memory(GiB)": 44.29, |
| "step": 3940, |
| "token_acc": 0.5614035087719298, |
| "train_speed(iter/s)": 0.580167 |
| }, |
| { |
| "epoch": 2.3938106796116507, |
| "grad_norm": 8.229738235473633, |
| "learning_rate": 9.740483372282383e-06, |
| "loss": 1.8495658874511718, |
| "memory(GiB)": 44.29, |
| "step": 3945, |
| "token_acc": 0.6066176470588235, |
| "train_speed(iter/s)": 0.580188 |
| }, |
| { |
| "epoch": 2.3968446601941746, |
| "grad_norm": 9.419842720031738, |
| "learning_rate": 9.646480814701447e-06, |
| "loss": 1.9571540832519532, |
| "memory(GiB)": 44.29, |
| "step": 3950, |
| "token_acc": 0.5364431486880467, |
| "train_speed(iter/s)": 0.580215 |
| }, |
| { |
| "epoch": 2.399878640776699, |
| "grad_norm": 9.840128898620605, |
| "learning_rate": 9.552885603517797e-06, |
| "loss": 1.9348846435546876, |
| "memory(GiB)": 44.29, |
| "step": 3955, |
| "token_acc": 0.570446735395189, |
| "train_speed(iter/s)": 0.580228 |
| }, |
| { |
| "epoch": 2.4029126213592233, |
| "grad_norm": 11.587018013000488, |
| "learning_rate": 9.459698683523204e-06, |
| "loss": 2.1948358535766603, |
| "memory(GiB)": 44.29, |
| "step": 3960, |
| "token_acc": 0.5457413249211357, |
| "train_speed(iter/s)": 0.580218 |
| }, |
| { |
| "epoch": 2.4059466019417477, |
| "grad_norm": 7.861437797546387, |
| "learning_rate": 9.366920995387901e-06, |
| "loss": 2.0211660385131838, |
| "memory(GiB)": 44.29, |
| "step": 3965, |
| "token_acc": 0.5498489425981873, |
| "train_speed(iter/s)": 0.580257 |
| }, |
| { |
| "epoch": 2.408980582524272, |
| "grad_norm": 10.794283866882324, |
| "learning_rate": 9.274553475651254e-06, |
| "loss": 1.9600090026855468, |
| "memory(GiB)": 44.29, |
| "step": 3970, |
| "token_acc": 0.5950413223140496, |
| "train_speed(iter/s)": 0.580279 |
| }, |
| { |
| "epoch": 2.412014563106796, |
| "grad_norm": 7.574179649353027, |
| "learning_rate": 9.182597056712111e-06, |
| "loss": 1.962773895263672, |
| "memory(GiB)": 44.29, |
| "step": 3975, |
| "token_acc": 0.5454545454545454, |
| "train_speed(iter/s)": 0.580177 |
| }, |
| { |
| "epoch": 2.4150485436893203, |
| "grad_norm": 8.261923789978027, |
| "learning_rate": 9.09105266681954e-06, |
| "loss": 2.138422393798828, |
| "memory(GiB)": 44.29, |
| "step": 3980, |
| "token_acc": 0.518796992481203, |
| "train_speed(iter/s)": 0.580227 |
| }, |
| { |
| "epoch": 2.4180825242718447, |
| "grad_norm": 9.629799842834473, |
| "learning_rate": 8.99992123006339e-06, |
| "loss": 1.972011184692383, |
| "memory(GiB)": 44.29, |
| "step": 3985, |
| "token_acc": 0.5466237942122186, |
| "train_speed(iter/s)": 0.580297 |
| }, |
| { |
| "epoch": 2.421116504854369, |
| "grad_norm": 6.383166313171387, |
| "learning_rate": 8.909203666364957e-06, |
| "loss": 1.874557113647461, |
| "memory(GiB)": 44.29, |
| "step": 3990, |
| "token_acc": 0.583941605839416, |
| "train_speed(iter/s)": 0.580373 |
| }, |
| { |
| "epoch": 2.4241504854368934, |
| "grad_norm": 7.899206161499023, |
| "learning_rate": 8.818900891467773e-06, |
| "loss": 2.0880853652954103, |
| "memory(GiB)": 44.29, |
| "step": 3995, |
| "token_acc": 0.54, |
| "train_speed(iter/s)": 0.580411 |
| }, |
| { |
| "epoch": 2.4271844660194173, |
| "grad_norm": 8.335851669311523, |
| "learning_rate": 8.729013816928239e-06, |
| "loss": 1.8050338745117187, |
| "memory(GiB)": 44.29, |
| "step": 4000, |
| "token_acc": 0.6138613861386139, |
| "train_speed(iter/s)": 0.580506 |
| }, |
| { |
| "epoch": 2.4271844660194173, |
| "eval_loss": 1.988856554031372, |
| "eval_runtime": 12.22, |
| "eval_samples_per_second": 8.183, |
| "eval_steps_per_second": 8.183, |
| "eval_token_acc": 0.5174337517433751, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.4302184466019416, |
| "grad_norm": 6.326101303100586, |
| "learning_rate": 8.639543350106532e-06, |
| "loss": 1.6620052337646485, |
| "memory(GiB)": 44.29, |
| "step": 4005, |
| "token_acc": 0.5577651515151515, |
| "train_speed(iter/s)": 0.579332 |
| }, |
| { |
| "epoch": 2.433252427184466, |
| "grad_norm": 5.722497463226318, |
| "learning_rate": 8.550490394157417e-06, |
| "loss": 2.129566192626953, |
| "memory(GiB)": 44.29, |
| "step": 4010, |
| "token_acc": 0.5157593123209169, |
| "train_speed(iter/s)": 0.57938 |
| }, |
| { |
| "epoch": 2.4362864077669903, |
| "grad_norm": 7.888674736022949, |
| "learning_rate": 8.46185584802106e-06, |
| "loss": 1.7735406875610351, |
| "memory(GiB)": 44.29, |
| "step": 4015, |
| "token_acc": 0.6041666666666666, |
| "train_speed(iter/s)": 0.579409 |
| }, |
| { |
| "epoch": 2.4393203883495147, |
| "grad_norm": 9.027255058288574, |
| "learning_rate": 8.373640606414096e-06, |
| "loss": 2.2499406814575194, |
| "memory(GiB)": 44.29, |
| "step": 4020, |
| "token_acc": 0.5216049382716049, |
| "train_speed(iter/s)": 0.579437 |
| }, |
| { |
| "epoch": 2.4423543689320386, |
| "grad_norm": 6.910282611846924, |
| "learning_rate": 8.285845559820427e-06, |
| "loss": 1.820733642578125, |
| "memory(GiB)": 44.29, |
| "step": 4025, |
| "token_acc": 0.5671641791044776, |
| "train_speed(iter/s)": 0.579456 |
| }, |
| { |
| "epoch": 2.445388349514563, |
| "grad_norm": 8.852483749389648, |
| "learning_rate": 8.198471594482376e-06, |
| "loss": 2.3667272567749023, |
| "memory(GiB)": 44.29, |
| "step": 4030, |
| "token_acc": 0.5066666666666667, |
| "train_speed(iter/s)": 0.579474 |
| }, |
| { |
| "epoch": 2.4484223300970873, |
| "grad_norm": 7.324892520904541, |
| "learning_rate": 8.111519592391669e-06, |
| "loss": 1.7319637298583985, |
| "memory(GiB)": 44.29, |
| "step": 4035, |
| "token_acc": 0.6045751633986928, |
| "train_speed(iter/s)": 0.579496 |
| }, |
| { |
| "epoch": 2.4514563106796117, |
| "grad_norm": 7.501872539520264, |
| "learning_rate": 8.024990431280543e-06, |
| "loss": 2.2290987014770507, |
| "memory(GiB)": 44.29, |
| "step": 4040, |
| "token_acc": 0.5184049079754601, |
| "train_speed(iter/s)": 0.57949 |
| }, |
| { |
| "epoch": 2.454490291262136, |
| "grad_norm": 11.032537460327148, |
| "learning_rate": 7.93888498461291e-06, |
| "loss": 1.7387943267822266, |
| "memory(GiB)": 44.29, |
| "step": 4045, |
| "token_acc": 0.5924657534246576, |
| "train_speed(iter/s)": 0.579449 |
| }, |
| { |
| "epoch": 2.4575242718446604, |
| "grad_norm": 7.408664226531982, |
| "learning_rate": 7.853204121575475e-06, |
| "loss": 2.0464914321899412, |
| "memory(GiB)": 44.29, |
| "step": 4050, |
| "token_acc": 0.5613496932515337, |
| "train_speed(iter/s)": 0.579515 |
| }, |
| { |
| "epoch": 2.4605582524271843, |
| "grad_norm": 8.151251792907715, |
| "learning_rate": 7.76794870706905e-06, |
| "loss": 1.9731042861938477, |
| "memory(GiB)": 44.29, |
| "step": 4055, |
| "token_acc": 0.5264900662251656, |
| "train_speed(iter/s)": 0.579583 |
| }, |
| { |
| "epoch": 2.4635922330097086, |
| "grad_norm": 7.136772632598877, |
| "learning_rate": 7.683119601699757e-06, |
| "loss": 1.9375322341918946, |
| "memory(GiB)": 44.29, |
| "step": 4060, |
| "token_acc": 0.5538922155688623, |
| "train_speed(iter/s)": 0.579566 |
| }, |
| { |
| "epoch": 2.466626213592233, |
| "grad_norm": 8.133397102355957, |
| "learning_rate": 7.598717661770377e-06, |
| "loss": 1.9626676559448242, |
| "memory(GiB)": 44.29, |
| "step": 4065, |
| "token_acc": 0.5642633228840125, |
| "train_speed(iter/s)": 0.579562 |
| }, |
| { |
| "epoch": 2.4696601941747574, |
| "grad_norm": 7.656953811645508, |
| "learning_rate": 7.514743739271696e-06, |
| "loss": 1.7955259323120116, |
| "memory(GiB)": 44.29, |
| "step": 4070, |
| "token_acc": 0.6, |
| "train_speed(iter/s)": 0.579478 |
| }, |
| { |
| "epoch": 2.4726941747572817, |
| "grad_norm": 9.205748558044434, |
| "learning_rate": 7.4311986818738685e-06, |
| "loss": 1.7786579132080078, |
| "memory(GiB)": 44.29, |
| "step": 4075, |
| "token_acc": 0.6013071895424836, |
| "train_speed(iter/s)": 0.579436 |
| }, |
| { |
| "epoch": 2.475728155339806, |
| "grad_norm": 9.025361061096191, |
| "learning_rate": 7.348083332917926e-06, |
| "loss": 2.109883689880371, |
| "memory(GiB)": 44.29, |
| "step": 4080, |
| "token_acc": 0.536, |
| "train_speed(iter/s)": 0.579399 |
| }, |
| { |
| "epoch": 2.47876213592233, |
| "grad_norm": 7.150624752044678, |
| "learning_rate": 7.26539853140723e-06, |
| "loss": 1.8995925903320312, |
| "memory(GiB)": 44.29, |
| "step": 4085, |
| "token_acc": 0.5714285714285714, |
| "train_speed(iter/s)": 0.57952 |
| }, |
| { |
| "epoch": 2.4817961165048543, |
| "grad_norm": 7.212602138519287, |
| "learning_rate": 7.1831451119989955e-06, |
| "loss": 2.258907508850098, |
| "memory(GiB)": 44.29, |
| "step": 4090, |
| "token_acc": 0.4742547425474255, |
| "train_speed(iter/s)": 0.579551 |
| }, |
| { |
| "epoch": 2.4848300970873787, |
| "grad_norm": 9.11235523223877, |
| "learning_rate": 7.1013239049958714e-06, |
| "loss": 1.7706048965454102, |
| "memory(GiB)": 44.29, |
| "step": 4095, |
| "token_acc": 0.5925925925925926, |
| "train_speed(iter/s)": 0.579509 |
| }, |
| { |
| "epoch": 2.487864077669903, |
| "grad_norm": 10.799226760864258, |
| "learning_rate": 7.019935736337585e-06, |
| "loss": 2.1821046829223634, |
| "memory(GiB)": 44.29, |
| "step": 4100, |
| "token_acc": 0.5335463258785943, |
| "train_speed(iter/s)": 0.579558 |
| }, |
| { |
| "epoch": 2.4908980582524274, |
| "grad_norm": 11.041994094848633, |
| "learning_rate": 6.938981427592534e-06, |
| "loss": 2.088601303100586, |
| "memory(GiB)": 44.29, |
| "step": 4105, |
| "token_acc": 0.5171339563862928, |
| "train_speed(iter/s)": 0.579608 |
| }, |
| { |
| "epoch": 2.4939320388349513, |
| "grad_norm": 10.685086250305176, |
| "learning_rate": 6.858461795949583e-06, |
| "loss": 1.5177223205566406, |
| "memory(GiB)": 44.29, |
| "step": 4110, |
| "token_acc": 0.6339285714285714, |
| "train_speed(iter/s)": 0.579595 |
| }, |
| { |
| "epoch": 2.4969660194174756, |
| "grad_norm": 8.770302772521973, |
| "learning_rate": 6.778377654209761e-06, |
| "loss": 1.7158885955810548, |
| "memory(GiB)": 44.29, |
| "step": 4115, |
| "token_acc": 0.5580524344569289, |
| "train_speed(iter/s)": 0.579559 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 8.05949878692627, |
| "learning_rate": 6.698729810778065e-06, |
| "loss": 2.1136884689331055, |
| "memory(GiB)": 44.29, |
| "step": 4120, |
| "token_acc": 0.5434782608695652, |
| "train_speed(iter/s)": 0.579581 |
| }, |
| { |
| "epoch": 2.5030339805825244, |
| "grad_norm": 9.553973197937012, |
| "learning_rate": 6.619519069655322e-06, |
| "loss": 1.8230070114135741, |
| "memory(GiB)": 44.29, |
| "step": 4125, |
| "token_acc": 0.5797101449275363, |
| "train_speed(iter/s)": 0.579641 |
| }, |
| { |
| "epoch": 2.5060679611650487, |
| "grad_norm": 7.956108093261719, |
| "learning_rate": 6.54074623042999e-06, |
| "loss": 2.0894168853759765, |
| "memory(GiB)": 44.29, |
| "step": 4130, |
| "token_acc": 0.5565749235474006, |
| "train_speed(iter/s)": 0.579668 |
| }, |
| { |
| "epoch": 2.5091019417475726, |
| "grad_norm": 8.481484413146973, |
| "learning_rate": 6.4624120882702535e-06, |
| "loss": 1.8939842224121093, |
| "memory(GiB)": 44.29, |
| "step": 4135, |
| "token_acc": 0.5757575757575758, |
| "train_speed(iter/s)": 0.579678 |
| }, |
| { |
| "epoch": 2.512135922330097, |
| "grad_norm": 9.907540321350098, |
| "learning_rate": 6.384517433915793e-06, |
| "loss": 1.9347640991210937, |
| "memory(GiB)": 44.29, |
| "step": 4140, |
| "token_acc": 0.5447761194029851, |
| "train_speed(iter/s)": 0.579755 |
| }, |
| { |
| "epoch": 2.5151699029126213, |
| "grad_norm": 7.414953231811523, |
| "learning_rate": 6.30706305366996e-06, |
| "loss": 1.730459213256836, |
| "memory(GiB)": 44.29, |
| "step": 4145, |
| "token_acc": 0.5985401459854015, |
| "train_speed(iter/s)": 0.579655 |
| }, |
| { |
| "epoch": 2.5182038834951457, |
| "grad_norm": 8.353326797485352, |
| "learning_rate": 6.230049729391779e-06, |
| "loss": 1.9265541076660155, |
| "memory(GiB)": 44.29, |
| "step": 4150, |
| "token_acc": 0.5632183908045977, |
| "train_speed(iter/s)": 0.57963 |
| }, |
| { |
| "epoch": 2.52123786407767, |
| "grad_norm": 6.5633673667907715, |
| "learning_rate": 6.153478238488019e-06, |
| "loss": 1.7929351806640625, |
| "memory(GiB)": 44.29, |
| "step": 4155, |
| "token_acc": 0.5882352941176471, |
| "train_speed(iter/s)": 0.579611 |
| }, |
| { |
| "epoch": 2.524271844660194, |
| "grad_norm": 7.694858551025391, |
| "learning_rate": 6.077349353905465e-06, |
| "loss": 2.095606231689453, |
| "memory(GiB)": 44.29, |
| "step": 4160, |
| "token_acc": 0.5905511811023622, |
| "train_speed(iter/s)": 0.579685 |
| }, |
| { |
| "epoch": 2.5273058252427183, |
| "grad_norm": 6.272264003753662, |
| "learning_rate": 6.00166384412294e-06, |
| "loss": 2.1394012451171873, |
| "memory(GiB)": 44.29, |
| "step": 4165, |
| "token_acc": 0.5434782608695652, |
| "train_speed(iter/s)": 0.579716 |
| }, |
| { |
| "epoch": 2.5303398058252426, |
| "grad_norm": 8.841377258300781, |
| "learning_rate": 5.926422473143717e-06, |
| "loss": 1.972856330871582, |
| "memory(GiB)": 44.29, |
| "step": 4170, |
| "token_acc": 0.5674740484429066, |
| "train_speed(iter/s)": 0.579732 |
| }, |
| { |
| "epoch": 2.533373786407767, |
| "grad_norm": 7.734652996063232, |
| "learning_rate": 5.851626000487714e-06, |
| "loss": 1.771505355834961, |
| "memory(GiB)": 44.29, |
| "step": 4175, |
| "token_acc": 0.5695364238410596, |
| "train_speed(iter/s)": 0.57976 |
| }, |
| { |
| "epoch": 2.5364077669902914, |
| "grad_norm": 8.464856147766113, |
| "learning_rate": 5.7772751811838165e-06, |
| "loss": 2.1697675704956056, |
| "memory(GiB)": 44.29, |
| "step": 4180, |
| "token_acc": 0.5490196078431373, |
| "train_speed(iter/s)": 0.579766 |
| }, |
| { |
| "epoch": 2.5394417475728153, |
| "grad_norm": 6.249225616455078, |
| "learning_rate": 5.703370765762345e-06, |
| "loss": 2.03582706451416, |
| "memory(GiB)": 44.29, |
| "step": 4185, |
| "token_acc": 0.5623188405797102, |
| "train_speed(iter/s)": 0.57982 |
| }, |
| { |
| "epoch": 2.54247572815534, |
| "grad_norm": 9.059986114501953, |
| "learning_rate": 5.629913500247364e-06, |
| "loss": 2.067348098754883, |
| "memory(GiB)": 44.29, |
| "step": 4190, |
| "token_acc": 0.5376712328767124, |
| "train_speed(iter/s)": 0.579847 |
| }, |
| { |
| "epoch": 2.545509708737864, |
| "grad_norm": 7.16273307800293, |
| "learning_rate": 5.556904126149237e-06, |
| "loss": 1.619649314880371, |
| "memory(GiB)": 44.29, |
| "step": 4195, |
| "token_acc": 0.6006600660066007, |
| "train_speed(iter/s)": 0.579885 |
| }, |
| { |
| "epoch": 2.5485436893203883, |
| "grad_norm": 8.991573333740234, |
| "learning_rate": 5.484343380457125e-06, |
| "loss": 1.7998830795288085, |
| "memory(GiB)": 44.29, |
| "step": 4200, |
| "token_acc": 0.5662650602409639, |
| "train_speed(iter/s)": 0.57986 |
| }, |
| { |
| "epoch": 2.5515776699029127, |
| "grad_norm": 8.396170616149902, |
| "learning_rate": 5.412231995631473e-06, |
| "loss": 1.9479732513427734, |
| "memory(GiB)": 44.29, |
| "step": 4205, |
| "token_acc": 0.5418060200668896, |
| "train_speed(iter/s)": 0.579772 |
| }, |
| { |
| "epoch": 2.554611650485437, |
| "grad_norm": 9.159605979919434, |
| "learning_rate": 5.340570699596769e-06, |
| "loss": 1.8561626434326173, |
| "memory(GiB)": 44.29, |
| "step": 4210, |
| "token_acc": 0.5530973451327433, |
| "train_speed(iter/s)": 0.579768 |
| }, |
| { |
| "epoch": 2.5576456310679614, |
| "grad_norm": 9.651739120483398, |
| "learning_rate": 5.269360215734026e-06, |
| "loss": 2.021830940246582, |
| "memory(GiB)": 44.29, |
| "step": 4215, |
| "token_acc": 0.5821917808219178, |
| "train_speed(iter/s)": 0.579771 |
| }, |
| { |
| "epoch": 2.5606796116504853, |
| "grad_norm": 6.730819225311279, |
| "learning_rate": 5.198601262873593e-06, |
| "loss": 1.8237226486206055, |
| "memory(GiB)": 44.29, |
| "step": 4220, |
| "token_acc": 0.5780821917808219, |
| "train_speed(iter/s)": 0.57971 |
| }, |
| { |
| "epoch": 2.5637135922330097, |
| "grad_norm": 10.186707496643066, |
| "learning_rate": 5.12829455528786e-06, |
| "loss": 1.6941600799560548, |
| "memory(GiB)": 44.29, |
| "step": 4225, |
| "token_acc": 0.6114649681528662, |
| "train_speed(iter/s)": 0.579676 |
| }, |
| { |
| "epoch": 2.566747572815534, |
| "grad_norm": 11.422538757324219, |
| "learning_rate": 5.0584408026840555e-06, |
| "loss": 1.9525514602661134, |
| "memory(GiB)": 44.29, |
| "step": 4230, |
| "token_acc": 0.5769230769230769, |
| "train_speed(iter/s)": 0.57969 |
| }, |
| { |
| "epoch": 2.5697815533980584, |
| "grad_norm": 6.254408836364746, |
| "learning_rate": 4.989040710197068e-06, |
| "loss": 1.8417320251464844, |
| "memory(GiB)": 44.29, |
| "step": 4235, |
| "token_acc": 0.5893854748603352, |
| "train_speed(iter/s)": 0.579743 |
| }, |
| { |
| "epoch": 2.5728155339805827, |
| "grad_norm": 8.989594459533691, |
| "learning_rate": 4.920094978382339e-06, |
| "loss": 2.2028553009033205, |
| "memory(GiB)": 44.29, |
| "step": 4240, |
| "token_acc": 0.52, |
| "train_speed(iter/s)": 0.579778 |
| }, |
| { |
| "epoch": 2.5758495145631066, |
| "grad_norm": 8.0951566696167, |
| "learning_rate": 4.851604303208801e-06, |
| "loss": 1.881844711303711, |
| "memory(GiB)": 44.29, |
| "step": 4245, |
| "token_acc": 0.6153846153846154, |
| "train_speed(iter/s)": 0.579786 |
| }, |
| { |
| "epoch": 2.578883495145631, |
| "grad_norm": 7.341141700744629, |
| "learning_rate": 4.783569376051833e-06, |
| "loss": 2.057468223571777, |
| "memory(GiB)": 44.29, |
| "step": 4250, |
| "token_acc": 0.5373563218390804, |
| "train_speed(iter/s)": 0.579789 |
| }, |
| { |
| "epoch": 2.5819174757281553, |
| "grad_norm": 8.066463470458984, |
| "learning_rate": 4.7159908836862994e-06, |
| "loss": 1.9251741409301757, |
| "memory(GiB)": 44.29, |
| "step": 4255, |
| "token_acc": 0.559322033898305, |
| "train_speed(iter/s)": 0.579759 |
| }, |
| { |
| "epoch": 2.5849514563106797, |
| "grad_norm": 14.030436515808105, |
| "learning_rate": 4.648869508279613e-06, |
| "loss": 1.9517692565917968, |
| "memory(GiB)": 44.29, |
| "step": 4260, |
| "token_acc": 0.5652173913043478, |
| "train_speed(iter/s)": 0.579826 |
| }, |
| { |
| "epoch": 2.587985436893204, |
| "grad_norm": 7.453925132751465, |
| "learning_rate": 4.582205927384814e-06, |
| "loss": 1.7124622344970704, |
| "memory(GiB)": 44.29, |
| "step": 4265, |
| "token_acc": 0.6095890410958904, |
| "train_speed(iter/s)": 0.579851 |
| }, |
| { |
| "epoch": 2.591019417475728, |
| "grad_norm": 9.848562240600586, |
| "learning_rate": 4.51600081393379e-06, |
| "loss": 1.534929084777832, |
| "memory(GiB)": 44.29, |
| "step": 4270, |
| "token_acc": 0.6325757575757576, |
| "train_speed(iter/s)": 0.579913 |
| }, |
| { |
| "epoch": 2.5940533980582523, |
| "grad_norm": 8.778762817382812, |
| "learning_rate": 4.450254836230449e-06, |
| "loss": 1.9810653686523438, |
| "memory(GiB)": 44.29, |
| "step": 4275, |
| "token_acc": 0.5531914893617021, |
| "train_speed(iter/s)": 0.579963 |
| }, |
| { |
| "epoch": 2.5970873786407767, |
| "grad_norm": 7.1793742179870605, |
| "learning_rate": 4.384968657943972e-06, |
| "loss": 2.044744682312012, |
| "memory(GiB)": 44.29, |
| "step": 4280, |
| "token_acc": 0.5632530120481928, |
| "train_speed(iter/s)": 0.579985 |
| }, |
| { |
| "epoch": 2.600121359223301, |
| "grad_norm": 6.974610805511475, |
| "learning_rate": 4.3201429381021285e-06, |
| "loss": 1.750173568725586, |
| "memory(GiB)": 44.29, |
| "step": 4285, |
| "token_acc": 0.6114864864864865, |
| "train_speed(iter/s)": 0.580089 |
| }, |
| { |
| "epoch": 2.6031553398058254, |
| "grad_norm": 7.6272196769714355, |
| "learning_rate": 4.255778331084609e-06, |
| "loss": 2.1643795013427733, |
| "memory(GiB)": 44.29, |
| "step": 4290, |
| "token_acc": 0.5138539042821159, |
| "train_speed(iter/s)": 0.580145 |
| }, |
| { |
| "epoch": 2.6061893203883493, |
| "grad_norm": 8.453348159790039, |
| "learning_rate": 4.1918754866164205e-06, |
| "loss": 2.236542510986328, |
| "memory(GiB)": 44.29, |
| "step": 4295, |
| "token_acc": 0.4793650793650794, |
| "train_speed(iter/s)": 0.580165 |
| }, |
| { |
| "epoch": 2.6092233009708736, |
| "grad_norm": 10.852858543395996, |
| "learning_rate": 4.1284350497613426e-06, |
| "loss": 1.9886856079101562, |
| "memory(GiB)": 44.29, |
| "step": 4300, |
| "token_acc": 0.5439739413680782, |
| "train_speed(iter/s)": 0.58019 |
| }, |
| { |
| "epoch": 2.612257281553398, |
| "grad_norm": 9.123336791992188, |
| "learning_rate": 4.065457660915401e-06, |
| "loss": 1.9303335189819335, |
| "memory(GiB)": 44.29, |
| "step": 4305, |
| "token_acc": 0.5847457627118644, |
| "train_speed(iter/s)": 0.580152 |
| }, |
| { |
| "epoch": 2.6152912621359223, |
| "grad_norm": 7.784154891967773, |
| "learning_rate": 4.002943955800409e-06, |
| "loss": 2.0141778945922852, |
| "memory(GiB)": 44.29, |
| "step": 4310, |
| "token_acc": 0.5393258426966292, |
| "train_speed(iter/s)": 0.580169 |
| }, |
| { |
| "epoch": 2.6183252427184467, |
| "grad_norm": 9.202990531921387, |
| "learning_rate": 3.94089456545757e-06, |
| "loss": 1.936072540283203, |
| "memory(GiB)": 44.29, |
| "step": 4315, |
| "token_acc": 0.5570469798657718, |
| "train_speed(iter/s)": 0.580162 |
| }, |
| { |
| "epoch": 2.6213592233009706, |
| "grad_norm": 8.248907089233398, |
| "learning_rate": 3.879310116241042e-06, |
| "loss": 1.968276596069336, |
| "memory(GiB)": 44.29, |
| "step": 4320, |
| "token_acc": 0.5680272108843537, |
| "train_speed(iter/s)": 0.580185 |
| }, |
| { |
| "epoch": 2.6243932038834954, |
| "grad_norm": 10.208954811096191, |
| "learning_rate": 3.818191229811696e-06, |
| "loss": 1.9195415496826171, |
| "memory(GiB)": 44.29, |
| "step": 4325, |
| "token_acc": 0.5785123966942148, |
| "train_speed(iter/s)": 0.580191 |
| }, |
| { |
| "epoch": 2.6274271844660193, |
| "grad_norm": 8.11597728729248, |
| "learning_rate": 3.757538523130799e-06, |
| "loss": 2.197231674194336, |
| "memory(GiB)": 44.29, |
| "step": 4330, |
| "token_acc": 0.5173501577287066, |
| "train_speed(iter/s)": 0.580246 |
| }, |
| { |
| "epoch": 2.6304611650485437, |
| "grad_norm": 10.075161933898926, |
| "learning_rate": 3.697352608453791e-06, |
| "loss": 2.041206932067871, |
| "memory(GiB)": 44.29, |
| "step": 4335, |
| "token_acc": 0.5785714285714286, |
| "train_speed(iter/s)": 0.580244 |
| }, |
| { |
| "epoch": 2.633495145631068, |
| "grad_norm": 9.632774353027344, |
| "learning_rate": 3.6376340933241104e-06, |
| "loss": 1.9504831314086915, |
| "memory(GiB)": 44.29, |
| "step": 4340, |
| "token_acc": 0.5544217687074829, |
| "train_speed(iter/s)": 0.580233 |
| }, |
| { |
| "epoch": 2.6365291262135924, |
| "grad_norm": 7.268722057342529, |
| "learning_rate": 3.5783835805670183e-06, |
| "loss": 2.2769695281982423, |
| "memory(GiB)": 44.29, |
| "step": 4345, |
| "token_acc": 0.5015197568389058, |
| "train_speed(iter/s)": 0.580233 |
| }, |
| { |
| "epoch": 2.6395631067961167, |
| "grad_norm": 13.444320678710938, |
| "learning_rate": 3.519601668283623e-06, |
| "loss": 1.9888429641723633, |
| "memory(GiB)": 44.29, |
| "step": 4350, |
| "token_acc": 0.5563636363636364, |
| "train_speed(iter/s)": 0.580169 |
| }, |
| { |
| "epoch": 2.6425970873786406, |
| "grad_norm": 8.871038436889648, |
| "learning_rate": 3.4612889498447043e-06, |
| "loss": 1.7693092346191406, |
| "memory(GiB)": 44.29, |
| "step": 4355, |
| "token_acc": 0.5708812260536399, |
| "train_speed(iter/s)": 0.580141 |
| }, |
| { |
| "epoch": 2.645631067961165, |
| "grad_norm": 9.421436309814453, |
| "learning_rate": 3.40344601388482e-06, |
| "loss": 1.8508855819702148, |
| "memory(GiB)": 44.29, |
| "step": 4360, |
| "token_acc": 0.5864197530864198, |
| "train_speed(iter/s)": 0.580145 |
| }, |
| { |
| "epoch": 2.6486650485436893, |
| "grad_norm": 14.255351066589355, |
| "learning_rate": 3.346073444296338e-06, |
| "loss": 1.8605754852294922, |
| "memory(GiB)": 44.29, |
| "step": 4365, |
| "token_acc": 0.6238244514106583, |
| "train_speed(iter/s)": 0.580184 |
| }, |
| { |
| "epoch": 2.6516990291262137, |
| "grad_norm": 9.838223457336426, |
| "learning_rate": 3.289171820223519e-06, |
| "loss": 1.8943605422973633, |
| "memory(GiB)": 44.29, |
| "step": 4370, |
| "token_acc": 0.59375, |
| "train_speed(iter/s)": 0.580196 |
| }, |
| { |
| "epoch": 2.654733009708738, |
| "grad_norm": 7.1384148597717285, |
| "learning_rate": 3.2327417160567196e-06, |
| "loss": 1.945779037475586, |
| "memory(GiB)": 44.29, |
| "step": 4375, |
| "token_acc": 0.5589225589225589, |
| "train_speed(iter/s)": 0.580208 |
| }, |
| { |
| "epoch": 2.657766990291262, |
| "grad_norm": 7.130894184112549, |
| "learning_rate": 3.176783701426528e-06, |
| "loss": 1.920769500732422, |
| "memory(GiB)": 44.29, |
| "step": 4380, |
| "token_acc": 0.5652173913043478, |
| "train_speed(iter/s)": 0.5802 |
| }, |
| { |
| "epoch": 2.6608009708737863, |
| "grad_norm": 7.5801215171813965, |
| "learning_rate": 3.121298341198081e-06, |
| "loss": 2.089648628234863, |
| "memory(GiB)": 44.29, |
| "step": 4385, |
| "token_acc": 0.5445026178010471, |
| "train_speed(iter/s)": 0.580307 |
| }, |
| { |
| "epoch": 2.6638349514563107, |
| "grad_norm": 9.623913764953613, |
| "learning_rate": 3.0662861954653232e-06, |
| "loss": 2.102077674865723, |
| "memory(GiB)": 44.29, |
| "step": 4390, |
| "token_acc": 0.5238095238095238, |
| "train_speed(iter/s)": 0.580352 |
| }, |
| { |
| "epoch": 2.666868932038835, |
| "grad_norm": 8.056645393371582, |
| "learning_rate": 3.0117478195453353e-06, |
| "loss": 2.002307319641113, |
| "memory(GiB)": 44.29, |
| "step": 4395, |
| "token_acc": 0.5625, |
| "train_speed(iter/s)": 0.580375 |
| }, |
| { |
| "epoch": 2.6699029126213594, |
| "grad_norm": 9.664189338684082, |
| "learning_rate": 2.9576837639728073e-06, |
| "loss": 1.638421630859375, |
| "memory(GiB)": 44.29, |
| "step": 4400, |
| "token_acc": 0.6138328530259366, |
| "train_speed(iter/s)": 0.580313 |
| }, |
| { |
| "epoch": 2.6729368932038833, |
| "grad_norm": 7.514981269836426, |
| "learning_rate": 2.9040945744943757e-06, |
| "loss": 1.8152626037597657, |
| "memory(GiB)": 44.29, |
| "step": 4405, |
| "token_acc": 0.5535055350553506, |
| "train_speed(iter/s)": 0.580281 |
| }, |
| { |
| "epoch": 2.6759708737864076, |
| "grad_norm": 7.019512176513672, |
| "learning_rate": 2.850980792063196e-06, |
| "loss": 1.805082130432129, |
| "memory(GiB)": 44.29, |
| "step": 4410, |
| "token_acc": 0.5683890577507599, |
| "train_speed(iter/s)": 0.580336 |
| }, |
| { |
| "epoch": 2.679004854368932, |
| "grad_norm": 8.447052955627441, |
| "learning_rate": 2.798342952833455e-06, |
| "loss": 1.9645135879516602, |
| "memory(GiB)": 44.29, |
| "step": 4415, |
| "token_acc": 0.5562700964630225, |
| "train_speed(iter/s)": 0.580375 |
| }, |
| { |
| "epoch": 2.6820388349514563, |
| "grad_norm": 15.852560997009277, |
| "learning_rate": 2.7461815881549225e-06, |
| "loss": 1.9464908599853517, |
| "memory(GiB)": 44.29, |
| "step": 4420, |
| "token_acc": 0.5913978494623656, |
| "train_speed(iter/s)": 0.580409 |
| }, |
| { |
| "epoch": 2.6850728155339807, |
| "grad_norm": 8.933894157409668, |
| "learning_rate": 2.694497224567688e-06, |
| "loss": 2.005167007446289, |
| "memory(GiB)": 44.29, |
| "step": 4425, |
| "token_acc": 0.5362903225806451, |
| "train_speed(iter/s)": 0.580346 |
| }, |
| { |
| "epoch": 2.6881067961165046, |
| "grad_norm": 5.791989326477051, |
| "learning_rate": 2.6432903837967036e-06, |
| "loss": 1.905177116394043, |
| "memory(GiB)": 44.29, |
| "step": 4430, |
| "token_acc": 0.556923076923077, |
| "train_speed(iter/s)": 0.580417 |
| }, |
| { |
| "epoch": 2.6911407766990294, |
| "grad_norm": 7.198362350463867, |
| "learning_rate": 2.5925615827466444e-06, |
| "loss": 2.0099058151245117, |
| "memory(GiB)": 44.29, |
| "step": 4435, |
| "token_acc": 0.5861111111111111, |
| "train_speed(iter/s)": 0.580452 |
| }, |
| { |
| "epoch": 2.6941747572815533, |
| "grad_norm": 10.059782981872559, |
| "learning_rate": 2.542311333496622e-06, |
| "loss": 2.030255126953125, |
| "memory(GiB)": 44.29, |
| "step": 4440, |
| "token_acc": 0.5693215339233039, |
| "train_speed(iter/s)": 0.580427 |
| }, |
| { |
| "epoch": 2.6972087378640777, |
| "grad_norm": 8.237997055053711, |
| "learning_rate": 2.492540143295036e-06, |
| "loss": 1.9501361846923828, |
| "memory(GiB)": 44.29, |
| "step": 4445, |
| "token_acc": 0.5619335347432024, |
| "train_speed(iter/s)": 0.5805 |
| }, |
| { |
| "epoch": 2.700242718446602, |
| "grad_norm": 8.189979553222656, |
| "learning_rate": 2.4432485145544527e-06, |
| "loss": 2.0908411026000975, |
| "memory(GiB)": 44.29, |
| "step": 4450, |
| "token_acc": 0.5563139931740614, |
| "train_speed(iter/s)": 0.580487 |
| }, |
| { |
| "epoch": 2.7032766990291264, |
| "grad_norm": 7.352850437164307, |
| "learning_rate": 2.394436944846523e-06, |
| "loss": 1.8278610229492187, |
| "memory(GiB)": 44.29, |
| "step": 4455, |
| "token_acc": 0.5739130434782609, |
| "train_speed(iter/s)": 0.580446 |
| }, |
| { |
| "epoch": 2.7063106796116507, |
| "grad_norm": 7.38375997543335, |
| "learning_rate": 2.3461059268969744e-06, |
| "loss": 1.9157276153564453, |
| "memory(GiB)": 44.29, |
| "step": 4460, |
| "token_acc": 0.5667655786350149, |
| "train_speed(iter/s)": 0.580538 |
| }, |
| { |
| "epoch": 2.7093446601941746, |
| "grad_norm": 11.641793251037598, |
| "learning_rate": 2.29825594858063e-06, |
| "loss": 1.723676872253418, |
| "memory(GiB)": 44.29, |
| "step": 4465, |
| "token_acc": 0.5962962962962963, |
| "train_speed(iter/s)": 0.580636 |
| }, |
| { |
| "epoch": 2.712378640776699, |
| "grad_norm": 8.030855178833008, |
| "learning_rate": 2.250887492916487e-06, |
| "loss": 1.855816650390625, |
| "memory(GiB)": 44.29, |
| "step": 4470, |
| "token_acc": 0.5791044776119403, |
| "train_speed(iter/s)": 0.580678 |
| }, |
| { |
| "epoch": 2.7154126213592233, |
| "grad_norm": 8.97354793548584, |
| "learning_rate": 2.204001038062836e-06, |
| "loss": 1.9793785095214844, |
| "memory(GiB)": 44.29, |
| "step": 4475, |
| "token_acc": 0.5257731958762887, |
| "train_speed(iter/s)": 0.58069 |
| }, |
| { |
| "epoch": 2.7184466019417477, |
| "grad_norm": 10.212775230407715, |
| "learning_rate": 2.157597057312444e-06, |
| "loss": 2.099479103088379, |
| "memory(GiB)": 44.29, |
| "step": 4480, |
| "token_acc": 0.5294117647058824, |
| "train_speed(iter/s)": 0.580635 |
| }, |
| { |
| "epoch": 2.721480582524272, |
| "grad_norm": 9.049674034118652, |
| "learning_rate": 2.1116760190877437e-06, |
| "loss": 1.7141408920288086, |
| "memory(GiB)": 44.29, |
| "step": 4485, |
| "token_acc": 0.6265822784810127, |
| "train_speed(iter/s)": 0.580726 |
| }, |
| { |
| "epoch": 2.724514563106796, |
| "grad_norm": 10.3820219039917, |
| "learning_rate": 2.0662383869361645e-06, |
| "loss": 1.9986873626708985, |
| "memory(GiB)": 44.29, |
| "step": 4490, |
| "token_acc": 0.5757575757575758, |
| "train_speed(iter/s)": 0.580832 |
| }, |
| { |
| "epoch": 2.7275485436893203, |
| "grad_norm": 7.463447093963623, |
| "learning_rate": 2.0212846195253987e-06, |
| "loss": 2.1121494293212892, |
| "memory(GiB)": 44.29, |
| "step": 4495, |
| "token_acc": 0.5421052631578948, |
| "train_speed(iter/s)": 0.580767 |
| }, |
| { |
| "epoch": 2.7305825242718447, |
| "grad_norm": 8.770597457885742, |
| "learning_rate": 1.976815170638802e-06, |
| "loss": 2.0751237869262695, |
| "memory(GiB)": 44.29, |
| "step": 4500, |
| "token_acc": 0.5382165605095541, |
| "train_speed(iter/s)": 0.580785 |
| }, |
| { |
| "epoch": 2.7305825242718447, |
| "eval_loss": 1.9814581871032715, |
| "eval_runtime": 12.0819, |
| "eval_samples_per_second": 8.277, |
| "eval_steps_per_second": 8.277, |
| "eval_token_acc": 0.5314591700133868, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.733616504854369, |
| "grad_norm": 11.466297149658203, |
| "learning_rate": 1.9328304891708003e-06, |
| "loss": 1.710250473022461, |
| "memory(GiB)": 44.29, |
| "step": 4505, |
| "token_acc": 0.5572666025024061, |
| "train_speed(iter/s)": 0.579903 |
| }, |
| { |
| "epoch": 2.7366504854368934, |
| "grad_norm": 7.724918365478516, |
| "learning_rate": 1.8893310191223535e-06, |
| "loss": 1.6978034973144531, |
| "memory(GiB)": 44.29, |
| "step": 4510, |
| "token_acc": 0.5993377483443708, |
| "train_speed(iter/s)": 0.579938 |
| }, |
| { |
| "epoch": 2.7396844660194173, |
| "grad_norm": 7.455316543579102, |
| "learning_rate": 1.8463171995964978e-06, |
| "loss": 1.7312326431274414, |
| "memory(GiB)": 44.29, |
| "step": 4515, |
| "token_acc": 0.584717607973422, |
| "train_speed(iter/s)": 0.579877 |
| }, |
| { |
| "epoch": 2.7427184466019416, |
| "grad_norm": 9.585491180419922, |
| "learning_rate": 1.8037894647938758e-06, |
| "loss": 1.9202953338623048, |
| "memory(GiB)": 44.29, |
| "step": 4520, |
| "token_acc": 0.5942492012779552, |
| "train_speed(iter/s)": 0.579941 |
| }, |
| { |
| "epoch": 2.745752427184466, |
| "grad_norm": 7.608863830566406, |
| "learning_rate": 1.7617482440083931e-06, |
| "loss": 1.9673721313476562, |
| "memory(GiB)": 44.29, |
| "step": 4525, |
| "token_acc": 0.5555555555555556, |
| "train_speed(iter/s)": 0.579965 |
| }, |
| { |
| "epoch": 2.7487864077669903, |
| "grad_norm": 7.024211883544922, |
| "learning_rate": 1.7201939616228569e-06, |
| "loss": 1.9407548904418945, |
| "memory(GiB)": 44.29, |
| "step": 4530, |
| "token_acc": 0.5653333333333334, |
| "train_speed(iter/s)": 0.579964 |
| }, |
| { |
| "epoch": 2.7518203883495147, |
| "grad_norm": 8.106232643127441, |
| "learning_rate": 1.6791270371046997e-06, |
| "loss": 1.7603139877319336, |
| "memory(GiB)": 44.29, |
| "step": 4535, |
| "token_acc": 0.5956112852664577, |
| "train_speed(iter/s)": 0.579993 |
| }, |
| { |
| "epoch": 2.7548543689320386, |
| "grad_norm": 8.373075485229492, |
| "learning_rate": 1.638547885001762e-06, |
| "loss": 2.115154838562012, |
| "memory(GiB)": 44.29, |
| "step": 4540, |
| "token_acc": 0.5537459283387622, |
| "train_speed(iter/s)": 0.58 |
| }, |
| { |
| "epoch": 2.757888349514563, |
| "grad_norm": 8.133313179016113, |
| "learning_rate": 1.5984569149380678e-06, |
| "loss": 1.959366226196289, |
| "memory(GiB)": 44.29, |
| "step": 4545, |
| "token_acc": 0.592948717948718, |
| "train_speed(iter/s)": 0.580015 |
| }, |
| { |
| "epoch": 2.7609223300970873, |
| "grad_norm": 10.857476234436035, |
| "learning_rate": 1.5588545316097269e-06, |
| "loss": 1.636090087890625, |
| "memory(GiB)": 44.29, |
| "step": 4550, |
| "token_acc": 0.5871212121212122, |
| "train_speed(iter/s)": 0.580078 |
| }, |
| { |
| "epoch": 2.7639563106796117, |
| "grad_norm": 7.435564041137695, |
| "learning_rate": 1.51974113478085e-06, |
| "loss": 1.679054069519043, |
| "memory(GiB)": 44.29, |
| "step": 4555, |
| "token_acc": 0.597864768683274, |
| "train_speed(iter/s)": 0.580029 |
| }, |
| { |
| "epoch": 2.766990291262136, |
| "grad_norm": 8.17315673828125, |
| "learning_rate": 1.4811171192794627e-06, |
| "loss": 2.029444694519043, |
| "memory(GiB)": 44.29, |
| "step": 4560, |
| "token_acc": 0.568561872909699, |
| "train_speed(iter/s)": 0.579988 |
| }, |
| { |
| "epoch": 2.77002427184466, |
| "grad_norm": 10.141511917114258, |
| "learning_rate": 1.4429828749936092e-06, |
| "loss": 1.9936655044555665, |
| "memory(GiB)": 44.29, |
| "step": 4565, |
| "token_acc": 0.5769230769230769, |
| "train_speed(iter/s)": 0.580042 |
| }, |
| { |
| "epoch": 2.7730582524271847, |
| "grad_norm": 8.14281177520752, |
| "learning_rate": 1.4053387868673217e-06, |
| "loss": 1.8854488372802733, |
| "memory(GiB)": 44.29, |
| "step": 4570, |
| "token_acc": 0.582089552238806, |
| "train_speed(iter/s)": 0.580069 |
| }, |
| { |
| "epoch": 2.7760922330097086, |
| "grad_norm": 7.9403910636901855, |
| "learning_rate": 1.368185234896796e-06, |
| "loss": 2.005961608886719, |
| "memory(GiB)": 44.29, |
| "step": 4575, |
| "token_acc": 0.5279503105590062, |
| "train_speed(iter/s)": 0.580129 |
| }, |
| { |
| "epoch": 2.779126213592233, |
| "grad_norm": 8.195262908935547, |
| "learning_rate": 1.3315225941265386e-06, |
| "loss": 1.789654541015625, |
| "memory(GiB)": 44.29, |
| "step": 4580, |
| "token_acc": 0.5811688311688312, |
| "train_speed(iter/s)": 0.580137 |
| }, |
| { |
| "epoch": 2.7821601941747574, |
| "grad_norm": 7.385119438171387, |
| "learning_rate": 1.2953512346455643e-06, |
| "loss": 1.678761100769043, |
| "memory(GiB)": 44.29, |
| "step": 4585, |
| "token_acc": 0.5840978593272171, |
| "train_speed(iter/s)": 0.580145 |
| }, |
| { |
| "epoch": 2.7851941747572817, |
| "grad_norm": 6.9788336753845215, |
| "learning_rate": 1.2596715215836996e-06, |
| "loss": 1.8593015670776367, |
| "memory(GiB)": 44.29, |
| "step": 4590, |
| "token_acc": 0.5896551724137931, |
| "train_speed(iter/s)": 0.580152 |
| }, |
| { |
| "epoch": 2.788228155339806, |
| "grad_norm": 7.658742427825928, |
| "learning_rate": 1.224483815107863e-06, |
| "loss": 1.924429702758789, |
| "memory(GiB)": 44.29, |
| "step": 4595, |
| "token_acc": 0.5796610169491525, |
| "train_speed(iter/s)": 0.580188 |
| }, |
| { |
| "epoch": 2.79126213592233, |
| "grad_norm": 7.426290035247803, |
| "learning_rate": 1.1897884704184236e-06, |
| "loss": 1.8148229598999024, |
| "memory(GiB)": 44.29, |
| "step": 4600, |
| "token_acc": 0.6019108280254777, |
| "train_speed(iter/s)": 0.580172 |
| }, |
| { |
| "epoch": 2.7942961165048543, |
| "grad_norm": 9.051165580749512, |
| "learning_rate": 1.1555858377456596e-06, |
| "loss": 1.9418960571289063, |
| "memory(GiB)": 44.29, |
| "step": 4605, |
| "token_acc": 0.6119402985074627, |
| "train_speed(iter/s)": 0.580258 |
| }, |
| { |
| "epoch": 2.7973300970873787, |
| "grad_norm": 6.436223030090332, |
| "learning_rate": 1.1218762623461666e-06, |
| "loss": 1.7338180541992188, |
| "memory(GiB)": 44.29, |
| "step": 4610, |
| "token_acc": 0.6037735849056604, |
| "train_speed(iter/s)": 0.580253 |
| }, |
| { |
| "epoch": 2.800364077669903, |
| "grad_norm": 9.345931053161621, |
| "learning_rate": 1.0886600844994266e-06, |
| "loss": 2.1333446502685547, |
| "memory(GiB)": 44.29, |
| "step": 4615, |
| "token_acc": 0.5629139072847682, |
| "train_speed(iter/s)": 0.580306 |
| }, |
| { |
| "epoch": 2.8033980582524274, |
| "grad_norm": 9.715279579162598, |
| "learning_rate": 1.0559376395043285e-06, |
| "loss": 1.706222152709961, |
| "memory(GiB)": 44.29, |
| "step": 4620, |
| "token_acc": 0.6322314049586777, |
| "train_speed(iter/s)": 0.580351 |
| }, |
| { |
| "epoch": 2.8064320388349513, |
| "grad_norm": 5.747392654418945, |
| "learning_rate": 1.0237092576758034e-06, |
| "loss": 1.9026046752929688, |
| "memory(GiB)": 44.29, |
| "step": 4625, |
| "token_acc": 0.5642458100558659, |
| "train_speed(iter/s)": 0.580353 |
| }, |
| { |
| "epoch": 2.8094660194174756, |
| "grad_norm": 9.29836654663086, |
| "learning_rate": 9.919752643414992e-07, |
| "loss": 1.9644575119018555, |
| "memory(GiB)": 44.29, |
| "step": 4630, |
| "token_acc": 0.5217391304347826, |
| "train_speed(iter/s)": 0.580395 |
| }, |
| { |
| "epoch": 2.8125, |
| "grad_norm": 6.784262657165527, |
| "learning_rate": 9.607359798384785e-07, |
| "loss": 2.0778518676757813, |
| "memory(GiB)": 44.29, |
| "step": 4635, |
| "token_acc": 0.5848375451263538, |
| "train_speed(iter/s)": 0.58043 |
| }, |
| { |
| "epoch": 2.8155339805825244, |
| "grad_norm": 10.704444885253906, |
| "learning_rate": 9.299917195099927e-07, |
| "loss": 1.6303802490234376, |
| "memory(GiB)": 44.29, |
| "step": 4640, |
| "token_acc": 0.5941176470588235, |
| "train_speed(iter/s)": 0.580467 |
| }, |
| { |
| "epoch": 2.8185679611650487, |
| "grad_norm": 9.466361045837402, |
| "learning_rate": 8.997427937023018e-07, |
| "loss": 2.072785758972168, |
| "memory(GiB)": 44.29, |
| "step": 4645, |
| "token_acc": 0.5944272445820433, |
| "train_speed(iter/s)": 0.580569 |
| }, |
| { |
| "epoch": 2.8216019417475726, |
| "grad_norm": 8.331581115722656, |
| "learning_rate": 8.699895077615316e-07, |
| "loss": 1.9922773361206054, |
| "memory(GiB)": 44.29, |
| "step": 4650, |
| "token_acc": 0.5819935691318328, |
| "train_speed(iter/s)": 0.580614 |
| }, |
| { |
| "epoch": 2.824635922330097, |
| "grad_norm": 7.199705600738525, |
| "learning_rate": 8.407321620306108e-07, |
| "loss": 2.1337678909301756, |
| "memory(GiB)": 44.29, |
| "step": 4655, |
| "token_acc": 0.583011583011583, |
| "train_speed(iter/s)": 0.580633 |
| }, |
| { |
| "epoch": 2.8276699029126213, |
| "grad_norm": 11.327582359313965, |
| "learning_rate": 8.119710518462164e-07, |
| "loss": 1.815553855895996, |
| "memory(GiB)": 44.29, |
| "step": 4660, |
| "token_acc": 0.5860058309037901, |
| "train_speed(iter/s)": 0.58062 |
| }, |
| { |
| "epoch": 2.8307038834951457, |
| "grad_norm": 9.220823287963867, |
| "learning_rate": 7.837064675357997e-07, |
| "loss": 2.0095773696899415, |
| "memory(GiB)": 44.29, |
| "step": 4665, |
| "token_acc": 0.5482866043613707, |
| "train_speed(iter/s)": 0.580668 |
| }, |
| { |
| "epoch": 2.83373786407767, |
| "grad_norm": 8.487168312072754, |
| "learning_rate": 7.559386944146762e-07, |
| "loss": 1.874141311645508, |
| "memory(GiB)": 44.29, |
| "step": 4670, |
| "token_acc": 0.5662337662337662, |
| "train_speed(iter/s)": 0.58063 |
| }, |
| { |
| "epoch": 2.836771844660194, |
| "grad_norm": 10.926680564880371, |
| "learning_rate": 7.28668012783107e-07, |
| "loss": 1.9664880752563476, |
| "memory(GiB)": 44.29, |
| "step": 4675, |
| "token_acc": 0.567398119122257, |
| "train_speed(iter/s)": 0.580617 |
| }, |
| { |
| "epoch": 2.8398058252427183, |
| "grad_norm": 8.504984855651855, |
| "learning_rate": 7.018946979234997e-07, |
| "loss": 2.202426528930664, |
| "memory(GiB)": 44.29, |
| "step": 4680, |
| "token_acc": 0.5260416666666666, |
| "train_speed(iter/s)": 0.580652 |
| }, |
| { |
| "epoch": 2.8428398058252426, |
| "grad_norm": 8.054615020751953, |
| "learning_rate": 6.756190200976287e-07, |
| "loss": 2.008488082885742, |
| "memory(GiB)": 44.29, |
| "step": 4685, |
| "token_acc": 0.5735735735735735, |
| "train_speed(iter/s)": 0.580661 |
| }, |
| { |
| "epoch": 2.845873786407767, |
| "grad_norm": 7.760517597198486, |
| "learning_rate": 6.498412445438751e-07, |
| "loss": 1.9507659912109374, |
| "memory(GiB)": 44.29, |
| "step": 4690, |
| "token_acc": 0.5644699140401146, |
| "train_speed(iter/s)": 0.58068 |
| }, |
| { |
| "epoch": 2.8489077669902914, |
| "grad_norm": 8.335232734680176, |
| "learning_rate": 6.245616314746072e-07, |
| "loss": 2.067840576171875, |
| "memory(GiB)": 44.29, |
| "step": 4695, |
| "token_acc": 0.5325779036827195, |
| "train_speed(iter/s)": 0.580562 |
| }, |
| { |
| "epoch": 2.8519417475728153, |
| "grad_norm": 10.580134391784668, |
| "learning_rate": 5.997804360734827e-07, |
| "loss": 2.042892837524414, |
| "memory(GiB)": 44.29, |
| "step": 4700, |
| "token_acc": 0.5509554140127388, |
| "train_speed(iter/s)": 0.580537 |
| }, |
| { |
| "epoch": 2.85497572815534, |
| "grad_norm": 7.85345983505249, |
| "learning_rate": 5.754979084929335e-07, |
| "loss": 1.6745044708251953, |
| "memory(GiB)": 44.29, |
| "step": 4705, |
| "token_acc": 0.6067796610169491, |
| "train_speed(iter/s)": 0.580553 |
| }, |
| { |
| "epoch": 2.858009708737864, |
| "grad_norm": 6.51752233505249, |
| "learning_rate": 5.517142938516074e-07, |
| "loss": 1.8814077377319336, |
| "memory(GiB)": 44.29, |
| "step": 4710, |
| "token_acc": 0.5815384615384616, |
| "train_speed(iter/s)": 0.58047 |
| }, |
| { |
| "epoch": 2.8610436893203883, |
| "grad_norm": 9.65807819366455, |
| "learning_rate": 5.284298322319026e-07, |
| "loss": 2.0154050827026366, |
| "memory(GiB)": 44.29, |
| "step": 4715, |
| "token_acc": 0.5488958990536278, |
| "train_speed(iter/s)": 0.580497 |
| }, |
| { |
| "epoch": 2.8640776699029127, |
| "grad_norm": 6.690892696380615, |
| "learning_rate": 5.056447586775593e-07, |
| "loss": 1.9270032882690429, |
| "memory(GiB)": 44.29, |
| "step": 4720, |
| "token_acc": 0.589041095890411, |
| "train_speed(iter/s)": 0.580493 |
| }, |
| { |
| "epoch": 2.867111650485437, |
| "grad_norm": 7.775207996368408, |
| "learning_rate": 4.833593031912387e-07, |
| "loss": 1.9307134628295899, |
| "memory(GiB)": 44.29, |
| "step": 4725, |
| "token_acc": 0.584045584045584, |
| "train_speed(iter/s)": 0.580464 |
| }, |
| { |
| "epoch": 2.8701456310679614, |
| "grad_norm": 6.894526481628418, |
| "learning_rate": 4.6157369073226984e-07, |
| "loss": 1.5071632385253906, |
| "memory(GiB)": 44.29, |
| "step": 4730, |
| "token_acc": 0.6421725239616614, |
| "train_speed(iter/s)": 0.58048 |
| }, |
| { |
| "epoch": 2.8731796116504853, |
| "grad_norm": 6.513083457946777, |
| "learning_rate": 4.402881412143234e-07, |
| "loss": 2.146462249755859, |
| "memory(GiB)": 44.29, |
| "step": 4735, |
| "token_acc": 0.5506849315068493, |
| "train_speed(iter/s)": 0.580443 |
| }, |
| { |
| "epoch": 2.8762135922330097, |
| "grad_norm": 7.810274600982666, |
| "learning_rate": 4.1950286950321327e-07, |
| "loss": 1.9746414184570313, |
| "memory(GiB)": 44.29, |
| "step": 4740, |
| "token_acc": 0.5451713395638629, |
| "train_speed(iter/s)": 0.580426 |
| }, |
| { |
| "epoch": 2.879247572815534, |
| "grad_norm": 8.50667667388916, |
| "learning_rate": 3.9921808541474316e-07, |
| "loss": 1.7838300704956054, |
| "memory(GiB)": 44.29, |
| "step": 4745, |
| "token_acc": 0.5792880258899676, |
| "train_speed(iter/s)": 0.580387 |
| }, |
| { |
| "epoch": 2.8822815533980584, |
| "grad_norm": 7.629726886749268, |
| "learning_rate": 3.7943399371254686e-07, |
| "loss": 1.6623340606689454, |
| "memory(GiB)": 44.29, |
| "step": 4750, |
| "token_acc": 0.6351791530944625, |
| "train_speed(iter/s)": 0.580241 |
| }, |
| { |
| "epoch": 2.8853155339805827, |
| "grad_norm": 7.58314323425293, |
| "learning_rate": 3.601507941060622e-07, |
| "loss": 2.0338212966918947, |
| "memory(GiB)": 44.29, |
| "step": 4755, |
| "token_acc": 0.5410764872521246, |
| "train_speed(iter/s)": 0.580202 |
| }, |
| { |
| "epoch": 2.8883495145631066, |
| "grad_norm": 11.662416458129883, |
| "learning_rate": 3.41368681248494e-07, |
| "loss": 1.8530158996582031, |
| "memory(GiB)": 44.29, |
| "step": 4760, |
| "token_acc": 0.5753424657534246, |
| "train_speed(iter/s)": 0.580151 |
| }, |
| { |
| "epoch": 2.891383495145631, |
| "grad_norm": 9.764945030212402, |
| "learning_rate": 3.2308784473485956e-07, |
| "loss": 1.810487937927246, |
| "memory(GiB)": 44.29, |
| "step": 4765, |
| "token_acc": 0.6116071428571429, |
| "train_speed(iter/s)": 0.580171 |
| }, |
| { |
| "epoch": 2.8944174757281553, |
| "grad_norm": 7.281760215759277, |
| "learning_rate": 3.053084691000685e-07, |
| "loss": 1.762740707397461, |
| "memory(GiB)": 44.29, |
| "step": 4770, |
| "token_acc": 0.5967213114754099, |
| "train_speed(iter/s)": 0.580116 |
| }, |
| { |
| "epoch": 2.8974514563106797, |
| "grad_norm": 8.245015144348145, |
| "learning_rate": 2.8803073381704626e-07, |
| "loss": 1.84234561920166, |
| "memory(GiB)": 44.29, |
| "step": 4775, |
| "token_acc": 0.5969230769230769, |
| "train_speed(iter/s)": 0.580086 |
| }, |
| { |
| "epoch": 2.900485436893204, |
| "grad_norm": 6.5408935546875, |
| "learning_rate": 2.712548132949577e-07, |
| "loss": 1.842409896850586, |
| "memory(GiB)": 44.29, |
| "step": 4780, |
| "token_acc": 0.60790273556231, |
| "train_speed(iter/s)": 0.580074 |
| }, |
| { |
| "epoch": 2.903519417475728, |
| "grad_norm": 7.102424144744873, |
| "learning_rate": 2.5498087687741424e-07, |
| "loss": 1.610619354248047, |
| "memory(GiB)": 44.64, |
| "step": 4785, |
| "token_acc": 0.6106870229007634, |
| "train_speed(iter/s)": 0.580062 |
| }, |
| { |
| "epoch": 2.9065533980582523, |
| "grad_norm": 8.561509132385254, |
| "learning_rate": 2.3920908884078053e-07, |
| "loss": 1.9039691925048827, |
| "memory(GiB)": 44.64, |
| "step": 4790, |
| "token_acc": 0.5791245791245792, |
| "train_speed(iter/s)": 0.580072 |
| }, |
| { |
| "epoch": 2.9095873786407767, |
| "grad_norm": 10.727002143859863, |
| "learning_rate": 2.239396083925094e-07, |
| "loss": 1.9637014389038085, |
| "memory(GiB)": 44.64, |
| "step": 4795, |
| "token_acc": 0.5355029585798816, |
| "train_speed(iter/s)": 0.579908 |
| }, |
| { |
| "epoch": 2.912621359223301, |
| "grad_norm": 8.442927360534668, |
| "learning_rate": 2.0917258966953733e-07, |
| "loss": 2.2038265228271485, |
| "memory(GiB)": 44.64, |
| "step": 4800, |
| "token_acc": 0.5454545454545454, |
| "train_speed(iter/s)": 0.579874 |
| }, |
| { |
| "epoch": 2.9156553398058254, |
| "grad_norm": 8.77606201171875, |
| "learning_rate": 1.9490818173672486e-07, |
| "loss": 1.8866867065429687, |
| "memory(GiB)": 44.64, |
| "step": 4805, |
| "token_acc": 0.5782747603833865, |
| "train_speed(iter/s)": 0.579853 |
| }, |
| { |
| "epoch": 2.9186893203883493, |
| "grad_norm": 8.638134956359863, |
| "learning_rate": 1.8114652858536862e-07, |
| "loss": 1.8457630157470704, |
| "memory(GiB)": 44.64, |
| "step": 4810, |
| "token_acc": 0.5689655172413793, |
| "train_speed(iter/s)": 0.579838 |
| }, |
| { |
| "epoch": 2.9217233009708736, |
| "grad_norm": 6.532174587249756, |
| "learning_rate": 1.6788776913171932e-07, |
| "loss": 1.879047966003418, |
| "memory(GiB)": 44.64, |
| "step": 4815, |
| "token_acc": 0.5876923076923077, |
| "train_speed(iter/s)": 0.579795 |
| }, |
| { |
| "epoch": 2.924757281553398, |
| "grad_norm": 10.169187545776367, |
| "learning_rate": 1.5513203721559955e-07, |
| "loss": 2.0470817565917967, |
| "memory(GiB)": 44.64, |
| "step": 4820, |
| "token_acc": 0.5559322033898305, |
| "train_speed(iter/s)": 0.57982 |
| }, |
| { |
| "epoch": 2.9277912621359223, |
| "grad_norm": 7.9186015129089355, |
| "learning_rate": 1.428794615990603e-07, |
| "loss": 1.855224609375, |
| "memory(GiB)": 44.64, |
| "step": 4825, |
| "token_acc": 0.559322033898305, |
| "train_speed(iter/s)": 0.579822 |
| }, |
| { |
| "epoch": 2.9308252427184467, |
| "grad_norm": 8.12701416015625, |
| "learning_rate": 1.3113016596503769e-07, |
| "loss": 1.8893653869628906, |
| "memory(GiB)": 44.64, |
| "step": 4830, |
| "token_acc": 0.551948051948052, |
| "train_speed(iter/s)": 0.579803 |
| }, |
| { |
| "epoch": 2.9338592233009706, |
| "grad_norm": 6.349172592163086, |
| "learning_rate": 1.1988426891617054e-07, |
| "loss": 1.6970531463623046, |
| "memory(GiB)": 44.64, |
| "step": 4835, |
| "token_acc": 0.6104294478527608, |
| "train_speed(iter/s)": 0.579803 |
| }, |
| { |
| "epoch": 2.9368932038834954, |
| "grad_norm": 7.324570655822754, |
| "learning_rate": 1.0914188397355141e-07, |
| "loss": 1.8949806213378906, |
| "memory(GiB)": 44.64, |
| "step": 4840, |
| "token_acc": 0.5326797385620915, |
| "train_speed(iter/s)": 0.579773 |
| }, |
| { |
| "epoch": 2.9399271844660193, |
| "grad_norm": 7.8848114013671875, |
| "learning_rate": 9.890311957559406e-08, |
| "loss": 2.149030303955078, |
| "memory(GiB)": 44.64, |
| "step": 4845, |
| "token_acc": 0.5292207792207793, |
| "train_speed(iter/s)": 0.579762 |
| }, |
| { |
| "epoch": 2.9429611650485437, |
| "grad_norm": 7.4910783767700195, |
| "learning_rate": 8.916807907695113e-08, |
| "loss": 2.0563175201416017, |
| "memory(GiB)": 44.64, |
| "step": 4850, |
| "token_acc": 0.5728476821192053, |
| "train_speed(iter/s)": 0.5797 |
| }, |
| { |
| "epoch": 2.945995145631068, |
| "grad_norm": 10.111432075500488, |
| "learning_rate": 7.993686074744821e-08, |
| "loss": 1.8403484344482421, |
| "memory(GiB)": 44.64, |
| "step": 4855, |
| "token_acc": 0.6107594936708861, |
| "train_speed(iter/s)": 0.579672 |
| }, |
| { |
| "epoch": 2.9490291262135924, |
| "grad_norm": 8.500150680541992, |
| "learning_rate": 7.120955777112914e-08, |
| "loss": 1.9626632690429688, |
| "memory(GiB)": 44.64, |
| "step": 4860, |
| "token_acc": 0.5274390243902439, |
| "train_speed(iter/s)": 0.579711 |
| }, |
| { |
| "epoch": 2.9520631067961167, |
| "grad_norm": 9.124574661254883, |
| "learning_rate": 6.298625824527337e-08, |
| "loss": 2.131892776489258, |
| "memory(GiB)": 44.64, |
| "step": 4865, |
| "token_acc": 0.540625, |
| "train_speed(iter/s)": 0.579721 |
| }, |
| { |
| "epoch": 2.9550970873786406, |
| "grad_norm": 8.05234432220459, |
| "learning_rate": 5.526704517951897e-08, |
| "loss": 1.5919179916381836, |
| "memory(GiB)": 44.64, |
| "step": 4870, |
| "token_acc": 0.6409495548961425, |
| "train_speed(iter/s)": 0.579706 |
| }, |
| { |
| "epoch": 2.958131067961165, |
| "grad_norm": 9.051335334777832, |
| "learning_rate": 4.8051996495052096e-08, |
| "loss": 1.8254867553710938, |
| "memory(GiB)": 44.64, |
| "step": 4875, |
| "token_acc": 0.5714285714285714, |
| "train_speed(iter/s)": 0.579633 |
| }, |
| { |
| "epoch": 2.9611650485436893, |
| "grad_norm": 12.33359432220459, |
| "learning_rate": 4.134118502378548e-08, |
| "loss": 1.7505077362060546, |
| "memory(GiB)": 44.64, |
| "step": 4880, |
| "token_acc": 0.6245954692556634, |
| "train_speed(iter/s)": 0.579568 |
| }, |
| { |
| "epoch": 2.9641990291262137, |
| "grad_norm": 8.701041221618652, |
| "learning_rate": 3.5134678507636745e-08, |
| "loss": 1.7970073699951172, |
| "memory(GiB)": 44.64, |
| "step": 4885, |
| "token_acc": 0.5992366412213741, |
| "train_speed(iter/s)": 0.579584 |
| }, |
| { |
| "epoch": 2.967233009708738, |
| "grad_norm": 10.156620979309082, |
| "learning_rate": 2.9432539597851195e-08, |
| "loss": 2.0175302505493162, |
| "memory(GiB)": 44.64, |
| "step": 4890, |
| "token_acc": 0.5270758122743683, |
| "train_speed(iter/s)": 0.579648 |
| }, |
| { |
| "epoch": 2.970266990291262, |
| "grad_norm": 9.249608039855957, |
| "learning_rate": 2.423482585435788e-08, |
| "loss": 1.8950572967529298, |
| "memory(GiB)": 44.64, |
| "step": 4895, |
| "token_acc": 0.5878594249201278, |
| "train_speed(iter/s)": 0.579643 |
| }, |
| { |
| "epoch": 2.9733009708737863, |
| "grad_norm": 8.014542579650879, |
| "learning_rate": 1.9541589745186717e-08, |
| "loss": 1.8426591873168945, |
| "memory(GiB)": 44.64, |
| "step": 4900, |
| "token_acc": 0.5846153846153846, |
| "train_speed(iter/s)": 0.579651 |
| }, |
| { |
| "epoch": 2.9763349514563107, |
| "grad_norm": 7.611429691314697, |
| "learning_rate": 1.5352878645963352e-08, |
| "loss": 2.1125755310058594, |
| "memory(GiB)": 44.64, |
| "step": 4905, |
| "token_acc": 0.5577557755775577, |
| "train_speed(iter/s)": 0.579586 |
| }, |
| { |
| "epoch": 2.979368932038835, |
| "grad_norm": 12.364704132080078, |
| "learning_rate": 1.1668734839404006e-08, |
| "loss": 1.8508235931396484, |
| "memory(GiB)": 44.64, |
| "step": 4910, |
| "token_acc": 0.6196078431372549, |
| "train_speed(iter/s)": 0.579638 |
| }, |
| { |
| "epoch": 2.9824029126213594, |
| "grad_norm": 7.556114196777344, |
| "learning_rate": 8.489195514888027e-09, |
| "loss": 2.153472137451172, |
| "memory(GiB)": 44.64, |
| "step": 4915, |
| "token_acc": 0.5749235474006116, |
| "train_speed(iter/s)": 0.579639 |
| }, |
| { |
| "epoch": 2.9854368932038833, |
| "grad_norm": 5.7980546951293945, |
| "learning_rate": 5.814292768108187e-09, |
| "loss": 1.972738265991211, |
| "memory(GiB)": 44.64, |
| "step": 4920, |
| "token_acc": 0.5417721518987342, |
| "train_speed(iter/s)": 0.579645 |
| }, |
| { |
| "epoch": 2.9884708737864076, |
| "grad_norm": 8.18667221069336, |
| "learning_rate": 3.644053600726505e-09, |
| "loss": 2.019988441467285, |
| "memory(GiB)": 44.64, |
| "step": 4925, |
| "token_acc": 0.5647058823529412, |
| "train_speed(iter/s)": 0.579688 |
| }, |
| { |
| "epoch": 2.991504854368932, |
| "grad_norm": 7.362902641296387, |
| "learning_rate": 1.978499920096688e-09, |
| "loss": 1.9861087799072266, |
| "memory(GiB)": 44.64, |
| "step": 4930, |
| "token_acc": 0.5605095541401274, |
| "train_speed(iter/s)": 0.579756 |
| }, |
| { |
| "epoch": 2.9945388349514563, |
| "grad_norm": 7.812079906463623, |
| "learning_rate": 8.176485390642974e-10, |
| "loss": 1.789814567565918, |
| "memory(GiB)": 44.64, |
| "step": 4935, |
| "token_acc": 0.5348837209302325, |
| "train_speed(iter/s)": 0.579831 |
| }, |
| { |
| "epoch": 2.9975728155339807, |
| "grad_norm": 9.960822105407715, |
| "learning_rate": 1.6151117577800633e-10, |
| "loss": 2.1190773010253907, |
| "memory(GiB)": 44.64, |
| "step": 4940, |
| "token_acc": 0.5483870967741935, |
| "train_speed(iter/s)": 0.579872 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.9859907627105713, |
| "eval_runtime": 12.2556, |
| "eval_samples_per_second": 8.16, |
| "eval_steps_per_second": 8.16, |
| "eval_token_acc": 0.5401554404145078, |
| "step": 4944 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 4944, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.826945200557008e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|