{ "best_metric": 0.88762098828324, "best_model_checkpoint": "test-glue/checkpoint-98176", "epoch": 4.0, "global_step": 98176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020371577574967405, "learning_rate": 1.9898142112125166e-05, "loss": 0.659133544921875, "step": 500 }, { "epoch": 0.04074315514993481, "learning_rate": 1.9796284224250328e-05, "loss": 0.5110537109375, "step": 1000 }, { "epoch": 0.06111473272490222, "learning_rate": 1.969442633637549e-05, "loss": 0.485730224609375, "step": 1500 }, { "epoch": 0.08148631029986962, "learning_rate": 1.9592568448500654e-05, "loss": 0.48240771484375, "step": 2000 }, { "epoch": 0.10185788787483703, "learning_rate": 1.9490710560625816e-05, "loss": 0.46434228515625, "step": 2500 }, { "epoch": 0.12222946544980444, "learning_rate": 1.938885267275098e-05, "loss": 0.449291259765625, "step": 3000 }, { "epoch": 0.14260104302477183, "learning_rate": 1.9286994784876142e-05, "loss": 0.4585693359375, "step": 3500 }, { "epoch": 0.16297262059973924, "learning_rate": 1.9185136897001307e-05, "loss": 0.44105908203125, "step": 4000 }, { "epoch": 0.18334419817470665, "learning_rate": 1.9083279009126468e-05, "loss": 0.444558837890625, "step": 4500 }, { "epoch": 0.20371577574967406, "learning_rate": 1.898142112125163e-05, "loss": 0.45100146484375, "step": 5000 }, { "epoch": 0.22408735332464147, "learning_rate": 1.8879563233376795e-05, "loss": 0.448494140625, "step": 5500 }, { "epoch": 0.24445893089960888, "learning_rate": 1.8777705345501956e-05, "loss": 0.4301044921875, "step": 6000 }, { "epoch": 0.2648305084745763, "learning_rate": 1.867584745762712e-05, "loss": 0.42643359375, "step": 6500 }, { "epoch": 0.28520208604954367, "learning_rate": 1.8573989569752282e-05, "loss": 0.41756689453125, "step": 7000 }, { "epoch": 0.3055736636245111, "learning_rate": 1.8472131681877447e-05, "loss": 0.4308828125, "step": 7500 }, { "epoch": 0.3259452411994785, "learning_rate": 1.837027379400261e-05, "loss": 0.41683154296875, "step": 8000 }, { "epoch": 0.34631681877444587, "learning_rate": 1.8268415906127773e-05, "loss": 0.413720703125, "step": 8500 }, { "epoch": 0.3666883963494133, "learning_rate": 1.8166558018252935e-05, "loss": 0.425869140625, "step": 9000 }, { "epoch": 0.3870599739243807, "learning_rate": 1.80647001303781e-05, "loss": 0.42243359375, "step": 9500 }, { "epoch": 0.4074315514993481, "learning_rate": 1.796284224250326e-05, "loss": 0.415326171875, "step": 10000 }, { "epoch": 0.4278031290743155, "learning_rate": 1.7860984354628423e-05, "loss": 0.4072890625, "step": 10500 }, { "epoch": 0.44817470664928294, "learning_rate": 1.7759126466753587e-05, "loss": 0.4062841796875, "step": 11000 }, { "epoch": 0.4685462842242503, "learning_rate": 1.765726857887875e-05, "loss": 0.3890205078125, "step": 11500 }, { "epoch": 0.48891786179921776, "learning_rate": 1.7555410691003914e-05, "loss": 0.3969296875, "step": 12000 }, { "epoch": 0.5092894393741851, "learning_rate": 1.7453552803129075e-05, "loss": 0.3952880859375, "step": 12500 }, { "epoch": 0.5296610169491526, "learning_rate": 1.7351694915254237e-05, "loss": 0.41177734375, "step": 13000 }, { "epoch": 0.5500325945241199, "learning_rate": 1.72498370273794e-05, "loss": 0.3807109375, "step": 13500 }, { "epoch": 0.5704041720990873, "learning_rate": 1.7147979139504566e-05, "loss": 0.403853515625, "step": 14000 }, { "epoch": 0.5907757496740548, "learning_rate": 1.7046121251629728e-05, "loss": 0.411607421875, "step": 14500 }, { "epoch": 0.6111473272490222, "learning_rate": 1.6944263363754893e-05, "loss": 0.3996162109375, "step": 15000 }, { "epoch": 0.6315189048239895, "learning_rate": 1.6842405475880054e-05, "loss": 0.3948984375, "step": 15500 }, { "epoch": 0.651890482398957, "learning_rate": 1.6740547588005215e-05, "loss": 0.389115234375, "step": 16000 }, { "epoch": 0.6722620599739244, "learning_rate": 1.663868970013038e-05, "loss": 0.403466796875, "step": 16500 }, { "epoch": 0.6926336375488917, "learning_rate": 1.6536831812255542e-05, "loss": 0.4059140625, "step": 17000 }, { "epoch": 0.7130052151238592, "learning_rate": 1.6434973924380707e-05, "loss": 0.3873095703125, "step": 17500 }, { "epoch": 0.7333767926988266, "learning_rate": 1.6333116036505868e-05, "loss": 0.3887783203125, "step": 18000 }, { "epoch": 0.753748370273794, "learning_rate": 1.623125814863103e-05, "loss": 0.395041015625, "step": 18500 }, { "epoch": 0.7741199478487614, "learning_rate": 1.6129400260756194e-05, "loss": 0.39855078125, "step": 19000 }, { "epoch": 0.7944915254237288, "learning_rate": 1.6027542372881356e-05, "loss": 0.390134765625, "step": 19500 }, { "epoch": 0.8148631029986962, "learning_rate": 1.592568448500652e-05, "loss": 0.3717578125, "step": 20000 }, { "epoch": 0.8352346805736637, "learning_rate": 1.5823826597131685e-05, "loss": 0.387556640625, "step": 20500 }, { "epoch": 0.855606258148631, "learning_rate": 1.5721968709256844e-05, "loss": 0.389892578125, "step": 21000 }, { "epoch": 0.8759778357235984, "learning_rate": 1.562011082138201e-05, "loss": 0.38671875, "step": 21500 }, { "epoch": 0.8963494132985659, "learning_rate": 1.5518252933507173e-05, "loss": 0.36665234375, "step": 22000 }, { "epoch": 0.9167209908735332, "learning_rate": 1.5416395045632335e-05, "loss": 0.3940859375, "step": 22500 }, { "epoch": 0.9370925684485006, "learning_rate": 1.53145371577575e-05, "loss": 0.382552734375, "step": 23000 }, { "epoch": 0.9574641460234681, "learning_rate": 1.5212679269882663e-05, "loss": 0.38312109375, "step": 23500 }, { "epoch": 0.9778357235984355, "learning_rate": 1.5110821382007822e-05, "loss": 0.3804765625, "step": 24000 }, { "epoch": 0.9982073011734028, "learning_rate": 1.5008963494132987e-05, "loss": 0.383294921875, "step": 24500 }, { "epoch": 1.0, "eval_accuracy": 0.8807947019867549, "eval_loss": 0.3417690396308899, "step": 24544 }, { "epoch": 1.0185788787483703, "learning_rate": 1.490710560625815e-05, "loss": 0.293595703125, "step": 25000 }, { "epoch": 1.0389504563233376, "learning_rate": 1.4805247718383314e-05, "loss": 0.29225, "step": 25500 }, { "epoch": 1.0593220338983051, "learning_rate": 1.4703389830508477e-05, "loss": 0.294130859375, "step": 26000 }, { "epoch": 1.0796936114732725, "learning_rate": 1.4601531942633638e-05, "loss": 0.28494921875, "step": 26500 }, { "epoch": 1.1000651890482398, "learning_rate": 1.4499674054758801e-05, "loss": 0.28199609375, "step": 27000 }, { "epoch": 1.1204367666232073, "learning_rate": 1.4397816166883964e-05, "loss": 0.28830078125, "step": 27500 }, { "epoch": 1.1408083441981747, "learning_rate": 1.4295958279009128e-05, "loss": 0.283265625, "step": 28000 }, { "epoch": 1.161179921773142, "learning_rate": 1.419410039113429e-05, "loss": 0.284984375, "step": 28500 }, { "epoch": 1.1815514993481095, "learning_rate": 1.4092242503259454e-05, "loss": 0.2741875, "step": 29000 }, { "epoch": 1.2019230769230769, "learning_rate": 1.3990384615384615e-05, "loss": 0.293828125, "step": 29500 }, { "epoch": 1.2222946544980444, "learning_rate": 1.3888526727509778e-05, "loss": 0.291970703125, "step": 30000 }, { "epoch": 1.2426662320730117, "learning_rate": 1.3786668839634942e-05, "loss": 0.2823515625, "step": 30500 }, { "epoch": 1.263037809647979, "learning_rate": 1.3684810951760106e-05, "loss": 0.2898671875, "step": 31000 }, { "epoch": 1.2834093872229466, "learning_rate": 1.358295306388527e-05, "loss": 0.291859375, "step": 31500 }, { "epoch": 1.303780964797914, "learning_rate": 1.3481095176010431e-05, "loss": 0.292462890625, "step": 32000 }, { "epoch": 1.3241525423728815, "learning_rate": 1.3379237288135594e-05, "loss": 0.29555859375, "step": 32500 }, { "epoch": 1.3445241199478488, "learning_rate": 1.3277379400260757e-05, "loss": 0.288212890625, "step": 33000 }, { "epoch": 1.3648956975228161, "learning_rate": 1.317552151238592e-05, "loss": 0.294849609375, "step": 33500 }, { "epoch": 1.3852672750977835, "learning_rate": 1.3073663624511084e-05, "loss": 0.289576171875, "step": 34000 }, { "epoch": 1.405638852672751, "learning_rate": 1.2971805736636247e-05, "loss": 0.289185546875, "step": 34500 }, { "epoch": 1.4260104302477183, "learning_rate": 1.2869947848761408e-05, "loss": 0.283068359375, "step": 35000 }, { "epoch": 1.4463820078226859, "learning_rate": 1.2768089960886571e-05, "loss": 0.28625390625, "step": 35500 }, { "epoch": 1.4667535853976532, "learning_rate": 1.2666232073011735e-05, "loss": 0.282048828125, "step": 36000 }, { "epoch": 1.4871251629726205, "learning_rate": 1.2564374185136898e-05, "loss": 0.28896484375, "step": 36500 }, { "epoch": 1.5074967405475879, "learning_rate": 1.2462516297262063e-05, "loss": 0.276443359375, "step": 37000 }, { "epoch": 1.5278683181225554, "learning_rate": 1.2360658409387226e-05, "loss": 0.310044921875, "step": 37500 }, { "epoch": 1.548239895697523, "learning_rate": 1.2258800521512385e-05, "loss": 0.285341796875, "step": 38000 }, { "epoch": 1.5686114732724903, "learning_rate": 1.215694263363755e-05, "loss": 0.282486328125, "step": 38500 }, { "epoch": 1.5889830508474576, "learning_rate": 1.2055084745762713e-05, "loss": 0.295353515625, "step": 39000 }, { "epoch": 1.609354628422425, "learning_rate": 1.1953226857887877e-05, "loss": 0.284724609375, "step": 39500 }, { "epoch": 1.6297262059973925, "learning_rate": 1.185136897001304e-05, "loss": 0.286890625, "step": 40000 }, { "epoch": 1.6500977835723598, "learning_rate": 1.1749511082138201e-05, "loss": 0.278595703125, "step": 40500 }, { "epoch": 1.6704693611473274, "learning_rate": 1.1647653194263364e-05, "loss": 0.30148046875, "step": 41000 }, { "epoch": 1.6908409387222947, "learning_rate": 1.1545795306388527e-05, "loss": 0.274353515625, "step": 41500 }, { "epoch": 1.711212516297262, "learning_rate": 1.144393741851369e-05, "loss": 0.29472265625, "step": 42000 }, { "epoch": 1.7315840938722293, "learning_rate": 1.1342079530638854e-05, "loss": 0.2853359375, "step": 42500 }, { "epoch": 1.7519556714471969, "learning_rate": 1.1240221642764017e-05, "loss": 0.286759765625, "step": 43000 }, { "epoch": 1.7723272490221644, "learning_rate": 1.1138363754889178e-05, "loss": 0.28123046875, "step": 43500 }, { "epoch": 1.7926988265971318, "learning_rate": 1.1036505867014341e-05, "loss": 0.2833515625, "step": 44000 }, { "epoch": 1.813070404172099, "learning_rate": 1.0934647979139506e-05, "loss": 0.281861328125, "step": 44500 }, { "epoch": 1.8334419817470664, "learning_rate": 1.083279009126467e-05, "loss": 0.2866171875, "step": 45000 }, { "epoch": 1.8538135593220337, "learning_rate": 1.0730932203389833e-05, "loss": 0.28151953125, "step": 45500 }, { "epoch": 1.8741851368970013, "learning_rate": 1.0629074315514994e-05, "loss": 0.285787109375, "step": 46000 }, { "epoch": 1.8945567144719688, "learning_rate": 1.0527216427640157e-05, "loss": 0.28494921875, "step": 46500 }, { "epoch": 1.9149282920469362, "learning_rate": 1.042535853976532e-05, "loss": 0.281390625, "step": 47000 }, { "epoch": 1.9352998696219035, "learning_rate": 1.0323500651890483e-05, "loss": 0.2822734375, "step": 47500 }, { "epoch": 1.9556714471968708, "learning_rate": 1.0221642764015647e-05, "loss": 0.2885546875, "step": 48000 }, { "epoch": 1.9760430247718384, "learning_rate": 1.011978487614081e-05, "loss": 0.2759296875, "step": 48500 }, { "epoch": 1.996414602346806, "learning_rate": 1.0017926988265971e-05, "loss": 0.27166796875, "step": 49000 }, { "epoch": 2.0, "eval_accuracy": 0.8865002547121752, "eval_loss": 0.34813082218170166, "step": 49088 }, { "epoch": 2.0167861799217732, "learning_rate": 9.916069100391134e-06, "loss": 0.2067265625, "step": 49500 }, { "epoch": 2.0371577574967406, "learning_rate": 9.814211212516298e-06, "loss": 0.18627734375, "step": 50000 }, { "epoch": 2.057529335071708, "learning_rate": 9.71235332464146e-06, "loss": 0.20591796875, "step": 50500 }, { "epoch": 2.077900912646675, "learning_rate": 9.610495436766624e-06, "loss": 0.207765625, "step": 51000 }, { "epoch": 2.098272490221643, "learning_rate": 9.508637548891787e-06, "loss": 0.200046875, "step": 51500 }, { "epoch": 2.1186440677966103, "learning_rate": 9.40677966101695e-06, "loss": 0.20955078125, "step": 52000 }, { "epoch": 2.1390156453715776, "learning_rate": 9.304921773142113e-06, "loss": 0.20408203125, "step": 52500 }, { "epoch": 2.159387222946545, "learning_rate": 9.203063885267276e-06, "loss": 0.1967578125, "step": 53000 }, { "epoch": 2.1797588005215123, "learning_rate": 9.101205997392438e-06, "loss": 0.2086171875, "step": 53500 }, { "epoch": 2.2001303780964796, "learning_rate": 8.999348109517601e-06, "loss": 0.19387890625, "step": 54000 }, { "epoch": 2.2205019556714474, "learning_rate": 8.897490221642766e-06, "loss": 0.2149140625, "step": 54500 }, { "epoch": 2.2408735332464147, "learning_rate": 8.795632333767927e-06, "loss": 0.20816015625, "step": 55000 }, { "epoch": 2.261245110821382, "learning_rate": 8.69377444589309e-06, "loss": 0.1981796875, "step": 55500 }, { "epoch": 2.2816166883963493, "learning_rate": 8.591916558018254e-06, "loss": 0.20524609375, "step": 56000 }, { "epoch": 2.3019882659713167, "learning_rate": 8.490058670143417e-06, "loss": 0.20031640625, "step": 56500 }, { "epoch": 2.322359843546284, "learning_rate": 8.38820078226858e-06, "loss": 0.2170234375, "step": 57000 }, { "epoch": 2.3427314211212518, "learning_rate": 8.286342894393743e-06, "loss": 0.202625, "step": 57500 }, { "epoch": 2.363102998696219, "learning_rate": 8.184485006518904e-06, "loss": 0.21112890625, "step": 58000 }, { "epoch": 2.3834745762711864, "learning_rate": 8.08262711864407e-06, "loss": 0.19984375, "step": 58500 }, { "epoch": 2.4038461538461537, "learning_rate": 7.980769230769232e-06, "loss": 0.2016015625, "step": 59000 }, { "epoch": 2.424217731421121, "learning_rate": 7.878911342894394e-06, "loss": 0.20296875, "step": 59500 }, { "epoch": 2.444589308996089, "learning_rate": 7.777053455019557e-06, "loss": 0.20494140625, "step": 60000 }, { "epoch": 2.464960886571056, "learning_rate": 7.67519556714472e-06, "loss": 0.19887890625, "step": 60500 }, { "epoch": 2.4853324641460235, "learning_rate": 7.573337679269883e-06, "loss": 0.2058984375, "step": 61000 }, { "epoch": 2.505704041720991, "learning_rate": 7.4714797913950464e-06, "loss": 0.2057265625, "step": 61500 }, { "epoch": 2.526075619295958, "learning_rate": 7.369621903520209e-06, "loss": 0.1991484375, "step": 62000 }, { "epoch": 2.5464471968709255, "learning_rate": 7.267764015645372e-06, "loss": 0.2016328125, "step": 62500 }, { "epoch": 2.5668187744458932, "learning_rate": 7.165906127770536e-06, "loss": 0.2006640625, "step": 63000 }, { "epoch": 2.5871903520208606, "learning_rate": 7.064048239895698e-06, "loss": 0.20136328125, "step": 63500 }, { "epoch": 2.607561929595828, "learning_rate": 6.962190352020861e-06, "loss": 0.210953125, "step": 64000 }, { "epoch": 2.627933507170795, "learning_rate": 6.8603324641460245e-06, "loss": 0.20728515625, "step": 64500 }, { "epoch": 2.648305084745763, "learning_rate": 6.758474576271187e-06, "loss": 0.19809765625, "step": 65000 }, { "epoch": 2.6686766623207303, "learning_rate": 6.65661668839635e-06, "loss": 0.20791796875, "step": 65500 }, { "epoch": 2.6890482398956976, "learning_rate": 6.554758800521513e-06, "loss": 0.1968359375, "step": 66000 }, { "epoch": 2.709419817470665, "learning_rate": 6.452900912646675e-06, "loss": 0.19569140625, "step": 66500 }, { "epoch": 2.7297913950456323, "learning_rate": 6.351043024771839e-06, "loss": 0.21024609375, "step": 67000 }, { "epoch": 2.7501629726205996, "learning_rate": 6.249185136897002e-06, "loss": 0.20680859375, "step": 67500 }, { "epoch": 2.770534550195567, "learning_rate": 6.147327249022165e-06, "loss": 0.206203125, "step": 68000 }, { "epoch": 2.7909061277705347, "learning_rate": 6.045469361147328e-06, "loss": 0.1981015625, "step": 68500 }, { "epoch": 2.811277705345502, "learning_rate": 5.94361147327249e-06, "loss": 0.20408984375, "step": 69000 }, { "epoch": 2.8316492829204694, "learning_rate": 5.841753585397653e-06, "loss": 0.20057421875, "step": 69500 }, { "epoch": 2.8520208604954367, "learning_rate": 5.739895697522817e-06, "loss": 0.19898046875, "step": 70000 }, { "epoch": 2.872392438070404, "learning_rate": 5.63803780964798e-06, "loss": 0.19495703125, "step": 70500 }, { "epoch": 2.8927640156453718, "learning_rate": 5.536179921773143e-06, "loss": 0.1974296875, "step": 71000 }, { "epoch": 2.913135593220339, "learning_rate": 5.434322033898306e-06, "loss": 0.20246484375, "step": 71500 }, { "epoch": 2.9335071707953064, "learning_rate": 5.332464146023468e-06, "loss": 0.19712109375, "step": 72000 }, { "epoch": 2.9538787483702738, "learning_rate": 5.230606258148631e-06, "loss": 0.202109375, "step": 72500 }, { "epoch": 2.974250325945241, "learning_rate": 5.128748370273794e-06, "loss": 0.2054140625, "step": 73000 }, { "epoch": 2.9946219035202084, "learning_rate": 5.026890482398958e-06, "loss": 0.202515625, "step": 73500 }, { "epoch": 3.0, "eval_accuracy": 0.884666327050433, "eval_loss": 0.4447501301765442, "step": 73632 }, { "epoch": 3.014993481095176, "learning_rate": 4.92503259452412e-06, "loss": 0.13929296875, "step": 74000 }, { "epoch": 3.0353650586701435, "learning_rate": 4.823174706649283e-06, "loss": 0.14209765625, "step": 74500 }, { "epoch": 3.055736636245111, "learning_rate": 4.721316818774446e-06, "loss": 0.14396875, "step": 75000 }, { "epoch": 3.076108213820078, "learning_rate": 4.6194589308996094e-06, "loss": 0.13436328125, "step": 75500 }, { "epoch": 3.0964797913950455, "learning_rate": 4.5176010430247726e-06, "loss": 0.13274609375, "step": 76000 }, { "epoch": 3.1168513689700132, "learning_rate": 4.415743155149935e-06, "loss": 0.1425546875, "step": 76500 }, { "epoch": 3.1372229465449806, "learning_rate": 4.313885267275098e-06, "loss": 0.1465, "step": 77000 }, { "epoch": 3.157594524119948, "learning_rate": 4.212027379400261e-06, "loss": 0.13299609375, "step": 77500 }, { "epoch": 3.1779661016949152, "learning_rate": 4.110169491525424e-06, "loss": 0.146578125, "step": 78000 }, { "epoch": 3.1983376792698825, "learning_rate": 4.0083116036505874e-06, "loss": 0.1382421875, "step": 78500 }, { "epoch": 3.21870925684485, "learning_rate": 3.90645371577575e-06, "loss": 0.14358984375, "step": 79000 }, { "epoch": 3.2390808344198176, "learning_rate": 3.8045958279009133e-06, "loss": 0.1374140625, "step": 79500 }, { "epoch": 3.259452411994785, "learning_rate": 3.702737940026076e-06, "loss": 0.14011328125, "step": 80000 }, { "epoch": 3.2798239895697523, "learning_rate": 3.6008800521512388e-06, "loss": 0.14218359375, "step": 80500 }, { "epoch": 3.3001955671447196, "learning_rate": 3.4990221642764015e-06, "loss": 0.14026953125, "step": 81000 }, { "epoch": 3.320567144719687, "learning_rate": 3.397164276401565e-06, "loss": 0.1326171875, "step": 81500 }, { "epoch": 3.3409387222946547, "learning_rate": 3.2953063885267278e-06, "loss": 0.124484375, "step": 82000 }, { "epoch": 3.361310299869622, "learning_rate": 3.1934485006518905e-06, "loss": 0.13862890625, "step": 82500 }, { "epoch": 3.3816818774445894, "learning_rate": 3.091590612777054e-06, "loss": 0.14094140625, "step": 83000 }, { "epoch": 3.4020534550195567, "learning_rate": 2.9897327249022168e-06, "loss": 0.13262890625, "step": 83500 }, { "epoch": 3.422425032594524, "learning_rate": 2.8878748370273795e-06, "loss": 0.14966015625, "step": 84000 }, { "epoch": 3.4427966101694913, "learning_rate": 2.7860169491525422e-06, "loss": 0.136, "step": 84500 }, { "epoch": 3.463168187744459, "learning_rate": 2.684159061277706e-06, "loss": 0.15666796875, "step": 85000 }, { "epoch": 3.4835397653194264, "learning_rate": 2.5823011734028685e-06, "loss": 0.12829296875, "step": 85500 }, { "epoch": 3.5039113428943938, "learning_rate": 2.4804432855280312e-06, "loss": 0.134625, "step": 86000 }, { "epoch": 3.524282920469361, "learning_rate": 2.3785853976531944e-06, "loss": 0.1383125, "step": 86500 }, { "epoch": 3.5446544980443284, "learning_rate": 2.2767275097783575e-06, "loss": 0.145578125, "step": 87000 }, { "epoch": 3.565026075619296, "learning_rate": 2.1748696219035202e-06, "loss": 0.13809765625, "step": 87500 }, { "epoch": 3.5853976531942635, "learning_rate": 2.0730117340286834e-06, "loss": 0.131578125, "step": 88000 }, { "epoch": 3.605769230769231, "learning_rate": 1.971153846153846e-06, "loss": 0.128625, "step": 88500 }, { "epoch": 3.626140808344198, "learning_rate": 1.8692959582790093e-06, "loss": 0.1566328125, "step": 89000 }, { "epoch": 3.6465123859191655, "learning_rate": 1.7674380704041722e-06, "loss": 0.13289453125, "step": 89500 }, { "epoch": 3.666883963494133, "learning_rate": 1.6655801825293353e-06, "loss": 0.13489453125, "step": 90000 }, { "epoch": 3.6872555410691, "learning_rate": 1.563722294654498e-06, "loss": 0.1342890625, "step": 90500 }, { "epoch": 3.707627118644068, "learning_rate": 1.4618644067796612e-06, "loss": 0.13596875, "step": 91000 }, { "epoch": 3.7279986962190352, "learning_rate": 1.3600065189048241e-06, "loss": 0.12205859375, "step": 91500 }, { "epoch": 3.7483702737940026, "learning_rate": 1.258148631029987e-06, "loss": 0.136265625, "step": 92000 }, { "epoch": 3.76874185136897, "learning_rate": 1.15629074315515e-06, "loss": 0.1251875, "step": 92500 }, { "epoch": 3.7891134289439377, "learning_rate": 1.0544328552803131e-06, "loss": 0.1440859375, "step": 93000 }, { "epoch": 3.809485006518905, "learning_rate": 9.52574967405476e-07, "loss": 0.14747265625, "step": 93500 }, { "epoch": 3.8298565840938723, "learning_rate": 8.507170795306389e-07, "loss": 0.12144921875, "step": 94000 }, { "epoch": 3.8502281616688396, "learning_rate": 7.488591916558019e-07, "loss": 0.138546875, "step": 94500 }, { "epoch": 3.870599739243807, "learning_rate": 6.470013037809649e-07, "loss": 0.1290390625, "step": 95000 }, { "epoch": 3.8909713168187743, "learning_rate": 5.451434159061278e-07, "loss": 0.13651953125, "step": 95500 }, { "epoch": 3.9113428943937416, "learning_rate": 4.432855280312908e-07, "loss": 0.14065234375, "step": 96000 }, { "epoch": 3.9317144719687094, "learning_rate": 3.4142764015645373e-07, "loss": 0.129953125, "step": 96500 }, { "epoch": 3.9520860495436767, "learning_rate": 2.395697522816167e-07, "loss": 0.127765625, "step": 97000 }, { "epoch": 3.972457627118644, "learning_rate": 1.3771186440677968e-07, "loss": 0.12146875, "step": 97500 }, { "epoch": 3.9928292046936114, "learning_rate": 3.585397653194264e-08, "loss": 0.132578125, "step": 98000 }, { "epoch": 4.0, "eval_accuracy": 0.88762098828324, "eval_loss": 0.5551679134368896, "step": 98176 } ], "max_steps": 98176, "num_train_epochs": 4, "total_flos": 124136967570323400, "trial_name": null, "trial_params": null }