{ "best_metric": 0.41885045170783997, "best_model_checkpoint": "output/Baichuan-13B-Chat_lora_wqs_nlp/checkpoint-2400", "epoch": 1.9992665933259994, "global_step": 2726, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.999865525734509e-05, "loss": 1.5452, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9994621174046976e-05, "loss": 1.3291, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.9986985231938546e-05, "loss": 1.1048, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.99760306731191e-05, "loss": 0.9155, "step": 40 }, { "epoch": 0.04, "learning_rate": 4.9961758952505326e-05, "loss": 0.8856, "step": 50 }, { "epoch": 0.04, "learning_rate": 4.9944171965578836e-05, "loss": 0.8052, "step": 60 }, { "epoch": 0.05, "learning_rate": 4.992327204813435e-05, "loss": 0.7332, "step": 70 }, { "epoch": 0.06, "learning_rate": 4.989906197596955e-05, "loss": 0.6982, "step": 80 }, { "epoch": 0.07, "learning_rate": 4.987154496451635e-05, "loss": 0.6811, "step": 90 }, { "epoch": 0.07, "learning_rate": 4.984072466841389e-05, "loss": 0.6323, "step": 100 }, { "epoch": 0.08, "learning_rate": 4.981016546765289e-05, "loss": 0.6234, "step": 110 }, { "epoch": 0.09, "learning_rate": 4.977308057009269e-05, "loss": 0.6125, "step": 120 }, { "epoch": 0.1, "learning_rate": 4.97327054653146e-05, "loss": 0.5977, "step": 130 }, { "epoch": 0.1, "learning_rate": 4.968904551569013e-05, "loss": 0.6179, "step": 140 }, { "epoch": 0.11, "learning_rate": 4.9642106519863544e-05, "loss": 0.5879, "step": 150 }, { "epoch": 0.12, "learning_rate": 4.959189471198171e-05, "loss": 0.5472, "step": 160 }, { "epoch": 0.12, "learning_rate": 4.953841676086613e-05, "loss": 0.602, "step": 170 }, { "epoch": 0.13, "learning_rate": 4.9481679769127275e-05, "loss": 0.5773, "step": 180 }, { "epoch": 0.14, "learning_rate": 4.9421691272221167e-05, "loss": 0.5615, "step": 190 }, { "epoch": 0.15, "learning_rate": 4.935845923744865e-05, "loss": 0.5704, "step": 200 }, { "epoch": 0.15, "eval_loss": 0.5564362406730652, "eval_runtime": 72.9469, "eval_samples_per_second": 15.107, "eval_steps_per_second": 1.522, "step": 200 }, { "epoch": 0.15, "learning_rate": 4.9291992062897183e-05, "loss": 0.5803, "step": 210 }, { "epoch": 0.16, "learning_rate": 4.922229857632545e-05, "loss": 0.5655, "step": 220 }, { "epoch": 0.17, "learning_rate": 4.9149388033990966e-05, "loss": 0.5769, "step": 230 }, { "epoch": 0.18, "learning_rate": 4.9073270119420635e-05, "loss": 0.5862, "step": 240 }, { "epoch": 0.18, "learning_rate": 4.899395494212471e-05, "loss": 0.5506, "step": 250 }, { "epoch": 0.19, "learning_rate": 4.891145303625408e-05, "loss": 0.5183, "step": 260 }, { "epoch": 0.2, "learning_rate": 4.882577535920121e-05, "loss": 0.575, "step": 270 }, { "epoch": 0.21, "learning_rate": 4.8736933290144815e-05, "loss": 0.5359, "step": 280 }, { "epoch": 0.21, "learning_rate": 4.8644938628538606e-05, "loss": 0.5302, "step": 290 }, { "epoch": 0.22, "learning_rate": 4.8549803592544076e-05, "loss": 0.5399, "step": 300 }, { "epoch": 0.23, "learning_rate": 4.845154081740783e-05, "loss": 0.5379, "step": 310 }, { "epoch": 0.23, "learning_rate": 4.835016335378343e-05, "loss": 0.5299, "step": 320 }, { "epoch": 0.24, "learning_rate": 4.8245684665998073e-05, "loss": 0.535, "step": 330 }, { "epoch": 0.25, "learning_rate": 4.813811863026436e-05, "loss": 0.5145, "step": 340 }, { "epoch": 0.26, "learning_rate": 4.802747953283732e-05, "loss": 0.5126, "step": 350 }, { "epoch": 0.26, "learning_rate": 4.791378206811704e-05, "loss": 0.5012, "step": 360 }, { "epoch": 0.27, "learning_rate": 4.7797041336696995e-05, "loss": 0.5373, "step": 370 }, { "epoch": 0.28, "learning_rate": 4.767727284335852e-05, "loss": 0.4778, "step": 380 }, { "epoch": 0.29, "learning_rate": 4.755449249501155e-05, "loss": 0.4894, "step": 390 }, { "epoch": 0.29, "learning_rate": 4.7428716598581934e-05, "loss": 0.4986, "step": 400 }, { "epoch": 0.29, "eval_loss": 0.4967592656612396, "eval_runtime": 70.0948, "eval_samples_per_second": 15.722, "eval_steps_per_second": 1.584, "step": 400 }, { "epoch": 0.3, "learning_rate": 4.729996185884571e-05, "loss": 0.4928, "step": 410 }, { "epoch": 0.31, "learning_rate": 4.716824537621042e-05, "loss": 0.4807, "step": 420 }, { "epoch": 0.32, "learning_rate": 4.703358464444397e-05, "loss": 0.4734, "step": 430 }, { "epoch": 0.32, "learning_rate": 4.689599754835122e-05, "loss": 0.4603, "step": 440 }, { "epoch": 0.33, "learning_rate": 4.6755502361398616e-05, "loss": 0.4661, "step": 450 }, { "epoch": 0.34, "learning_rate": 4.6612117743287234e-05, "loss": 0.5072, "step": 460 }, { "epoch": 0.34, "learning_rate": 4.646586273747452e-05, "loss": 0.5056, "step": 470 }, { "epoch": 0.35, "learning_rate": 4.631675676864503e-05, "loss": 0.4391, "step": 480 }, { "epoch": 0.36, "learning_rate": 4.6164819640130595e-05, "loss": 0.483, "step": 490 }, { "epoch": 0.37, "learning_rate": 4.601007153128014e-05, "loss": 0.4774, "step": 500 }, { "epoch": 0.37, "learning_rate": 4.5852532994779606e-05, "loss": 0.4848, "step": 510 }, { "epoch": 0.38, "learning_rate": 4.5692224953922266e-05, "loss": 0.5099, "step": 520 }, { "epoch": 0.39, "learning_rate": 4.5529168699829805e-05, "loss": 0.4851, "step": 530 }, { "epoch": 0.4, "learning_rate": 4.536338588862459e-05, "loss": 0.498, "step": 540 }, { "epoch": 0.4, "learning_rate": 4.519489853855341e-05, "loss": 0.5398, "step": 550 }, { "epoch": 0.41, "learning_rate": 4.50237290270632e-05, "loss": 0.4566, "step": 560 }, { "epoch": 0.42, "learning_rate": 4.484990008782893e-05, "loss": 0.4942, "step": 570 }, { "epoch": 0.43, "learning_rate": 4.467343480773433e-05, "loss": 0.4625, "step": 580 }, { "epoch": 0.43, "learning_rate": 4.44943566238056e-05, "loss": 0.4703, "step": 590 }, { "epoch": 0.44, "learning_rate": 4.431268932009865e-05, "loss": 0.464, "step": 600 }, { "epoch": 0.44, "eval_loss": 0.46821752190589905, "eval_runtime": 69.9505, "eval_samples_per_second": 15.754, "eval_steps_per_second": 1.587, "step": 600 }, { "epoch": 0.45, "learning_rate": 4.412845702454024e-05, "loss": 0.4739, "step": 610 }, { "epoch": 0.45, "learning_rate": 4.394168420572349e-05, "loss": 0.4529, "step": 620 }, { "epoch": 0.46, "learning_rate": 4.3752395669658086e-05, "loss": 0.4433, "step": 630 }, { "epoch": 0.47, "learning_rate": 4.356061655647571e-05, "loss": 0.459, "step": 640 }, { "epoch": 0.48, "learning_rate": 4.336637233709107e-05, "loss": 0.4877, "step": 650 }, { "epoch": 0.48, "learning_rate": 4.316968880981903e-05, "loss": 0.4676, "step": 660 }, { "epoch": 0.49, "learning_rate": 4.2970592096948236e-05, "loss": 0.4782, "step": 670 }, { "epoch": 0.5, "learning_rate": 4.276910864127168e-05, "loss": 0.4778, "step": 680 }, { "epoch": 0.51, "learning_rate": 4.25652652025748e-05, "loss": 0.454, "step": 690 }, { "epoch": 0.51, "learning_rate": 4.235908885408133e-05, "loss": 0.4399, "step": 700 }, { "epoch": 0.52, "learning_rate": 4.215060697885767e-05, "loss": 0.5106, "step": 710 }, { "epoch": 0.53, "learning_rate": 4.1939847266176e-05, "loss": 0.4683, "step": 720 }, { "epoch": 0.54, "learning_rate": 4.172683770783677e-05, "loss": 0.4896, "step": 730 }, { "epoch": 0.54, "learning_rate": 4.1511606594451016e-05, "loss": 0.4633, "step": 740 }, { "epoch": 0.55, "learning_rate": 4.1294182511682946e-05, "loss": 0.4486, "step": 750 }, { "epoch": 0.56, "learning_rate": 4.1074594336453384e-05, "loss": 0.4706, "step": 760 }, { "epoch": 0.56, "learning_rate": 4.085287123310455e-05, "loss": 0.4797, "step": 770 }, { "epoch": 0.57, "learning_rate": 4.062904264952657e-05, "loss": 0.4251, "step": 780 }, { "epoch": 0.58, "learning_rate": 4.0403138313246435e-05, "loss": 0.4687, "step": 790 }, { "epoch": 0.59, "learning_rate": 4.017518822747976e-05, "loss": 0.48, "step": 800 }, { "epoch": 0.59, "eval_loss": 0.4554018974304199, "eval_runtime": 70.2392, "eval_samples_per_second": 15.689, "eval_steps_per_second": 1.58, "step": 800 }, { "epoch": 0.59, "learning_rate": 3.994522266714594e-05, "loss": 0.4662, "step": 810 }, { "epoch": 0.6, "learning_rate": 3.9713272174847246e-05, "loss": 0.4837, "step": 820 }, { "epoch": 0.61, "learning_rate": 3.947936755681229e-05, "loss": 0.5014, "step": 830 }, { "epoch": 0.62, "learning_rate": 3.92435398788046e-05, "loss": 0.4764, "step": 840 }, { "epoch": 0.62, "learning_rate": 3.9005820461996604e-05, "loss": 0.4587, "step": 850 }, { "epoch": 0.63, "learning_rate": 3.876624087880979e-05, "loss": 0.484, "step": 860 }, { "epoch": 0.64, "learning_rate": 3.85248329487214e-05, "loss": 0.4285, "step": 870 }, { "epoch": 0.65, "learning_rate": 3.828162873403843e-05, "loss": 0.4742, "step": 880 }, { "epoch": 0.65, "learning_rate": 3.803666053563926e-05, "loss": 0.4533, "step": 890 }, { "epoch": 0.66, "learning_rate": 3.778996088868365e-05, "loss": 0.4395, "step": 900 }, { "epoch": 0.67, "learning_rate": 3.754156255829167e-05, "loss": 0.4519, "step": 910 }, { "epoch": 0.67, "learning_rate": 3.7291498535191996e-05, "loss": 0.4684, "step": 920 }, { "epoch": 0.68, "learning_rate": 3.703980203134029e-05, "loss": 0.4259, "step": 930 }, { "epoch": 0.69, "learning_rate": 3.678650647550822e-05, "loss": 0.4576, "step": 940 }, { "epoch": 0.7, "learning_rate": 3.6531645508843636e-05, "loss": 0.446, "step": 950 }, { "epoch": 0.7, "learning_rate": 3.6275252980402544e-05, "loss": 0.4439, "step": 960 }, { "epoch": 0.71, "learning_rate": 3.601736294265354e-05, "loss": 0.476, "step": 970 }, { "epoch": 0.72, "learning_rate": 3.5758009646955115e-05, "loss": 0.4153, "step": 980 }, { "epoch": 0.73, "learning_rate": 3.5497227539006614e-05, "loss": 0.4444, "step": 990 }, { "epoch": 0.73, "learning_rate": 3.523505125427341e-05, "loss": 0.4395, "step": 1000 }, { "epoch": 0.73, "eval_loss": 0.44357746839523315, "eval_runtime": 70.1513, "eval_samples_per_second": 15.709, "eval_steps_per_second": 1.582, "step": 1000 }, { "epoch": 0.74, "learning_rate": 3.497151561338678e-05, "loss": 0.4387, "step": 1010 }, { "epoch": 0.75, "learning_rate": 3.470665561751928e-05, "loss": 0.4559, "step": 1020 }, { "epoch": 0.76, "learning_rate": 3.444050644373611e-05, "loss": 0.4349, "step": 1030 }, { "epoch": 0.76, "learning_rate": 3.417310344032309e-05, "loss": 0.4661, "step": 1040 }, { "epoch": 0.77, "learning_rate": 3.390448212209191e-05, "loss": 0.4945, "step": 1050 }, { "epoch": 0.78, "learning_rate": 3.3634678165663325e-05, "loss": 0.4399, "step": 1060 }, { "epoch": 0.78, "learning_rate": 3.336372740472877e-05, "loss": 0.4399, "step": 1070 }, { "epoch": 0.79, "learning_rate": 3.309166582529114e-05, "loss": 0.4251, "step": 1080 }, { "epoch": 0.8, "learning_rate": 3.281852956088537e-05, "loss": 0.4385, "step": 1090 }, { "epoch": 0.81, "learning_rate": 3.254435488777941e-05, "loss": 0.4566, "step": 1100 }, { "epoch": 0.81, "learning_rate": 3.226917822015623e-05, "loss": 0.4352, "step": 1110 }, { "epoch": 0.82, "learning_rate": 3.199303610527749e-05, "loss": 0.4617, "step": 1120 }, { "epoch": 0.83, "learning_rate": 3.1715965218629595e-05, "loss": 0.476, "step": 1130 }, { "epoch": 0.84, "learning_rate": 3.143800235905268e-05, "loss": 0.4322, "step": 1140 }, { "epoch": 0.84, "learning_rate": 3.115918444385315e-05, "loss": 0.4736, "step": 1150 }, { "epoch": 0.85, "learning_rate": 3.0879548503900665e-05, "loss": 0.4322, "step": 1160 }, { "epoch": 0.86, "learning_rate": 3.0599131678709836e-05, "loss": 0.4152, "step": 1170 }, { "epoch": 0.87, "learning_rate": 3.031797121150764e-05, "loss": 0.4187, "step": 1180 }, { "epoch": 0.87, "learning_rate": 3.0036104444286954e-05, "loss": 0.4331, "step": 1190 }, { "epoch": 0.88, "learning_rate": 2.9753568812847065e-05, "loss": 0.45, "step": 1200 }, { "epoch": 0.88, "eval_loss": 0.43782830238342285, "eval_runtime": 70.0939, "eval_samples_per_second": 15.722, "eval_steps_per_second": 1.584, "step": 1200 }, { "epoch": 0.89, "learning_rate": 2.9470401841821686e-05, "loss": 0.4457, "step": 1210 }, { "epoch": 0.89, "learning_rate": 2.9186641139695108e-05, "loss": 0.4667, "step": 1220 }, { "epoch": 0.9, "learning_rate": 2.8902324393807333e-05, "loss": 0.4516, "step": 1230 }, { "epoch": 0.91, "learning_rate": 2.861748936534867e-05, "loss": 0.4695, "step": 1240 }, { "epoch": 0.92, "learning_rate": 2.8332173884344477e-05, "loss": 0.45, "step": 1250 }, { "epoch": 0.92, "learning_rate": 2.8046415844630857e-05, "loss": 0.4246, "step": 1260 }, { "epoch": 0.93, "learning_rate": 2.7760253198821822e-05, "loss": 0.4449, "step": 1270 }, { "epoch": 0.94, "learning_rate": 2.7473723953268687e-05, "loss": 0.4217, "step": 1280 }, { "epoch": 0.95, "learning_rate": 2.7186866163012232e-05, "loss": 0.4495, "step": 1290 }, { "epoch": 0.95, "learning_rate": 2.6899717926728535e-05, "loss": 0.4523, "step": 1300 }, { "epoch": 0.96, "learning_rate": 2.6612317381668915e-05, "loss": 0.4523, "step": 1310 }, { "epoch": 0.97, "learning_rate": 2.632470269859478e-05, "loss": 0.45, "step": 1320 }, { "epoch": 0.98, "learning_rate": 2.603691207670803e-05, "loss": 0.4371, "step": 1330 }, { "epoch": 0.98, "learning_rate": 2.5748983738577653e-05, "loss": 0.4221, "step": 1340 }, { "epoch": 0.99, "learning_rate": 2.5460955925063268e-05, "loss": 0.4565, "step": 1350 }, { "epoch": 1.0, "learning_rate": 2.5172866890236203e-05, "loss": 0.3892, "step": 1360 }, { "epoch": 1.0, "learning_rate": 2.48847548962988e-05, "loss": 0.3973, "step": 1370 }, { "epoch": 1.01, "learning_rate": 2.4596658208502713e-05, "loss": 0.4151, "step": 1380 }, { "epoch": 1.02, "learning_rate": 2.4308615090066735e-05, "loss": 0.3839, "step": 1390 }, { "epoch": 1.03, "learning_rate": 2.4020663797094864e-05, "loss": 0.4062, "step": 1400 }, { "epoch": 1.03, "eval_loss": 0.43284282088279724, "eval_runtime": 70.1658, "eval_samples_per_second": 15.706, "eval_steps_per_second": 1.582, "step": 1400 }, { "epoch": 1.03, "learning_rate": 2.373284257349544e-05, "loss": 0.4003, "step": 1410 }, { "epoch": 1.04, "learning_rate": 2.3445189645901806e-05, "loss": 0.4447, "step": 1420 }, { "epoch": 1.05, "learning_rate": 2.3157743218595247e-05, "loss": 0.4565, "step": 1430 }, { "epoch": 1.06, "learning_rate": 2.287054146843097e-05, "loss": 0.4265, "step": 1440 }, { "epoch": 1.06, "learning_rate": 2.2583622539767668e-05, "loss": 0.4056, "step": 1450 }, { "epoch": 1.07, "learning_rate": 2.2297024539401463e-05, "loss": 0.4074, "step": 1460 }, { "epoch": 1.08, "learning_rate": 2.2010785531504716e-05, "loss": 0.4281, "step": 1470 }, { "epoch": 1.09, "learning_rate": 2.172494353257066e-05, "loss": 0.4663, "step": 1480 }, { "epoch": 1.09, "learning_rate": 2.1439536506364274e-05, "loss": 0.4281, "step": 1490 }, { "epoch": 1.1, "learning_rate": 2.1154602358880122e-05, "loss": 0.4111, "step": 1500 }, { "epoch": 1.11, "learning_rate": 2.0870178933307948e-05, "loss": 0.4187, "step": 1510 }, { "epoch": 1.11, "learning_rate": 2.0586304005006585e-05, "loss": 0.3964, "step": 1520 }, { "epoch": 1.12, "learning_rate": 2.030301527648684e-05, "loss": 0.4331, "step": 1530 }, { "epoch": 1.13, "learning_rate": 2.0020350372404102e-05, "loss": 0.4432, "step": 1540 }, { "epoch": 1.14, "learning_rate": 1.9738346834561254e-05, "loss": 0.4056, "step": 1550 }, { "epoch": 1.14, "learning_rate": 1.945704211692262e-05, "loss": 0.4354, "step": 1560 }, { "epoch": 1.15, "learning_rate": 1.9176473580639538e-05, "loss": 0.4309, "step": 1570 }, { "epoch": 1.16, "learning_rate": 1.8896678489088304e-05, "loss": 0.4058, "step": 1580 }, { "epoch": 1.17, "learning_rate": 1.8617694002921064e-05, "loss": 0.4319, "step": 1590 }, { "epoch": 1.17, "learning_rate": 1.8339557175130383e-05, "loss": 0.4267, "step": 1600 }, { "epoch": 1.17, "eval_loss": 0.42894992232322693, "eval_runtime": 70.251, "eval_samples_per_second": 15.687, "eval_steps_per_second": 1.58, "step": 1600 }, { "epoch": 1.18, "learning_rate": 1.8062304946128073e-05, "loss": 0.3921, "step": 1610 }, { "epoch": 1.19, "learning_rate": 1.7785974138839018e-05, "loss": 0.4206, "step": 1620 }, { "epoch": 1.2, "learning_rate": 1.7510601453810594e-05, "loss": 0.4356, "step": 1630 }, { "epoch": 1.2, "learning_rate": 1.723622346433828e-05, "loss": 0.4167, "step": 1640 }, { "epoch": 1.21, "learning_rate": 1.6962876611608262e-05, "loss": 0.4233, "step": 1650 }, { "epoch": 1.22, "learning_rate": 1.6690597199857523e-05, "loss": 0.4176, "step": 1660 }, { "epoch": 1.22, "learning_rate": 1.6419421391552142e-05, "loss": 0.4241, "step": 1670 }, { "epoch": 1.23, "learning_rate": 1.6149385202584423e-05, "loss": 0.4524, "step": 1680 }, { "epoch": 1.24, "learning_rate": 1.5880524497489474e-05, "loss": 0.4647, "step": 1690 }, { "epoch": 1.25, "learning_rate": 1.5612874984681923e-05, "loss": 0.4036, "step": 1700 }, { "epoch": 1.25, "learning_rate": 1.534647221171334e-05, "loss": 0.4148, "step": 1710 }, { "epoch": 1.26, "learning_rate": 1.5081351560551021e-05, "loss": 0.408, "step": 1720 }, { "epoch": 1.27, "learning_rate": 1.4817548242878759e-05, "loss": 0.457, "step": 1730 }, { "epoch": 1.28, "learning_rate": 1.45550972954203e-05, "loss": 0.4193, "step": 1740 }, { "epoch": 1.28, "learning_rate": 1.4294033575285914e-05, "loss": 0.4149, "step": 1750 }, { "epoch": 1.29, "learning_rate": 1.4034391755342972e-05, "loss": 0.4032, "step": 1760 }, { "epoch": 1.3, "learning_rate": 1.3776206319610823e-05, "loss": 0.4078, "step": 1770 }, { "epoch": 1.31, "learning_rate": 1.3519511558680892e-05, "loss": 0.4123, "step": 1780 }, { "epoch": 1.31, "learning_rate": 1.3264341565162422e-05, "loss": 0.4089, "step": 1790 }, { "epoch": 1.32, "learning_rate": 1.3010730229154445e-05, "loss": 0.383, "step": 1800 }, { "epoch": 1.32, "eval_loss": 0.4250344932079315, "eval_runtime": 70.4814, "eval_samples_per_second": 15.635, "eval_steps_per_second": 1.575, "step": 1800 }, { "epoch": 1.33, "learning_rate": 1.2758711233744783e-05, "loss": 0.3936, "step": 1810 }, { "epoch": 1.33, "learning_rate": 1.2508318050536421e-05, "loss": 0.4127, "step": 1820 }, { "epoch": 1.34, "learning_rate": 1.2259583935202062e-05, "loss": 0.4015, "step": 1830 }, { "epoch": 1.35, "learning_rate": 1.2012541923067244e-05, "loss": 0.4203, "step": 1840 }, { "epoch": 1.36, "learning_rate": 1.176722482472286e-05, "loss": 0.4367, "step": 1850 }, { "epoch": 1.36, "learning_rate": 1.1523665221667398e-05, "loss": 0.4233, "step": 1860 }, { "epoch": 1.37, "learning_rate": 1.1281895461979732e-05, "loss": 0.4405, "step": 1870 }, { "epoch": 1.38, "learning_rate": 1.104194765602281e-05, "loss": 0.4379, "step": 1880 }, { "epoch": 1.39, "learning_rate": 1.0803853672178946e-05, "loss": 0.4146, "step": 1890 }, { "epoch": 1.39, "learning_rate": 1.0567645132617316e-05, "loss": 0.4438, "step": 1900 }, { "epoch": 1.4, "learning_rate": 1.0333353409094015e-05, "loss": 0.3915, "step": 1910 }, { "epoch": 1.41, "learning_rate": 1.0101009618785528e-05, "loss": 0.4063, "step": 1920 }, { "epoch": 1.42, "learning_rate": 9.870644620155877e-06, "loss": 0.3974, "step": 1930 }, { "epoch": 1.42, "learning_rate": 9.642289008858244e-06, "loss": 0.4015, "step": 1940 }, { "epoch": 1.43, "learning_rate": 9.41597311367142e-06, "loss": 0.4264, "step": 1950 }, { "epoch": 1.44, "learning_rate": 9.191726992471725e-06, "loss": 0.4334, "step": 1960 }, { "epoch": 1.44, "learning_rate": 8.969580428240903e-06, "loss": 0.4197, "step": 1970 }, { "epoch": 1.45, "learning_rate": 8.74956292511056e-06, "loss": 0.422, "step": 1980 }, { "epoch": 1.46, "learning_rate": 8.531703704443575e-06, "loss": 0.4077, "step": 1990 }, { "epoch": 1.47, "learning_rate": 8.316031700953086e-06, "loss": 0.4334, "step": 2000 }, { "epoch": 1.47, "eval_loss": 0.4206145107746124, "eval_runtime": 70.0533, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.585, "step": 2000 }, { "epoch": 1.47, "learning_rate": 8.102575558859612e-06, "loss": 0.4171, "step": 2010 }, { "epoch": 1.48, "learning_rate": 7.891363628086671e-06, "loss": 0.3957, "step": 2020 }, { "epoch": 1.49, "learning_rate": 7.682423960495517e-06, "loss": 0.4174, "step": 2030 }, { "epoch": 1.5, "learning_rate": 7.475784306159478e-06, "loss": 0.4187, "step": 2040 }, { "epoch": 1.5, "learning_rate": 7.271472109678379e-06, "loss": 0.3775, "step": 2050 }, { "epoch": 1.51, "learning_rate": 7.0695145065334585e-06, "loss": 0.4019, "step": 2060 }, { "epoch": 1.52, "learning_rate": 6.869938319483471e-06, "loss": 0.4143, "step": 2070 }, { "epoch": 1.53, "learning_rate": 6.67277005500222e-06, "loss": 0.3853, "step": 2080 }, { "epoch": 1.53, "learning_rate": 6.478035899758139e-06, "loss": 0.4403, "step": 2090 }, { "epoch": 1.54, "learning_rate": 6.285761717136335e-06, "loss": 0.3888, "step": 2100 }, { "epoch": 1.55, "learning_rate": 6.095973043803577e-06, "loss": 0.4123, "step": 2110 }, { "epoch": 1.55, "learning_rate": 5.908695086316701e-06, "loss": 0.4035, "step": 2120 }, { "epoch": 1.56, "learning_rate": 5.723952717774763e-06, "loss": 0.4219, "step": 2130 }, { "epoch": 1.57, "learning_rate": 5.54177047451562e-06, "loss": 0.4209, "step": 2140 }, { "epoch": 1.58, "learning_rate": 5.362172552857128e-06, "loss": 0.4286, "step": 2150 }, { "epoch": 1.58, "learning_rate": 5.18518280588354e-06, "loss": 0.4134, "step": 2160 }, { "epoch": 1.59, "learning_rate": 5.010824740277501e-06, "loss": 0.4005, "step": 2170 }, { "epoch": 1.6, "learning_rate": 4.83912151319802e-06, "loss": 0.3973, "step": 2180 }, { "epoch": 1.61, "learning_rate": 4.6700959292048875e-06, "loss": 0.3959, "step": 2190 }, { "epoch": 1.61, "learning_rate": 4.503770437229918e-06, "loss": 0.4017, "step": 2200 }, { "epoch": 1.61, "eval_loss": 0.41970890760421753, "eval_runtime": 69.9019, "eval_samples_per_second": 15.765, "eval_steps_per_second": 1.588, "step": 2200 }, { "epoch": 1.62, "learning_rate": 4.340167127595407e-06, "loss": 0.4013, "step": 2210 }, { "epoch": 1.63, "learning_rate": 4.179307729080256e-06, "loss": 0.4069, "step": 2220 }, { "epoch": 1.64, "learning_rate": 4.021213606034064e-06, "loss": 0.3857, "step": 2230 }, { "epoch": 1.64, "learning_rate": 3.8659057555396645e-06, "loss": 0.4262, "step": 2240 }, { "epoch": 1.65, "learning_rate": 3.713404804624418e-06, "loss": 0.3953, "step": 2250 }, { "epoch": 1.66, "learning_rate": 3.5637310075206544e-06, "loss": 0.4231, "step": 2260 }, { "epoch": 1.66, "learning_rate": 3.416904242975635e-06, "loss": 0.407, "step": 2270 }, { "epoch": 1.67, "learning_rate": 3.2729440116113843e-06, "loss": 0.4254, "step": 2280 }, { "epoch": 1.68, "learning_rate": 3.131869433334725e-06, "loss": 0.4054, "step": 2290 }, { "epoch": 1.69, "learning_rate": 2.9936992447979068e-06, "loss": 0.3801, "step": 2300 }, { "epoch": 1.69, "learning_rate": 2.8584517969101054e-06, "loss": 0.4335, "step": 2310 }, { "epoch": 1.7, "learning_rate": 2.7261450524001807e-06, "loss": 0.412, "step": 2320 }, { "epoch": 1.71, "learning_rate": 2.596796583430969e-06, "loss": 0.4456, "step": 2330 }, { "epoch": 1.72, "learning_rate": 2.470423569265462e-06, "loss": 0.3972, "step": 2340 }, { "epoch": 1.72, "learning_rate": 2.34704279398516e-06, "loss": 0.3935, "step": 2350 }, { "epoch": 1.73, "learning_rate": 2.2266706442609226e-06, "loss": 0.3974, "step": 2360 }, { "epoch": 1.74, "learning_rate": 2.109323107176578e-06, "loss": 0.4157, "step": 2370 }, { "epoch": 1.75, "learning_rate": 1.9950157681056318e-06, "loss": 0.4379, "step": 2380 }, { "epoch": 1.75, "learning_rate": 1.8837638086413063e-06, "loss": 0.4035, "step": 2390 }, { "epoch": 1.76, "learning_rate": 1.7755820045802145e-06, "loss": 0.4455, "step": 2400 }, { "epoch": 1.76, "eval_loss": 0.41885045170783997, "eval_runtime": 70.0957, "eval_samples_per_second": 15.721, "eval_steps_per_second": 1.584, "step": 2400 }, { "epoch": 1.77, "learning_rate": 1.6704847239599364e-06, "loss": 0.4532, "step": 2410 }, { "epoch": 1.77, "learning_rate": 1.5684859251507394e-06, "loss": 0.4213, "step": 2420 }, { "epoch": 1.78, "learning_rate": 1.4695991550017164e-06, "loss": 0.3873, "step": 2430 }, { "epoch": 1.79, "learning_rate": 1.373837547041576e-06, "loss": 0.4163, "step": 2440 }, { "epoch": 1.8, "learning_rate": 1.2812138197343392e-06, "loss": 0.3946, "step": 2450 }, { "epoch": 1.8, "learning_rate": 1.1917402747901152e-06, "loss": 0.41, "step": 2460 }, { "epoch": 1.81, "learning_rate": 1.105428795531327e-06, "loss": 0.4126, "step": 2470 }, { "epoch": 1.82, "learning_rate": 1.0222908453143804e-06, "loss": 0.4505, "step": 2480 }, { "epoch": 1.83, "learning_rate": 9.423374660072065e-07, "loss": 0.4303, "step": 2490 }, { "epoch": 1.83, "learning_rate": 8.655792765227405e-07, "loss": 0.4179, "step": 2500 }, { "epoch": 1.84, "learning_rate": 7.920264714085828e-07, "loss": 0.4376, "step": 2510 }, { "epoch": 1.85, "learning_rate": 7.216888194930272e-07, "loss": 0.4069, "step": 2520 }, { "epoch": 1.86, "learning_rate": 6.545756625876031e-07, "loss": 0.3887, "step": 2530 }, { "epoch": 1.86, "learning_rate": 5.906959142463947e-07, "loss": 0.3972, "step": 2540 }, { "epoch": 1.87, "learning_rate": 5.300580585821696e-07, "loss": 0.4078, "step": 2550 }, { "epoch": 1.88, "learning_rate": 4.7267014913956463e-07, "loss": 0.4467, "step": 2560 }, { "epoch": 1.88, "learning_rate": 4.1853980782549097e-07, "loss": 0.4567, "step": 2570 }, { "epoch": 1.89, "learning_rate": 3.6767422389682173e-07, "loss": 0.4494, "step": 2580 }, { "epoch": 1.9, "learning_rate": 3.2008015300555306e-07, "loss": 0.4344, "step": 2590 }, { "epoch": 1.91, "learning_rate": 2.757639163015774e-07, "loss": 0.4101, "step": 2600 }, { "epoch": 1.91, "eval_loss": 0.41886425018310547, "eval_runtime": 70.2179, "eval_samples_per_second": 15.694, "eval_steps_per_second": 1.581, "step": 2600 }, { "epoch": 1.91, "learning_rate": 2.347313995931466e-07, "loss": 0.4058, "step": 2610 }, { "epoch": 1.92, "learning_rate": 1.9698805256513908e-07, "loss": 0.414, "step": 2620 }, { "epoch": 1.93, "learning_rate": 1.6253888805527474e-07, "loss": 0.4254, "step": 2630 }, { "epoch": 1.94, "learning_rate": 1.3138848138835313e-07, "loss": 0.4017, "step": 2640 }, { "epoch": 1.94, "learning_rate": 1.0354096976856186e-07, "loss": 0.4258, "step": 2650 }, { "epoch": 1.95, "learning_rate": 7.900005173002712e-08, "loss": 0.4098, "step": 2660 }, { "epoch": 1.96, "learning_rate": 5.776898664557051e-08, "loss": 0.42, "step": 2670 }, { "epoch": 1.97, "learning_rate": 3.985059429383875e-08, "loss": 0.3886, "step": 2680 }, { "epoch": 1.97, "learning_rate": 2.5247254484794813e-08, "loss": 0.4276, "step": 2690 }, { "epoch": 1.98, "learning_rate": 1.3960906743634706e-08, "loss": 0.4006, "step": 2700 }, { "epoch": 1.99, "learning_rate": 5.993050053204607e-09, "loss": 0.4243, "step": 2710 }, { "epoch": 1.99, "learning_rate": 1.3447426549129117e-09, "loss": 0.4439, "step": 2720 }, { "epoch": 2.0, "step": 2726, "total_flos": 1.1289170586615415e+19, "train_loss": 0.46561298847898175, "train_runtime": 35477.4141, "train_samples_per_second": 6.149, "train_steps_per_second": 0.077 } ], "max_steps": 2726, "num_train_epochs": 2, "total_flos": 1.1289170586615415e+19, "trial_name": null, "trial_params": null }