TinyLlama-1.1B-Chat-rust-cpp-encodings
/
LORAs
/tinyllama-encoder_3e-4
/checkpoint-3710
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 5.0, | |
"eval_steps": 500, | |
"global_step": 3710, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.00029999626535870253, | |
"loss": 1.7113, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002999850616207776, | |
"loss": 1.6682, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.00029996638934411774, | |
"loss": 1.6829, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.00029994024945851293, | |
"loss": 1.4132, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.00029990664326560453, | |
"loss": 1.4066, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.0002998655724388202, | |
"loss": 1.5205, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 0.0002998170390232907, | |
"loss": 1.5003, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 0.000299761045435748, | |
"loss": 1.4407, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 0.0002996975944644049, | |
"loss": 1.4323, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 0.0002996266892688164, | |
"loss": 1.4167, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 0.00029954833337972206, | |
"loss": 1.4195, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.0002994625306988704, | |
"loss": 1.4177, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.0002993692854988246, | |
"loss": 1.4279, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 0.0002992686024227496, | |
"loss": 1.3698, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 0.0002991604864841811, | |
"loss": 1.4032, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 0.00029904494306677576, | |
"loss": 1.2865, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 0.00029892197792404313, | |
"loss": 1.2427, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 0.0002987915971790592, | |
"loss": 1.2646, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 0.00029865380732416153, | |
"loss": 1.2246, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 0.00029850861522062586, | |
"loss": 1.2994, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 0.00029835602809832456, | |
"loss": 1.4174, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.00029819605355536655, | |
"loss": 1.1977, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.31, | |
"learning_rate": 0.0002980286995577189, | |
"loss": 1.2475, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.0002978539744388104, | |
"loss": 1.3046, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.34, | |
"learning_rate": 0.00029767188689911616, | |
"loss": 1.231, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 0.00029748244600572493, | |
"loss": 1.1623, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 0.0002972856611918871, | |
"loss": 1.22, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.38, | |
"learning_rate": 0.00029708154225654526, | |
"loss": 1.3153, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 0.00029687009936384606, | |
"loss": 1.0245, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 0.0002966513430426344, | |
"loss": 1.1617, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 0.0002964252841859287, | |
"loss": 1.3038, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 0.00029619193405037905, | |
"loss": 1.2176, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 0.000295951304255706, | |
"loss": 1.0473, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 0.0002957034067841225, | |
"loss": 1.0024, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.47, | |
"learning_rate": 0.00029544825397973706, | |
"loss": 1.0392, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 0.00029518585854793896, | |
"loss": 1.1253, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 0.00029491623355476574, | |
"loss": 1.2854, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.51, | |
"learning_rate": 0.0002946393924262526, | |
"loss": 1.2807, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 0.0002943553489477636, | |
"loss": 1.1513, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.54, | |
"learning_rate": 0.00029406411726330553, | |
"loss": 0.971, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 0.0002937657118748234, | |
"loss": 0.97, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.57, | |
"learning_rate": 0.00029346014764147836, | |
"loss": 1.0773, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.58, | |
"learning_rate": 0.0002931474397789078, | |
"loss": 1.1714, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 0.0002928276038584677, | |
"loss": 1.1828, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 0.0002925006558064572, | |
"loss": 1.0298, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 0.0002921666119033256, | |
"loss": 1.0366, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.63, | |
"learning_rate": 0.0002918254887828617, | |
"loss": 0.9101, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 0.0002914773034313653, | |
"loss": 0.9801, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.66, | |
"learning_rate": 0.0002911220731868018, | |
"loss": 0.9764, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.00029075981573793827, | |
"loss": 0.8117, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.69, | |
"learning_rate": 0.00029039054912346313, | |
"loss": 0.9689, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.7, | |
"learning_rate": 0.0002900142917310877, | |
"loss": 1.068, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.71, | |
"learning_rate": 0.00028963106229663063, | |
"loss": 0.9002, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 0.0002892408799030848, | |
"loss": 0.7903, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 0.00028884376397966734, | |
"loss": 0.8156, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.75, | |
"learning_rate": 0.00028843973430085204, | |
"loss": 1.0247, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.77, | |
"learning_rate": 0.00028802881098538433, | |
"loss": 0.8413, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.0002876110144952802, | |
"loss": 0.9629, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 0.00028718636563480654, | |
"loss": 0.9488, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.0002867548855494457, | |
"loss": 0.74, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.82, | |
"learning_rate": 0.00028631659572484223, | |
"loss": 0.8782, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.84, | |
"learning_rate": 0.0002858715179857333, | |
"loss": 0.7538, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 0.0002854196744948615, | |
"loss": 1.0124, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.86, | |
"learning_rate": 0.00028496108775187177, | |
"loss": 1.0094, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.88, | |
"learning_rate": 0.00028449578059219045, | |
"loss": 0.9834, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.0002840237761858889, | |
"loss": 0.8183, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.9, | |
"learning_rate": 0.00028354509803652894, | |
"loss": 0.7833, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.92, | |
"learning_rate": 0.00028305976997999307, | |
"loss": 1.0735, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.93, | |
"learning_rate": 0.00028256781618329733, | |
"loss": 0.6456, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 0.0002820692611433879, | |
"loss": 0.8017, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.00028156412968592144, | |
"loss": 0.639, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 0.0002810524469640285, | |
"loss": 0.9926, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.98, | |
"learning_rate": 0.0002805342384570614, | |
"loss": 0.6367, | |
"step": 730 | |
}, | |
{ | |
"epoch": 1.0, | |
"learning_rate": 0.00028000952996932554, | |
"loss": 0.7394, | |
"step": 740 | |
}, | |
{ | |
"epoch": 1.01, | |
"learning_rate": 0.0002794783476287939, | |
"loss": 0.5386, | |
"step": 750 | |
}, | |
{ | |
"epoch": 1.02, | |
"learning_rate": 0.0002789407178858066, | |
"loss": 0.7581, | |
"step": 760 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 0.00027839666751175354, | |
"loss": 0.7513, | |
"step": 770 | |
}, | |
{ | |
"epoch": 1.05, | |
"learning_rate": 0.0002778462235977413, | |
"loss": 0.9431, | |
"step": 780 | |
}, | |
{ | |
"epoch": 1.06, | |
"learning_rate": 0.0002772894135532442, | |
"loss": 0.9494, | |
"step": 790 | |
}, | |
{ | |
"epoch": 1.08, | |
"learning_rate": 0.00027672626510473936, | |
"loss": 0.6138, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.09, | |
"learning_rate": 0.0002761568062943261, | |
"loss": 0.8516, | |
"step": 810 | |
}, | |
{ | |
"epoch": 1.11, | |
"learning_rate": 0.00027558106547832985, | |
"loss": 0.7844, | |
"step": 820 | |
}, | |
{ | |
"epoch": 1.12, | |
"learning_rate": 0.0002749990713258895, | |
"loss": 0.6772, | |
"step": 830 | |
}, | |
{ | |
"epoch": 1.13, | |
"learning_rate": 0.00027441085281753024, | |
"loss": 0.6081, | |
"step": 840 | |
}, | |
{ | |
"epoch": 1.15, | |
"learning_rate": 0.0002738164392437207, | |
"loss": 0.7722, | |
"step": 850 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.0002732158602034138, | |
"loss": 0.6299, | |
"step": 860 | |
}, | |
{ | |
"epoch": 1.17, | |
"learning_rate": 0.00027260914560257345, | |
"loss": 0.6504, | |
"step": 870 | |
}, | |
{ | |
"epoch": 1.19, | |
"learning_rate": 0.00027199632565268504, | |
"loss": 0.637, | |
"step": 880 | |
}, | |
{ | |
"epoch": 1.2, | |
"learning_rate": 0.0002713774308692511, | |
"loss": 0.7156, | |
"step": 890 | |
}, | |
{ | |
"epoch": 1.21, | |
"learning_rate": 0.00027075249207027187, | |
"loss": 0.797, | |
"step": 900 | |
}, | |
{ | |
"epoch": 1.23, | |
"learning_rate": 0.00027012154037471065, | |
"loss": 0.8322, | |
"step": 910 | |
}, | |
{ | |
"epoch": 1.24, | |
"learning_rate": 0.00026948460720094416, | |
"loss": 0.7509, | |
"step": 920 | |
}, | |
{ | |
"epoch": 1.25, | |
"learning_rate": 0.0002688417242651983, | |
"loss": 0.716, | |
"step": 930 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 0.00026819292357996847, | |
"loss": 0.6985, | |
"step": 940 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 0.00026753823745242583, | |
"loss": 0.7565, | |
"step": 950 | |
}, | |
{ | |
"epoch": 1.29, | |
"learning_rate": 0.0002668776984828083, | |
"loss": 0.9529, | |
"step": 960 | |
}, | |
{ | |
"epoch": 1.31, | |
"learning_rate": 0.0002662113395627975, | |
"loss": 0.7075, | |
"step": 970 | |
}, | |
{ | |
"epoch": 1.32, | |
"learning_rate": 0.0002655391938738806, | |
"loss": 0.7943, | |
"step": 980 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 0.00026486129488569824, | |
"loss": 0.8068, | |
"step": 990 | |
}, | |
{ | |
"epoch": 1.35, | |
"learning_rate": 0.0002641776763543778, | |
"loss": 0.7974, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.36, | |
"learning_rate": 0.0002634883723208527, | |
"loss": 0.667, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 1.37, | |
"learning_rate": 0.0002627934171091669, | |
"loss": 0.8704, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 1.39, | |
"learning_rate": 0.00026209284532476636, | |
"loss": 0.5226, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.0002613866918527752, | |
"loss": 0.6797, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 1.42, | |
"learning_rate": 0.0002606749918562591, | |
"loss": 0.6037, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 1.43, | |
"learning_rate": 0.0002599577807744739, | |
"loss": 0.539, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 1.44, | |
"learning_rate": 0.0002592350943211014, | |
"loss": 0.628, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 1.46, | |
"learning_rate": 0.0002585069684824706, | |
"loss": 0.7272, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 1.47, | |
"learning_rate": 0.0002577734395157657, | |
"loss": 0.6978, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 1.48, | |
"learning_rate": 0.00025703454394722115, | |
"loss": 0.5347, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 1.5, | |
"learning_rate": 0.00025629031857030225, | |
"loss": 0.6832, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 1.51, | |
"learning_rate": 0.0002555408004438734, | |
"loss": 0.4517, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 0.00025478602689035253, | |
"loss": 0.4694, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 1.54, | |
"learning_rate": 0.00025402603549385284, | |
"loss": 0.5531, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 1.55, | |
"learning_rate": 0.000253260864098311, | |
"loss": 0.965, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.00025249055080560297, | |
"loss": 0.5112, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 1.58, | |
"learning_rate": 0.0002517151339736464, | |
"loss": 0.6728, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.00025093465221449115, | |
"loss": 0.7728, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 0.0002501491443923959, | |
"loss": 0.5934, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 1.62, | |
"learning_rate": 0.0002493586496218933, | |
"loss": 0.6981, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 1.63, | |
"learning_rate": 0.0002485632072658423, | |
"loss": 0.5475, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 1.64, | |
"learning_rate": 0.0002477628569334679, | |
"loss": 0.691, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 0.00024695763847838866, | |
"loss": 0.7188, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 1.67, | |
"learning_rate": 0.00024614759199663265, | |
"loss": 0.6256, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 1.68, | |
"learning_rate": 0.0002453327578246404, | |
"loss": 0.4491, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.0002445131765372567, | |
"loss": 0.7007, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 1.71, | |
"learning_rate": 0.00024368888894570962, | |
"loss": 0.6256, | |
"step": 1270 | |
}, | |
{ | |
"epoch": 1.73, | |
"learning_rate": 0.000242859936095579, | |
"loss": 0.7769, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 1.74, | |
"learning_rate": 0.00024202635926475223, | |
"loss": 0.5488, | |
"step": 1290 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.00024118819996136865, | |
"loss": 0.4278, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.00024034549992175288, | |
"loss": 0.4966, | |
"step": 1310 | |
}, | |
{ | |
"epoch": 1.78, | |
"learning_rate": 0.0002394983011083366, | |
"loss": 0.4736, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 1.79, | |
"learning_rate": 0.00023864664570756873, | |
"loss": 0.6513, | |
"step": 1330 | |
}, | |
{ | |
"epoch": 1.81, | |
"learning_rate": 0.00023779057612781506, | |
"loss": 0.5389, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 0.00023693013499724632, | |
"loss": 0.3802, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 1.83, | |
"learning_rate": 0.0002360653651617156, | |
"loss": 0.754, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 1.85, | |
"learning_rate": 0.00023519630968262477, | |
"loss": 0.6096, | |
"step": 1370 | |
}, | |
{ | |
"epoch": 1.86, | |
"learning_rate": 0.00023432301183478018, | |
"loss": 0.5114, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 0.00023344551510423808, | |
"loss": 0.6215, | |
"step": 1390 | |
}, | |
{ | |
"epoch": 1.89, | |
"learning_rate": 0.00023256386318613873, | |
"loss": 0.5015, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 1.9, | |
"learning_rate": 0.00023167809998253102, | |
"loss": 0.6841, | |
"step": 1410 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.00023078826960018612, | |
"loss": 0.7431, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 1.93, | |
"learning_rate": 0.00022989441634840128, | |
"loss": 0.6028, | |
"step": 1430 | |
}, | |
{ | |
"epoch": 1.94, | |
"learning_rate": 0.00022899658473679344, | |
"loss": 0.6164, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.00022809481947308276, | |
"loss": 0.7823, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 1.97, | |
"learning_rate": 0.0002271891654608665, | |
"loss": 0.5562, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 1.98, | |
"learning_rate": 0.00022627966779738306, | |
"loss": 0.6174, | |
"step": 1470 | |
}, | |
{ | |
"epoch": 1.99, | |
"learning_rate": 0.00022536637177126615, | |
"loss": 0.671, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 2.01, | |
"learning_rate": 0.00022444932286028987, | |
"loss": 0.6333, | |
"step": 1490 | |
}, | |
{ | |
"epoch": 2.02, | |
"learning_rate": 0.00022352856672910404, | |
"loss": 0.5254, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 2.04, | |
"learning_rate": 0.00022260414922696027, | |
"loss": 0.4731, | |
"step": 1510 | |
}, | |
{ | |
"epoch": 2.05, | |
"learning_rate": 0.00022167611638542896, | |
"loss": 0.7305, | |
"step": 1520 | |
}, | |
{ | |
"epoch": 2.06, | |
"learning_rate": 0.00022074451441610708, | |
"loss": 0.5911, | |
"step": 1530 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 0.00021980938970831717, | |
"loss": 0.4527, | |
"step": 1540 | |
}, | |
{ | |
"epoch": 2.09, | |
"learning_rate": 0.00021887078882679723, | |
"loss": 0.4437, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 2.1, | |
"learning_rate": 0.0002179287585093822, | |
"loss": 0.5298, | |
"step": 1560 | |
}, | |
{ | |
"epoch": 2.12, | |
"learning_rate": 0.00021698334566467626, | |
"loss": 0.7712, | |
"step": 1570 | |
}, | |
{ | |
"epoch": 2.13, | |
"learning_rate": 0.0002160345973697176, | |
"loss": 0.5333, | |
"step": 1580 | |
}, | |
{ | |
"epoch": 2.14, | |
"learning_rate": 0.00021508256086763368, | |
"loss": 0.6186, | |
"step": 1590 | |
}, | |
{ | |
"epoch": 2.16, | |
"learning_rate": 0.00021412728356528905, | |
"loss": 0.5444, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 2.17, | |
"learning_rate": 0.00021316881303092445, | |
"loss": 0.385, | |
"step": 1610 | |
}, | |
{ | |
"epoch": 2.18, | |
"learning_rate": 0.00021220719699178848, | |
"loss": 0.5459, | |
"step": 1620 | |
}, | |
{ | |
"epoch": 2.2, | |
"learning_rate": 0.00021124248333176079, | |
"loss": 0.4447, | |
"step": 1630 | |
}, | |
{ | |
"epoch": 2.21, | |
"learning_rate": 0.0002102747200889677, | |
"loss": 0.5434, | |
"step": 1640 | |
}, | |
{ | |
"epoch": 2.22, | |
"learning_rate": 0.00020930395545339008, | |
"loss": 0.5391, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 2.24, | |
"learning_rate": 0.00020833023776446407, | |
"loss": 0.5926, | |
"step": 1660 | |
}, | |
{ | |
"epoch": 2.25, | |
"learning_rate": 0.00020735361550867345, | |
"loss": 0.6304, | |
"step": 1670 | |
}, | |
{ | |
"epoch": 2.26, | |
"learning_rate": 0.0002063741373171357, | |
"loss": 0.4942, | |
"step": 1680 | |
}, | |
{ | |
"epoch": 2.28, | |
"learning_rate": 0.00020539185196318023, | |
"loss": 0.507, | |
"step": 1690 | |
}, | |
{ | |
"epoch": 2.29, | |
"learning_rate": 0.00020440680835991969, | |
"loss": 0.4658, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 2.3, | |
"learning_rate": 0.00020341905555781433, | |
"loss": 0.4042, | |
"step": 1710 | |
}, | |
{ | |
"epoch": 2.32, | |
"learning_rate": 0.00020242864274222955, | |
"loss": 0.5539, | |
"step": 1720 | |
}, | |
{ | |
"epoch": 2.33, | |
"learning_rate": 0.0002014356192309868, | |
"loss": 0.4031, | |
"step": 1730 | |
}, | |
{ | |
"epoch": 2.35, | |
"learning_rate": 0.00020044003447190756, | |
"loss": 0.4963, | |
"step": 1740 | |
}, | |
{ | |
"epoch": 2.36, | |
"learning_rate": 0.00019944193804035117, | |
"loss": 0.3302, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 0.00019844137963674643, | |
"loss": 0.5527, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 2.39, | |
"learning_rate": 0.0001974384090841164, | |
"loss": 0.485, | |
"step": 1770 | |
}, | |
{ | |
"epoch": 2.4, | |
"learning_rate": 0.00019643307632559776, | |
"loss": 0.6018, | |
"step": 1780 | |
}, | |
{ | |
"epoch": 2.41, | |
"learning_rate": 0.0001954254314219536, | |
"loss": 0.4818, | |
"step": 1790 | |
}, | |
{ | |
"epoch": 2.43, | |
"learning_rate": 0.00019441552454908096, | |
"loss": 0.5312, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 2.44, | |
"learning_rate": 0.00019340340599551193, | |
"loss": 0.5679, | |
"step": 1810 | |
}, | |
{ | |
"epoch": 2.45, | |
"learning_rate": 0.00019238912615990983, | |
"loss": 0.3859, | |
"step": 1820 | |
}, | |
{ | |
"epoch": 2.47, | |
"learning_rate": 0.0001913727355485595, | |
"loss": 0.4745, | |
"step": 1830 | |
}, | |
{ | |
"epoch": 2.48, | |
"learning_rate": 0.0001903542847728523, | |
"loss": 0.5523, | |
"step": 1840 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 0.00018933382454676588, | |
"loss": 0.3833, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 2.51, | |
"learning_rate": 0.00018831140568433897, | |
"loss": 0.4132, | |
"step": 1860 | |
}, | |
{ | |
"epoch": 2.52, | |
"learning_rate": 0.000187287079097141, | |
"loss": 0.3685, | |
"step": 1870 | |
}, | |
{ | |
"epoch": 2.53, | |
"learning_rate": 0.000186260895791737, | |
"loss": 0.6318, | |
"step": 1880 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.00018523290686714756, | |
"loss": 0.4088, | |
"step": 1890 | |
}, | |
{ | |
"epoch": 2.56, | |
"learning_rate": 0.0001842031635123046, | |
"loss": 0.5499, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 2.57, | |
"learning_rate": 0.00018317171700350224, | |
"loss": 0.4856, | |
"step": 1910 | |
}, | |
{ | |
"epoch": 2.59, | |
"learning_rate": 0.0001821386187018435, | |
"loss": 0.6596, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 2.6, | |
"learning_rate": 0.00018110392005068286, | |
"loss": 0.3197, | |
"step": 1930 | |
}, | |
{ | |
"epoch": 2.61, | |
"learning_rate": 0.00018006767257306447, | |
"loss": 0.3975, | |
"step": 1940 | |
}, | |
{ | |
"epoch": 2.63, | |
"learning_rate": 0.00017902992786915663, | |
"loss": 0.3733, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 2.64, | |
"learning_rate": 0.00017799073761368234, | |
"loss": 0.4203, | |
"step": 1960 | |
}, | |
{ | |
"epoch": 2.65, | |
"learning_rate": 0.00017695015355334624, | |
"loss": 0.4533, | |
"step": 1970 | |
}, | |
{ | |
"epoch": 2.67, | |
"learning_rate": 0.00017590822750425774, | |
"loss": 0.3846, | |
"step": 1980 | |
}, | |
{ | |
"epoch": 2.68, | |
"learning_rate": 0.0001748650113493508, | |
"loss": 0.4219, | |
"step": 1990 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 0.0001738205570358006, | |
"loss": 0.2895, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 0.00017277491657243668, | |
"loss": 0.3751, | |
"step": 2010 | |
}, | |
{ | |
"epoch": 2.72, | |
"learning_rate": 0.000171728142027153, | |
"loss": 0.508, | |
"step": 2020 | |
}, | |
{ | |
"epoch": 2.74, | |
"learning_rate": 0.00017068028552431566, | |
"loss": 0.5577, | |
"step": 2030 | |
}, | |
{ | |
"epoch": 2.75, | |
"learning_rate": 0.00016963139924216675, | |
"loss": 0.4342, | |
"step": 2040 | |
}, | |
{ | |
"epoch": 2.76, | |
"learning_rate": 0.00016858153541022676, | |
"loss": 0.3891, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 2.78, | |
"learning_rate": 0.00016753074630669327, | |
"loss": 0.4064, | |
"step": 2060 | |
}, | |
{ | |
"epoch": 2.79, | |
"learning_rate": 0.00016647908425583804, | |
"loss": 0.3571, | |
"step": 2070 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 0.00016542660162540136, | |
"loss": 0.53, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 2.82, | |
"learning_rate": 0.00016437335082398455, | |
"loss": 0.4457, | |
"step": 2090 | |
}, | |
{ | |
"epoch": 2.83, | |
"learning_rate": 0.00016331938429844022, | |
"loss": 0.4608, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 2.84, | |
"learning_rate": 0.0001622647545312604, | |
"loss": 0.4084, | |
"step": 2110 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 0.00016120951403796364, | |
"loss": 0.6194, | |
"step": 2120 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 0.0001601537153644795, | |
"loss": 0.5458, | |
"step": 2130 | |
}, | |
{ | |
"epoch": 2.88, | |
"learning_rate": 0.00015909741108453243, | |
"loss": 0.3484, | |
"step": 2140 | |
}, | |
{ | |
"epoch": 2.9, | |
"learning_rate": 0.00015804065379702352, | |
"loss": 0.2758, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 2.91, | |
"learning_rate": 0.00015698349612341156, | |
"loss": 0.2401, | |
"step": 2160 | |
}, | |
{ | |
"epoch": 2.92, | |
"learning_rate": 0.00015592599070509265, | |
"loss": 0.4804, | |
"step": 2170 | |
}, | |
{ | |
"epoch": 2.94, | |
"learning_rate": 0.00015486819020077886, | |
"loss": 0.4722, | |
"step": 2180 | |
}, | |
{ | |
"epoch": 2.95, | |
"learning_rate": 0.0001538101472838762, | |
"loss": 0.4195, | |
"step": 2190 | |
}, | |
{ | |
"epoch": 2.96, | |
"learning_rate": 0.00015275191463986159, | |
"loss": 0.4658, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 0.00015169354496365948, | |
"loss": 0.5397, | |
"step": 2210 | |
}, | |
{ | |
"epoch": 2.99, | |
"learning_rate": 0.0001506350909570179, | |
"loss": 0.4333, | |
"step": 2220 | |
}, | |
{ | |
"epoch": 3.01, | |
"learning_rate": 0.0001495766053258841, | |
"loss": 0.4128, | |
"step": 2230 | |
}, | |
{ | |
"epoch": 3.02, | |
"learning_rate": 0.00014851814077778016, | |
"loss": 0.4373, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 3.03, | |
"learning_rate": 0.00014745975001917812, | |
"loss": 0.3217, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 3.05, | |
"learning_rate": 0.00014640148575287593, | |
"loss": 0.3968, | |
"step": 2260 | |
}, | |
{ | |
"epoch": 3.06, | |
"learning_rate": 0.0001453434006753726, | |
"loss": 0.398, | |
"step": 2270 | |
}, | |
{ | |
"epoch": 3.07, | |
"learning_rate": 0.00014428554747424448, | |
"loss": 0.3392, | |
"step": 2280 | |
}, | |
{ | |
"epoch": 3.09, | |
"learning_rate": 0.0001432279788255217, | |
"loss": 0.3295, | |
"step": 2290 | |
}, | |
{ | |
"epoch": 3.1, | |
"learning_rate": 0.00014217074739106478, | |
"loss": 0.3666, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 3.11, | |
"learning_rate": 0.00014111390581594284, | |
"loss": 0.4743, | |
"step": 2310 | |
}, | |
{ | |
"epoch": 3.13, | |
"learning_rate": 0.00014005750672581177, | |
"loss": 0.3309, | |
"step": 2320 | |
}, | |
{ | |
"epoch": 3.14, | |
"learning_rate": 0.00013900160272429374, | |
"loss": 0.514, | |
"step": 2330 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 0.000137946246390358, | |
"loss": 0.3525, | |
"step": 2340 | |
}, | |
{ | |
"epoch": 3.17, | |
"learning_rate": 0.00013689149027570246, | |
"loss": 0.3963, | |
"step": 2350 | |
}, | |
{ | |
"epoch": 3.18, | |
"learning_rate": 0.00013583738690213718, | |
"loss": 0.268, | |
"step": 2360 | |
}, | |
{ | |
"epoch": 3.19, | |
"learning_rate": 0.00013478398875896858, | |
"loss": 0.5475, | |
"step": 2370 | |
}, | |
{ | |
"epoch": 3.21, | |
"learning_rate": 0.0001337313483003862, | |
"loss": 0.4491, | |
"step": 2380 | |
}, | |
{ | |
"epoch": 3.22, | |
"learning_rate": 0.0001326795179428503, | |
"loss": 0.3173, | |
"step": 2390 | |
}, | |
{ | |
"epoch": 3.23, | |
"learning_rate": 0.00013162855006248217, | |
"loss": 0.4052, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 3.25, | |
"learning_rate": 0.00013057849699245574, | |
"loss": 0.4724, | |
"step": 2410 | |
}, | |
{ | |
"epoch": 3.26, | |
"learning_rate": 0.0001295294110203919, | |
"loss": 0.345, | |
"step": 2420 | |
}, | |
{ | |
"epoch": 3.27, | |
"learning_rate": 0.00012848134438575454, | |
"loss": 0.2382, | |
"step": 2430 | |
}, | |
{ | |
"epoch": 3.29, | |
"learning_rate": 0.0001274343492772494, | |
"loss": 0.3374, | |
"step": 2440 | |
}, | |
{ | |
"epoch": 3.3, | |
"learning_rate": 0.00012638847783022554, | |
"loss": 0.3927, | |
"step": 2450 | |
}, | |
{ | |
"epoch": 3.32, | |
"learning_rate": 0.0001253437821240789, | |
"loss": 0.5055, | |
"step": 2460 | |
}, | |
{ | |
"epoch": 3.33, | |
"learning_rate": 0.00012430031417965908, | |
"loss": 0.3408, | |
"step": 2470 | |
}, | |
{ | |
"epoch": 3.34, | |
"learning_rate": 0.0001232581259566792, | |
"loss": 0.2692, | |
"step": 2480 | |
}, | |
{ | |
"epoch": 3.36, | |
"learning_rate": 0.00012221726935112833, | |
"loss": 0.4541, | |
"step": 2490 | |
}, | |
{ | |
"epoch": 3.37, | |
"learning_rate": 0.00012117779619268726, | |
"loss": 0.3564, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 3.38, | |
"learning_rate": 0.00012013975824214778, | |
"loss": 0.307, | |
"step": 2510 | |
}, | |
{ | |
"epoch": 3.4, | |
"learning_rate": 0.00011910320718883525, | |
"loss": 0.3651, | |
"step": 2520 | |
}, | |
{ | |
"epoch": 3.41, | |
"learning_rate": 0.00011806819464803458, | |
"loss": 0.4532, | |
"step": 2530 | |
}, | |
{ | |
"epoch": 3.42, | |
"learning_rate": 0.00011703477215842013, | |
"loss": 0.324, | |
"step": 2540 | |
}, | |
{ | |
"epoch": 3.44, | |
"learning_rate": 0.00011600299117948933, | |
"loss": 0.4484, | |
"step": 2550 | |
}, | |
{ | |
"epoch": 3.45, | |
"learning_rate": 0.0001149729030890003, | |
"loss": 0.3729, | |
"step": 2560 | |
}, | |
{ | |
"epoch": 3.46, | |
"learning_rate": 0.0001139445591804133, | |
"loss": 0.2915, | |
"step": 2570 | |
}, | |
{ | |
"epoch": 3.48, | |
"learning_rate": 0.00011291801066033667, | |
"loss": 0.4071, | |
"step": 2580 | |
}, | |
{ | |
"epoch": 3.49, | |
"learning_rate": 0.00011189330864597714, | |
"loss": 0.3065, | |
"step": 2590 | |
}, | |
{ | |
"epoch": 3.5, | |
"learning_rate": 0.00011087050416259409, | |
"loss": 0.34, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 3.52, | |
"learning_rate": 0.00010984964814095903, | |
"loss": 0.3662, | |
"step": 2610 | |
}, | |
{ | |
"epoch": 3.53, | |
"learning_rate": 0.00010883079141481938, | |
"loss": 0.2805, | |
"step": 2620 | |
}, | |
{ | |
"epoch": 3.54, | |
"learning_rate": 0.0001078139847183673, | |
"loss": 0.3726, | |
"step": 2630 | |
}, | |
{ | |
"epoch": 3.56, | |
"learning_rate": 0.00010679927868371316, | |
"loss": 0.3502, | |
"step": 2640 | |
}, | |
{ | |
"epoch": 3.57, | |
"learning_rate": 0.00010578672383836435, | |
"loss": 0.3235, | |
"step": 2650 | |
}, | |
{ | |
"epoch": 3.58, | |
"learning_rate": 0.00010477637060270957, | |
"loss": 0.3663, | |
"step": 2660 | |
}, | |
{ | |
"epoch": 3.6, | |
"learning_rate": 0.00010376826928750763, | |
"loss": 0.3284, | |
"step": 2670 | |
}, | |
{ | |
"epoch": 3.61, | |
"learning_rate": 0.0001027624700913826, | |
"loss": 0.322, | |
"step": 2680 | |
}, | |
{ | |
"epoch": 3.63, | |
"learning_rate": 0.000101759023098324, | |
"loss": 0.236, | |
"step": 2690 | |
}, | |
{ | |
"epoch": 3.64, | |
"learning_rate": 0.00010075797827519295, | |
"loss": 0.2304, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 3.65, | |
"learning_rate": 9.975938546923396e-05, | |
"loss": 0.4279, | |
"step": 2710 | |
}, | |
{ | |
"epoch": 3.67, | |
"learning_rate": 9.876329440559268e-05, | |
"loss": 0.3311, | |
"step": 2720 | |
}, | |
{ | |
"epoch": 3.68, | |
"learning_rate": 9.776975468484019e-05, | |
"loss": 0.3676, | |
"step": 2730 | |
}, | |
{ | |
"epoch": 3.69, | |
"learning_rate": 9.677881578050272e-05, | |
"loss": 0.2316, | |
"step": 2740 | |
}, | |
{ | |
"epoch": 3.71, | |
"learning_rate": 9.579052703659831e-05, | |
"loss": 0.345, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 3.72, | |
"learning_rate": 9.480493766517982e-05, | |
"loss": 0.4248, | |
"step": 2760 | |
}, | |
{ | |
"epoch": 3.73, | |
"learning_rate": 9.382209674388407e-05, | |
"loss": 0.3895, | |
"step": 2770 | |
}, | |
{ | |
"epoch": 3.75, | |
"learning_rate": 9.284205321348839e-05, | |
"loss": 0.2675, | |
"step": 2780 | |
}, | |
{ | |
"epoch": 3.76, | |
"learning_rate": 9.186485587547324e-05, | |
"loss": 0.4064, | |
"step": 2790 | |
}, | |
{ | |
"epoch": 3.77, | |
"learning_rate": 9.08905533895925e-05, | |
"loss": 0.3509, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 3.79, | |
"learning_rate": 8.991919427145014e-05, | |
"loss": 0.3982, | |
"step": 2810 | |
}, | |
{ | |
"epoch": 3.8, | |
"learning_rate": 8.895082689008442e-05, | |
"loss": 0.3352, | |
"step": 2820 | |
}, | |
{ | |
"epoch": 3.81, | |
"learning_rate": 8.798549946555971e-05, | |
"loss": 0.3933, | |
"step": 2830 | |
}, | |
{ | |
"epoch": 3.83, | |
"learning_rate": 8.702326006656477e-05, | |
"loss": 0.4379, | |
"step": 2840 | |
}, | |
{ | |
"epoch": 3.84, | |
"learning_rate": 8.606415660801956e-05, | |
"loss": 0.2152, | |
"step": 2850 | |
}, | |
{ | |
"epoch": 3.85, | |
"learning_rate": 8.510823684868922e-05, | |
"loss": 0.2329, | |
"step": 2860 | |
}, | |
{ | |
"epoch": 3.87, | |
"learning_rate": 8.415554838880595e-05, | |
"loss": 0.3286, | |
"step": 2870 | |
}, | |
{ | |
"epoch": 3.88, | |
"learning_rate": 8.320613866769852e-05, | |
"loss": 0.3072, | |
"step": 2880 | |
}, | |
{ | |
"epoch": 3.89, | |
"learning_rate": 8.22600549614303e-05, | |
"loss": 0.3184, | |
"step": 2890 | |
}, | |
{ | |
"epoch": 3.91, | |
"learning_rate": 8.131734438044519e-05, | |
"loss": 0.4206, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 3.92, | |
"learning_rate": 8.037805386722124e-05, | |
"loss": 0.4384, | |
"step": 2910 | |
}, | |
{ | |
"epoch": 3.94, | |
"learning_rate": 7.944223019393373e-05, | |
"loss": 0.2849, | |
"step": 2920 | |
}, | |
{ | |
"epoch": 3.95, | |
"learning_rate": 7.850991996012589e-05, | |
"loss": 0.1819, | |
"step": 2930 | |
}, | |
{ | |
"epoch": 3.96, | |
"learning_rate": 7.758116959038828e-05, | |
"loss": 0.3399, | |
"step": 2940 | |
}, | |
{ | |
"epoch": 3.98, | |
"learning_rate": 7.665602533204745e-05, | |
"loss": 0.3788, | |
"step": 2950 | |
}, | |
{ | |
"epoch": 3.99, | |
"learning_rate": 7.573453325286273e-05, | |
"loss": 0.442, | |
"step": 2960 | |
}, | |
{ | |
"epoch": 4.0, | |
"learning_rate": 7.481673923873248e-05, | |
"loss": 0.37, | |
"step": 2970 | |
}, | |
{ | |
"epoch": 4.02, | |
"learning_rate": 7.390268899140912e-05, | |
"loss": 0.3434, | |
"step": 2980 | |
}, | |
{ | |
"epoch": 4.03, | |
"learning_rate": 7.299242802622322e-05, | |
"loss": 0.26, | |
"step": 2990 | |
}, | |
{ | |
"epoch": 4.04, | |
"learning_rate": 7.208600166981743e-05, | |
"loss": 0.3701, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 4.06, | |
"learning_rate": 7.118345505788912e-05, | |
"loss": 0.3749, | |
"step": 3010 | |
}, | |
{ | |
"epoch": 4.07, | |
"learning_rate": 7.028483313294289e-05, | |
"loss": 0.2764, | |
"step": 3020 | |
}, | |
{ | |
"epoch": 4.08, | |
"learning_rate": 6.939018064205281e-05, | |
"loss": 0.3949, | |
"step": 3030 | |
}, | |
{ | |
"epoch": 4.1, | |
"learning_rate": 6.849954213463407e-05, | |
"loss": 0.3791, | |
"step": 3040 | |
}, | |
{ | |
"epoch": 4.11, | |
"learning_rate": 6.761296196022468e-05, | |
"loss": 0.3701, | |
"step": 3050 | |
}, | |
{ | |
"epoch": 4.12, | |
"learning_rate": 6.673048426627714e-05, | |
"loss": 0.3115, | |
"step": 3060 | |
}, | |
{ | |
"epoch": 4.14, | |
"learning_rate": 6.585215299595985e-05, | |
"loss": 0.4056, | |
"step": 3070 | |
}, | |
{ | |
"epoch": 4.15, | |
"learning_rate": 6.497801188596934e-05, | |
"loss": 0.2566, | |
"step": 3080 | |
}, | |
{ | |
"epoch": 4.16, | |
"learning_rate": 6.410810446435216e-05, | |
"loss": 0.3565, | |
"step": 3090 | |
}, | |
{ | |
"epoch": 4.18, | |
"learning_rate": 6.324247404833736e-05, | |
"loss": 0.222, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 4.19, | |
"learning_rate": 6.23811637421796e-05, | |
"loss": 0.1795, | |
"step": 3110 | |
}, | |
{ | |
"epoch": 4.2, | |
"learning_rate": 6.152421643501283e-05, | |
"loss": 0.316, | |
"step": 3120 | |
}, | |
{ | |
"epoch": 4.22, | |
"learning_rate": 6.0671674798714305e-05, | |
"loss": 0.3918, | |
"step": 3130 | |
}, | |
{ | |
"epoch": 4.23, | |
"learning_rate": 5.9823581285780096e-05, | |
"loss": 0.2051, | |
"step": 3140 | |
}, | |
{ | |
"epoch": 4.25, | |
"learning_rate": 5.897997812721103e-05, | |
"loss": 0.2124, | |
"step": 3150 | |
}, | |
{ | |
"epoch": 4.26, | |
"learning_rate": 5.814090733040956e-05, | |
"loss": 0.2791, | |
"step": 3160 | |
}, | |
{ | |
"epoch": 4.27, | |
"learning_rate": 5.7306410677088524e-05, | |
"loss": 0.2585, | |
"step": 3170 | |
}, | |
{ | |
"epoch": 4.29, | |
"learning_rate": 5.6476529721189974e-05, | |
"loss": 0.3343, | |
"step": 3180 | |
}, | |
{ | |
"epoch": 4.3, | |
"learning_rate": 5.565130578681649e-05, | |
"loss": 0.3006, | |
"step": 3190 | |
}, | |
{ | |
"epoch": 4.31, | |
"learning_rate": 5.483077996617325e-05, | |
"loss": 0.309, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 4.33, | |
"learning_rate": 5.4014993117521686e-05, | |
"loss": 0.2552, | |
"step": 3210 | |
}, | |
{ | |
"epoch": 4.34, | |
"learning_rate": 5.3203985863145255e-05, | |
"loss": 0.2918, | |
"step": 3220 | |
}, | |
{ | |
"epoch": 4.35, | |
"learning_rate": 5.23977985873264e-05, | |
"loss": 0.2198, | |
"step": 3230 | |
}, | |
{ | |
"epoch": 4.37, | |
"learning_rate": 5.159647143433575e-05, | |
"loss": 0.2432, | |
"step": 3240 | |
}, | |
{ | |
"epoch": 4.38, | |
"learning_rate": 5.080004430643297e-05, | |
"loss": 0.2466, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 4.39, | |
"learning_rate": 5.000855686188001e-05, | |
"loss": 0.2845, | |
"step": 3260 | |
}, | |
{ | |
"epoch": 4.41, | |
"learning_rate": 4.9222048512966096e-05, | |
"loss": 0.2725, | |
"step": 3270 | |
}, | |
{ | |
"epoch": 4.42, | |
"learning_rate": 4.844055842404539e-05, | |
"loss": 0.2334, | |
"step": 3280 | |
}, | |
{ | |
"epoch": 4.43, | |
"learning_rate": 4.766412550958674e-05, | |
"loss": 0.2995, | |
"step": 3290 | |
}, | |
{ | |
"epoch": 4.45, | |
"learning_rate": 4.689278843223571e-05, | |
"loss": 0.2107, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 4.46, | |
"learning_rate": 4.6126585600889834e-05, | |
"loss": 0.2346, | |
"step": 3310 | |
}, | |
{ | |
"epoch": 4.47, | |
"learning_rate": 4.536555516878547e-05, | |
"loss": 0.1997, | |
"step": 3320 | |
}, | |
{ | |
"epoch": 4.49, | |
"learning_rate": 4.4609735031598425e-05, | |
"loss": 0.3414, | |
"step": 3330 | |
}, | |
{ | |
"epoch": 4.5, | |
"learning_rate": 4.3859162825556675e-05, | |
"loss": 0.2281, | |
"step": 3340 | |
}, | |
{ | |
"epoch": 4.51, | |
"learning_rate": 4.311387592556626e-05, | |
"loss": 0.269, | |
"step": 3350 | |
}, | |
{ | |
"epoch": 4.53, | |
"learning_rate": 4.237391144335031e-05, | |
"loss": 0.3307, | |
"step": 3360 | |
}, | |
{ | |
"epoch": 4.54, | |
"learning_rate": 4.163930622560111e-05, | |
"loss": 0.3806, | |
"step": 3370 | |
}, | |
{ | |
"epoch": 4.56, | |
"learning_rate": 4.0910096852145024e-05, | |
"loss": 0.3077, | |
"step": 3380 | |
}, | |
{ | |
"epoch": 4.57, | |
"learning_rate": 4.018631963412126e-05, | |
"loss": 0.2362, | |
"step": 3390 | |
}, | |
{ | |
"epoch": 4.58, | |
"learning_rate": 3.946801061217374e-05, | |
"loss": 0.2585, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 4.6, | |
"learning_rate": 3.8755205554656207e-05, | |
"loss": 0.2587, | |
"step": 3410 | |
}, | |
{ | |
"epoch": 4.61, | |
"learning_rate": 3.804793995585142e-05, | |
"loss": 0.3041, | |
"step": 3420 | |
}, | |
{ | |
"epoch": 4.62, | |
"learning_rate": 3.734624903420356e-05, | |
"loss": 0.2408, | |
"step": 3430 | |
}, | |
{ | |
"epoch": 4.64, | |
"learning_rate": 3.6650167730564575e-05, | |
"loss": 0.4386, | |
"step": 3440 | |
}, | |
{ | |
"epoch": 4.65, | |
"learning_rate": 3.595973070645425e-05, | |
"loss": 0.253, | |
"step": 3450 | |
}, | |
{ | |
"epoch": 4.66, | |
"learning_rate": 3.5274972342334166e-05, | |
"loss": 0.2513, | |
"step": 3460 | |
}, | |
{ | |
"epoch": 4.68, | |
"learning_rate": 3.459592673589587e-05, | |
"loss": 0.2299, | |
"step": 3470 | |
}, | |
{ | |
"epoch": 4.69, | |
"learning_rate": 3.392262770036299e-05, | |
"loss": 0.3081, | |
"step": 3480 | |
}, | |
{ | |
"epoch": 4.7, | |
"learning_rate": 3.325510876280718e-05, | |
"loss": 0.2529, | |
"step": 3490 | |
}, | |
{ | |
"epoch": 4.72, | |
"learning_rate": 3.2593403162479026e-05, | |
"loss": 0.2634, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 4.73, | |
"learning_rate": 3.19375438491527e-05, | |
"loss": 0.2449, | |
"step": 3510 | |
}, | |
{ | |
"epoch": 4.74, | |
"learning_rate": 3.128756348148522e-05, | |
"loss": 0.2089, | |
"step": 3520 | |
}, | |
{ | |
"epoch": 4.76, | |
"learning_rate": 3.0643494425390255e-05, | |
"loss": 0.2856, | |
"step": 3530 | |
}, | |
{ | |
"epoch": 4.77, | |
"learning_rate": 3.0005368752426416e-05, | |
"loss": 0.3153, | |
"step": 3540 | |
}, | |
{ | |
"epoch": 4.78, | |
"learning_rate": 2.937321823820019e-05, | |
"loss": 0.4003, | |
"step": 3550 | |
}, | |
{ | |
"epoch": 4.8, | |
"learning_rate": 2.8747074360783838e-05, | |
"loss": 0.2499, | |
"step": 3560 | |
}, | |
{ | |
"epoch": 4.81, | |
"learning_rate": 2.81269682991478e-05, | |
"loss": 0.3115, | |
"step": 3570 | |
}, | |
{ | |
"epoch": 4.82, | |
"learning_rate": 2.7512930931608144e-05, | |
"loss": 0.239, | |
"step": 3580 | |
}, | |
{ | |
"epoch": 4.84, | |
"learning_rate": 2.690499283428909e-05, | |
"loss": 0.2655, | |
"step": 3590 | |
}, | |
{ | |
"epoch": 4.85, | |
"learning_rate": 2.630318427960018e-05, | |
"loss": 0.2439, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 4.87, | |
"learning_rate": 2.570753523472923e-05, | |
"loss": 0.2491, | |
"step": 3610 | |
}, | |
{ | |
"epoch": 4.88, | |
"learning_rate": 2.5118075360149886e-05, | |
"loss": 0.2485, | |
"step": 3620 | |
}, | |
{ | |
"epoch": 4.89, | |
"learning_rate": 2.4534834008144632e-05, | |
"loss": 0.1787, | |
"step": 3630 | |
}, | |
{ | |
"epoch": 4.91, | |
"learning_rate": 2.3957840221343372e-05, | |
"loss": 0.379, | |
"step": 3640 | |
}, | |
{ | |
"epoch": 4.92, | |
"learning_rate": 2.3387122731277074e-05, | |
"loss": 0.274, | |
"step": 3650 | |
}, | |
{ | |
"epoch": 4.93, | |
"learning_rate": 2.2822709956947194e-05, | |
"loss": 0.287, | |
"step": 3660 | |
}, | |
{ | |
"epoch": 4.95, | |
"learning_rate": 2.2264630003410492e-05, | |
"loss": 0.2741, | |
"step": 3670 | |
}, | |
{ | |
"epoch": 4.96, | |
"learning_rate": 2.1712910660379474e-05, | |
"loss": 0.1208, | |
"step": 3680 | |
}, | |
{ | |
"epoch": 4.97, | |
"learning_rate": 2.1167579400838735e-05, | |
"loss": 0.2456, | |
"step": 3690 | |
}, | |
{ | |
"epoch": 4.99, | |
"learning_rate": 2.062866337967685e-05, | |
"loss": 0.2456, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 5.0, | |
"learning_rate": 2.009618943233419e-05, | |
"loss": 0.2643, | |
"step": 3710 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 4452, | |
"num_train_epochs": 6, | |
"save_steps": 500, | |
"total_flos": 1.93884008030208e+16, | |
"trial_name": null, | |
"trial_params": null | |
} | |