|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.997246443322625, |
|
"global_step": 4356, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999739928881725, |
|
"loss": 2.1906, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019998959729054295, |
|
"loss": 1.8919, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019997659441099206, |
|
"loss": 1.7855, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019995839132649917, |
|
"loss": 1.7235, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001999349889838836, |
|
"loss": 1.7027, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019990638860040006, |
|
"loss": 1.7118, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019987259166367533, |
|
"loss": 1.689, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019983359993163078, |
|
"loss": 1.6845, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001997894154323911, |
|
"loss": 1.6655, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001997400404641787, |
|
"loss": 1.6518, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019968547759519425, |
|
"loss": 1.6571, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000199625729663483, |
|
"loss": 1.6559, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019956079977678722, |
|
"loss": 1.6417, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001994906913123846, |
|
"loss": 1.6313, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019941540791691245, |
|
"loss": 1.6243, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019933495350617813, |
|
"loss": 1.6189, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001992493322649554, |
|
"loss": 1.628, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019915854864676664, |
|
"loss": 1.6149, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019906260737365122, |
|
"loss": 1.6032, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019896151343592008, |
|
"loss": 1.626, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001988552720918958, |
|
"loss": 1.6369, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019874388886763944, |
|
"loss": 1.5757, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019862736955666296, |
|
"loss": 1.5873, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019850572021962788, |
|
"loss": 1.6041, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019837894718402997, |
|
"loss": 1.5905, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019824705704387028, |
|
"loss": 1.6131, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019811005665931205, |
|
"loss": 1.6045, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019796795315632395, |
|
"loss": 1.5942, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019782075392630935, |
|
"loss": 1.5768, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019766846662572191, |
|
"loss": 1.5967, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019751109917566737, |
|
"loss": 1.5606, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019734865976149145, |
|
"loss": 1.58, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019718115683235417, |
|
"loss": 1.5809, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019700859910079036, |
|
"loss": 1.5758, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019683099554225649, |
|
"loss": 1.57, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001966483553946637, |
|
"loss": 1.5783, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019646068815789755, |
|
"loss": 1.5699, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019626800359332362, |
|
"loss": 1.5767, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019607031172327996, |
|
"loss": 1.5635, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019586762283055573, |
|
"loss": 1.5763, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001956599474578563, |
|
"loss": 1.5719, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019544729640725498, |
|
"loss": 1.5712, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019522968073963106, |
|
"loss": 1.5699, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019500711177409454, |
|
"loss": 1.539, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001947796010873974, |
|
"loss": 1.5515, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00019454716051333135, |
|
"loss": 1.5691, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001943098021421124, |
|
"loss": 1.573, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00019406753831975203, |
|
"loss": 1.5532, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019382038164741477, |
|
"loss": 1.5778, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001935683449807631, |
|
"loss": 1.5514, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00019331144142928854, |
|
"loss": 1.5493, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019304968435562993, |
|
"loss": 1.5698, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00019278308737487822, |
|
"loss": 1.5699, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001925116643538684, |
|
"loss": 1.554, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00019223542941045817, |
|
"loss": 1.5527, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00019195439691279363, |
|
"loss": 1.5517, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00019166858147856203, |
|
"loss": 1.5673, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019137799797423126, |
|
"loss": 1.5469, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019108266151427673, |
|
"loss": 1.5499, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00019078258746039507, |
|
"loss": 1.5502, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00019047779142070527, |
|
"loss": 1.5402, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001901682892489367, |
|
"loss": 1.5444, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00018985409704360456, |
|
"loss": 1.5524, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00018953523114717245, |
|
"loss": 1.5338, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00018921170814520247, |
|
"loss": 1.5416, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00018888354486549237, |
|
"loss": 1.5524, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00018855075837720034, |
|
"loss": 1.544, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001882133659899573, |
|
"loss": 1.5317, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001878713852529663, |
|
"loss": 1.5516, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00018752483395408987, |
|
"loss": 1.5479, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00018717373011892474, |
|
"loss": 1.5328, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001868180920098644, |
|
"loss": 1.524, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00018645793812514894, |
|
"loss": 1.4712, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001860932871979031, |
|
"loss": 1.3509, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00018572415819516174, |
|
"loss": 1.3227, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00018535057031688335, |
|
"loss": 1.3216, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00018497254299495146, |
|
"loss": 1.3022, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00018459009589216364, |
|
"loss": 1.308, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00018420324890120916, |
|
"loss": 1.3295, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001838120221436338, |
|
"loss": 1.3249, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018341643596879367, |
|
"loss": 1.3109, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018301651095279655, |
|
"loss": 1.328, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018261226789743172, |
|
"loss": 1.3215, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018220372782908777, |
|
"loss": 1.3378, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018179091199765926, |
|
"loss": 1.3289, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018137384187544116, |
|
"loss": 1.3362, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018095253915601206, |
|
"loss": 1.3263, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018052702575310588, |
|
"loss": 1.3238, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00018009732379947188, |
|
"loss": 1.3278, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001796634556457236, |
|
"loss": 1.3277, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017922544385917628, |
|
"loss": 1.3332, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017878331122267284, |
|
"loss": 1.3268, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017833708073339922, |
|
"loss": 1.3316, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017788677560168784, |
|
"loss": 1.337, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001774324192498105, |
|
"loss": 1.3307, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001769740353107602, |
|
"loss": 1.3295, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001765116476270216, |
|
"loss": 1.3148, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00017604528024933115, |
|
"loss": 1.3269, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00017557495743542585, |
|
"loss": 1.3488, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00017510070364878177, |
|
"loss": 1.3377, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001746225435573415, |
|
"loss": 1.3396, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001741405020322309, |
|
"loss": 1.3375, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00017365460414646574, |
|
"loss": 1.326, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00017316487517364721, |
|
"loss": 1.3297, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00017267134058664775, |
|
"loss": 1.3429, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00017217402605628572, |
|
"loss": 1.3339, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00017167295744999027, |
|
"loss": 1.3375, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00017116816083045602, |
|
"loss": 1.3161, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00017065966245428723, |
|
"loss": 1.3281, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00017014748877063214, |
|
"loss": 1.322, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001696316664198073, |
|
"loss": 1.3344, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00016911222223191182, |
|
"loss": 1.3607, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00016858918322543186, |
|
"loss": 1.34, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00016806257660583534, |
|
"loss": 1.3324, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00016753242976415666, |
|
"loss": 1.3249, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00016699877027557226, |
|
"loss": 1.3363, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00016646162589796615, |
|
"loss": 1.3403, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001659210245704861, |
|
"loss": 1.3283, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00016537699441209047, |
|
"loss": 1.3258, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001648295637200856, |
|
"loss": 1.3258, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00016427876096865394, |
|
"loss": 1.3581, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00016372461480737297, |
|
"loss": 1.3337, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00016316715405972508, |
|
"loss": 1.3432, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001626064077215983, |
|
"loss": 1.3385, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00016204240495977805, |
|
"loss": 1.3474, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001614751751104301, |
|
"loss": 1.3198, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00016090474767757474, |
|
"loss": 1.3407, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00016033115233155202, |
|
"loss": 1.3203, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00015975441890747855, |
|
"loss": 1.3311, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00015917457740369565, |
|
"loss": 1.3271, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.000158591657980209, |
|
"loss": 1.3432, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00015800569095711982, |
|
"loss": 1.3302, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00015741670681304796, |
|
"loss": 1.3326, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00015682473618354635, |
|
"loss": 1.3358, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0001562298098595078, |
|
"loss": 1.3343, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001556319587855631, |
|
"loss": 1.3214, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001550312140584718, |
|
"loss": 1.3342, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00015442760692550443, |
|
"loss": 1.3253, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0001538211687828174, |
|
"loss": 1.3494, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00015321193117381996, |
|
"loss": 1.324, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00015259992578753334, |
|
"loss": 1.3267, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00015198518445694255, |
|
"loss": 1.343, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00015136773915734066, |
|
"loss": 1.3209, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00015074762200466556, |
|
"loss": 1.3224, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001501248652538296, |
|
"loss": 1.3481, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00014949950129704162, |
|
"loss": 1.1061, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00014887156266212237, |
|
"loss": 0.9927, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00014824108201081247, |
|
"loss": 0.9946, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00014760809213707344, |
|
"loss": 0.9764, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00014697262596538227, |
|
"loss": 0.9851, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00014633471654901842, |
|
"loss": 0.9941, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.000145694397068345, |
|
"loss": 0.9875, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0001450517008290827, |
|
"loss": 0.9828, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00014440666126057744, |
|
"loss": 0.9913, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00014375931191406159, |
|
"loss": 0.9881, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00014310968646090883, |
|
"loss": 0.9866, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001424578186908828, |
|
"loss": 0.9934, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001418037425103795, |
|
"loss": 0.9963, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00014114749194066363, |
|
"loss": 0.9791, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00014048910111609915, |
|
"loss": 0.9851, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0001398286042823736, |
|
"loss": 0.9829, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00013916603579471705, |
|
"loss": 0.9995, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00013850143011611497, |
|
"loss": 0.9843, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0001378348218155158, |
|
"loss": 1.0025, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00013716624556603274, |
|
"loss": 1.0135, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00013649573614314044, |
|
"loss": 1.0007, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00013582332842286592, |
|
"loss": 1.0117, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00013514905737997473, |
|
"loss": 1.0068, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001344729580861517, |
|
"loss": 0.998, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001337950657081768, |
|
"loss": 0.9995, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00013311541550609565, |
|
"loss": 0.9975, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00013243404283138597, |
|
"loss": 1.014, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0001317509831251184, |
|
"loss": 0.9954, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00013106627191611332, |
|
"loss": 1.0051, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00013037994481909264, |
|
"loss": 1.0156, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0001296920375328275, |
|
"loss": 1.0155, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00012900258583828137, |
|
"loss": 1.0223, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00012831162559674887, |
|
"loss": 1.0181, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00012761919274799054, |
|
"loss": 1.0023, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00012692532330836346, |
|
"loss": 0.9917, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0001262300533689478, |
|
"loss": 1.0132, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00012553341909366978, |
|
"loss": 1.0136, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0001248354567174203, |
|
"loss": 0.9961, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00012413620254417057, |
|
"loss": 1.0244, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0001234356929450835, |
|
"loss": 1.0126, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00012273396435662212, |
|
"loss": 1.0121, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00012203105327865407, |
|
"loss": 1.0293, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00012132699627255347, |
|
"loss": 1.0104, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00012062182995929882, |
|
"loss": 1.0119, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00011991559101756852, |
|
"loss": 1.0047, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00011920831618183282, |
|
"loss": 1.0095, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00011850004224044315, |
|
"loss": 1.0112, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0001177908060337188, |
|
"loss": 1.0131, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00011708064445203042, |
|
"loss": 1.0082, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00011636959443388132, |
|
"loss": 1.0246, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00011565769296398618, |
|
"loss": 1.0102, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00011494497707134731, |
|
"loss": 1.0103, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00011423148382732853, |
|
"loss": 1.0081, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.000113517250343727, |
|
"loss": 1.0092, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0001128023137708429, |
|
"loss": 0.9983, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00011208671129554702, |
|
"loss": 1.0136, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00011137048013934656, |
|
"loss": 1.0348, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00011065365755644906, |
|
"loss": 0.9996, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00010993628083182467, |
|
"loss": 1.0142, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00010921838727926681, |
|
"loss": 1.0101, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00010850001423945126, |
|
"loss": 1.0126, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00010778119907799398, |
|
"loss": 1.0188, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00010706197918350758, |
|
"loss": 1.0232, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00010634239196565646, |
|
"loss": 1.0161, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00010562247485321115, |
|
"loss": 1.0213, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0001049022652921013, |
|
"loss": 0.9976, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00010418180074346815, |
|
"loss": 1.0324, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00010346111868171584, |
|
"loss": 1.0132, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00010274025659256232, |
|
"loss": 1.0003, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00010201925197108953, |
|
"loss": 1.0046, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0001012981423197931, |
|
"loss": 1.0093, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00010057696514663169, |
|
"loss": 1.0065, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.985575796307615e-05, |
|
"loss": 0.9698, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 9.913455828215814e-05, |
|
"loss": 0.6643, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 9.84134036165192e-05, |
|
"loss": 0.6469, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 9.769233147645943e-05, |
|
"loss": 0.6258, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 9.697137936798634e-05, |
|
"loss": 0.6403, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 9.625058479086418e-05, |
|
"loss": 0.6438, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 9.552998523666326e-05, |
|
"loss": 0.6249, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.480961818681004e-05, |
|
"loss": 0.6426, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.408952111063727e-05, |
|
"loss": 0.652, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.336973146343537e-05, |
|
"loss": 0.644, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 9.265028668450402e-05, |
|
"loss": 0.6364, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 9.193122419520485e-05, |
|
"loss": 0.6513, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 9.121258139701502e-05, |
|
"loss": 0.6501, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 9.049439566958175e-05, |
|
"loss": 0.6431, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 8.977670436877811e-05, |
|
"loss": 0.652, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 8.905954482475991e-05, |
|
"loss": 0.6494, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 8.83429543400241e-05, |
|
"loss": 0.6486, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.76269701874684e-05, |
|
"loss": 0.6399, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 8.691162960845264e-05, |
|
"loss": 0.6452, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.619696981086172e-05, |
|
"loss": 0.6448, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.548302796717019e-05, |
|
"loss": 0.6541, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 8.476984121250875e-05, |
|
"loss": 0.6405, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 8.405744664273278e-05, |
|
"loss": 0.6419, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 8.334588131249277e-05, |
|
"loss": 0.6369, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 0.6585, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.192538637163621e-05, |
|
"loss": 0.6461, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 8.121653064696118e-05, |
|
"loss": 0.6614, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.050865192986211e-05, |
|
"loss": 0.6536, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 7.980178704010089e-05, |
|
"loss": 0.6559, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 7.9095972744706e-05, |
|
"loss": 0.6605, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 7.839124575606004e-05, |
|
"loss": 0.6631, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 7.76876427299903e-05, |
|
"loss": 0.6666, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 7.69852002638618e-05, |
|
"loss": 0.6646, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 7.62839548946742e-05, |
|
"loss": 0.6526, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 7.558394309716088e-05, |
|
"loss": 0.657, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 7.488520128189209e-05, |
|
"loss": 0.6546, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 7.41877657933809e-05, |
|
"loss": 0.657, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.349167290819274e-05, |
|
"loss": 0.6594, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.279695883305866e-05, |
|
"loss": 0.6676, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 7.210365970299194e-05, |
|
"loss": 0.6527, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.141181157940859e-05, |
|
"loss": 0.6494, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.072145044825162e-05, |
|
"loss": 0.6525, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 7.003261221811934e-05, |
|
"loss": 0.6521, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 6.934533271839752e-05, |
|
"loss": 0.6669, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 6.865964769739575e-05, |
|
"loss": 0.6548, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 6.797559282048806e-05, |
|
"loss": 0.6668, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 6.729320366825784e-05, |
|
"loss": 0.6516, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 6.661251573464706e-05, |
|
"loss": 0.655, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 6.593356442511015e-05, |
|
"loss": 0.6641, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 6.525638505477231e-05, |
|
"loss": 0.6592, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 6.458101284659286e-05, |
|
"loss": 0.6582, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 6.390748292953284e-05, |
|
"loss": 0.6814, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 6.323583033672799e-05, |
|
"loss": 0.658, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.256609000366649e-05, |
|
"loss": 0.6653, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.189829676637182e-05, |
|
"loss": 0.654, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 6.123248535959083e-05, |
|
"loss": 0.6626, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.056869041498687e-05, |
|
"loss": 0.6696, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 5.9906946459338656e-05, |
|
"loss": 0.6635, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 5.924728791274432e-05, |
|
"loss": 0.6593, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5.858974908683105e-05, |
|
"loss": 0.6622, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 5.79343641829704e-05, |
|
"loss": 0.6605, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 5.728116729049928e-05, |
|
"loss": 0.6664, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 5.663019238494704e-05, |
|
"loss": 0.6575, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5.5981473326267976e-05, |
|
"loss": 0.6792, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5.533504385708024e-05, |
|
"loss": 0.6841, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 5.4690937600910905e-05, |
|
"loss": 0.6625, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 5.404918806044679e-05, |
|
"loss": 0.6637, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.340982861579199e-05, |
|
"loss": 0.6587, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.277289252273174e-05, |
|
"loss": 0.6641, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.213841291100239e-05, |
|
"loss": 0.6515, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.1506422782568345e-05, |
|
"loss": 0.6596, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 5.087695500990555e-05, |
|
"loss": 0.6508, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 5.025004233429145e-05, |
|
"loss": 0.6552, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.962571736410223e-05, |
|
"loss": 0.5383, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.90040125731165e-05, |
|
"loss": 0.4324, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.8384960298826274e-05, |
|
"loss": 0.4194, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.776859274075506e-05, |
|
"loss": 0.4238, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.715494195878285e-05, |
|
"loss": 0.419, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.654403987147865e-05, |
|
"loss": 0.4239, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.593591825444028e-05, |
|
"loss": 0.4261, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.5330608738641486e-05, |
|
"loss": 0.4415, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.472814280878689e-05, |
|
"loss": 0.4236, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 4.412855180167406e-05, |
|
"loss": 0.4334, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.353186690456371e-05, |
|
"loss": 0.422, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 4.293811915355761e-05, |
|
"loss": 0.4361, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.234733943198399e-05, |
|
"loss": 0.4339, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 4.175955846879151e-05, |
|
"loss": 0.44, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.11748068369506e-05, |
|
"loss": 0.4328, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 4.059311495186338e-05, |
|
"loss": 0.4297, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.001451306978174e-05, |
|
"loss": 0.4225, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 3.943903128623335e-05, |
|
"loss": 0.4288, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.886669953445637e-05, |
|
"loss": 0.4198, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3.829754758384262e-05, |
|
"loss": 0.4199, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 3.77316050383889e-05, |
|
"loss": 0.4308, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 3.7168901335157315e-05, |
|
"loss": 0.4335, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.660946574274421e-05, |
|
"loss": 0.4267, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 3.6053327359757535e-05, |
|
"loss": 0.4348, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.550051511330361e-05, |
|
"loss": 0.4403, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 3.4951057757482205e-05, |
|
"loss": 0.4252, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 3.440498387189111e-05, |
|
"loss": 0.4212, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.3862321860139576e-05, |
|
"loss": 0.4336, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3.332309994837085e-05, |
|
"loss": 0.4304, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3.278734618379402e-05, |
|
"loss": 0.4245, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 3.225508843322524e-05, |
|
"loss": 0.4113, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 3.172635438163816e-05, |
|
"loss": 0.4389, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.120117153072404e-05, |
|
"loss": 0.427, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 3.0679567197461134e-05, |
|
"loss": 0.4162, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.016156851269384e-05, |
|
"loss": 0.4347, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.9647202419721687e-05, |
|
"loss": 0.4259, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.913649567289759e-05, |
|
"loss": 0.4399, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 2.862947483623659e-05, |
|
"loss": 0.4278, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.812616628203383e-05, |
|
"loss": 0.4344, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.7626596189492983e-05, |
|
"loss": 0.4339, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.7130790543364646e-05, |
|
"loss": 0.4353, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.6638775132594553e-05, |
|
"loss": 0.4333, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.6150575548982292e-05, |
|
"loss": 0.4336, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 2.5666217185850262e-05, |
|
"loss": 0.4388, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.5185725236722636e-05, |
|
"loss": 0.4322, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.4709124694015116e-05, |
|
"loss": 0.4435, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 2.423644034773498e-05, |
|
"loss": 0.4358, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.3767696784191463e-05, |
|
"loss": 0.4471, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.3302918384717177e-05, |
|
"loss": 0.4227, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.284212932439972e-05, |
|
"loss": 0.4269, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.2385353570824308e-05, |
|
"loss": 0.4393, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.1932614882827197e-05, |
|
"loss": 0.4331, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.148393680925973e-05, |
|
"loss": 0.4407, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.1039342687763586e-05, |
|
"loss": 0.4335, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.0598855643556826e-05, |
|
"loss": 0.4196, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.016249858823106e-05, |
|
"loss": 0.4298, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.973029421855981e-05, |
|
"loss": 0.4392, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.93022650153178e-05, |
|
"loss": 0.4368, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.8878433242111716e-05, |
|
"loss": 0.4373, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.8458820944222255e-05, |
|
"loss": 0.4409, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.804344994745727e-05, |
|
"loss": 0.4263, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.763234185701673e-05, |
|
"loss": 0.4119, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.7225518056368785e-05, |
|
"loss": 0.4381, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.6822999706137567e-05, |
|
"loss": 0.4296, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.6424807743002612e-05, |
|
"loss": 0.4212, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.6030962878609725e-05, |
|
"loss": 0.4228, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.5641485598493743e-05, |
|
"loss": 0.4132, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.5256396161013075e-05, |
|
"loss": 0.4433, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.487571459629582e-05, |
|
"loss": 0.4401, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.4499460705197998e-05, |
|
"loss": 0.4329, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.412765405827372e-05, |
|
"loss": 0.43, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.3760313994757001e-05, |
|
"loss": 0.4292, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.4316, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 1.3039109812259598e-05, |
|
"loss": 0.353, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.268528320615452e-05, |
|
"loss": 0.3517, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 1.2335998207257137e-05, |
|
"loss": 0.3451, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1.1991272983355505e-05, |
|
"loss": 0.3536, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.1651125465064516e-05, |
|
"loss": 0.3435, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.131557334489326e-05, |
|
"loss": 0.3361, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 1.098463407632474e-05, |
|
"loss": 0.3415, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.0658324872908121e-05, |
|
"loss": 0.3426, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.0336662707363287e-05, |
|
"loss": 0.3476, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 1.0019664310698029e-05, |
|
"loss": 0.3474, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 9.707346171337894e-06, |
|
"loss": 0.3476, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 9.399724534268384e-06, |
|
"loss": 0.3354, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 9.096815400190172e-06, |
|
"loss": 0.3514, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 8.798634524686699e-06, |
|
"loss": 0.3519, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.505197417404687e-06, |
|
"loss": 0.3387, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 8.216519341247486e-06, |
|
"loss": 0.3458, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 7.932615311581126e-06, |
|
"loss": 0.3403, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 7.653500095453248e-06, |
|
"loss": 0.3461, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.3791882108251945e-06, |
|
"loss": 0.3445, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 7.109693925816651e-06, |
|
"loss": 0.3439, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 6.845031257963619e-06, |
|
"loss": 0.3446, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 6.585213973489335e-06, |
|
"loss": 0.3577, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 6.3302555865880965e-06, |
|
"loss": 0.3368, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.08016935872251e-06, |
|
"loss": 0.338, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 5.834968297933541e-06, |
|
"loss": 0.3552, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 5.594665158163992e-06, |
|
"loss": 0.3489, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 5.359272438595153e-06, |
|
"loss": 0.354, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 5.128802382996567e-06, |
|
"loss": 0.3512, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.903266979089249e-06, |
|
"loss": 0.3333, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.682677957922155e-06, |
|
"loss": 0.3523, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.467046793261931e-06, |
|
"loss": 0.3462, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 4.256384700996252e-06, |
|
"loss": 0.34, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.050702638550275e-06, |
|
"loss": 0.3555, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 3.850011304316781e-06, |
|
"loss": 0.357, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 3.6543211370997587e-06, |
|
"loss": 0.3549, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 3.4636423155712916e-06, |
|
"loss": 0.3455, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 3.2779847577422697e-06, |
|
"loss": 0.3513, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 3.0973581204464362e-06, |
|
"loss": 0.3554, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 2.921771798838069e-06, |
|
"loss": 0.3433, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.751234925903412e-06, |
|
"loss": 0.3544, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 2.585756371985493e-06, |
|
"loss": 0.3466, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.4253447443228106e-06, |
|
"loss": 0.3459, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 2.270008386601685e-06, |
|
"loss": 0.3462, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 2.119755378522137e-06, |
|
"loss": 0.3398, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 1.974593535377722e-06, |
|
"loss": 0.3488, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.83453040764906e-06, |
|
"loss": 0.349, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 1.6995732806109554e-06, |
|
"loss": 0.3416, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.569729173953638e-06, |
|
"loss": 0.3556, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 1.4450048414174854e-06, |
|
"loss": 0.3341, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 1.3254067704418283e-06, |
|
"loss": 0.3511, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1.2109411818274852e-06, |
|
"loss": 0.3536, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 1.1016140294131894e-06, |
|
"loss": 0.3318, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 9.974309997658915e-07, |
|
"loss": 0.3383, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 8.983975118849852e-07, |
|
"loss": 0.3465, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 8.04518716920466e-07, |
|
"loss": 0.3512, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 7.157994979049898e-07, |
|
"loss": 0.3377, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 6.322444694998319e-07, |
|
"loss": 0.3473, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 5.538579777549347e-07, |
|
"loss": 0.3423, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 4.80644099882821e-07, |
|
"loss": 0.3424, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.126066440464982e-07, |
|
"loss": 0.3448, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 3.497491491614158e-07, |
|
"loss": 0.3438, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.920748847113686e-07, |
|
"loss": 0.3514, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 2.395868505784438e-07, |
|
"loss": 0.3395, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.9228777688700127e-07, |
|
"loss": 0.353, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.5018012386162072e-07, |
|
"loss": 0.3474, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.1326608169920372e-07, |
|
"loss": 0.345, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 8.154757045497619e-08, |
|
"loss": 0.3494, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 5.50262399426904e-08, |
|
"loss": 0.3439, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 3.370346964876036e-08, |
|
"loss": 0.3474, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 1.7580368660519152e-08, |
|
"loss": 0.335, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 6.657775608553962e-09, |
|
"loss": 0.3516, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.362586230632354e-10, |
|
"loss": 0.3405, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4356, |
|
"total_flos": 5.268602572020646e+18, |
|
"train_loss": 0.8953271216487972, |
|
"train_runtime": 33942.058, |
|
"train_samples_per_second": 12.326, |
|
"train_steps_per_second": 0.128 |
|
} |
|
], |
|
"max_steps": 4356, |
|
"num_train_epochs": 6, |
|
"total_flos": 5.268602572020646e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|