legal_llama_ckpt / trainer_state.json
BaixingAI's picture
Upload 7 files
f1ee1ea
raw
history blame
53.6 kB
{
"best_metric": 1.110647439956665,
"best_model_checkpoint": "outs/instuct_chat_50k/checkpoint-2000",
"epoch": 21.228571428571428,
"global_step": 38400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.5568240788790864e-07,
"loss": 1.1388,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.5412558380902958e-05,
"loss": 1.5551,
"step": 100
},
{
"epoch": 0.01,
"eval_loss": 1.2936956882476807,
"eval_runtime": 31.4406,
"eval_samples_per_second": 6.361,
"eval_steps_per_second": 0.795,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 3.098079916969382e-05,
"loss": 1.4423,
"step": 200
},
{
"epoch": 0.02,
"eval_loss": 1.227220892906189,
"eval_runtime": 31.4526,
"eval_samples_per_second": 6.359,
"eval_steps_per_second": 0.795,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 4.6393357550596784e-05,
"loss": 1.3335,
"step": 300
},
{
"epoch": 0.02,
"eval_loss": 1.2049953937530518,
"eval_runtime": 31.9522,
"eval_samples_per_second": 6.259,
"eval_steps_per_second": 0.782,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 6.165023352361183e-05,
"loss": 1.4225,
"step": 400
},
{
"epoch": 0.03,
"eval_loss": 1.1863670349121094,
"eval_runtime": 31.5148,
"eval_samples_per_second": 6.346,
"eval_steps_per_second": 0.793,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 7.721847431240269e-05,
"loss": 1.3618,
"step": 500
},
{
"epoch": 0.04,
"eval_loss": 1.1773875951766968,
"eval_runtime": 31.4521,
"eval_samples_per_second": 6.359,
"eval_steps_per_second": 0.795,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 9.278671510119357e-05,
"loss": 1.3262,
"step": 600
},
{
"epoch": 0.05,
"eval_loss": 1.168226718902588,
"eval_runtime": 31.5121,
"eval_samples_per_second": 6.347,
"eval_steps_per_second": 0.793,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 0.00010835495588998442,
"loss": 1.2133,
"step": 700
},
{
"epoch": 0.05,
"eval_loss": 1.1481703519821167,
"eval_runtime": 31.504,
"eval_samples_per_second": 6.348,
"eval_steps_per_second": 0.794,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 0.00012392319667877528,
"loss": 1.2034,
"step": 800
},
{
"epoch": 0.06,
"eval_loss": 1.13801908493042,
"eval_runtime": 31.5183,
"eval_samples_per_second": 6.346,
"eval_steps_per_second": 0.793,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 0.00013949143746756616,
"loss": 1.2159,
"step": 900
},
{
"epoch": 0.07,
"eval_loss": 1.1303695440292358,
"eval_runtime": 31.4531,
"eval_samples_per_second": 6.359,
"eval_steps_per_second": 0.795,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 0.00015505967825635704,
"loss": 1.1612,
"step": 1000
},
{
"epoch": 0.08,
"eval_loss": 1.1280632019042969,
"eval_runtime": 31.434,
"eval_samples_per_second": 6.363,
"eval_steps_per_second": 0.795,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 0.00017062791904514786,
"loss": 1.1549,
"step": 1100
},
{
"epoch": 0.09,
"eval_loss": 1.1211422681808472,
"eval_runtime": 31.5065,
"eval_samples_per_second": 6.348,
"eval_steps_per_second": 0.793,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 0.00018619615983393875,
"loss": 1.2038,
"step": 1200
},
{
"epoch": 0.09,
"eval_loss": 1.126134991645813,
"eval_runtime": 31.4919,
"eval_samples_per_second": 6.351,
"eval_steps_per_second": 0.794,
"step": 1200
},
{
"epoch": 0.1,
"learning_rate": 0.00020176440062272963,
"loss": 1.1784,
"step": 1300
},
{
"epoch": 0.1,
"eval_loss": 1.1223646402359009,
"eval_runtime": 31.4108,
"eval_samples_per_second": 6.367,
"eval_steps_per_second": 0.796,
"step": 1300
},
{
"epoch": 0.11,
"learning_rate": 0.00021733264141152048,
"loss": 1.1834,
"step": 1400
},
{
"epoch": 0.11,
"eval_loss": 1.113663911819458,
"eval_runtime": 31.463,
"eval_samples_per_second": 6.357,
"eval_steps_per_second": 0.795,
"step": 1400
},
{
"epoch": 0.12,
"learning_rate": 0.00023290088220031133,
"loss": 1.1974,
"step": 1500
},
{
"epoch": 0.12,
"eval_loss": 1.1265443563461304,
"eval_runtime": 31.4893,
"eval_samples_per_second": 6.351,
"eval_steps_per_second": 0.794,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 0.0002484691229891022,
"loss": 1.2174,
"step": 1600
},
{
"epoch": 0.12,
"eval_loss": 1.1237465143203735,
"eval_runtime": 31.5226,
"eval_samples_per_second": 6.345,
"eval_steps_per_second": 0.793,
"step": 1600
},
{
"epoch": 0.13,
"learning_rate": 0.00026403736377789307,
"loss": 1.2019,
"step": 1700
},
{
"epoch": 0.13,
"eval_loss": 1.1198475360870361,
"eval_runtime": 31.4658,
"eval_samples_per_second": 6.356,
"eval_steps_per_second": 0.795,
"step": 1700
},
{
"epoch": 0.14,
"learning_rate": 0.0002796056045666839,
"loss": 1.2114,
"step": 1800
},
{
"epoch": 0.14,
"eval_loss": 1.1201978921890259,
"eval_runtime": 31.5138,
"eval_samples_per_second": 6.346,
"eval_steps_per_second": 0.793,
"step": 1800
},
{
"epoch": 0.15,
"learning_rate": 0.00029517384535547483,
"loss": 1.1848,
"step": 1900
},
{
"epoch": 0.15,
"eval_loss": 1.1198968887329102,
"eval_runtime": 31.1932,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 0.801,
"step": 1900
},
{
"epoch": 0.16,
"learning_rate": 0.0002994345961596241,
"loss": 1.129,
"step": 2000
},
{
"epoch": 0.16,
"eval_loss": 1.110647439956665,
"eval_runtime": 31.1909,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 0.802,
"step": 2000
},
{
"epoch": 0.16,
"learning_rate": 0.00029861517030400697,
"loss": 1.1864,
"step": 2100
},
{
"epoch": 0.16,
"eval_loss": 1.1134731769561768,
"eval_runtime": 31.169,
"eval_samples_per_second": 6.417,
"eval_steps_per_second": 0.802,
"step": 2100
},
{
"epoch": 0.17,
"learning_rate": 0.0002977957444483898,
"loss": 1.1725,
"step": 2200
},
{
"epoch": 0.17,
"eval_loss": 1.1226890087127686,
"eval_runtime": 31.1975,
"eval_samples_per_second": 6.411,
"eval_steps_per_second": 0.801,
"step": 2200
},
{
"epoch": 0.18,
"learning_rate": 0.0002969763185927726,
"loss": 1.1363,
"step": 2300
},
{
"epoch": 0.18,
"eval_loss": 1.1255100965499878,
"eval_runtime": 31.237,
"eval_samples_per_second": 6.403,
"eval_steps_per_second": 0.8,
"step": 2300
},
{
"epoch": 0.19,
"learning_rate": 0.00029615689273715547,
"loss": 1.204,
"step": 2400
},
{
"epoch": 0.19,
"eval_loss": 1.1148998737335205,
"eval_runtime": 31.2369,
"eval_samples_per_second": 6.403,
"eval_steps_per_second": 0.8,
"step": 2400
},
{
"epoch": 0.19,
"learning_rate": 0.0002953374668815383,
"loss": 1.1465,
"step": 2500
},
{
"epoch": 0.19,
"eval_loss": 1.1103030443191528,
"eval_runtime": 31.2141,
"eval_samples_per_second": 6.407,
"eval_steps_per_second": 0.801,
"step": 2500
},
{
"epoch": 0.2,
"learning_rate": 0.0002945180410259212,
"loss": 1.2058,
"step": 2600
},
{
"epoch": 0.2,
"eval_loss": 1.1112116575241089,
"eval_runtime": 31.6225,
"eval_samples_per_second": 6.325,
"eval_steps_per_second": 0.791,
"step": 2600
},
{
"epoch": 0.21,
"learning_rate": 0.00029369861517030397,
"loss": 1.1662,
"step": 2700
},
{
"epoch": 0.21,
"eval_loss": 1.1122115850448608,
"eval_runtime": 31.1829,
"eval_samples_per_second": 6.414,
"eval_steps_per_second": 0.802,
"step": 2700
},
{
"epoch": 0.22,
"learning_rate": 0.0002928791893146868,
"loss": 1.1372,
"step": 2800
},
{
"epoch": 0.22,
"eval_loss": 1.1113629341125488,
"eval_runtime": 31.6588,
"eval_samples_per_second": 6.317,
"eval_steps_per_second": 0.79,
"step": 2800
},
{
"epoch": 0.23,
"learning_rate": 0.0002920597634590696,
"loss": 1.1693,
"step": 2900
},
{
"epoch": 0.23,
"eval_loss": 1.1073808670043945,
"eval_runtime": 31.5886,
"eval_samples_per_second": 6.331,
"eval_steps_per_second": 0.791,
"step": 2900
},
{
"epoch": 0.23,
"learning_rate": 0.0002912403376034525,
"loss": 1.1776,
"step": 3000
},
{
"epoch": 0.23,
"eval_loss": 1.1112266778945923,
"eval_runtime": 31.3957,
"eval_samples_per_second": 6.37,
"eval_steps_per_second": 0.796,
"step": 3000
},
{
"epoch": 1.06,
"learning_rate": 0.0002786905011532816,
"loss": 1.0338,
"step": 3100
},
{
"epoch": 1.11,
"learning_rate": 0.000277904172782554,
"loss": 0.9576,
"step": 3200
},
{
"epoch": 1.17,
"learning_rate": 0.00027711784441182633,
"loss": 0.9065,
"step": 3300
},
{
"epoch": 1.23,
"learning_rate": 0.00027633151604109874,
"loss": 0.8641,
"step": 3400
},
{
"epoch": 1.29,
"learning_rate": 0.00027554518767037114,
"loss": 0.8564,
"step": 3500
},
{
"epoch": 1.34,
"learning_rate": 0.00027475885929964354,
"loss": 0.8547,
"step": 3600
},
{
"epoch": 1.4,
"learning_rate": 0.0002739725309289159,
"loss": 0.8469,
"step": 3700
},
{
"epoch": 1.46,
"learning_rate": 0.00027318620255818824,
"loss": 0.8303,
"step": 3800
},
{
"epoch": 1.51,
"learning_rate": 0.00027239987418746064,
"loss": 0.8234,
"step": 3900
},
{
"epoch": 1.57,
"learning_rate": 0.00027161354581673304,
"loss": 0.7905,
"step": 4000
},
{
"epoch": 1.63,
"learning_rate": 0.00027082721744600544,
"loss": 0.7864,
"step": 4100
},
{
"epoch": 1.69,
"learning_rate": 0.0002700408890752778,
"loss": 0.7816,
"step": 4200
},
{
"epoch": 1.74,
"learning_rate": 0.0002692545607045502,
"loss": 0.7759,
"step": 4300
},
{
"epoch": 1.8,
"learning_rate": 0.0002684682323338226,
"loss": 0.7948,
"step": 4400
},
{
"epoch": 1.86,
"learning_rate": 0.00026768190396309494,
"loss": 0.7525,
"step": 4500
},
{
"epoch": 1.91,
"learning_rate": 0.00026689557559236735,
"loss": 0.7646,
"step": 4600
},
{
"epoch": 1.97,
"learning_rate": 0.00026610924722163975,
"loss": 0.7578,
"step": 4700
},
{
"epoch": 2.03,
"learning_rate": 0.0002653229188509121,
"loss": 0.6764,
"step": 4800
},
{
"epoch": 2.09,
"learning_rate": 0.0002645365904801845,
"loss": 0.6138,
"step": 4900
},
{
"epoch": 2.14,
"learning_rate": 0.0002637502621094569,
"loss": 0.6206,
"step": 5000
},
{
"epoch": 2.2,
"learning_rate": 0.0002629639337387293,
"loss": 0.6195,
"step": 5100
},
{
"epoch": 2.26,
"learning_rate": 0.00026217760536800165,
"loss": 0.6353,
"step": 5200
},
{
"epoch": 2.31,
"learning_rate": 0.000261391276997274,
"loss": 0.6084,
"step": 5300
},
{
"epoch": 2.37,
"learning_rate": 0.0002606049486265464,
"loss": 0.625,
"step": 5400
},
{
"epoch": 2.43,
"learning_rate": 0.0002598186202558188,
"loss": 0.6085,
"step": 5500
},
{
"epoch": 2.49,
"learning_rate": 0.0002590322918850912,
"loss": 0.6372,
"step": 5600
},
{
"epoch": 2.54,
"learning_rate": 0.00025824596351436356,
"loss": 0.6267,
"step": 5700
},
{
"epoch": 2.6,
"learning_rate": 0.00025745963514363596,
"loss": 0.6322,
"step": 5800
},
{
"epoch": 2.66,
"learning_rate": 0.00025667330677290836,
"loss": 0.6142,
"step": 5900
},
{
"epoch": 2.71,
"learning_rate": 0.0002558869784021807,
"loss": 0.6346,
"step": 6000
},
{
"epoch": 2.77,
"learning_rate": 0.0002551006500314531,
"loss": 0.6368,
"step": 6100
},
{
"epoch": 2.83,
"learning_rate": 0.0002543143216607255,
"loss": 0.638,
"step": 6200
},
{
"epoch": 2.89,
"learning_rate": 0.00025352799328999786,
"loss": 0.6373,
"step": 6300
},
{
"epoch": 2.94,
"learning_rate": 0.00025274166491927026,
"loss": 0.6196,
"step": 6400
},
{
"epoch": 3.0,
"learning_rate": 0.00025195533654854267,
"loss": 0.6328,
"step": 6500
},
{
"epoch": 3.06,
"learning_rate": 0.00025116900817781507,
"loss": 0.4419,
"step": 6600
},
{
"epoch": 3.11,
"learning_rate": 0.0002503826798070874,
"loss": 0.4553,
"step": 6700
},
{
"epoch": 3.17,
"learning_rate": 0.00024959635143635976,
"loss": 0.47,
"step": 6800
},
{
"epoch": 3.23,
"learning_rate": 0.00024881002306563217,
"loss": 0.4713,
"step": 6900
},
{
"epoch": 3.29,
"learning_rate": 0.00024802369469490457,
"loss": 0.4837,
"step": 7000
},
{
"epoch": 3.34,
"learning_rate": 0.00024723736632417697,
"loss": 0.4761,
"step": 7100
},
{
"epoch": 3.4,
"learning_rate": 0.0002464510379534493,
"loss": 0.4743,
"step": 7200
},
{
"epoch": 3.46,
"learning_rate": 0.0002456647095827217,
"loss": 0.4721,
"step": 7300
},
{
"epoch": 3.51,
"learning_rate": 0.0002448783812119941,
"loss": 0.4761,
"step": 7400
},
{
"epoch": 3.57,
"learning_rate": 0.00024409205284126647,
"loss": 0.4833,
"step": 7500
},
{
"epoch": 3.63,
"learning_rate": 0.0002433057244705389,
"loss": 0.4715,
"step": 7600
},
{
"epoch": 3.69,
"learning_rate": 0.00024251939609981125,
"loss": 0.4881,
"step": 7700
},
{
"epoch": 3.74,
"learning_rate": 0.00024173306772908362,
"loss": 0.4932,
"step": 7800
},
{
"epoch": 3.8,
"learning_rate": 0.00024094673935835603,
"loss": 0.49,
"step": 7900
},
{
"epoch": 3.86,
"learning_rate": 0.0002401604109876284,
"loss": 0.4877,
"step": 8000
},
{
"epoch": 3.91,
"learning_rate": 0.0002393740826169008,
"loss": 0.4959,
"step": 8100
},
{
"epoch": 3.97,
"learning_rate": 0.00023858775424617318,
"loss": 0.5031,
"step": 8200
},
{
"epoch": 4.03,
"learning_rate": 0.00023780142587544555,
"loss": 0.4162,
"step": 8300
},
{
"epoch": 4.09,
"learning_rate": 0.00023701509750471796,
"loss": 0.33,
"step": 8400
},
{
"epoch": 4.14,
"learning_rate": 0.00023622876913399033,
"loss": 0.3303,
"step": 8500
},
{
"epoch": 4.2,
"learning_rate": 0.00023544244076326273,
"loss": 0.3346,
"step": 8600
},
{
"epoch": 4.26,
"learning_rate": 0.0002346561123925351,
"loss": 0.3454,
"step": 8700
},
{
"epoch": 4.31,
"learning_rate": 0.00023386978402180748,
"loss": 0.3507,
"step": 8800
},
{
"epoch": 4.37,
"learning_rate": 0.0002330834556510799,
"loss": 0.3481,
"step": 8900
},
{
"epoch": 4.43,
"learning_rate": 0.00023229712728035223,
"loss": 0.3461,
"step": 9000
},
{
"epoch": 4.49,
"learning_rate": 0.00023151079890962464,
"loss": 0.3489,
"step": 9100
},
{
"epoch": 4.54,
"learning_rate": 0.000230724470538897,
"loss": 0.3534,
"step": 9200
},
{
"epoch": 4.6,
"learning_rate": 0.0002299381421681694,
"loss": 0.3629,
"step": 9300
},
{
"epoch": 4.66,
"learning_rate": 0.0002291518137974418,
"loss": 0.3788,
"step": 9400
},
{
"epoch": 4.71,
"learning_rate": 0.00022836548542671417,
"loss": 0.3588,
"step": 9500
},
{
"epoch": 4.77,
"learning_rate": 0.00022757915705598657,
"loss": 0.3655,
"step": 9600
},
{
"epoch": 4.83,
"learning_rate": 0.00022679282868525894,
"loss": 0.3728,
"step": 9700
},
{
"epoch": 4.89,
"learning_rate": 0.00022600650031453132,
"loss": 0.3782,
"step": 9800
},
{
"epoch": 4.94,
"learning_rate": 0.00022522017194380372,
"loss": 0.3761,
"step": 9900
},
{
"epoch": 5.0,
"learning_rate": 0.0002244338435730761,
"loss": 0.3783,
"step": 10000
},
{
"epoch": 5.06,
"learning_rate": 0.0002236475152023485,
"loss": 0.2337,
"step": 10100
},
{
"epoch": 5.11,
"learning_rate": 0.00022286118683162087,
"loss": 0.2369,
"step": 10200
},
{
"epoch": 5.17,
"learning_rate": 0.00022207485846089325,
"loss": 0.235,
"step": 10300
},
{
"epoch": 5.23,
"learning_rate": 0.00022128853009016565,
"loss": 0.2353,
"step": 10400
},
{
"epoch": 5.29,
"learning_rate": 0.000220502201719438,
"loss": 0.2448,
"step": 10500
},
{
"epoch": 5.34,
"learning_rate": 0.0002197158733487104,
"loss": 0.2423,
"step": 10600
},
{
"epoch": 5.4,
"learning_rate": 0.00021892954497798278,
"loss": 0.2471,
"step": 10700
},
{
"epoch": 5.46,
"learning_rate": 0.00021814321660725515,
"loss": 0.2479,
"step": 10800
},
{
"epoch": 5.51,
"learning_rate": 0.00021735688823652755,
"loss": 0.2591,
"step": 10900
},
{
"epoch": 5.57,
"learning_rate": 0.00021657055986579993,
"loss": 0.2672,
"step": 11000
},
{
"epoch": 5.63,
"learning_rate": 0.00021578423149507233,
"loss": 0.2676,
"step": 11100
},
{
"epoch": 5.69,
"learning_rate": 0.0002149979031243447,
"loss": 0.2683,
"step": 11200
},
{
"epoch": 5.74,
"learning_rate": 0.00021421157475361708,
"loss": 0.2646,
"step": 11300
},
{
"epoch": 5.8,
"learning_rate": 0.00021342524638288948,
"loss": 0.2759,
"step": 11400
},
{
"epoch": 5.86,
"learning_rate": 0.00021263891801216186,
"loss": 0.2805,
"step": 11500
},
{
"epoch": 5.91,
"learning_rate": 0.00021185258964143426,
"loss": 0.2721,
"step": 11600
},
{
"epoch": 5.97,
"learning_rate": 0.00021106626127070664,
"loss": 0.2785,
"step": 11700
},
{
"epoch": 6.03,
"learning_rate": 0.00021027993289997898,
"loss": 0.2186,
"step": 11800
},
{
"epoch": 6.09,
"learning_rate": 0.00020949360452925141,
"loss": 0.1612,
"step": 11900
},
{
"epoch": 6.14,
"learning_rate": 0.00020870727615852376,
"loss": 0.1628,
"step": 12000
},
{
"epoch": 6.2,
"learning_rate": 0.00020792094778779616,
"loss": 0.1661,
"step": 12100
},
{
"epoch": 6.26,
"learning_rate": 0.00020713461941706854,
"loss": 0.1759,
"step": 12200
},
{
"epoch": 6.31,
"learning_rate": 0.00020634829104634091,
"loss": 0.1803,
"step": 12300
},
{
"epoch": 6.37,
"learning_rate": 0.00020556196267561332,
"loss": 0.1788,
"step": 12400
},
{
"epoch": 6.43,
"learning_rate": 0.000204783497588593,
"loss": 0.1812,
"step": 12500
},
{
"epoch": 6.49,
"learning_rate": 0.00020399716921786537,
"loss": 0.1813,
"step": 12600
},
{
"epoch": 6.54,
"learning_rate": 0.00020321084084713774,
"loss": 0.1876,
"step": 12700
},
{
"epoch": 6.6,
"learning_rate": 0.00020242451247641014,
"loss": 0.184,
"step": 12800
},
{
"epoch": 6.66,
"learning_rate": 0.00020163818410568252,
"loss": 0.1964,
"step": 12900
},
{
"epoch": 6.71,
"learning_rate": 0.00020085185573495492,
"loss": 0.194,
"step": 13000
},
{
"epoch": 6.77,
"learning_rate": 0.0002000655273642273,
"loss": 0.1956,
"step": 13100
},
{
"epoch": 6.83,
"learning_rate": 0.00019927919899349964,
"loss": 0.2015,
"step": 13200
},
{
"epoch": 6.89,
"learning_rate": 0.00019849287062277207,
"loss": 0.2008,
"step": 13300
},
{
"epoch": 6.94,
"learning_rate": 0.00019770654225204442,
"loss": 0.2035,
"step": 13400
},
{
"epoch": 7.0,
"learning_rate": 0.00019692021388131682,
"loss": 0.2102,
"step": 13500
},
{
"epoch": 7.06,
"learning_rate": 0.0001961338855105892,
"loss": 0.1232,
"step": 13600
},
{
"epoch": 7.11,
"learning_rate": 0.00019534755713986157,
"loss": 0.1186,
"step": 13700
},
{
"epoch": 7.17,
"learning_rate": 0.00019456122876913398,
"loss": 0.1213,
"step": 13800
},
{
"epoch": 7.23,
"learning_rate": 0.00019377490039840635,
"loss": 0.1234,
"step": 13900
},
{
"epoch": 7.29,
"learning_rate": 0.00019298857202767875,
"loss": 0.1284,
"step": 14000
},
{
"epoch": 7.34,
"learning_rate": 0.00019220224365695113,
"loss": 0.136,
"step": 14100
},
{
"epoch": 7.4,
"learning_rate": 0.0001914159152862235,
"loss": 0.133,
"step": 14200
},
{
"epoch": 7.46,
"learning_rate": 0.0001906295869154959,
"loss": 0.1387,
"step": 14300
},
{
"epoch": 7.51,
"learning_rate": 0.00018984325854476828,
"loss": 0.1373,
"step": 14400
},
{
"epoch": 7.57,
"learning_rate": 0.00018906479345774796,
"loss": 0.1399,
"step": 14500
},
{
"epoch": 7.63,
"learning_rate": 0.0001882784650870203,
"loss": 0.1407,
"step": 14600
},
{
"epoch": 7.69,
"learning_rate": 0.0001874921367162927,
"loss": 0.1506,
"step": 14700
},
{
"epoch": 7.74,
"learning_rate": 0.00018670580834556508,
"loss": 0.1471,
"step": 14800
},
{
"epoch": 7.8,
"learning_rate": 0.00018591947997483746,
"loss": 0.1492,
"step": 14900
},
{
"epoch": 7.86,
"learning_rate": 0.00018513315160410986,
"loss": 0.1495,
"step": 15000
},
{
"epoch": 7.91,
"learning_rate": 0.00018434682323338223,
"loss": 0.1564,
"step": 15100
},
{
"epoch": 7.97,
"learning_rate": 0.00018356049486265464,
"loss": 0.1541,
"step": 15200
},
{
"epoch": 8.03,
"learning_rate": 0.000182774166491927,
"loss": 0.1303,
"step": 15300
},
{
"epoch": 8.09,
"learning_rate": 0.00018198783812119939,
"loss": 0.0966,
"step": 15400
},
{
"epoch": 8.14,
"learning_rate": 0.0001812015097504718,
"loss": 0.0969,
"step": 15500
},
{
"epoch": 8.2,
"learning_rate": 0.00018041518137974416,
"loss": 0.1002,
"step": 15600
},
{
"epoch": 8.26,
"learning_rate": 0.00017962885300901657,
"loss": 0.0999,
"step": 15700
},
{
"epoch": 8.31,
"learning_rate": 0.00017884252463828894,
"loss": 0.1012,
"step": 15800
},
{
"epoch": 8.37,
"learning_rate": 0.0001780561962675613,
"loss": 0.1026,
"step": 15900
},
{
"epoch": 8.43,
"learning_rate": 0.00017726986789683372,
"loss": 0.1083,
"step": 16000
},
{
"epoch": 8.49,
"learning_rate": 0.00017648353952610607,
"loss": 0.1086,
"step": 16100
},
{
"epoch": 8.54,
"learning_rate": 0.00017569721115537847,
"loss": 0.1127,
"step": 16200
},
{
"epoch": 8.6,
"learning_rate": 0.00017491088278465084,
"loss": 0.1104,
"step": 16300
},
{
"epoch": 8.66,
"learning_rate": 0.00017412455441392322,
"loss": 0.114,
"step": 16400
},
{
"epoch": 8.71,
"learning_rate": 0.00017333822604319562,
"loss": 0.1156,
"step": 16500
},
{
"epoch": 8.77,
"learning_rate": 0.000172551897672468,
"loss": 0.1145,
"step": 16600
},
{
"epoch": 8.83,
"learning_rate": 0.00017177343258544767,
"loss": 0.1181,
"step": 16700
},
{
"epoch": 8.89,
"learning_rate": 0.00017098710421472005,
"loss": 0.1239,
"step": 16800
},
{
"epoch": 8.94,
"learning_rate": 0.00017020077584399245,
"loss": 0.1203,
"step": 16900
},
{
"epoch": 9.0,
"learning_rate": 0.00016941444747326482,
"loss": 0.1226,
"step": 17000
},
{
"epoch": 9.06,
"learning_rate": 0.00016862811910253723,
"loss": 0.0842,
"step": 17100
},
{
"epoch": 9.11,
"learning_rate": 0.0001678417907318096,
"loss": 0.0782,
"step": 17200
},
{
"epoch": 9.17,
"learning_rate": 0.00016705546236108195,
"loss": 0.0796,
"step": 17300
},
{
"epoch": 9.23,
"learning_rate": 0.00016626913399035435,
"loss": 0.0814,
"step": 17400
},
{
"epoch": 9.29,
"learning_rate": 0.00016548280561962673,
"loss": 0.0836,
"step": 17500
},
{
"epoch": 9.34,
"learning_rate": 0.00016469647724889913,
"loss": 0.0844,
"step": 17600
},
{
"epoch": 9.4,
"learning_rate": 0.0001639101488781715,
"loss": 0.0876,
"step": 17700
},
{
"epoch": 9.46,
"learning_rate": 0.00016312382050744388,
"loss": 0.0853,
"step": 17800
},
{
"epoch": 9.51,
"learning_rate": 0.00016233749213671628,
"loss": 0.0922,
"step": 17900
},
{
"epoch": 9.57,
"learning_rate": 0.00016155116376598866,
"loss": 0.0919,
"step": 18000
},
{
"epoch": 9.63,
"learning_rate": 0.00016076483539526106,
"loss": 0.0932,
"step": 18100
},
{
"epoch": 9.69,
"learning_rate": 0.00015997850702453343,
"loss": 0.0919,
"step": 18200
},
{
"epoch": 9.74,
"learning_rate": 0.0001591921786538058,
"loss": 0.0957,
"step": 18300
},
{
"epoch": 9.8,
"learning_rate": 0.0001584058502830782,
"loss": 0.0982,
"step": 18400
},
{
"epoch": 9.86,
"learning_rate": 0.0001576195219123506,
"loss": 0.0988,
"step": 18500
},
{
"epoch": 9.91,
"learning_rate": 0.000156833193541623,
"loss": 0.1016,
"step": 18600
},
{
"epoch": 9.97,
"learning_rate": 0.00015604686517089536,
"loss": 0.0967,
"step": 18700
},
{
"epoch": 10.03,
"learning_rate": 0.0001552605368001677,
"loss": 0.0862,
"step": 18800
},
{
"epoch": 10.09,
"learning_rate": 0.00015447420842944011,
"loss": 0.0693,
"step": 18900
},
{
"epoch": 10.14,
"learning_rate": 0.0001536957433424198,
"loss": 0.0722,
"step": 19000
},
{
"epoch": 10.2,
"learning_rate": 0.00015290941497169216,
"loss": 0.0725,
"step": 19100
},
{
"epoch": 10.26,
"learning_rate": 0.00015212308660096454,
"loss": 0.0716,
"step": 19200
},
{
"epoch": 10.31,
"learning_rate": 0.00015133675823023694,
"loss": 0.0709,
"step": 19300
},
{
"epoch": 10.37,
"learning_rate": 0.00015055042985950932,
"loss": 0.0734,
"step": 19400
},
{
"epoch": 10.43,
"learning_rate": 0.0001497641014887817,
"loss": 0.0761,
"step": 19500
},
{
"epoch": 10.49,
"learning_rate": 0.0001489777731180541,
"loss": 0.0778,
"step": 19600
},
{
"epoch": 10.54,
"learning_rate": 0.00014819144474732647,
"loss": 0.0794,
"step": 19700
},
{
"epoch": 10.6,
"learning_rate": 0.00014740511637659884,
"loss": 0.0811,
"step": 19800
},
{
"epoch": 10.66,
"learning_rate": 0.00014661878800587125,
"loss": 0.0806,
"step": 19900
},
{
"epoch": 10.71,
"learning_rate": 0.00014583245963514362,
"loss": 0.0812,
"step": 20000
},
{
"epoch": 10.77,
"learning_rate": 0.000145046131264416,
"loss": 0.0822,
"step": 20100
},
{
"epoch": 10.83,
"learning_rate": 0.0001442598028936884,
"loss": 0.0828,
"step": 20200
},
{
"epoch": 10.89,
"learning_rate": 0.00014347347452296077,
"loss": 0.0827,
"step": 20300
},
{
"epoch": 10.94,
"learning_rate": 0.00014268714615223315,
"loss": 0.0846,
"step": 20400
},
{
"epoch": 11.0,
"learning_rate": 0.00014190081778150555,
"loss": 0.0881,
"step": 20500
},
{
"epoch": 11.06,
"learning_rate": 0.00014111448941077793,
"loss": 0.0636,
"step": 20600
},
{
"epoch": 11.11,
"learning_rate": 0.00014032816104005033,
"loss": 0.0628,
"step": 20700
},
{
"epoch": 11.17,
"learning_rate": 0.00013954183266932268,
"loss": 0.063,
"step": 20800
},
{
"epoch": 11.23,
"learning_rate": 0.00013875550429859508,
"loss": 0.0621,
"step": 20900
},
{
"epoch": 11.29,
"learning_rate": 0.00013796917592786745,
"loss": 0.0644,
"step": 21000
},
{
"epoch": 11.34,
"learning_rate": 0.00013718284755713986,
"loss": 0.0636,
"step": 21100
},
{
"epoch": 11.4,
"learning_rate": 0.00013639651918641223,
"loss": 0.0667,
"step": 21200
},
{
"epoch": 11.46,
"learning_rate": 0.0001356180540993919,
"loss": 0.0686,
"step": 21300
},
{
"epoch": 11.51,
"learning_rate": 0.00013483172572866428,
"loss": 0.0677,
"step": 21400
},
{
"epoch": 11.57,
"learning_rate": 0.00013404539735793666,
"loss": 0.0692,
"step": 21500
},
{
"epoch": 11.63,
"learning_rate": 0.00013325906898720903,
"loss": 0.072,
"step": 21600
},
{
"epoch": 11.69,
"learning_rate": 0.00013247274061648143,
"loss": 0.0708,
"step": 21700
},
{
"epoch": 11.74,
"learning_rate": 0.0001316864122457538,
"loss": 0.0738,
"step": 21800
},
{
"epoch": 11.8,
"learning_rate": 0.0001309000838750262,
"loss": 0.0744,
"step": 21900
},
{
"epoch": 11.86,
"learning_rate": 0.00013011375550429859,
"loss": 0.0743,
"step": 22000
},
{
"epoch": 11.91,
"learning_rate": 0.00012932742713357096,
"loss": 0.0742,
"step": 22100
},
{
"epoch": 11.97,
"learning_rate": 0.00012854109876284334,
"loss": 0.0743,
"step": 22200
},
{
"epoch": 12.03,
"learning_rate": 0.00012775477039211574,
"loss": 0.0665,
"step": 22300
},
{
"epoch": 12.09,
"learning_rate": 0.00012696844202138811,
"loss": 0.0583,
"step": 22400
},
{
"epoch": 12.14,
"learning_rate": 0.00012618211365066052,
"loss": 0.0555,
"step": 22500
},
{
"epoch": 12.2,
"learning_rate": 0.0001253957852799329,
"loss": 0.0556,
"step": 22600
},
{
"epoch": 12.26,
"learning_rate": 0.00012460945690920527,
"loss": 0.0596,
"step": 22700
},
{
"epoch": 12.31,
"learning_rate": 0.00012382312853847767,
"loss": 0.058,
"step": 22800
},
{
"epoch": 12.37,
"learning_rate": 0.00012303680016775004,
"loss": 0.0599,
"step": 22900
},
{
"epoch": 12.43,
"learning_rate": 0.00012225047179702242,
"loss": 0.0574,
"step": 23000
},
{
"epoch": 12.49,
"learning_rate": 0.00012146414342629481,
"loss": 0.0594,
"step": 23100
},
{
"epoch": 12.54,
"learning_rate": 0.00012067781505556718,
"loss": 0.062,
"step": 23200
},
{
"epoch": 12.6,
"learning_rate": 0.00011989148668483957,
"loss": 0.0624,
"step": 23300
},
{
"epoch": 12.66,
"learning_rate": 0.00011910515831411196,
"loss": 0.0618,
"step": 23400
},
{
"epoch": 12.71,
"learning_rate": 0.00011831882994338435,
"loss": 0.0645,
"step": 23500
},
{
"epoch": 12.77,
"learning_rate": 0.00011753250157265673,
"loss": 0.0642,
"step": 23600
},
{
"epoch": 12.83,
"learning_rate": 0.0001167540364856364,
"loss": 0.0651,
"step": 23700
},
{
"epoch": 12.89,
"learning_rate": 0.00011596770811490879,
"loss": 0.0643,
"step": 23800
},
{
"epoch": 12.94,
"learning_rate": 0.00011518137974418116,
"loss": 0.0649,
"step": 23900
},
{
"epoch": 13.0,
"learning_rate": 0.00011439505137345354,
"loss": 0.0654,
"step": 24000
},
{
"epoch": 13.06,
"learning_rate": 0.00011360872300272593,
"loss": 0.0519,
"step": 24100
},
{
"epoch": 13.11,
"learning_rate": 0.00011282239463199832,
"loss": 0.0518,
"step": 24200
},
{
"epoch": 13.17,
"learning_rate": 0.0001120360662612707,
"loss": 0.0517,
"step": 24300
},
{
"epoch": 13.23,
"learning_rate": 0.00011124973789054309,
"loss": 0.0511,
"step": 24400
},
{
"epoch": 13.29,
"learning_rate": 0.00011046340951981545,
"loss": 0.0524,
"step": 24500
},
{
"epoch": 13.34,
"learning_rate": 0.00010967708114908784,
"loss": 0.0533,
"step": 24600
},
{
"epoch": 13.4,
"learning_rate": 0.00010889075277836023,
"loss": 0.0541,
"step": 24700
},
{
"epoch": 13.46,
"learning_rate": 0.00010810442440763262,
"loss": 0.0544,
"step": 24800
},
{
"epoch": 13.51,
"learning_rate": 0.00010731809603690501,
"loss": 0.0549,
"step": 24900
},
{
"epoch": 13.57,
"learning_rate": 0.00010653176766617738,
"loss": 0.0555,
"step": 25000
},
{
"epoch": 13.63,
"learning_rate": 0.00010574543929544977,
"loss": 0.0553,
"step": 25100
},
{
"epoch": 13.69,
"learning_rate": 0.00010495911092472216,
"loss": 0.0551,
"step": 25200
},
{
"epoch": 13.74,
"learning_rate": 0.00010417278255399454,
"loss": 0.0565,
"step": 25300
},
{
"epoch": 13.8,
"learning_rate": 0.00010338645418326693,
"loss": 0.0575,
"step": 25400
},
{
"epoch": 13.86,
"learning_rate": 0.0001026001258125393,
"loss": 0.0563,
"step": 25500
},
{
"epoch": 13.91,
"learning_rate": 0.00010181379744181169,
"loss": 0.057,
"step": 25600
},
{
"epoch": 13.97,
"learning_rate": 0.00010102746907108408,
"loss": 0.0574,
"step": 25700
},
{
"epoch": 14.03,
"learning_rate": 0.00010024114070035647,
"loss": 0.0556,
"step": 25800
},
{
"epoch": 14.09,
"learning_rate": 9.945481232962886e-05,
"loss": 0.049,
"step": 25900
},
{
"epoch": 14.14,
"learning_rate": 9.866848395890122e-05,
"loss": 0.0477,
"step": 26000
},
{
"epoch": 14.2,
"learning_rate": 9.78821555881736e-05,
"loss": 0.0477,
"step": 26100
},
{
"epoch": 14.26,
"learning_rate": 9.7095827217446e-05,
"loss": 0.0481,
"step": 26200
},
{
"epoch": 14.31,
"learning_rate": 9.630949884671838e-05,
"loss": 0.0466,
"step": 26300
},
{
"epoch": 14.37,
"learning_rate": 9.552317047599077e-05,
"loss": 0.049,
"step": 26400
},
{
"epoch": 14.43,
"learning_rate": 9.473684210526315e-05,
"loss": 0.0509,
"step": 26500
},
{
"epoch": 14.49,
"learning_rate": 9.395051373453554e-05,
"loss": 0.0488,
"step": 26600
},
{
"epoch": 14.54,
"learning_rate": 9.316418536380793e-05,
"loss": 0.0509,
"step": 26700
},
{
"epoch": 14.6,
"learning_rate": 9.23778569930803e-05,
"loss": 0.0515,
"step": 26800
},
{
"epoch": 14.66,
"learning_rate": 9.159152862235269e-05,
"loss": 0.0523,
"step": 26900
},
{
"epoch": 14.71,
"learning_rate": 9.080520025162506e-05,
"loss": 0.0499,
"step": 27000
},
{
"epoch": 14.77,
"learning_rate": 9.001887188089745e-05,
"loss": 0.0507,
"step": 27100
},
{
"epoch": 14.83,
"learning_rate": 8.923254351016984e-05,
"loss": 0.0524,
"step": 27200
},
{
"epoch": 14.89,
"learning_rate": 8.844621513944223e-05,
"loss": 0.0525,
"step": 27300
},
{
"epoch": 14.94,
"learning_rate": 8.765988676871462e-05,
"loss": 0.0522,
"step": 27400
},
{
"epoch": 15.0,
"learning_rate": 8.687355839798698e-05,
"loss": 0.0547,
"step": 27500
},
{
"epoch": 15.06,
"learning_rate": 8.608723002725937e-05,
"loss": 0.0449,
"step": 27600
},
{
"epoch": 15.11,
"learning_rate": 8.530090165653176e-05,
"loss": 0.0455,
"step": 27700
},
{
"epoch": 15.17,
"learning_rate": 8.452243656951142e-05,
"loss": 0.0429,
"step": 27800
},
{
"epoch": 15.23,
"learning_rate": 8.373610819878381e-05,
"loss": 0.0447,
"step": 27900
},
{
"epoch": 15.29,
"learning_rate": 8.29497798280562e-05,
"loss": 0.0458,
"step": 28000
},
{
"epoch": 15.34,
"learning_rate": 8.216345145732857e-05,
"loss": 0.0451,
"step": 28100
},
{
"epoch": 15.4,
"learning_rate": 8.137712308660096e-05,
"loss": 0.047,
"step": 28200
},
{
"epoch": 15.46,
"learning_rate": 8.059079471587334e-05,
"loss": 0.0465,
"step": 28300
},
{
"epoch": 15.51,
"learning_rate": 7.980446634514572e-05,
"loss": 0.0461,
"step": 28400
},
{
"epoch": 15.57,
"learning_rate": 7.901813797441811e-05,
"loss": 0.0456,
"step": 28500
},
{
"epoch": 15.63,
"learning_rate": 7.82318096036905e-05,
"loss": 0.0459,
"step": 28600
},
{
"epoch": 15.69,
"learning_rate": 7.744548123296289e-05,
"loss": 0.0461,
"step": 28700
},
{
"epoch": 15.74,
"learning_rate": 7.665915286223525e-05,
"loss": 0.0469,
"step": 28800
},
{
"epoch": 15.8,
"learning_rate": 7.587282449150764e-05,
"loss": 0.0478,
"step": 28900
},
{
"epoch": 15.86,
"learning_rate": 7.508649612078003e-05,
"loss": 0.0474,
"step": 29000
},
{
"epoch": 15.91,
"learning_rate": 7.430016775005242e-05,
"loss": 0.0469,
"step": 29100
},
{
"epoch": 15.97,
"learning_rate": 7.35138393793248e-05,
"loss": 0.0471,
"step": 29200
},
{
"epoch": 16.03,
"learning_rate": 7.272751100859718e-05,
"loss": 0.0461,
"step": 29300
},
{
"epoch": 16.09,
"learning_rate": 7.194118263786957e-05,
"loss": 0.0404,
"step": 29400
},
{
"epoch": 16.14,
"learning_rate": 7.115485426714195e-05,
"loss": 0.0415,
"step": 29500
},
{
"epoch": 16.2,
"learning_rate": 7.036852589641434e-05,
"loss": 0.0417,
"step": 29600
},
{
"epoch": 16.26,
"learning_rate": 6.958219752568672e-05,
"loss": 0.0416,
"step": 29700
},
{
"epoch": 16.31,
"learning_rate": 6.879586915495911e-05,
"loss": 0.0419,
"step": 29800
},
{
"epoch": 16.37,
"learning_rate": 6.800954078423149e-05,
"loss": 0.0435,
"step": 29900
},
{
"epoch": 16.43,
"learning_rate": 6.723107569721115e-05,
"loss": 0.0418,
"step": 30000
},
{
"epoch": 16.49,
"learning_rate": 6.644474732648354e-05,
"loss": 0.0429,
"step": 30100
},
{
"epoch": 16.54,
"learning_rate": 6.565841895575591e-05,
"loss": 0.0419,
"step": 30200
},
{
"epoch": 16.6,
"learning_rate": 6.48720905850283e-05,
"loss": 0.0416,
"step": 30300
},
{
"epoch": 16.66,
"learning_rate": 6.408576221430069e-05,
"loss": 0.0423,
"step": 30400
},
{
"epoch": 16.71,
"learning_rate": 6.329943384357306e-05,
"loss": 0.0433,
"step": 30500
},
{
"epoch": 16.77,
"learning_rate": 6.251310547284545e-05,
"loss": 0.044,
"step": 30600
},
{
"epoch": 16.83,
"learning_rate": 6.172677710211784e-05,
"loss": 0.0434,
"step": 30700
},
{
"epoch": 16.89,
"learning_rate": 6.0940448731390224e-05,
"loss": 0.0436,
"step": 30800
},
{
"epoch": 16.94,
"learning_rate": 6.015412036066261e-05,
"loss": 0.0441,
"step": 30900
},
{
"epoch": 17.0,
"learning_rate": 5.936779198993499e-05,
"loss": 0.044,
"step": 31000
},
{
"epoch": 17.06,
"learning_rate": 5.8581463619207377e-05,
"loss": 0.0369,
"step": 31100
},
{
"epoch": 17.11,
"learning_rate": 5.779513524847976e-05,
"loss": 0.0385,
"step": 31200
},
{
"epoch": 17.17,
"learning_rate": 5.700880687775215e-05,
"loss": 0.0398,
"step": 31300
},
{
"epoch": 17.23,
"learning_rate": 5.622247850702453e-05,
"loss": 0.0389,
"step": 31400
},
{
"epoch": 17.29,
"learning_rate": 5.543615013629691e-05,
"loss": 0.0387,
"step": 31500
},
{
"epoch": 17.34,
"learning_rate": 5.46498217655693e-05,
"loss": 0.0398,
"step": 31600
},
{
"epoch": 17.4,
"learning_rate": 5.3863493394841675e-05,
"loss": 0.039,
"step": 31700
},
{
"epoch": 17.46,
"learning_rate": 5.3077165024114064e-05,
"loss": 0.041,
"step": 31800
},
{
"epoch": 17.51,
"learning_rate": 5.229083665338645e-05,
"loss": 0.0395,
"step": 31900
},
{
"epoch": 17.57,
"learning_rate": 5.1504508282658835e-05,
"loss": 0.0405,
"step": 32000
},
{
"epoch": 17.63,
"learning_rate": 5.071817991193122e-05,
"loss": 0.0401,
"step": 32100
},
{
"epoch": 17.69,
"learning_rate": 4.993971482491088e-05,
"loss": 0.0404,
"step": 32200
},
{
"epoch": 17.74,
"learning_rate": 4.915338645418326e-05,
"loss": 0.0399,
"step": 32300
},
{
"epoch": 17.8,
"learning_rate": 4.836705808345565e-05,
"loss": 0.0384,
"step": 32400
},
{
"epoch": 17.86,
"learning_rate": 4.758072971272803e-05,
"loss": 0.0421,
"step": 32500
},
{
"epoch": 17.91,
"learning_rate": 4.679440134200042e-05,
"loss": 0.0408,
"step": 32600
},
{
"epoch": 17.97,
"learning_rate": 4.60080729712728e-05,
"loss": 0.0406,
"step": 32700
},
{
"epoch": 18.03,
"learning_rate": 4.522174460054518e-05,
"loss": 0.0388,
"step": 32800
},
{
"epoch": 18.09,
"learning_rate": 4.443541622981757e-05,
"loss": 0.0374,
"step": 32900
},
{
"epoch": 18.14,
"learning_rate": 4.3649087859089946e-05,
"loss": 0.0367,
"step": 33000
},
{
"epoch": 18.2,
"learning_rate": 4.2862759488362335e-05,
"loss": 0.0367,
"step": 33100
},
{
"epoch": 18.26,
"learning_rate": 4.2076431117634723e-05,
"loss": 0.0367,
"step": 33200
},
{
"epoch": 18.31,
"learning_rate": 4.1290102746907105e-05,
"loss": 0.0367,
"step": 33300
},
{
"epoch": 18.37,
"learning_rate": 4.050377437617949e-05,
"loss": 0.0376,
"step": 33400
},
{
"epoch": 18.43,
"learning_rate": 3.971744600545187e-05,
"loss": 0.0381,
"step": 33500
},
{
"epoch": 18.49,
"learning_rate": 3.893111763472426e-05,
"loss": 0.0378,
"step": 33600
},
{
"epoch": 18.54,
"learning_rate": 3.814478926399665e-05,
"loss": 0.0383,
"step": 33700
},
{
"epoch": 18.6,
"learning_rate": 3.735846089326902e-05,
"loss": 0.0377,
"step": 33800
},
{
"epoch": 18.66,
"learning_rate": 3.657213252254141e-05,
"loss": 0.0381,
"step": 33900
},
{
"epoch": 18.71,
"learning_rate": 3.578580415181379e-05,
"loss": 0.0382,
"step": 34000
},
{
"epoch": 18.77,
"learning_rate": 3.4999475781086175e-05,
"loss": 0.0382,
"step": 34100
},
{
"epoch": 18.83,
"learning_rate": 3.4213147410358564e-05,
"loss": 0.0392,
"step": 34200
},
{
"epoch": 18.89,
"learning_rate": 3.3434682323338223e-05,
"loss": 0.0388,
"step": 34300
},
{
"epoch": 18.94,
"learning_rate": 3.2648353952610605e-05,
"loss": 0.0386,
"step": 34400
},
{
"epoch": 19.0,
"learning_rate": 3.186202558188299e-05,
"loss": 0.0383,
"step": 34500
},
{
"epoch": 19.06,
"learning_rate": 3.1075697211155376e-05,
"loss": 0.0343,
"step": 34600
},
{
"epoch": 19.11,
"learning_rate": 3.028936884042776e-05,
"loss": 0.0349,
"step": 34700
},
{
"epoch": 19.17,
"learning_rate": 2.9503040469700143e-05,
"loss": 0.0355,
"step": 34800
},
{
"epoch": 19.23,
"learning_rate": 2.871671209897253e-05,
"loss": 0.035,
"step": 34900
},
{
"epoch": 19.29,
"learning_rate": 2.793038372824491e-05,
"loss": 0.0353,
"step": 35000
},
{
"epoch": 19.34,
"learning_rate": 2.71440553575173e-05,
"loss": 0.035,
"step": 35100
},
{
"epoch": 19.4,
"learning_rate": 2.635772698678968e-05,
"loss": 0.0355,
"step": 35200
},
{
"epoch": 19.46,
"learning_rate": 2.5571398616062064e-05,
"loss": 0.0351,
"step": 35300
},
{
"epoch": 19.51,
"learning_rate": 2.478507024533445e-05,
"loss": 0.0358,
"step": 35400
},
{
"epoch": 19.57,
"learning_rate": 2.399874187460683e-05,
"loss": 0.0386,
"step": 35500
},
{
"epoch": 19.63,
"learning_rate": 2.321241350387922e-05,
"loss": 0.0362,
"step": 35600
},
{
"epoch": 19.69,
"learning_rate": 2.24260851331516e-05,
"loss": 0.0364,
"step": 35700
},
{
"epoch": 19.74,
"learning_rate": 2.1639756762423987e-05,
"loss": 0.0364,
"step": 35800
},
{
"epoch": 19.8,
"learning_rate": 2.085342839169637e-05,
"loss": 0.0344,
"step": 35900
},
{
"epoch": 19.86,
"learning_rate": 2.0067100020968754e-05,
"loss": 0.0372,
"step": 36000
},
{
"epoch": 19.91,
"learning_rate": 1.928077165024114e-05,
"loss": 0.0366,
"step": 36100
},
{
"epoch": 19.97,
"learning_rate": 1.8494443279513525e-05,
"loss": 0.0357,
"step": 36200
},
{
"epoch": 20.03,
"learning_rate": 1.7708114908785907e-05,
"loss": 0.0349,
"step": 36300
},
{
"epoch": 20.09,
"learning_rate": 1.692178653805829e-05,
"loss": 0.0329,
"step": 36400
},
{
"epoch": 20.14,
"learning_rate": 1.6143321451037952e-05,
"loss": 0.0339,
"step": 36500
},
{
"epoch": 20.2,
"learning_rate": 1.5356993080310334e-05,
"loss": 0.0329,
"step": 36600
},
{
"epoch": 20.26,
"learning_rate": 1.4570664709582721e-05,
"loss": 0.0329,
"step": 36700
},
{
"epoch": 20.31,
"learning_rate": 1.3784336338855105e-05,
"loss": 0.0329,
"step": 36800
},
{
"epoch": 20.37,
"learning_rate": 1.299800796812749e-05,
"loss": 0.0342,
"step": 36900
},
{
"epoch": 20.43,
"learning_rate": 1.2211679597399874e-05,
"loss": 0.0344,
"step": 37000
},
{
"epoch": 20.49,
"learning_rate": 1.1425351226672256e-05,
"loss": 0.0336,
"step": 37100
},
{
"epoch": 20.54,
"learning_rate": 1.0639022855944641e-05,
"loss": 0.0339,
"step": 37200
},
{
"epoch": 20.6,
"learning_rate": 9.852694485217025e-06,
"loss": 0.0345,
"step": 37300
},
{
"epoch": 20.66,
"learning_rate": 9.06636611448941e-06,
"loss": 0.034,
"step": 37400
},
{
"epoch": 20.71,
"learning_rate": 8.280037743761794e-06,
"loss": 0.0343,
"step": 37500
},
{
"epoch": 20.77,
"learning_rate": 7.4937093730341786e-06,
"loss": 0.0339,
"step": 37600
},
{
"epoch": 20.83,
"learning_rate": 6.707381002306563e-06,
"loss": 0.0342,
"step": 37700
},
{
"epoch": 20.89,
"learning_rate": 5.921052631578947e-06,
"loss": 0.036,
"step": 37800
},
{
"epoch": 20.94,
"learning_rate": 5.134724260851331e-06,
"loss": 0.0336,
"step": 37900
},
{
"epoch": 21.0,
"learning_rate": 4.348395890123715e-06,
"loss": 0.0342,
"step": 38000
},
{
"epoch": 21.06,
"learning_rate": 3.5620675193960995e-06,
"loss": 0.0313,
"step": 38100
},
{
"epoch": 21.11,
"learning_rate": 2.7757391486684836e-06,
"loss": 0.0326,
"step": 38200
},
{
"epoch": 21.17,
"learning_rate": 1.9894107779408677e-06,
"loss": 0.0325,
"step": 38300
},
{
"epoch": 21.23,
"learning_rate": 1.2030824072132522e-06,
"loss": 0.0314,
"step": 38400
}
],
"max_steps": 38538,
"num_train_epochs": 23,
"total_flos": 3.3292216688770744e+18,
"trial_name": null,
"trial_params": null
}