lombardata's picture
Evaluation on the test set completed on 2024_11_15.
2e3f7df verified
{
"best_metric": 0.4637599587440491,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-from-probs-large-2024_11_15-batch-size64_freeze_probs/checkpoint-7590",
"epoch": 79.0,
"eval_steps": 500,
"global_step": 8690,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_explained_variance": 0.32841917872428894,
"eval_kl_divergence": 0.10252656042575836,
"eval_loss": 0.5005590319633484,
"eval_mae": 0.15520869195461273,
"eval_rmse": 0.19042611122131348,
"eval_runtime": 60.5528,
"eval_samples_per_second": 38.875,
"eval_steps_per_second": 0.611,
"learning_rate": 0.001,
"step": 110
},
{
"epoch": 2.0,
"eval_explained_variance": 0.3932196795940399,
"eval_kl_divergence": 0.5180067420005798,
"eval_loss": 0.47547808289527893,
"eval_mae": 0.12452296167612076,
"eval_rmse": 0.16812847554683685,
"eval_runtime": 57.4976,
"eval_samples_per_second": 40.941,
"eval_steps_per_second": 0.644,
"learning_rate": 0.001,
"step": 220
},
{
"epoch": 3.0,
"eval_explained_variance": 0.3974684476852417,
"eval_kl_divergence": 0.6862403154373169,
"eval_loss": 0.47452571988105774,
"eval_mae": 0.1226513460278511,
"eval_rmse": 0.16751675307750702,
"eval_runtime": 57.6506,
"eval_samples_per_second": 40.832,
"eval_steps_per_second": 0.642,
"learning_rate": 0.001,
"step": 330
},
{
"epoch": 4.0,
"eval_explained_variance": 0.40236756205558777,
"eval_kl_divergence": 0.3211989104747772,
"eval_loss": 0.47420722246170044,
"eval_mae": 0.1255439817905426,
"eval_rmse": 0.16721709072589874,
"eval_runtime": 58.0216,
"eval_samples_per_second": 40.571,
"eval_steps_per_second": 0.638,
"learning_rate": 0.001,
"step": 440
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.20816726982593536,
"learning_rate": 0.001,
"loss": 0.5081,
"step": 500
},
{
"epoch": 5.0,
"eval_explained_variance": 0.4118477404117584,
"eval_kl_divergence": 0.5071600079536438,
"eval_loss": 0.47245556116104126,
"eval_mae": 0.12236347794532776,
"eval_rmse": 0.16526558995246887,
"eval_runtime": 60.9082,
"eval_samples_per_second": 38.648,
"eval_steps_per_second": 0.607,
"learning_rate": 0.001,
"step": 550
},
{
"epoch": 6.0,
"eval_explained_variance": 0.4100535213947296,
"eval_kl_divergence": 0.6710320115089417,
"eval_loss": 0.4725925624370575,
"eval_mae": 0.12164705991744995,
"eval_rmse": 0.16568797826766968,
"eval_runtime": 60.266,
"eval_samples_per_second": 39.06,
"eval_steps_per_second": 0.614,
"learning_rate": 0.001,
"step": 660
},
{
"epoch": 7.0,
"eval_explained_variance": 0.4183339774608612,
"eval_kl_divergence": 0.3161657452583313,
"eval_loss": 0.4731809198856354,
"eval_mae": 0.12548527121543884,
"eval_rmse": 0.16550247371196747,
"eval_runtime": 59.012,
"eval_samples_per_second": 39.89,
"eval_steps_per_second": 0.627,
"learning_rate": 0.001,
"step": 770
},
{
"epoch": 8.0,
"eval_explained_variance": 0.4233661890029907,
"eval_kl_divergence": 0.27189013361930847,
"eval_loss": 0.47284314036369324,
"eval_mae": 0.12600405514240265,
"eval_rmse": 0.16514724493026733,
"eval_runtime": 60.8246,
"eval_samples_per_second": 38.701,
"eval_steps_per_second": 0.608,
"learning_rate": 0.001,
"step": 880
},
{
"epoch": 9.0,
"eval_explained_variance": 0.42370346188545227,
"eval_kl_divergence": 0.6392844319343567,
"eval_loss": 0.4707973003387451,
"eval_mae": 0.12056442350149155,
"eval_rmse": 0.16385647654533386,
"eval_runtime": 57.5192,
"eval_samples_per_second": 40.925,
"eval_steps_per_second": 0.643,
"learning_rate": 0.001,
"step": 990
},
{
"epoch": 9.090909090909092,
"grad_norm": 0.15108729898929596,
"learning_rate": 0.001,
"loss": 0.4668,
"step": 1000
},
{
"epoch": 10.0,
"eval_explained_variance": 0.41512250900268555,
"eval_kl_divergence": 0.5359246134757996,
"eval_loss": 0.4732784628868103,
"eval_mae": 0.12296172976493835,
"eval_rmse": 0.16544467210769653,
"eval_runtime": 60.8049,
"eval_samples_per_second": 38.714,
"eval_steps_per_second": 0.609,
"learning_rate": 0.001,
"step": 1100
},
{
"epoch": 11.0,
"eval_explained_variance": 0.43050625920295715,
"eval_kl_divergence": 0.24788798391819,
"eval_loss": 0.47162503004074097,
"eval_mae": 0.12532271444797516,
"eval_rmse": 0.1646868884563446,
"eval_runtime": 59.6353,
"eval_samples_per_second": 39.473,
"eval_steps_per_second": 0.62,
"learning_rate": 0.001,
"step": 1210
},
{
"epoch": 12.0,
"eval_explained_variance": 0.43575945496559143,
"eval_kl_divergence": 0.3118789792060852,
"eval_loss": 0.47083696722984314,
"eval_mae": 0.12438095360994339,
"eval_rmse": 0.16306261718273163,
"eval_runtime": 59.6011,
"eval_samples_per_second": 39.496,
"eval_steps_per_second": 0.621,
"learning_rate": 0.001,
"step": 1320
},
{
"epoch": 13.0,
"eval_explained_variance": 0.42740270495414734,
"eval_kl_divergence": 0.36944085359573364,
"eval_loss": 0.47152063250541687,
"eval_mae": 0.1230199933052063,
"eval_rmse": 0.16350014507770538,
"eval_runtime": 60.519,
"eval_samples_per_second": 38.897,
"eval_steps_per_second": 0.611,
"learning_rate": 0.001,
"step": 1430
},
{
"epoch": 13.636363636363637,
"grad_norm": 0.1494702696800232,
"learning_rate": 0.001,
"loss": 0.4641,
"step": 1500
},
{
"epoch": 14.0,
"eval_explained_variance": 0.41340962052345276,
"eval_kl_divergence": 0.5592221617698669,
"eval_loss": 0.47212228178977966,
"eval_mae": 0.12158066779375076,
"eval_rmse": 0.16525773704051971,
"eval_runtime": 59.2288,
"eval_samples_per_second": 39.744,
"eval_steps_per_second": 0.625,
"learning_rate": 0.001,
"step": 1540
},
{
"epoch": 15.0,
"eval_explained_variance": 0.43138065934181213,
"eval_kl_divergence": 0.49361512064933777,
"eval_loss": 0.47012239694595337,
"eval_mae": 0.12126541882753372,
"eval_rmse": 0.16284985840320587,
"eval_runtime": 61.7909,
"eval_samples_per_second": 38.096,
"eval_steps_per_second": 0.599,
"learning_rate": 0.001,
"step": 1650
},
{
"epoch": 16.0,
"eval_explained_variance": 0.43279382586479187,
"eval_kl_divergence": 0.2819983661174774,
"eval_loss": 0.4718552827835083,
"eval_mae": 0.12293669581413269,
"eval_rmse": 0.16459016501903534,
"eval_runtime": 59.5152,
"eval_samples_per_second": 39.553,
"eval_steps_per_second": 0.622,
"learning_rate": 0.001,
"step": 1760
},
{
"epoch": 17.0,
"eval_explained_variance": 0.43319597840309143,
"eval_kl_divergence": 0.5294199585914612,
"eval_loss": 0.46933484077453613,
"eval_mae": 0.12004240602254868,
"eval_rmse": 0.16205951571464539,
"eval_runtime": 59.464,
"eval_samples_per_second": 39.587,
"eval_steps_per_second": 0.622,
"learning_rate": 0.001,
"step": 1870
},
{
"epoch": 18.0,
"eval_explained_variance": 0.42939844727516174,
"eval_kl_divergence": 0.4093473255634308,
"eval_loss": 0.4710436165332794,
"eval_mae": 0.12161851674318314,
"eval_rmse": 0.16353638470172882,
"eval_runtime": 60.82,
"eval_samples_per_second": 38.704,
"eval_steps_per_second": 0.608,
"learning_rate": 0.001,
"step": 1980
},
{
"epoch": 18.181818181818183,
"grad_norm": 0.11152761429548264,
"learning_rate": 0.001,
"loss": 0.4618,
"step": 2000
},
{
"epoch": 19.0,
"eval_explained_variance": 0.4387861490249634,
"eval_kl_divergence": 0.29183313250541687,
"eval_loss": 0.4698491394519806,
"eval_mae": 0.12186750769615173,
"eval_rmse": 0.16223199665546417,
"eval_runtime": 62.7122,
"eval_samples_per_second": 37.537,
"eval_steps_per_second": 0.59,
"learning_rate": 0.001,
"step": 2090
},
{
"epoch": 20.0,
"eval_explained_variance": 0.4355180561542511,
"eval_kl_divergence": 0.47719886898994446,
"eval_loss": 0.4691685736179352,
"eval_mae": 0.11899092048406601,
"eval_rmse": 0.16173695027828217,
"eval_runtime": 60.1867,
"eval_samples_per_second": 39.112,
"eval_steps_per_second": 0.615,
"learning_rate": 0.001,
"step": 2200
},
{
"epoch": 21.0,
"eval_explained_variance": 0.44244399666786194,
"eval_kl_divergence": 0.4335584044456482,
"eval_loss": 0.46830564737319946,
"eval_mae": 0.12040043622255325,
"eval_rmse": 0.16058459877967834,
"eval_runtime": 59.7866,
"eval_samples_per_second": 39.373,
"eval_steps_per_second": 0.619,
"learning_rate": 0.001,
"step": 2310
},
{
"epoch": 22.0,
"eval_explained_variance": 0.4233216345310211,
"eval_kl_divergence": 0.7962150573730469,
"eval_loss": 0.47239789366722107,
"eval_mae": 0.11830935627222061,
"eval_rmse": 0.16501490771770477,
"eval_runtime": 62.1424,
"eval_samples_per_second": 37.881,
"eval_steps_per_second": 0.595,
"learning_rate": 0.001,
"step": 2420
},
{
"epoch": 22.727272727272727,
"grad_norm": 0.10114327073097229,
"learning_rate": 0.001,
"loss": 0.4613,
"step": 2500
},
{
"epoch": 23.0,
"eval_explained_variance": 0.43542811274528503,
"eval_kl_divergence": 0.2854216396808624,
"eval_loss": 0.47136834263801575,
"eval_mae": 0.12230511754751205,
"eval_rmse": 0.16408009827136993,
"eval_runtime": 61.631,
"eval_samples_per_second": 38.195,
"eval_steps_per_second": 0.6,
"learning_rate": 0.001,
"step": 2530
},
{
"epoch": 24.0,
"eval_explained_variance": 0.42795756459236145,
"eval_kl_divergence": 0.42056405544281006,
"eval_loss": 0.4706868529319763,
"eval_mae": 0.12066368013620377,
"eval_rmse": 0.16326285898685455,
"eval_runtime": 61.2844,
"eval_samples_per_second": 38.411,
"eval_steps_per_second": 0.604,
"learning_rate": 0.001,
"step": 2640
},
{
"epoch": 25.0,
"eval_explained_variance": 0.44159284234046936,
"eval_kl_divergence": 0.5435640811920166,
"eval_loss": 0.46786901354789734,
"eval_mae": 0.11850475519895554,
"eval_rmse": 0.16058622300624847,
"eval_runtime": 61.9284,
"eval_samples_per_second": 38.012,
"eval_steps_per_second": 0.597,
"learning_rate": 0.001,
"step": 2750
},
{
"epoch": 26.0,
"eval_explained_variance": 0.4267805814743042,
"eval_kl_divergence": 0.4964081943035126,
"eval_loss": 0.47084224224090576,
"eval_mae": 0.11923620104789734,
"eval_rmse": 0.16337566077709198,
"eval_runtime": 66.0163,
"eval_samples_per_second": 35.658,
"eval_steps_per_second": 0.56,
"learning_rate": 0.001,
"step": 2860
},
{
"epoch": 27.0,
"eval_explained_variance": 0.43011048436164856,
"eval_kl_divergence": 0.6398861408233643,
"eval_loss": 0.4695045053958893,
"eval_mae": 0.11852020025253296,
"eval_rmse": 0.16250041127204895,
"eval_runtime": 60.9743,
"eval_samples_per_second": 38.606,
"eval_steps_per_second": 0.607,
"learning_rate": 0.001,
"step": 2970
},
{
"epoch": 27.272727272727273,
"grad_norm": 0.12341216951608658,
"learning_rate": 0.001,
"loss": 0.4607,
"step": 3000
},
{
"epoch": 28.0,
"eval_explained_variance": 0.43241068720817566,
"eval_kl_divergence": 0.5736985206604004,
"eval_loss": 0.4700873792171478,
"eval_mae": 0.11835578829050064,
"eval_rmse": 0.16237075626850128,
"eval_runtime": 60.2395,
"eval_samples_per_second": 39.077,
"eval_steps_per_second": 0.614,
"learning_rate": 0.001,
"step": 3080
},
{
"epoch": 29.0,
"eval_explained_variance": 0.43240413069725037,
"eval_kl_divergence": 0.4459187090396881,
"eval_loss": 0.4698559045791626,
"eval_mae": 0.1200462281703949,
"eval_rmse": 0.16241396963596344,
"eval_runtime": 59.6241,
"eval_samples_per_second": 39.481,
"eval_steps_per_second": 0.621,
"learning_rate": 0.001,
"step": 3190
},
{
"epoch": 30.0,
"eval_explained_variance": 0.4308302402496338,
"eval_kl_divergence": 0.27262812852859497,
"eval_loss": 0.4722815454006195,
"eval_mae": 0.12538868188858032,
"eval_rmse": 0.1643446981906891,
"eval_runtime": 60.4817,
"eval_samples_per_second": 38.921,
"eval_steps_per_second": 0.612,
"learning_rate": 0.001,
"step": 3300
},
{
"epoch": 31.0,
"eval_explained_variance": 0.431255966424942,
"eval_kl_divergence": 0.5307573080062866,
"eval_loss": 0.46958214044570923,
"eval_mae": 0.11837340146303177,
"eval_rmse": 0.16221857070922852,
"eval_runtime": 59.6158,
"eval_samples_per_second": 39.486,
"eval_steps_per_second": 0.621,
"learning_rate": 0.001,
"step": 3410
},
{
"epoch": 31.818181818181817,
"grad_norm": 0.09215673804283142,
"learning_rate": 0.0001,
"loss": 0.4604,
"step": 3500
},
{
"epoch": 32.0,
"eval_explained_variance": 0.4507780075073242,
"eval_kl_divergence": 0.4200185239315033,
"eval_loss": 0.46677276492118835,
"eval_mae": 0.11745267361402512,
"eval_rmse": 0.1592676192522049,
"eval_runtime": 59.8038,
"eval_samples_per_second": 39.362,
"eval_steps_per_second": 0.619,
"learning_rate": 0.0001,
"step": 3520
},
{
"epoch": 33.0,
"eval_explained_variance": 0.4565463066101074,
"eval_kl_divergence": 0.35289108753204346,
"eval_loss": 0.46626824140548706,
"eval_mae": 0.11769836395978928,
"eval_rmse": 0.1586667150259018,
"eval_runtime": 63.0473,
"eval_samples_per_second": 37.337,
"eval_steps_per_second": 0.587,
"learning_rate": 0.0001,
"step": 3630
},
{
"epoch": 34.0,
"eval_explained_variance": 0.4541673958301544,
"eval_kl_divergence": 0.3587631583213806,
"eval_loss": 0.46665358543395996,
"eval_mae": 0.1181267499923706,
"eval_rmse": 0.15922589600086212,
"eval_runtime": 58.0806,
"eval_samples_per_second": 40.53,
"eval_steps_per_second": 0.637,
"learning_rate": 0.0001,
"step": 3740
},
{
"epoch": 35.0,
"eval_explained_variance": 0.4545403718948364,
"eval_kl_divergence": 0.4813242256641388,
"eval_loss": 0.46587392687797546,
"eval_mae": 0.11597732454538345,
"eval_rmse": 0.15844957530498505,
"eval_runtime": 59.5027,
"eval_samples_per_second": 39.561,
"eval_steps_per_second": 0.622,
"learning_rate": 0.0001,
"step": 3850
},
{
"epoch": 36.0,
"eval_explained_variance": 0.45941615104675293,
"eval_kl_divergence": 0.3503873348236084,
"eval_loss": 0.46578526496887207,
"eval_mae": 0.11725542694330215,
"eval_rmse": 0.15814347565174103,
"eval_runtime": 60.095,
"eval_samples_per_second": 39.171,
"eval_steps_per_second": 0.616,
"learning_rate": 0.0001,
"step": 3960
},
{
"epoch": 36.36363636363637,
"grad_norm": 0.08345460891723633,
"learning_rate": 0.0001,
"loss": 0.4565,
"step": 4000
},
{
"epoch": 37.0,
"eval_explained_variance": 0.4607694149017334,
"eval_kl_divergence": 0.39189669489860535,
"eval_loss": 0.4654408395290375,
"eval_mae": 0.11584330350160599,
"eval_rmse": 0.1577824205160141,
"eval_runtime": 58.734,
"eval_samples_per_second": 40.079,
"eval_steps_per_second": 0.63,
"learning_rate": 0.0001,
"step": 4070
},
{
"epoch": 38.0,
"eval_explained_variance": 0.45832768082618713,
"eval_kl_divergence": 0.40583303570747375,
"eval_loss": 0.46546319127082825,
"eval_mae": 0.1166045293211937,
"eval_rmse": 0.15796954929828644,
"eval_runtime": 58.3156,
"eval_samples_per_second": 40.367,
"eval_steps_per_second": 0.634,
"learning_rate": 0.0001,
"step": 4180
},
{
"epoch": 39.0,
"eval_explained_variance": 0.45672306418418884,
"eval_kl_divergence": 0.4117860198020935,
"eval_loss": 0.465843141078949,
"eval_mae": 0.11737682670354843,
"eval_rmse": 0.15845851600170135,
"eval_runtime": 59.8584,
"eval_samples_per_second": 39.326,
"eval_steps_per_second": 0.618,
"learning_rate": 0.0001,
"step": 4290
},
{
"epoch": 40.0,
"eval_explained_variance": 0.4607222080230713,
"eval_kl_divergence": 0.3563988506793976,
"eval_loss": 0.46561121940612793,
"eval_mae": 0.11697889119386673,
"eval_rmse": 0.15787295997142792,
"eval_runtime": 61.3479,
"eval_samples_per_second": 38.371,
"eval_steps_per_second": 0.603,
"learning_rate": 0.0001,
"step": 4400
},
{
"epoch": 40.90909090909091,
"grad_norm": 0.08773978799581528,
"learning_rate": 0.0001,
"loss": 0.4552,
"step": 4500
},
{
"epoch": 41.0,
"eval_explained_variance": 0.45979323983192444,
"eval_kl_divergence": 0.3572520911693573,
"eval_loss": 0.4657152593135834,
"eval_mae": 0.11711093783378601,
"eval_rmse": 0.15820421278476715,
"eval_runtime": 57.6839,
"eval_samples_per_second": 40.809,
"eval_steps_per_second": 0.641,
"learning_rate": 0.0001,
"step": 4510
},
{
"epoch": 42.0,
"eval_explained_variance": 0.45867350697517395,
"eval_kl_divergence": 0.5041557550430298,
"eval_loss": 0.4651602804660797,
"eval_mae": 0.11550069600343704,
"eval_rmse": 0.15786336362361908,
"eval_runtime": 56.8293,
"eval_samples_per_second": 41.422,
"eval_steps_per_second": 0.651,
"learning_rate": 0.0001,
"step": 4620
},
{
"epoch": 43.0,
"eval_explained_variance": 0.4612714946269989,
"eval_kl_divergence": 0.44621211290359497,
"eval_loss": 0.4651065468788147,
"eval_mae": 0.11574172228574753,
"eval_rmse": 0.15747833251953125,
"eval_runtime": 57.1474,
"eval_samples_per_second": 41.192,
"eval_steps_per_second": 0.647,
"learning_rate": 0.0001,
"step": 4730
},
{
"epoch": 44.0,
"eval_explained_variance": 0.4603614807128906,
"eval_kl_divergence": 0.4236082434654236,
"eval_loss": 0.46537330746650696,
"eval_mae": 0.11658215522766113,
"eval_rmse": 0.15792043507099152,
"eval_runtime": 55.8584,
"eval_samples_per_second": 42.142,
"eval_steps_per_second": 0.662,
"learning_rate": 0.0001,
"step": 4840
},
{
"epoch": 45.0,
"eval_explained_variance": 0.46250852942466736,
"eval_kl_divergence": 0.45096999406814575,
"eval_loss": 0.46489208936691284,
"eval_mae": 0.11505404114723206,
"eval_rmse": 0.15738531947135925,
"eval_runtime": 55.5313,
"eval_samples_per_second": 42.391,
"eval_steps_per_second": 0.666,
"learning_rate": 0.0001,
"step": 4950
},
{
"epoch": 45.45454545454545,
"grad_norm": 0.08461819589138031,
"learning_rate": 0.0001,
"loss": 0.4538,
"step": 5000
},
{
"epoch": 46.0,
"eval_explained_variance": 0.46191954612731934,
"eval_kl_divergence": 0.44900697469711304,
"eval_loss": 0.46484702825546265,
"eval_mae": 0.11566606909036636,
"eval_rmse": 0.15745492279529572,
"eval_runtime": 56.8805,
"eval_samples_per_second": 41.385,
"eval_steps_per_second": 0.65,
"learning_rate": 0.0001,
"step": 5060
},
{
"epoch": 47.0,
"eval_explained_variance": 0.46148741245269775,
"eval_kl_divergence": 0.47508490085601807,
"eval_loss": 0.4648602306842804,
"eval_mae": 0.11517279595136642,
"eval_rmse": 0.1574285626411438,
"eval_runtime": 56.4955,
"eval_samples_per_second": 41.667,
"eval_steps_per_second": 0.655,
"learning_rate": 0.0001,
"step": 5170
},
{
"epoch": 48.0,
"eval_explained_variance": 0.4631068706512451,
"eval_kl_divergence": 0.5305130481719971,
"eval_loss": 0.4647873342037201,
"eval_mae": 0.11513545364141464,
"eval_rmse": 0.15746952593326569,
"eval_runtime": 59.054,
"eval_samples_per_second": 39.862,
"eval_steps_per_second": 0.627,
"learning_rate": 0.0001,
"step": 5280
},
{
"epoch": 49.0,
"eval_explained_variance": 0.46304425597190857,
"eval_kl_divergence": 0.4798574149608612,
"eval_loss": 0.4647849500179291,
"eval_mae": 0.11539488285779953,
"eval_rmse": 0.1573745161294937,
"eval_runtime": 54.2646,
"eval_samples_per_second": 43.38,
"eval_steps_per_second": 0.682,
"learning_rate": 0.0001,
"step": 5390
},
{
"epoch": 50.0,
"grad_norm": 0.16299596428871155,
"learning_rate": 0.0001,
"loss": 0.4532,
"step": 5500
},
{
"epoch": 50.0,
"eval_explained_variance": 0.4693569839000702,
"eval_kl_divergence": 0.2825404107570648,
"eval_loss": 0.46499085426330566,
"eval_mae": 0.1172276958823204,
"eval_rmse": 0.15717318654060364,
"eval_runtime": 56.0282,
"eval_samples_per_second": 42.015,
"eval_steps_per_second": 0.66,
"learning_rate": 0.0001,
"step": 5500
},
{
"epoch": 51.0,
"eval_explained_variance": 0.4573368728160858,
"eval_kl_divergence": 0.48794299364089966,
"eval_loss": 0.465638667345047,
"eval_mae": 0.11509021371603012,
"eval_rmse": 0.15819959342479706,
"eval_runtime": 52.7895,
"eval_samples_per_second": 44.592,
"eval_steps_per_second": 0.701,
"learning_rate": 0.0001,
"step": 5610
},
{
"epoch": 52.0,
"eval_explained_variance": 0.4673852026462555,
"eval_kl_divergence": 0.41987907886505127,
"eval_loss": 0.46429532766342163,
"eval_mae": 0.11551753431558609,
"eval_rmse": 0.15662376582622528,
"eval_runtime": 54.5816,
"eval_samples_per_second": 43.128,
"eval_steps_per_second": 0.678,
"learning_rate": 0.0001,
"step": 5720
},
{
"epoch": 53.0,
"eval_explained_variance": 0.4672771692276001,
"eval_kl_divergence": 0.3879646956920624,
"eval_loss": 0.46441230177879333,
"eval_mae": 0.1155916228890419,
"eval_rmse": 0.1568875014781952,
"eval_runtime": 53.5146,
"eval_samples_per_second": 43.988,
"eval_steps_per_second": 0.691,
"learning_rate": 0.0001,
"step": 5830
},
{
"epoch": 54.0,
"eval_explained_variance": 0.4654136002063751,
"eval_kl_divergence": 0.42290592193603516,
"eval_loss": 0.4646008610725403,
"eval_mae": 0.11479470133781433,
"eval_rmse": 0.1569375991821289,
"eval_runtime": 53.8924,
"eval_samples_per_second": 43.68,
"eval_steps_per_second": 0.687,
"learning_rate": 0.0001,
"step": 5940
},
{
"epoch": 54.54545454545455,
"grad_norm": 0.08747697621583939,
"learning_rate": 0.0001,
"loss": 0.4526,
"step": 6000
},
{
"epoch": 55.0,
"eval_explained_variance": 0.4658801555633545,
"eval_kl_divergence": 0.40089842677116394,
"eval_loss": 0.4644174873828888,
"eval_mae": 0.11586496233940125,
"eval_rmse": 0.156887486577034,
"eval_runtime": 54.8967,
"eval_samples_per_second": 42.881,
"eval_steps_per_second": 0.674,
"learning_rate": 0.0001,
"step": 6050
},
{
"epoch": 56.0,
"eval_explained_variance": 0.46597158908843994,
"eval_kl_divergence": 0.34050217270851135,
"eval_loss": 0.464743047952652,
"eval_mae": 0.11636239290237427,
"eval_rmse": 0.15719135105609894,
"eval_runtime": 53.8695,
"eval_samples_per_second": 43.698,
"eval_steps_per_second": 0.687,
"learning_rate": 0.0001,
"step": 6160
},
{
"epoch": 57.0,
"eval_explained_variance": 0.4660731554031372,
"eval_kl_divergence": 0.4187561571598053,
"eval_loss": 0.4645179808139801,
"eval_mae": 0.11523237824440002,
"eval_rmse": 0.1568503975868225,
"eval_runtime": 52.6832,
"eval_samples_per_second": 44.682,
"eval_steps_per_second": 0.702,
"learning_rate": 0.0001,
"step": 6270
},
{
"epoch": 58.0,
"eval_explained_variance": 0.4659406840801239,
"eval_kl_divergence": 0.3079023063182831,
"eval_loss": 0.465102881193161,
"eval_mae": 0.11637380719184875,
"eval_rmse": 0.15757356584072113,
"eval_runtime": 53.7708,
"eval_samples_per_second": 43.778,
"eval_steps_per_second": 0.688,
"learning_rate": 0.0001,
"step": 6380
},
{
"epoch": 59.0,
"eval_explained_variance": 0.46542713046073914,
"eval_kl_divergence": 0.43387478590011597,
"eval_loss": 0.4644688367843628,
"eval_mae": 0.11504218727350235,
"eval_rmse": 0.15699030458927155,
"eval_runtime": 54.251,
"eval_samples_per_second": 43.391,
"eval_steps_per_second": 0.682,
"learning_rate": 1e-05,
"step": 6490
},
{
"epoch": 59.09090909090909,
"grad_norm": 0.09869211912155151,
"learning_rate": 1e-05,
"loss": 0.4514,
"step": 6500
},
{
"epoch": 60.0,
"eval_explained_variance": 0.4679425060749054,
"eval_kl_divergence": 0.38936442136764526,
"eval_loss": 0.46417686343193054,
"eval_mae": 0.11504556983709335,
"eval_rmse": 0.1565857082605362,
"eval_runtime": 53.3994,
"eval_samples_per_second": 44.083,
"eval_steps_per_second": 0.693,
"learning_rate": 1e-05,
"step": 6600
},
{
"epoch": 61.0,
"eval_explained_variance": 0.4692780673503876,
"eval_kl_divergence": 0.4144607186317444,
"eval_loss": 0.4639436900615692,
"eval_mae": 0.11456633359193802,
"eval_rmse": 0.15632741153240204,
"eval_runtime": 53.948,
"eval_samples_per_second": 43.635,
"eval_steps_per_second": 0.686,
"learning_rate": 1e-05,
"step": 6710
},
{
"epoch": 62.0,
"eval_explained_variance": 0.46859118342399597,
"eval_kl_divergence": 0.4063835144042969,
"eval_loss": 0.4641311764717102,
"eval_mae": 0.11482342332601547,
"eval_rmse": 0.15648160874843597,
"eval_runtime": 53.1646,
"eval_samples_per_second": 44.278,
"eval_steps_per_second": 0.696,
"learning_rate": 1e-05,
"step": 6820
},
{
"epoch": 63.0,
"eval_explained_variance": 0.4698045253753662,
"eval_kl_divergence": 0.35424694418907166,
"eval_loss": 0.4643491506576538,
"eval_mae": 0.11492928117513657,
"eval_rmse": 0.15652996301651,
"eval_runtime": 61.9895,
"eval_samples_per_second": 37.974,
"eval_steps_per_second": 0.597,
"learning_rate": 1e-05,
"step": 6930
},
{
"epoch": 63.63636363636363,
"grad_norm": 0.12132851779460907,
"learning_rate": 1e-05,
"loss": 0.4511,
"step": 7000
},
{
"epoch": 64.0,
"eval_explained_variance": 0.4702436923980713,
"eval_kl_divergence": 0.37175947427749634,
"eval_loss": 0.46402981877326965,
"eval_mae": 0.11502394080162048,
"eval_rmse": 0.1563546359539032,
"eval_runtime": 55.6273,
"eval_samples_per_second": 42.317,
"eval_steps_per_second": 0.665,
"learning_rate": 1e-05,
"step": 7040
},
{
"epoch": 65.0,
"eval_explained_variance": 0.46799585223197937,
"eval_kl_divergence": 0.41278746724128723,
"eval_loss": 0.4640822410583496,
"eval_mae": 0.11517596989870071,
"eval_rmse": 0.1565382480621338,
"eval_runtime": 60.037,
"eval_samples_per_second": 39.209,
"eval_steps_per_second": 0.616,
"learning_rate": 1e-05,
"step": 7150
},
{
"epoch": 66.0,
"eval_explained_variance": 0.46580052375793457,
"eval_kl_divergence": 0.4987623989582062,
"eval_loss": 0.46441909670829773,
"eval_mae": 0.11446693539619446,
"eval_rmse": 0.15703582763671875,
"eval_runtime": 58.422,
"eval_samples_per_second": 40.293,
"eval_steps_per_second": 0.633,
"learning_rate": 1e-05,
"step": 7260
},
{
"epoch": 67.0,
"eval_explained_variance": 0.4696963131427765,
"eval_kl_divergence": 0.41221925616264343,
"eval_loss": 0.46383005380630493,
"eval_mae": 0.11511614173650742,
"eval_rmse": 0.15620578825473785,
"eval_runtime": 57.3857,
"eval_samples_per_second": 41.021,
"eval_steps_per_second": 0.645,
"learning_rate": 1e-05,
"step": 7370
},
{
"epoch": 68.0,
"eval_explained_variance": 0.4673812687397003,
"eval_kl_divergence": 0.4579189419746399,
"eval_loss": 0.4639807641506195,
"eval_mae": 0.11436697095632553,
"eval_rmse": 0.15645776689052582,
"eval_runtime": 58.7335,
"eval_samples_per_second": 40.079,
"eval_steps_per_second": 0.63,
"learning_rate": 1e-05,
"step": 7480
},
{
"epoch": 68.18181818181819,
"grad_norm": 0.15623362362384796,
"learning_rate": 1e-05,
"loss": 0.4508,
"step": 7500
},
{
"epoch": 69.0,
"eval_explained_variance": 0.4701990783214569,
"eval_kl_divergence": 0.4197009801864624,
"eval_loss": 0.4637599587440491,
"eval_mae": 0.11433341354131699,
"eval_rmse": 0.15607893466949463,
"eval_runtime": 56.4381,
"eval_samples_per_second": 41.709,
"eval_steps_per_second": 0.656,
"learning_rate": 1e-05,
"step": 7590
},
{
"epoch": 70.0,
"eval_explained_variance": 0.46952661871910095,
"eval_kl_divergence": 0.4285525679588318,
"eval_loss": 0.46392253041267395,
"eval_mae": 0.11449825018644333,
"eval_rmse": 0.15625734627246857,
"eval_runtime": 59.9257,
"eval_samples_per_second": 39.282,
"eval_steps_per_second": 0.617,
"learning_rate": 1e-05,
"step": 7700
},
{
"epoch": 71.0,
"eval_explained_variance": 0.4707754850387573,
"eval_kl_divergence": 0.3542197048664093,
"eval_loss": 0.46406444907188416,
"eval_mae": 0.11525753885507584,
"eval_rmse": 0.1563321352005005,
"eval_runtime": 56.6326,
"eval_samples_per_second": 41.566,
"eval_steps_per_second": 0.653,
"learning_rate": 1e-05,
"step": 7810
},
{
"epoch": 72.0,
"eval_explained_variance": 0.4681284427642822,
"eval_kl_divergence": 0.42497748136520386,
"eval_loss": 0.46417826414108276,
"eval_mae": 0.11474020034074783,
"eval_rmse": 0.15662290155887604,
"eval_runtime": 56.0497,
"eval_samples_per_second": 41.998,
"eval_steps_per_second": 0.66,
"learning_rate": 1e-05,
"step": 7920
},
{
"epoch": 72.72727272727273,
"grad_norm": 0.12685681879520416,
"learning_rate": 1e-05,
"loss": 0.4505,
"step": 8000
},
{
"epoch": 73.0,
"eval_explained_variance": 0.47002461552619934,
"eval_kl_divergence": 0.43972158432006836,
"eval_loss": 0.4637835919857025,
"eval_mae": 0.11403892189264297,
"eval_rmse": 0.15611138939857483,
"eval_runtime": 55.8354,
"eval_samples_per_second": 42.16,
"eval_steps_per_second": 0.663,
"learning_rate": 1e-05,
"step": 8030
},
{
"epoch": 74.0,
"eval_explained_variance": 0.4689449369907379,
"eval_kl_divergence": 0.443666011095047,
"eval_loss": 0.463798850774765,
"eval_mae": 0.1145407184958458,
"eval_rmse": 0.15625973045825958,
"eval_runtime": 56.7357,
"eval_samples_per_second": 41.491,
"eval_steps_per_second": 0.652,
"learning_rate": 1e-05,
"step": 8140
},
{
"epoch": 75.0,
"eval_explained_variance": 0.4704826772212982,
"eval_kl_divergence": 0.4049000144004822,
"eval_loss": 0.46379053592681885,
"eval_mae": 0.11447467654943466,
"eval_rmse": 0.15613143146038055,
"eval_runtime": 56.7932,
"eval_samples_per_second": 41.449,
"eval_steps_per_second": 0.651,
"learning_rate": 1e-05,
"step": 8250
},
{
"epoch": 76.0,
"eval_explained_variance": 0.4674541652202606,
"eval_kl_divergence": 0.49260592460632324,
"eval_loss": 0.4639701247215271,
"eval_mae": 0.11414843797683716,
"eval_rmse": 0.15647520124912262,
"eval_runtime": 57.4638,
"eval_samples_per_second": 40.965,
"eval_steps_per_second": 0.644,
"learning_rate": 1.0000000000000002e-06,
"step": 8360
},
{
"epoch": 77.0,
"eval_explained_variance": 0.469455748796463,
"eval_kl_divergence": 0.44272491335868835,
"eval_loss": 0.463869571685791,
"eval_mae": 0.11419638991355896,
"eval_rmse": 0.15622590482234955,
"eval_runtime": 57.5968,
"eval_samples_per_second": 40.87,
"eval_steps_per_second": 0.642,
"learning_rate": 1.0000000000000002e-06,
"step": 8470
},
{
"epoch": 77.27272727272727,
"grad_norm": 0.11736844480037689,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.4505,
"step": 8500
},
{
"epoch": 78.0,
"eval_explained_variance": 0.4691663682460785,
"eval_kl_divergence": 0.42925453186035156,
"eval_loss": 0.46388140320777893,
"eval_mae": 0.1144518032670021,
"eval_rmse": 0.1562517285346985,
"eval_runtime": 55.8876,
"eval_samples_per_second": 42.12,
"eval_steps_per_second": 0.662,
"learning_rate": 1.0000000000000002e-06,
"step": 8580
},
{
"epoch": 79.0,
"eval_explained_variance": 0.4699589014053345,
"eval_kl_divergence": 0.376490980386734,
"eval_loss": 0.46412238478660583,
"eval_mae": 0.11472050100564957,
"eval_rmse": 0.15639875829219818,
"eval_runtime": 55.4743,
"eval_samples_per_second": 42.434,
"eval_steps_per_second": 0.667,
"learning_rate": 1.0000000000000002e-06,
"step": 8690
},
{
"epoch": 79.0,
"learning_rate": 1.0000000000000002e-06,
"step": 8690,
"total_flos": 8.188406191467658e+19,
"train_loss": 0.4591466036709872,
"train_runtime": 19731.8487,
"train_samples_per_second": 53.236,
"train_steps_per_second": 0.836
}
],
"logging_steps": 500,
"max_steps": 16500,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.188406191467658e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}