xk-huang
[add] model
5902845
raw
history blame contribute delete
No virus
39.7 kB
{
"best_metric": 2.003035545349121,
"best_model_checkpoint": "/mnt/default/projects/sca-xiaoke-v3/amlt-results/7301932201.25563-cd1e6021-6ea9-4835-8578-ba26f723a708/checkpoint-100000",
"epoch": 3.673229503379371,
"global_step": 100000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"_do_backward_in_ms": 13833.612655987963,
"_prepare_inputs_in_ms": 9.499168023467064,
"compute_loss_in_ms": 1344.5582571439445,
"epoch": 0.0,
"learning_rate/full": 0.0,
"loss": 9.9783,
"step": 1,
"training_step_in_ms": 15198.054818203673
},
{
"epoch": 0.0,
"eval_objects365-local-v2-validation_loss": 9.392258644104004,
"eval_objects365-local-v2-validation_runtime": 16.1869,
"eval_objects365-local-v2-validation_samples_per_second": 49.423,
"eval_objects365-local-v2-validation_steps_per_second": 0.803,
"step": 1
},
{
"_do_backward_in_ms": 2550.187955102608,
"_prepare_inputs_in_ms": 12.46627290174549,
"compute_loss_in_ms": 289.9478549773164,
"epoch": 0.04,
"learning_rate/full": 9.998519814211118e-05,
"loss": 3.8861,
"step": 1000,
"training_step_in_ms": 2849.577255269468
},
{
"_do_backward_in_ms": 2556.9965168242343,
"_prepare_inputs_in_ms": 3.3681516400538385,
"compute_loss_in_ms": 289.68224829342216,
"epoch": 0.07,
"learning_rate/full": 9.992214555651498e-05,
"loss": 2.6909,
"step": 2000,
"training_step_in_ms": 2856.0172235199716
},
{
"_do_backward_in_ms": 2544.9560291268863,
"_prepare_inputs_in_ms": 3.326028081588447,
"compute_loss_in_ms": 289.6684127182234,
"epoch": 0.11,
"learning_rate/full": 9.980962819544264e-05,
"loss": 2.4899,
"step": 3000,
"training_step_in_ms": 2843.9263667755295
},
{
"_do_backward_in_ms": 2553.390086729545,
"_prepare_inputs_in_ms": 3.3251649560406804,
"compute_loss_in_ms": 289.7164152129553,
"epoch": 0.15,
"learning_rate/full": 9.964775754540861e-05,
"loss": 2.364,
"step": 4000,
"training_step_in_ms": 2852.3637848414946
},
{
"_do_backward_in_ms": 2533.654973218916,
"_prepare_inputs_in_ms": 3.3234398325439543,
"compute_loss_in_ms": 289.49879492027685,
"epoch": 0.18,
"learning_rate/full": 9.94366939940579e-05,
"loss": 2.2671,
"step": 5000,
"training_step_in_ms": 2832.4309401281644
},
{
"epoch": 0.18,
"eval_objects365-local-v2-validation_loss": 3.2006750106811523,
"eval_objects365-local-v2-validation_runtime": 15.8807,
"eval_objects365-local-v2-validation_samples_per_second": 50.376,
"eval_objects365-local-v2-validation_steps_per_second": 0.819,
"step": 5000
},
{
"_do_backward_in_ms": 2536.1245670726057,
"_prepare_inputs_in_ms": 13.602690789379688,
"compute_loss_in_ms": 290.3555030166838,
"epoch": 0.22,
"learning_rate/full": 9.917664667124788e-05,
"loss": 2.2035,
"step": 6000,
"training_step_in_ms": 2835.8918179108296
},
{
"_do_backward_in_ms": 2521.147600293625,
"_prepare_inputs_in_ms": 3.3214590828865767,
"compute_loss_in_ms": 290.27019435027614,
"epoch": 0.26,
"learning_rate/full": 9.886787324183434e-05,
"loss": 2.1482,
"step": 7000,
"training_step_in_ms": 2820.6920346897095
},
{
"_do_backward_in_ms": 2548.451570129022,
"_prepare_inputs_in_ms": 3.319471804657951,
"compute_loss_in_ms": 289.1897811254021,
"epoch": 0.29,
"learning_rate/full": 9.851067965036731e-05,
"loss": 2.0894,
"step": 8000,
"training_step_in_ms": 2846.943020476494
},
{
"_do_backward_in_ms": 2545.683652488282,
"_prepare_inputs_in_ms": 3.316459863912314,
"compute_loss_in_ms": 289.3646022947505,
"epoch": 0.33,
"learning_rate/full": 9.81058489571687e-05,
"loss": 2.0449,
"step": 9000,
"training_step_in_ms": 2844.350300100632
},
{
"_do_backward_in_ms": 2587.985424251994,
"_prepare_inputs_in_ms": 3.3290419806726277,
"compute_loss_in_ms": 289.5182610661723,
"epoch": 0.37,
"learning_rate/full": 9.765344841950912e-05,
"loss": 2.0159,
"step": 10000,
"training_step_in_ms": 2886.8384181782603
},
{
"epoch": 0.37,
"eval_objects365-local-v2-validation_loss": 2.7690136432647705,
"eval_objects365-local-v2-validation_runtime": 9.6182,
"eval_objects365-local-v2-validation_samples_per_second": 83.175,
"eval_objects365-local-v2-validation_steps_per_second": 1.352,
"step": 10000
},
{
"_do_backward_in_ms": 2580.9500538480934,
"_prepare_inputs_in_ms": 7.9110309222817925,
"compute_loss_in_ms": 289.0381096436588,
"epoch": 0.4,
"learning_rate/full": 9.715392539719363e-05,
"loss": 1.9885,
"step": 11000,
"training_step_in_ms": 2879.298744171858
},
{
"_do_backward_in_ms": 2542.390609878581,
"_prepare_inputs_in_ms": 3.3009646027348936,
"compute_loss_in_ms": 289.6751387268305,
"epoch": 0.44,
"learning_rate/full": 9.660720396490479e-05,
"loss": 1.9603,
"step": 12000,
"training_step_in_ms": 2841.272174295038
},
{
"_do_backward_in_ms": 2512.176790253725,
"_prepare_inputs_in_ms": 3.362945111002773,
"compute_loss_in_ms": 289.53350385534577,
"epoch": 0.48,
"learning_rate/full": 9.601491810524181e-05,
"loss": 1.9403,
"step": 13000,
"training_step_in_ms": 2811.0723278834485
},
{
"_do_backward_in_ms": 2531.5591839374974,
"_prepare_inputs_in_ms": 3.3155137847643346,
"compute_loss_in_ms": 290.233249894809,
"epoch": 0.51,
"learning_rate/full": 9.537646901827713e-05,
"loss": 1.9229,
"step": 14000,
"training_step_in_ms": 2831.124616945861
},
{
"_do_backward_in_ms": 2558.8225115824025,
"_prepare_inputs_in_ms": 3.351889422861859,
"compute_loss_in_ms": 289.55780304642394,
"epoch": 0.55,
"learning_rate/full": 9.469376483242311e-05,
"loss": 1.9078,
"step": 15000,
"training_step_in_ms": 2857.7440589836333
},
{
"epoch": 0.55,
"eval_objects365-local-v2-validation_loss": 2.566831588745117,
"eval_objects365-local-v2-validation_runtime": 12.0469,
"eval_objects365-local-v2-validation_samples_per_second": 66.407,
"eval_objects365-local-v2-validation_steps_per_second": 1.079,
"step": 15000
},
{
"_do_backward_in_ms": 2569.1758075398393,
"_prepare_inputs_in_ms": 9.903900120278632,
"compute_loss_in_ms": 289.8735368660185,
"epoch": 0.59,
"learning_rate/full": 9.396611533498878e-05,
"loss": 1.8953,
"step": 16000,
"training_step_in_ms": 2868.428954576142
},
{
"_do_backward_in_ms": 2524.4527391090523,
"_prepare_inputs_in_ms": 3.3152836377266794,
"compute_loss_in_ms": 289.7500243561808,
"epoch": 0.62,
"learning_rate/full": 9.319569524704217e-05,
"loss": 1.8846,
"step": 17000,
"training_step_in_ms": 2823.4884984181263
},
{
"_do_backward_in_ms": 2517.142957175616,
"_prepare_inputs_in_ms": 3.341707782819867,
"compute_loss_in_ms": 289.59233529726043,
"epoch": 0.66,
"learning_rate/full": 9.238172567527468e-05,
"loss": 1.8726,
"step": 18000,
"training_step_in_ms": 2816.0720933733974
},
{
"_do_backward_in_ms": 2532.7779561954085,
"_prepare_inputs_in_ms": 3.3180122550111264,
"compute_loss_in_ms": 289.8389355558902,
"epoch": 0.7,
"learning_rate/full": 9.152751594369358e-05,
"loss": 1.864,
"step": 19000,
"training_step_in_ms": 2831.926313831471
},
{
"_do_backward_in_ms": 2530.6621135415044,
"_prepare_inputs_in_ms": 3.3357447627931833,
"compute_loss_in_ms": 289.85987694049254,
"epoch": 0.73,
"learning_rate/full": 9.063048903303299e-05,
"loss": 1.8557,
"step": 20000,
"training_step_in_ms": 2829.814629596658
},
{
"epoch": 0.73,
"eval_objects365-local-v2-validation_loss": 2.4183120727539062,
"eval_objects365-local-v2-validation_runtime": 19.7661,
"eval_objects365-local-v2-validation_samples_per_second": 40.473,
"eval_objects365-local-v2-validation_steps_per_second": 0.658,
"step": 20000
},
{
"_do_backward_in_ms": 2476.3556728472468,
"_prepare_inputs_in_ms": 17.271267849380134,
"compute_loss_in_ms": 289.4574518314115,
"epoch": 0.77,
"learning_rate/full": 8.969416096468137e-05,
"loss": 1.8426,
"step": 21000,
"training_step_in_ms": 2775.230575547088
},
{
"_do_backward_in_ms": 2468.883111936506,
"_prepare_inputs_in_ms": 3.332945456728339,
"compute_loss_in_ms": 289.88871885929257,
"epoch": 0.81,
"learning_rate/full": 8.871858101523774e-05,
"loss": 1.8415,
"step": 22000,
"training_step_in_ms": 2768.1600090207066
},
{
"_do_backward_in_ms": 2506.8567285528407,
"_prepare_inputs_in_ms": 3.3408744835760444,
"compute_loss_in_ms": 289.73892450495623,
"epoch": 0.84,
"learning_rate/full": 8.770471389455464e-05,
"loss": 1.8293,
"step": 23000,
"training_step_in_ms": 2805.954835511511
},
{
"_do_backward_in_ms": 2541.85831053718,
"_prepare_inputs_in_ms": 3.357209531823173,
"compute_loss_in_ms": 290.00427257712,
"epoch": 0.88,
"learning_rate/full": 8.665356217305291e-05,
"loss": 1.8266,
"step": 24000,
"training_step_in_ms": 2841.2785381632857
},
{
"_do_backward_in_ms": 2533.210183262825,
"_prepare_inputs_in_ms": 3.364060287596658,
"compute_loss_in_ms": 289.9935355405323,
"epoch": 0.92,
"learning_rate/full": 8.556616529032215e-05,
"loss": 1.8162,
"step": 25000,
"training_step_in_ms": 2832.5927242138423
},
{
"epoch": 0.92,
"eval_objects365-local-v2-validation_loss": 2.3316762447357178,
"eval_objects365-local-v2-validation_runtime": 10.2829,
"eval_objects365-local-v2-validation_samples_per_second": 77.799,
"eval_objects365-local-v2-validation_steps_per_second": 1.264,
"step": 25000
},
{
"_do_backward_in_ms": 2552.305114510702,
"_prepare_inputs_in_ms": 8.565855091612377,
"compute_loss_in_ms": 289.73671219767664,
"epoch": 0.96,
"learning_rate/full": 8.444359852726274e-05,
"loss": 1.8117,
"step": 26000,
"training_step_in_ms": 2851.485752185108
},
{
"_do_backward_in_ms": 2533.5027522125747,
"_prepare_inputs_in_ms": 3.3632643420714885,
"compute_loss_in_ms": 289.90648507210426,
"epoch": 0.99,
"learning_rate/full": 8.328579747384175e-05,
"loss": 1.806,
"step": 27000,
"training_step_in_ms": 2832.807456281269
},
{
"_do_backward_in_ms": 2546.2225131660234,
"_prepare_inputs_in_ms": 3.3682647191453725,
"compute_loss_in_ms": 291.1998807019554,
"epoch": 1.03,
"learning_rate/full": 8.209501557634378e-05,
"loss": 1.7955,
"step": 28000,
"training_step_in_ms": 2846.875886055641
},
{
"_do_backward_in_ms": 2583.1195299711544,
"_prepare_inputs_in_ms": 3.3649198710918427,
"compute_loss_in_ms": 290.97770567121916,
"epoch": 1.07,
"learning_rate/full": 8.087367077395005e-05,
"loss": 1.7877,
"step": 29000,
"training_step_in_ms": 2883.5424125664867
},
{
"_do_backward_in_ms": 2570.092649807455,
"_prepare_inputs_in_ms": 3.4090082803741097,
"compute_loss_in_ms": 291.62430305662565,
"epoch": 1.1,
"learning_rate/full": 7.962179630107982e-05,
"loss": 1.786,
"step": 30000,
"training_step_in_ms": 2871.2795688530896
},
{
"epoch": 1.1,
"eval_objects365-local-v2-validation_loss": 2.2865772247314453,
"eval_objects365-local-v2-validation_runtime": 8.6589,
"eval_objects365-local-v2-validation_samples_per_second": 92.391,
"eval_objects365-local-v2-validation_steps_per_second": 1.501,
"step": 30000
},
{
"_do_backward_in_ms": 721.1331692694221,
"_prepare_inputs_in_ms": 3.2962444906588644,
"compute_loss_in_ms": 299.71373338252306,
"epoch": 1.14,
"learning_rate/full": 9.998418572322853e-05,
"loss": 1.7871,
"step": 31000,
"training_step_in_ms": 1029.60611718148
},
{
"_do_backward_in_ms": 729.436703273328,
"_prepare_inputs_in_ms": 3.258270466234535,
"compute_loss_in_ms": 298.0247277948074,
"epoch": 1.18,
"learning_rate/full": 9.991993511089866e-05,
"loss": 1.7845,
"step": 32000,
"training_step_in_ms": 1036.1627226730343
},
{
"_do_backward_in_ms": 703.6630942693446,
"_prepare_inputs_in_ms": 3.2456943639554083,
"compute_loss_in_ms": 297.64328660978936,
"epoch": 1.21,
"learning_rate/full": 9.98061823549655e-05,
"loss": 1.7817,
"step": 33000,
"training_step_in_ms": 1009.9646131193731
},
{
"_do_backward_in_ms": 676.1822207763325,
"_prepare_inputs_in_ms": 3.2518609322141856,
"compute_loss_in_ms": 297.877519285772,
"epoch": 1.25,
"learning_rate/full": 9.964326742751142e-05,
"loss": 1.7795,
"step": 34000,
"training_step_in_ms": 982.7506944458
},
{
"_do_backward_in_ms": 624.6346801738255,
"_prepare_inputs_in_ms": 3.2795886190142483,
"compute_loss_in_ms": 298.1166247774381,
"epoch": 1.29,
"learning_rate/full": 9.943126236733435e-05,
"loss": 1.7769,
"step": 35000,
"training_step_in_ms": 931.5135621828958
},
{
"epoch": 1.29,
"eval_objects365-local-v2-validation_loss": 2.2724013328552246,
"eval_objects365-local-v2-validation_runtime": 12.4784,
"eval_objects365-local-v2-validation_samples_per_second": 64.111,
"eval_objects365-local-v2-validation_steps_per_second": 1.042,
"step": 35000
},
{
"_do_backward_in_ms": 655.5177808874287,
"_prepare_inputs_in_ms": 7.8633947225300425,
"compute_loss_in_ms": 297.8193518089228,
"epoch": 1.32,
"learning_rate/full": 9.917037681729384e-05,
"loss": 1.7745,
"step": 36000,
"training_step_in_ms": 962.1334383783396
},
{
"_do_backward_in_ms": 699.8656730484217,
"_prepare_inputs_in_ms": 3.240072426153347,
"compute_loss_in_ms": 298.73781585809775,
"epoch": 1.36,
"learning_rate/full": 9.886053467655043e-05,
"loss": 1.7686,
"step": 37000,
"training_step_in_ms": 1007.3340494644362
},
{
"_do_backward_in_ms": 698.6605496255215,
"_prepare_inputs_in_ms": 3.2106620045378804,
"compute_loss_in_ms": 299.0653761769645,
"epoch": 1.4,
"learning_rate/full": 9.850266196818751e-05,
"loss": 1.7686,
"step": 38000,
"training_step_in_ms": 1006.4037391303573
},
{
"_do_backward_in_ms": 724.4765353850089,
"_prepare_inputs_in_ms": 3.3089412197005004,
"compute_loss_in_ms": 298.82350925635546,
"epoch": 1.43,
"learning_rate/full": 9.80968270237185e-05,
"loss": 1.763,
"step": 39000,
"training_step_in_ms": 1032.1099421884865
},
{
"_do_backward_in_ms": 681.9796452447772,
"_prepare_inputs_in_ms": 3.263611613307148,
"compute_loss_in_ms": 298.8235752664041,
"epoch": 1.47,
"learning_rate/full": 9.764343115619788e-05,
"loss": 1.7614,
"step": 40000,
"training_step_in_ms": 989.5278342715465
},
{
"epoch": 1.47,
"eval_objects365-local-v2-validation_loss": 2.228322982788086,
"eval_objects365-local-v2-validation_runtime": 16.6211,
"eval_objects365-local-v2-validation_samples_per_second": 48.132,
"eval_objects365-local-v2-validation_steps_per_second": 0.782,
"step": 40000
},
{
"_do_backward_in_ms": 720.4739852924831,
"_prepare_inputs_in_ms": 12.448386072667512,
"compute_loss_in_ms": 298.166242549251,
"epoch": 1.51,
"learning_rate/full": 9.714292270967042e-05,
"loss": 1.7603,
"step": 41000,
"training_step_in_ms": 1027.4099870913196
},
{
"_do_backward_in_ms": 658.6267548131291,
"_prepare_inputs_in_ms": 3.201066299341619,
"compute_loss_in_ms": 298.838454146171,
"epoch": 1.54,
"learning_rate/full": 9.659579661582255e-05,
"loss": 1.7573,
"step": 42000,
"training_step_in_ms": 966.1794079060201
},
{
"_do_backward_in_ms": 676.473127261037,
"_prepare_inputs_in_ms": 3.228976390324533,
"compute_loss_in_ms": 298.2980008148588,
"epoch": 1.58,
"learning_rate/full": 9.600197721584953e-05,
"loss": 1.7545,
"step": 43000,
"training_step_in_ms": 983.4983903854154
},
{
"_do_backward_in_ms": 697.463578726165,
"_prepare_inputs_in_ms": 3.195912489667535,
"compute_loss_in_ms": 299.1437525388319,
"epoch": 1.62,
"learning_rate/full": 9.536323925372398e-05,
"loss": 1.7526,
"step": 44000,
"training_step_in_ms": 1005.2843026786577
},
{
"_do_backward_in_ms": 813.7108269445598,
"_prepare_inputs_in_ms": 3.206419989466667,
"compute_loss_in_ms": 298.0525588088203,
"epoch": 1.65,
"learning_rate/full": 9.467964349816328e-05,
"loss": 1.7477,
"step": 45000,
"training_step_in_ms": 1120.4696311727166
},
{
"epoch": 1.65,
"eval_objects365-local-v2-validation_loss": 2.1873245239257812,
"eval_objects365-local-v2-validation_runtime": 12.2979,
"eval_objects365-local-v2-validation_samples_per_second": 65.052,
"eval_objects365-local-v2-validation_steps_per_second": 1.057,
"step": 45000
},
{
"_do_backward_in_ms": 708.7151438989677,
"_prepare_inputs_in_ms": 8.210845838180447,
"compute_loss_in_ms": 297.9545111299988,
"epoch": 1.69,
"learning_rate/full": 9.395186592816932e-05,
"loss": 1.7454,
"step": 46000,
"training_step_in_ms": 1015.4890209392179
},
{
"_do_backward_in_ms": 677.8432183256373,
"_prepare_inputs_in_ms": 3.263900319347158,
"compute_loss_in_ms": 297.8792646545917,
"epoch": 1.73,
"learning_rate/full": 9.31806262122764e-05,
"loss": 1.7443,
"step": 47000,
"training_step_in_ms": 984.4030563381966
},
{
"_do_backward_in_ms": 720.129483740544,
"_prepare_inputs_in_ms": 3.2109336624853313,
"compute_loss_in_ms": 297.79731379216537,
"epoch": 1.76,
"learning_rate/full": 9.23666869969011e-05,
"loss": 1.7429,
"step": 48000,
"training_step_in_ms": 1026.6044916820247
},
{
"_do_backward_in_ms": 655.5070220401976,
"_prepare_inputs_in_ms": 3.2323538628406823,
"compute_loss_in_ms": 297.94803192745894,
"epoch": 1.8,
"learning_rate/full": 9.15108531521937e-05,
"loss": 1.7414,
"step": 49000,
"training_step_in_ms": 962.1004992513917
},
{
"_do_backward_in_ms": 672.1013942162972,
"_prepare_inputs_in_ms": 3.2201343055348843,
"compute_loss_in_ms": 298.3921029092744,
"epoch": 1.84,
"learning_rate/full": 9.061305292392976e-05,
"loss": 1.7364,
"step": 50000,
"training_step_in_ms": 979.1467305382248
},
{
"epoch": 1.84,
"eval_objects365-local-v2-validation_loss": 2.164118766784668,
"eval_objects365-local-v2-validation_runtime": 10.1507,
"eval_objects365-local-v2-validation_samples_per_second": 78.812,
"eval_objects365-local-v2-validation_steps_per_second": 1.281,
"step": 50000
},
{
"_do_backward_in_ms": 674.3136331241112,
"_prepare_inputs_in_ms": 6.509397196586208,
"compute_loss_in_ms": 298.9732424640313,
"epoch": 1.87,
"learning_rate/full": 8.967692735767203e-05,
"loss": 1.7366,
"step": 51000,
"training_step_in_ms": 982.0552003385965
},
{
"_do_backward_in_ms": 697.6172431716695,
"_prepare_inputs_in_ms": 3.218969340668991,
"compute_loss_in_ms": 298.4035959227476,
"epoch": 1.91,
"learning_rate/full": 8.86996523066913e-05,
"loss": 1.735,
"step": 52000,
"training_step_in_ms": 1004.6795647891704
},
{
"_do_backward_in_ms": 704.6787912775762,
"_prepare_inputs_in_ms": 3.23072279850021,
"compute_loss_in_ms": 298.14702042611316,
"epoch": 1.95,
"learning_rate/full": 8.768506659844343e-05,
"loss": 1.7307,
"step": 53000,
"training_step_in_ms": 1011.5234109486919
},
{
"_do_backward_in_ms": 769.2823166255839,
"_prepare_inputs_in_ms": 3.204921918688342,
"compute_loss_in_ms": 298.67282255436294,
"epoch": 1.98,
"learning_rate/full": 8.663321571775915e-05,
"loss": 1.7305,
"step": 54000,
"training_step_in_ms": 1076.5878808272537
},
{
"_do_backward_in_ms": 696.9331407416612,
"_prepare_inputs_in_ms": 3.244183993898332,
"compute_loss_in_ms": 298.70368046709336,
"epoch": 2.02,
"learning_rate/full": 8.554513979559709e-05,
"loss": 1.7214,
"step": 55000,
"training_step_in_ms": 1004.3571789248381
},
{
"epoch": 2.02,
"eval_objects365-local-v2-validation_loss": 2.1291964054107666,
"eval_objects365-local-v2-validation_runtime": 9.885,
"eval_objects365-local-v2-validation_samples_per_second": 80.931,
"eval_objects365-local-v2-validation_steps_per_second": 1.315,
"step": 55000
},
{
"_do_backward_in_ms": 653.2630679495633,
"_prepare_inputs_in_ms": 6.026035463639699,
"compute_loss_in_ms": 299.06775847170223,
"epoch": 2.06,
"learning_rate/full": 8.442077319354145e-05,
"loss": 1.7136,
"step": 56000,
"training_step_in_ms": 961.0975561500527
},
{
"_do_backward_in_ms": 745.036297386745,
"_prepare_inputs_in_ms": 3.249741542385891,
"compute_loss_in_ms": 297.8976241340861,
"epoch": 2.09,
"learning_rate/full": 8.326347629835318e-05,
"loss": 1.7138,
"step": 57000,
"training_step_in_ms": 1051.66180648515
},
{
"_do_backward_in_ms": 626.0586955258623,
"_prepare_inputs_in_ms": 3.230042038485408,
"compute_loss_in_ms": 298.4531378012616,
"epoch": 2.13,
"learning_rate/full": 8.207328655483055e-05,
"loss": 1.7121,
"step": 58000,
"training_step_in_ms": 933.1826650444418
},
{
"_do_backward_in_ms": 704.3030783196446,
"_prepare_inputs_in_ms": 3.246616828488186,
"compute_loss_in_ms": 297.92309659463353,
"epoch": 2.17,
"learning_rate/full": 8.085138089139716e-05,
"loss": 1.712,
"step": 59000,
"training_step_in_ms": 1010.959731190931
},
{
"_do_backward_in_ms": 695.9465066853445,
"_prepare_inputs_in_ms": 3.237966085318476,
"compute_loss_in_ms": 298.90500363637693,
"epoch": 2.2,
"learning_rate/full": 7.95989675990117e-05,
"loss": 1.7112,
"step": 60000,
"training_step_in_ms": 1003.5448978319764
},
{
"epoch": 2.2,
"eval_objects365-local-v2-validation_loss": 2.1127543449401855,
"eval_objects365-local-v2-validation_runtime": 8.4515,
"eval_objects365-local-v2-validation_samples_per_second": 94.657,
"eval_objects365-local-v2-validation_steps_per_second": 1.538,
"step": 60000
},
{
"_do_backward_in_ms": 716.5119342987891,
"_prepare_inputs_in_ms": 5.995148892154933,
"compute_loss_in_ms": 299.30640732260883,
"epoch": 2.24,
"learning_rate/full": 7.831598792818578e-05,
"loss": 1.7064,
"step": 61000,
"training_step_in_ms": 1024.6346847999375
},
{
"_do_backward_in_ms": 679.5819691745564,
"_prepare_inputs_in_ms": 3.2997133519966155,
"compute_loss_in_ms": 298.4492077725008,
"epoch": 2.28,
"learning_rate/full": 7.700627631147224e-05,
"loss": 1.7071,
"step": 62000,
"training_step_in_ms": 986.8024183101952
},
{
"_do_backward_in_ms": 696.5671245567501,
"_prepare_inputs_in_ms": 3.2379625719040632,
"compute_loss_in_ms": 298.1275162412785,
"epoch": 2.31,
"learning_rate/full": 7.566850863280712e-05,
"loss": 1.7073,
"step": 63000,
"training_step_in_ms": 1003.3855868254323
},
{
"_do_backward_in_ms": 788.7977120715659,
"_prepare_inputs_in_ms": 3.261386409169063,
"compute_loss_in_ms": 297.86948832380585,
"epoch": 2.35,
"learning_rate/full": 7.430805850264685e-05,
"loss": 1.7044,
"step": 64000,
"training_step_in_ms": 1095.416541912593
},
{
"_do_backward_in_ms": 679.518492219504,
"_prepare_inputs_in_ms": 3.206374272936955,
"compute_loss_in_ms": 298.958141958341,
"epoch": 2.39,
"learning_rate/full": 7.292361925349194e-05,
"loss": 1.7045,
"step": 65000,
"training_step_in_ms": 987.1717654119711
},
{
"epoch": 2.39,
"eval_objects365-local-v2-validation_loss": 2.0909905433654785,
"eval_objects365-local-v2-validation_runtime": 9.9304,
"eval_objects365-local-v2-validation_samples_per_second": 80.561,
"eval_objects365-local-v2-validation_steps_per_second": 1.309,
"step": 65000
},
{
"_do_backward_in_ms": 673.5720948528033,
"_prepare_inputs_in_ms": 6.18732621827092,
"compute_loss_in_ms": 297.60415547758134,
"epoch": 2.42,
"learning_rate/full": 7.151371560919644e-05,
"loss": 1.702,
"step": 66000,
"training_step_in_ms": 980.0054668276571
},
{
"_do_backward_in_ms": 654.3511845318135,
"_prepare_inputs_in_ms": 3.2777874602470547,
"compute_loss_in_ms": 298.2891470948234,
"epoch": 2.46,
"learning_rate/full": 7.008393674839574e-05,
"loss": 1.7017,
"step": 67000,
"training_step_in_ms": 961.3662255166564
},
{
"_do_backward_in_ms": 674.1237894105725,
"_prepare_inputs_in_ms": 3.2963655965868384,
"compute_loss_in_ms": 298.1315166691784,
"epoch": 2.5,
"learning_rate/full": 6.863429772988044e-05,
"loss": 1.7001,
"step": 68000,
"training_step_in_ms": 980.9758842557203
},
{
"_do_backward_in_ms": 651.5303289373405,
"_prepare_inputs_in_ms": 3.2912338944151998,
"compute_loss_in_ms": 297.70729713048786,
"epoch": 2.53,
"learning_rate/full": 6.716475375716726e-05,
"loss": 1.6992,
"step": 69000,
"training_step_in_ms": 957.9476293225307
},
{
"_do_backward_in_ms": 701.2599963427056,
"_prepare_inputs_in_ms": 3.2801425319630653,
"compute_loss_in_ms": 297.9513049093075,
"epoch": 2.57,
"learning_rate/full": 6.568119138852548e-05,
"loss": 1.6991,
"step": 70000,
"training_step_in_ms": 1007.9343056466896
},
{
"epoch": 2.57,
"eval_objects365-local-v2-validation_loss": 2.0831971168518066,
"eval_objects365-local-v2-validation_runtime": 13.482,
"eval_objects365-local-v2-validation_samples_per_second": 59.338,
"eval_objects365-local-v2-validation_steps_per_second": 0.964,
"step": 70000
},
{
"_do_backward_in_ms": 684.7173160158563,
"_prepare_inputs_in_ms": 9.682496656596602,
"compute_loss_in_ms": 298.04157538694375,
"epoch": 2.61,
"learning_rate/full": 6.41791349446638e-05,
"loss": 1.6959,
"step": 71000,
"training_step_in_ms": 991.6520508083049
},
{
"_do_backward_in_ms": 719.4974615401588,
"_prepare_inputs_in_ms": 3.2725699762813747,
"compute_loss_in_ms": 298.8009250371251,
"epoch": 2.64,
"learning_rate/full": 6.266455189473463e-05,
"loss": 1.6968,
"step": 72000,
"training_step_in_ms": 1027.078579184832
},
{
"_do_backward_in_ms": 630.4531340918038,
"_prepare_inputs_in_ms": 3.2720213141292334,
"compute_loss_in_ms": 298.3485946042929,
"epoch": 2.68,
"learning_rate/full": 6.11374454037508e-05,
"loss": 1.6951,
"step": 73000,
"training_step_in_ms": 937.6128895445727
},
{
"_do_backward_in_ms": 673.5870531778783,
"_prepare_inputs_in_ms": 3.220968232722953,
"compute_loss_in_ms": 298.4312469626311,
"epoch": 2.72,
"learning_rate/full": 5.9599325562893006e-05,
"loss": 1.6948,
"step": 74000,
"training_step_in_ms": 980.7399763246067
},
{
"_do_backward_in_ms": 670.7334653646685,
"_prepare_inputs_in_ms": 3.225922678830102,
"compute_loss_in_ms": 298.11172814434394,
"epoch": 2.75,
"learning_rate/full": 5.8050159947610774e-05,
"loss": 1.6937,
"step": 75000,
"training_step_in_ms": 977.5889939961489
},
{
"epoch": 2.75,
"eval_objects365-local-v2-validation_loss": 2.060175895690918,
"eval_objects365-local-v2-validation_runtime": 10.0939,
"eval_objects365-local-v2-validation_samples_per_second": 79.255,
"eval_objects365-local-v2-validation_steps_per_second": 1.288,
"step": 75000
},
{
"_do_backward_in_ms": 707.8241047970951,
"_prepare_inputs_in_ms": 7.805172383402789,
"compute_loss_in_ms": 298.5676535603343,
"epoch": 2.79,
"learning_rate/full": 5.649457853856564e-05,
"loss": 1.692,
"step": 76000,
"training_step_in_ms": 1015.2529031389859
},
{
"_do_backward_in_ms": 679.5182852572761,
"_prepare_inputs_in_ms": 3.2479509462136775,
"compute_loss_in_ms": 298.08279935712926,
"epoch": 2.83,
"learning_rate/full": 5.4932574914808247e-05,
"loss": 1.6897,
"step": 77000,
"training_step_in_ms": 986.3572093644179
},
{
"_do_backward_in_ms": 624.3718270168174,
"_prepare_inputs_in_ms": 3.2478769938461483,
"compute_loss_in_ms": 298.39406253327616,
"epoch": 2.87,
"learning_rate/full": 5.336569367582159e-05,
"loss": 1.6903,
"step": 78000,
"training_step_in_ms": 931.5286971002351
},
{
"_do_backward_in_ms": 699.6423877081834,
"_prepare_inputs_in_ms": 3.2958002001978457,
"compute_loss_in_ms": 298.47152298805304,
"epoch": 2.9,
"learning_rate/full": 5.179548424435664e-05,
"loss": 1.6903,
"step": 79000,
"training_step_in_ms": 1006.8662925218232
},
{
"_do_backward_in_ms": 721.8450685073622,
"_prepare_inputs_in_ms": 3.2630610479973257,
"compute_loss_in_ms": 298.69656310253777,
"epoch": 2.94,
"learning_rate/full": 5.0223499334273436e-05,
"loss": 1.6861,
"step": 80000,
"training_step_in_ms": 1029.3176082074642
},
{
"epoch": 2.94,
"eval_objects365-local-v2-validation_loss": 2.0467376708984375,
"eval_objects365-local-v2-validation_runtime": 12.3724,
"eval_objects365-local-v2-validation_samples_per_second": 64.66,
"eval_objects365-local-v2-validation_steps_per_second": 1.051,
"step": 80000
},
{
"_do_backward_in_ms": 745.3300231467001,
"_prepare_inputs_in_ms": 8.417346246764874,
"compute_loss_in_ms": 297.6633286998154,
"epoch": 2.98,
"learning_rate/full": 4.8651293415127954e-05,
"loss": 1.6859,
"step": 81000,
"training_step_in_ms": 1051.9083714645822
},
{
"_do_backward_in_ms": 713.2887438628823,
"_prepare_inputs_in_ms": 3.309611749369651,
"compute_loss_in_ms": 299.24693407770246,
"epoch": 3.01,
"learning_rate/full": 4.7080421175022657e-05,
"loss": 1.6808,
"step": 82000,
"training_step_in_ms": 1021.3982793183532
},
{
"_do_backward_in_ms": 683.561601414578,
"_prepare_inputs_in_ms": 3.340075889835134,
"compute_loss_in_ms": 299.84076196700335,
"epoch": 3.05,
"learning_rate/full": 4.551243598324116e-05,
"loss": 1.6701,
"step": 83000,
"training_step_in_ms": 992.2671003735159
},
{
"_do_backward_in_ms": 691.1516734741163,
"_prepare_inputs_in_ms": 3.3659417459275573,
"compute_loss_in_ms": 298.7288506310433,
"epoch": 3.09,
"learning_rate/full": 4.394888835418673e-05,
"loss": 1.6682,
"step": 84000,
"training_step_in_ms": 998.8259057179093
},
{
"_do_backward_in_ms": 671.9453688920476,
"_prepare_inputs_in_ms": 3.3872235738672316,
"compute_loss_in_ms": 299.7595350977499,
"epoch": 3.12,
"learning_rate/full": 4.238976880417727e-05,
"loss": 1.6682,
"step": 85000,
"training_step_in_ms": 980.6325940783136
},
{
"epoch": 3.12,
"eval_objects365-local-v2-validation_loss": 2.037609815597534,
"eval_objects365-local-v2-validation_runtime": 8.3572,
"eval_objects365-local-v2-validation_samples_per_second": 95.726,
"eval_objects365-local-v2-validation_steps_per_second": 1.556,
"step": 85000
},
{
"_do_backward_in_ms": 682.5501747601666,
"_prepare_inputs_in_ms": 6.052993975997133,
"compute_loss_in_ms": 298.15280754882235,
"epoch": 3.16,
"learning_rate/full": 4.083818976295859e-05,
"loss": 1.6684,
"step": 86000,
"training_step_in_ms": 989.5919032730162
},
{
"_do_backward_in_ms": 702.3105279654264,
"_prepare_inputs_in_ms": 3.27472277241759,
"compute_loss_in_ms": 298.05750323599204,
"epoch": 3.2,
"learning_rate/full": 3.929722605177466e-05,
"loss": 1.6664,
"step": 87000,
"training_step_in_ms": 1009.1526904681232
},
{
"_do_backward_in_ms": 647.5646377876401,
"_prepare_inputs_in_ms": 3.2817639869172126,
"compute_loss_in_ms": 299.0744258032646,
"epoch": 3.23,
"learning_rate/full": 3.776684586220099e-05,
"loss": 1.6671,
"step": 88000,
"training_step_in_ms": 955.4352292607073
},
{
"_do_backward_in_ms": 1546.6698298668489,
"_prepare_inputs_in_ms": 3.2690847222693264,
"compute_loss_in_ms": 298.1887877949048,
"epoch": 3.27,
"learning_rate/full": 3.6248562522640714e-05,
"loss": 1.6665,
"step": 89000,
"training_step_in_ms": 1853.662267722888
},
{
"_do_backward_in_ms": 1196.733351110248,
"_prepare_inputs_in_ms": 3.2732989322394133,
"compute_loss_in_ms": 298.05953590921126,
"epoch": 3.31,
"learning_rate/full": 3.4743877399432354e-05,
"loss": 1.6673,
"step": 90000,
"training_step_in_ms": 1503.5614393500146
},
{
"epoch": 3.31,
"eval_objects365-local-v2-validation_loss": 2.015079975128174,
"eval_objects365-local-v2-validation_runtime": 21.4395,
"eval_objects365-local-v2-validation_samples_per_second": 37.314,
"eval_objects365-local-v2-validation_steps_per_second": 0.606,
"step": 90000
},
{
"_do_backward_in_ms": 844.9975665507372,
"_prepare_inputs_in_ms": 19.031029790805363,
"compute_loss_in_ms": 298.18785645280065,
"epoch": 3.34,
"learning_rate/full": 3.325427841221202e-05,
"loss": 1.6653,
"step": 91000,
"training_step_in_ms": 1152.0402662665583
},
{
"_do_backward_in_ms": 1347.8918932015076,
"_prepare_inputs_in_ms": 3.3262957674451172,
"compute_loss_in_ms": 298.0921304386575,
"epoch": 3.38,
"learning_rate/full": 3.178123856257234e-05,
"loss": 1.6655,
"step": 92000,
"training_step_in_ms": 1654.7804647334851
},
{
"_do_backward_in_ms": 1063.0321979811415,
"_prepare_inputs_in_ms": 3.2982348231598735,
"compute_loss_in_ms": 298.2737292505335,
"epoch": 3.42,
"learning_rate/full": 3.0326214477473512e-05,
"loss": 1.6623,
"step": 93000,
"training_step_in_ms": 1370.0481948212255
},
{
"_do_backward_in_ms": 675.3600625551771,
"_prepare_inputs_in_ms": 3.2861924229655415,
"compute_loss_in_ms": 297.8690059813671,
"epoch": 3.45,
"learning_rate/full": 2.8890644968846193e-05,
"loss": 1.6633,
"step": 94000,
"training_step_in_ms": 981.9639976194594
},
{
"_do_backward_in_ms": 685.7196875785012,
"_prepare_inputs_in_ms": 3.2590834801085293,
"compute_loss_in_ms": 298.93962507206015,
"epoch": 3.49,
"learning_rate/full": 2.74745444265936e-05,
"loss": 1.6637,
"step": 95000,
"training_step_in_ms": 993.3962683330756
},
{
"epoch": 3.49,
"eval_objects365-local-v2-validation_loss": 2.0223588943481445,
"eval_objects365-local-v2-validation_runtime": 13.6125,
"eval_objects365-local-v2-validation_samples_per_second": 58.769,
"eval_objects365-local-v2-validation_steps_per_second": 0.955,
"step": 95000
},
{
"_do_backward_in_ms": 663.6111688169185,
"_prepare_inputs_in_ms": 11.124133992699312,
"compute_loss_in_ms": 298.8600466307988,
"epoch": 3.53,
"learning_rate/full": 2.608214514070504e-05,
"loss": 1.6625,
"step": 96000,
"training_step_in_ms": 971.3277539745905
},
{
"_do_backward_in_ms": 862.6617220155895,
"_prepare_inputs_in_ms": 3.263724277028814,
"compute_loss_in_ms": 298.15222160494886,
"epoch": 3.56,
"learning_rate/full": 2.471339721239901e-05,
"loss": 1.6617,
"step": 97000,
"training_step_in_ms": 1169.5883399837185
},
{
"_do_backward_in_ms": 792.2627358706668,
"_prepare_inputs_in_ms": 3.239450325258076,
"compute_loss_in_ms": 298.1890718040522,
"epoch": 3.6,
"learning_rate/full": 2.3370986279672524e-05,
"loss": 1.6611,
"step": 98000,
"training_step_in_ms": 1099.1772187727038
},
{
"_do_backward_in_ms": 821.3568878679071,
"_prepare_inputs_in_ms": 3.2639728772919625,
"compute_loss_in_ms": 298.9591815781314,
"epoch": 3.64,
"learning_rate/full": 2.2053549815720452e-05,
"loss": 1.6597,
"step": 99000,
"training_step_in_ms": 1129.0956585616805
},
{
"_do_backward_in_ms": 841.5605684022885,
"_prepare_inputs_in_ms": 3.277285093674436,
"compute_loss_in_ms": 298.11495484854095,
"epoch": 3.67,
"learning_rate/full": 2.0762471598573356e-05,
"loss": 1.6591,
"step": 100000,
"training_step_in_ms": 1148.4819840774871
},
{
"epoch": 3.67,
"eval_objects365-local-v2-validation_loss": 2.003035545349121,
"eval_objects365-local-v2-validation_runtime": 15.3967,
"eval_objects365-local-v2-validation_samples_per_second": 51.959,
"eval_objects365-local-v2-validation_steps_per_second": 0.844,
"step": 100000
}
],
"max_steps": 100000,
"num_train_epochs": 4,
"total_flos": 1.6535680982674692e+23,
"trial_name": null,
"trial_params": null
}