empty-michael's picture
End of training
8d4fef0 verified
{
"best_metric": 2.0353407859802246,
"best_model_checkpoint": "output_main/wandb/run-20240211_075351-8o9ldy4a/files/train_output/checkpoint-10000",
"epoch": 2.042133333333333,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"MSE": 892.0916341145833,
"MSE/layer0": 892.0916341145833,
"dead_code_fraction": 0.15045,
"dead_code_fraction/layer0": 0.15045,
"epoch": 0.0,
"input_norm": 31.997259775797524,
"input_norm/layer0": 31.997259775797524,
"learning_rate": 1e-05,
"loss": 8.134,
"max_norm": 35.01011657714844,
"max_norm/layer0": 35.01011657714844,
"mean_norm": 31.990370750427246,
"mean_norm/layer0": 31.990370750427246,
"multicode_k": 1,
"output_norm": 8.571834087371826,
"output_norm/layer0": 8.571834087371826,
"step": 1
},
{
"MSE": 889.7418754733337,
"MSE/layer0": 889.7418754733337,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.99632342656454,
"input_norm/layer0": 31.99632342656454,
"learning_rate": 0.0005,
"loss": 7.1762,
"max_norm": 35.03640365600586,
"max_norm/layer0": 35.03640365600586,
"mean_norm": 32.01236152648926,
"mean_norm/layer0": 32.01236152648926,
"multicode_k": 1,
"output_norm": 8.591146861614817,
"output_norm/layer0": 8.591146861614817,
"step": 50
},
{
"MSE": 869.5438468424481,
"MSE/layer0": 869.5438468424481,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.996115023295076,
"input_norm/layer0": 31.996115023295076,
"learning_rate": 0.001,
"loss": 5.0118,
"max_norm": 35.15137481689453,
"max_norm/layer0": 35.15137481689453,
"mean_norm": 32.11746788024902,
"mean_norm/layer0": 32.11746788024902,
"multicode_k": 1,
"output_norm": 8.768607576688133,
"output_norm/layer0": 8.768607576688133,
"step": 100
},
{
"MSE": 841.8395769246417,
"MSE/layer0": 841.8395769246417,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.996588408152256,
"input_norm/layer0": 31.996588408152256,
"learning_rate": 0.0015,
"loss": 4.2338,
"max_norm": 35.45364761352539,
"max_norm/layer0": 35.45364761352539,
"mean_norm": 32.34040641784668,
"mean_norm/layer0": 32.34040641784668,
"multicode_k": 1,
"output_norm": 9.237536044120787,
"output_norm/layer0": 9.237536044120787,
"step": 150
},
{
"MSE": 817.2703357950843,
"MSE/layer0": 817.2703357950843,
"dead_code_fraction": 5e-05,
"dead_code_fraction/layer0": 5e-05,
"epoch": 0.02,
"input_norm": 31.99718633969625,
"input_norm/layer0": 31.99718633969625,
"learning_rate": 0.002,
"loss": 3.837,
"max_norm": 36.11206817626953,
"max_norm/layer0": 36.11206817626953,
"mean_norm": 32.652212142944336,
"mean_norm/layer0": 32.652212142944336,
"multicode_k": 1,
"output_norm": 9.962018431027724,
"output_norm/layer0": 9.962018431027724,
"step": 200
},
{
"MSE": 787.4571261596678,
"MSE/layer0": 787.4571261596678,
"dead_code_fraction": 0.0028,
"dead_code_fraction/layer0": 0.0028,
"epoch": 0.03,
"input_norm": 31.99750740687052,
"input_norm/layer0": 31.99750740687052,
"learning_rate": 0.0025,
"loss": 3.5507,
"max_norm": 37.03396987915039,
"max_norm/layer0": 37.03396987915039,
"mean_norm": 33.030792236328125,
"mean_norm/layer0": 33.030792236328125,
"multicode_k": 1,
"output_norm": 11.101801137924198,
"output_norm/layer0": 11.101801137924198,
"step": 250
},
{
"MSE": 759.7653246053058,
"MSE/layer0": 759.7653246053058,
"dead_code_fraction": 0.02905,
"dead_code_fraction/layer0": 0.02905,
"epoch": 0.03,
"input_norm": 31.99749964078267,
"input_norm/layer0": 31.99749964078267,
"learning_rate": 0.003,
"loss": 3.3015,
"max_norm": 37.927757263183594,
"max_norm/layer0": 37.927757263183594,
"mean_norm": 33.33859634399414,
"mean_norm/layer0": 33.33859634399414,
"multicode_k": 1,
"output_norm": 12.222484871546431,
"output_norm/layer0": 12.222484871546431,
"step": 300
},
{
"MSE": 734.5841912841795,
"MSE/layer0": 734.5841912841795,
"dead_code_fraction": 0.06455,
"dead_code_fraction/layer0": 0.06455,
"epoch": 0.04,
"input_norm": 31.99746166547139,
"input_norm/layer0": 31.99746166547139,
"learning_rate": 0.0034999999999999996,
"loss": 3.1483,
"max_norm": 40.570350646972656,
"max_norm/layer0": 40.570350646972656,
"mean_norm": 33.79829216003418,
"mean_norm/layer0": 33.79829216003418,
"multicode_k": 1,
"output_norm": 13.233797086079917,
"output_norm/layer0": 13.233797086079917,
"step": 350
},
{
"MSE": 705.9179516601566,
"MSE/layer0": 705.9179516601566,
"dead_code_fraction": 0.13495,
"dead_code_fraction/layer0": 0.13495,
"epoch": 0.04,
"input_norm": 31.997578941980994,
"input_norm/layer0": 31.997578941980994,
"learning_rate": 0.004,
"loss": 3.0479,
"max_norm": 45.86402130126953,
"max_norm/layer0": 45.86402130126953,
"mean_norm": 34.60604667663574,
"mean_norm/layer0": 34.60604667663574,
"multicode_k": 1,
"output_norm": 14.794977650642394,
"output_norm/layer0": 14.794977650642394,
"step": 400
},
{
"MSE": 673.0142825317382,
"MSE/layer0": 673.0142825317382,
"dead_code_fraction": 0.236,
"dead_code_fraction/layer0": 0.236,
"epoch": 0.04,
"input_norm": 31.99772956212363,
"input_norm/layer0": 31.99772956212363,
"learning_rate": 0.0045000000000000005,
"loss": 2.9234,
"max_norm": 50.35022735595703,
"max_norm/layer0": 50.35022735595703,
"mean_norm": 35.50743293762207,
"mean_norm/layer0": 35.50743293762207,
"multicode_k": 1,
"output_norm": 16.412540513674415,
"output_norm/layer0": 16.412540513674415,
"step": 450
},
{
"MSE": 646.1952704874673,
"MSE/layer0": 646.1952704874673,
"dead_code_fraction": 0.31565,
"dead_code_fraction/layer0": 0.31565,
"epoch": 0.05,
"input_norm": 31.997816743850702,
"input_norm/layer0": 31.997816743850702,
"learning_rate": 0.005,
"loss": 2.8364,
"max_norm": 55.06960678100586,
"max_norm/layer0": 55.06960678100586,
"mean_norm": 36.40013122558594,
"mean_norm/layer0": 36.40013122558594,
"multicode_k": 1,
"output_norm": 17.61372879664104,
"output_norm/layer0": 17.61372879664104,
"step": 500
},
{
"epoch": 0.05,
"eval_MSE/layer0": 634.8931657946682,
"eval_accuracy": 0.42267877747562077,
"eval_dead_code_fraction/layer0": 0.3619,
"eval_input_norm/layer0": 31.9978586178746,
"eval_loss": 2.7649216651916504,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.081893277070293,
"eval_runtime": 157.3558,
"eval_samples_per_second": 29.379,
"eval_steps_per_second": 1.837,
"step": 500
},
{
"MSE": 627.919213663737,
"MSE/layer0": 627.919213663737,
"dead_code_fraction": 0.35455,
"dead_code_fraction/layer0": 0.35455,
"epoch": 0.06,
"input_norm": 31.997863556543983,
"input_norm/layer0": 31.997863556543983,
"learning_rate": 0.005,
"loss": 2.6999,
"max_norm": 59.44381332397461,
"max_norm/layer0": 59.44381332397461,
"mean_norm": 37.23677062988281,
"mean_norm/layer0": 37.23677062988281,
"multicode_k": 1,
"output_norm": 18.411861616770416,
"output_norm/layer0": 18.411861616770416,
"step": 550
},
{
"MSE": 613.3249330647789,
"MSE/layer0": 613.3249330647789,
"dead_code_fraction": 0.38215,
"dead_code_fraction/layer0": 0.38215,
"epoch": 0.06,
"input_norm": 31.99789684613545,
"input_norm/layer0": 31.99789684613545,
"learning_rate": 0.005,
"loss": 2.6511,
"max_norm": 66.23004150390625,
"max_norm/layer0": 66.23004150390625,
"mean_norm": 38.00171661376953,
"mean_norm/layer0": 38.00171661376953,
"multicode_k": 1,
"output_norm": 18.973640613555915,
"output_norm/layer0": 18.973640613555915,
"step": 600
},
{
"MSE": 601.0688813273114,
"MSE/layer0": 601.0688813273114,
"dead_code_fraction": 0.3855,
"dead_code_fraction/layer0": 0.3855,
"epoch": 0.07,
"input_norm": 31.997907568613698,
"input_norm/layer0": 31.997907568613698,
"learning_rate": 0.005,
"loss": 2.5727,
"max_norm": 72.61077117919922,
"max_norm/layer0": 72.61077117919922,
"mean_norm": 38.68782615661621,
"mean_norm/layer0": 38.68782615661621,
"multicode_k": 1,
"output_norm": 19.389015719095863,
"output_norm/layer0": 19.389015719095863,
"step": 650
},
{
"MSE": 595.7544806925458,
"MSE/layer0": 595.7544806925458,
"dead_code_fraction": 0.3847,
"dead_code_fraction/layer0": 0.3847,
"epoch": 0.07,
"input_norm": 31.99792699813842,
"input_norm/layer0": 31.99792699813842,
"learning_rate": 0.005,
"loss": 2.5303,
"max_norm": 77.1572036743164,
"max_norm/layer0": 77.1572036743164,
"mean_norm": 39.301788330078125,
"mean_norm/layer0": 39.301788330078125,
"multicode_k": 1,
"output_norm": 19.654865121841446,
"output_norm/layer0": 19.654865121841446,
"step": 700
},
{
"MSE": 587.4001970418295,
"MSE/layer0": 587.4001970418295,
"dead_code_fraction": 0.38495,
"dead_code_fraction/layer0": 0.38495,
"epoch": 0.07,
"input_norm": 31.997964229583737,
"input_norm/layer0": 31.997964229583737,
"learning_rate": 0.005,
"loss": 2.5181,
"max_norm": 81.00206756591797,
"max_norm/layer0": 81.00206756591797,
"mean_norm": 39.8663330078125,
"mean_norm/layer0": 39.8663330078125,
"multicode_k": 1,
"output_norm": 19.91484704653422,
"output_norm/layer0": 19.91484704653422,
"step": 750
},
{
"MSE": 582.8578649902345,
"MSE/layer0": 582.8578649902345,
"dead_code_fraction": 0.37595,
"dead_code_fraction/layer0": 0.37595,
"epoch": 0.08,
"input_norm": 31.997961613337196,
"input_norm/layer0": 31.997961613337196,
"learning_rate": 0.005,
"loss": 2.488,
"max_norm": 84.8564682006836,
"max_norm/layer0": 84.8564682006836,
"mean_norm": 40.41610145568848,
"mean_norm/layer0": 40.41610145568848,
"multicode_k": 1,
"output_norm": 20.113984060287464,
"output_norm/layer0": 20.113984060287464,
"step": 800
},
{
"MSE": 578.7394322713219,
"MSE/layer0": 578.7394322713219,
"dead_code_fraction": 0.36775,
"dead_code_fraction/layer0": 0.36775,
"epoch": 0.09,
"input_norm": 31.99793098767598,
"input_norm/layer0": 31.99793098767598,
"learning_rate": 0.005,
"loss": 2.3972,
"max_norm": 88.52584838867188,
"max_norm/layer0": 88.52584838867188,
"mean_norm": 40.93037033081055,
"mean_norm/layer0": 40.93037033081055,
"multicode_k": 1,
"output_norm": 20.255761035283413,
"output_norm/layer0": 20.255761035283413,
"step": 850
},
{
"MSE": 574.7943645222981,
"MSE/layer0": 574.7943645222981,
"dead_code_fraction": 0.3752,
"dead_code_fraction/layer0": 0.3752,
"epoch": 0.09,
"input_norm": 31.99794203122458,
"input_norm/layer0": 31.99794203122458,
"learning_rate": 0.005,
"loss": 2.4475,
"max_norm": 91.37139129638672,
"max_norm/layer0": 91.37139129638672,
"mean_norm": 41.42861366271973,
"mean_norm/layer0": 41.42861366271973,
"multicode_k": 1,
"output_norm": 20.38246509869893,
"output_norm/layer0": 20.38246509869893,
"step": 900
},
{
"MSE": 572.0475691731768,
"MSE/layer0": 572.0475691731768,
"dead_code_fraction": 0.369,
"dead_code_fraction/layer0": 0.369,
"epoch": 0.1,
"input_norm": 31.997947810490906,
"input_norm/layer0": 31.997947810490906,
"learning_rate": 0.005,
"loss": 2.3928,
"max_norm": 93.76451873779297,
"max_norm/layer0": 93.76451873779297,
"mean_norm": 41.89710807800293,
"mean_norm/layer0": 41.89710807800293,
"multicode_k": 1,
"output_norm": 20.522438500722256,
"output_norm/layer0": 20.522438500722256,
"step": 950
},
{
"MSE": 571.223816274007,
"MSE/layer0": 571.223816274007,
"dead_code_fraction": 0.35845,
"dead_code_fraction/layer0": 0.35845,
"epoch": 0.1,
"input_norm": 31.997930752436314,
"input_norm/layer0": 31.997930752436314,
"learning_rate": 0.005,
"loss": 2.3611,
"max_norm": 95.86876678466797,
"max_norm/layer0": 95.86876678466797,
"mean_norm": 42.36003875732422,
"mean_norm/layer0": 42.36003875732422,
"multicode_k": 1,
"output_norm": 20.59194125175477,
"output_norm/layer0": 20.59194125175477,
"step": 1000
},
{
"epoch": 0.1,
"eval_MSE/layer0": 568.7263942209383,
"eval_accuracy": 0.47120194006380184,
"eval_dead_code_fraction/layer0": 0.36065,
"eval_input_norm/layer0": 31.997911268824648,
"eval_loss": 2.370492935180664,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 20.66302644662364,
"eval_runtime": 157.3974,
"eval_samples_per_second": 29.372,
"eval_steps_per_second": 1.836,
"step": 1000
},
{
"MSE": 568.216246948242,
"MSE/layer0": 568.216246948242,
"dead_code_fraction": 0.35655,
"dead_code_fraction/layer0": 0.35655,
"epoch": 0.1,
"input_norm": 31.997933057149254,
"input_norm/layer0": 31.997933057149254,
"learning_rate": 0.005,
"loss": 2.3877,
"max_norm": 97.93981170654297,
"max_norm/layer0": 97.93981170654297,
"mean_norm": 42.796369552612305,
"mean_norm/layer0": 42.796369552612305,
"multicode_k": 1,
"output_norm": 20.69294343630473,
"output_norm/layer0": 20.69294343630473,
"step": 1050
},
{
"MSE": 566.0765097045902,
"MSE/layer0": 566.0765097045902,
"dead_code_fraction": 0.3515,
"dead_code_fraction/layer0": 0.3515,
"epoch": 0.11,
"input_norm": 31.997944701512658,
"input_norm/layer0": 31.997944701512658,
"learning_rate": 0.005,
"loss": 2.32,
"max_norm": 99.40829467773438,
"max_norm/layer0": 99.40829467773438,
"mean_norm": 43.20481872558594,
"mean_norm/layer0": 43.20481872558594,
"multicode_k": 1,
"output_norm": 20.780460087458298,
"output_norm/layer0": 20.780460087458298,
"step": 1100
},
{
"MSE": 563.1435256449383,
"MSE/layer0": 563.1435256449383,
"dead_code_fraction": 0.3425,
"dead_code_fraction/layer0": 0.3425,
"epoch": 0.12,
"input_norm": 31.99793601353964,
"input_norm/layer0": 31.99793601353964,
"learning_rate": 0.005,
"loss": 2.3309,
"max_norm": 100.84235382080078,
"max_norm/layer0": 100.84235382080078,
"mean_norm": 43.63128852844238,
"mean_norm/layer0": 43.63128852844238,
"multicode_k": 1,
"output_norm": 20.85479287147521,
"output_norm/layer0": 20.85479287147521,
"step": 1150
},
{
"MSE": 561.2093427530926,
"MSE/layer0": 561.2093427530926,
"dead_code_fraction": 0.3403,
"dead_code_fraction/layer0": 0.3403,
"epoch": 0.12,
"input_norm": 31.99792865435282,
"input_norm/layer0": 31.99792865435282,
"learning_rate": 0.005,
"loss": 2.3308,
"max_norm": 102.74110412597656,
"max_norm/layer0": 102.74110412597656,
"mean_norm": 44.03978157043457,
"mean_norm/layer0": 44.03978157043457,
"multicode_k": 1,
"output_norm": 20.931864147186282,
"output_norm/layer0": 20.931864147186282,
"step": 1200
},
{
"MSE": 559.3785518391925,
"MSE/layer0": 559.3785518391925,
"dead_code_fraction": 0.3412,
"dead_code_fraction/layer0": 0.3412,
"epoch": 0.12,
"input_norm": 31.99792771339417,
"input_norm/layer0": 31.99792771339417,
"learning_rate": 0.005,
"loss": 2.3437,
"max_norm": 104.6494369506836,
"max_norm/layer0": 104.6494369506836,
"mean_norm": 44.438026428222656,
"mean_norm/layer0": 44.438026428222656,
"multicode_k": 1,
"output_norm": 21.008427244822187,
"output_norm/layer0": 21.008427244822187,
"step": 1250
},
{
"MSE": 557.9434753417968,
"MSE/layer0": 557.9434753417968,
"dead_code_fraction": 0.33015,
"dead_code_fraction/layer0": 0.33015,
"epoch": 0.13,
"input_norm": 31.997915770212824,
"input_norm/layer0": 31.997915770212824,
"learning_rate": 0.005,
"loss": 2.2785,
"max_norm": 106.27558135986328,
"max_norm/layer0": 106.27558135986328,
"mean_norm": 44.82562255859375,
"mean_norm/layer0": 44.82562255859375,
"multicode_k": 1,
"output_norm": 21.07806761741638,
"output_norm/layer0": 21.07806761741638,
"step": 1300
},
{
"MSE": 556.1554424031574,
"MSE/layer0": 556.1554424031574,
"dead_code_fraction": 0.3277,
"dead_code_fraction/layer0": 0.3277,
"epoch": 0.14,
"input_norm": 31.9979051399231,
"input_norm/layer0": 31.9979051399231,
"learning_rate": 0.005,
"loss": 2.2823,
"max_norm": 107.8658676147461,
"max_norm/layer0": 107.8658676147461,
"mean_norm": 45.21988105773926,
"mean_norm/layer0": 45.21988105773926,
"multicode_k": 1,
"output_norm": 21.124666048685715,
"output_norm/layer0": 21.124666048685715,
"step": 1350
},
{
"MSE": 554.1472004191082,
"MSE/layer0": 554.1472004191082,
"dead_code_fraction": 0.32535,
"dead_code_fraction/layer0": 0.32535,
"epoch": 0.14,
"input_norm": 31.99791674613953,
"input_norm/layer0": 31.99791674613953,
"learning_rate": 0.005,
"loss": 2.3034,
"max_norm": 109.18831634521484,
"max_norm/layer0": 109.18831634521484,
"mean_norm": 45.60391616821289,
"mean_norm/layer0": 45.60391616821289,
"multicode_k": 1,
"output_norm": 21.184103918075557,
"output_norm/layer0": 21.184103918075557,
"step": 1400
},
{
"MSE": 553.0813423156735,
"MSE/layer0": 553.0813423156735,
"dead_code_fraction": 0.3218,
"dead_code_fraction/layer0": 0.3218,
"epoch": 0.14,
"input_norm": 31.997899109522507,
"input_norm/layer0": 31.997899109522507,
"learning_rate": 0.005,
"loss": 2.2583,
"max_norm": 110.68695831298828,
"max_norm/layer0": 110.68695831298828,
"mean_norm": 45.98097801208496,
"mean_norm/layer0": 45.98097801208496,
"multicode_k": 1,
"output_norm": 21.234303328196226,
"output_norm/layer0": 21.234303328196226,
"step": 1450
},
{
"MSE": 551.1942003377276,
"MSE/layer0": 551.1942003377276,
"dead_code_fraction": 0.32175,
"dead_code_fraction/layer0": 0.32175,
"epoch": 0.15,
"input_norm": 31.997910699844365,
"input_norm/layer0": 31.997910699844365,
"learning_rate": 0.005,
"loss": 2.2395,
"max_norm": 112.16923522949219,
"max_norm/layer0": 112.16923522949219,
"mean_norm": 46.355411529541016,
"mean_norm/layer0": 46.355411529541016,
"multicode_k": 1,
"output_norm": 21.303704795837398,
"output_norm/layer0": 21.303704795837398,
"step": 1500
},
{
"epoch": 0.15,
"eval_MSE/layer0": 550.3311246673497,
"eval_accuracy": 0.486590169556823,
"eval_dead_code_fraction/layer0": 0.32665,
"eval_input_norm/layer0": 31.99789719372221,
"eval_loss": 2.253082513809204,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 21.329729693291277,
"eval_runtime": 160.4576,
"eval_samples_per_second": 28.811,
"eval_steps_per_second": 1.801,
"step": 1500
},
{
"MSE": 551.4163179524738,
"MSE/layer0": 551.4163179524738,
"dead_code_fraction": 0.3174,
"dead_code_fraction/layer0": 0.3174,
"epoch": 0.15,
"input_norm": 31.997892700831095,
"input_norm/layer0": 31.997892700831095,
"learning_rate": 0.005,
"loss": 2.1968,
"max_norm": 113.21269989013672,
"max_norm/layer0": 113.21269989013672,
"mean_norm": 46.7271785736084,
"mean_norm/layer0": 46.7271785736084,
"multicode_k": 1,
"output_norm": 21.321544698079432,
"output_norm/layer0": 21.321544698079432,
"step": 1550
},
{
"MSE": 549.0553175354001,
"MSE/layer0": 549.0553175354001,
"dead_code_fraction": 0.31715,
"dead_code_fraction/layer0": 0.31715,
"epoch": 0.16,
"input_norm": 31.99791664441427,
"input_norm/layer0": 31.99791664441427,
"learning_rate": 0.005,
"loss": 2.2863,
"max_norm": 114.45362854003906,
"max_norm/layer0": 114.45362854003906,
"mean_norm": 47.08230972290039,
"mean_norm/layer0": 47.08230972290039,
"multicode_k": 1,
"output_norm": 21.38125430742899,
"output_norm/layer0": 21.38125430742899,
"step": 1600
},
{
"MSE": 547.4109810384114,
"MSE/layer0": 547.4109810384114,
"dead_code_fraction": 0.3131,
"dead_code_fraction/layer0": 0.3131,
"epoch": 0.17,
"input_norm": 31.997924680709843,
"input_norm/layer0": 31.997924680709843,
"learning_rate": 0.005,
"loss": 2.2147,
"max_norm": 115.29362487792969,
"max_norm/layer0": 115.29362487792969,
"mean_norm": 47.438798904418945,
"mean_norm/layer0": 47.438798904418945,
"multicode_k": 1,
"output_norm": 21.454637037913013,
"output_norm/layer0": 21.454637037913013,
"step": 1650
},
{
"MSE": 546.0445864868163,
"MSE/layer0": 546.0445864868163,
"dead_code_fraction": 0.31475,
"dead_code_fraction/layer0": 0.31475,
"epoch": 0.17,
"input_norm": 31.997929503122954,
"input_norm/layer0": 31.997929503122954,
"learning_rate": 0.005,
"loss": 2.2501,
"max_norm": 116.09871673583984,
"max_norm/layer0": 116.09871673583984,
"mean_norm": 47.79398536682129,
"mean_norm/layer0": 47.79398536682129,
"multicode_k": 1,
"output_norm": 21.4808695602417,
"output_norm/layer0": 21.4808695602417,
"step": 1700
},
{
"MSE": 545.4600128173831,
"MSE/layer0": 545.4600128173831,
"dead_code_fraction": 0.30905,
"dead_code_fraction/layer0": 0.30905,
"epoch": 0.17,
"input_norm": 31.997937501271572,
"input_norm/layer0": 31.997937501271572,
"learning_rate": 0.005,
"loss": 2.2296,
"max_norm": 117.0920181274414,
"max_norm/layer0": 117.0920181274414,
"mean_norm": 48.138267517089844,
"mean_norm/layer0": 48.138267517089844,
"multicode_k": 1,
"output_norm": 21.52623297691346,
"output_norm/layer0": 21.52623297691346,
"step": 1750
},
{
"MSE": 543.9589634704591,
"MSE/layer0": 543.9589634704591,
"dead_code_fraction": 0.3074,
"dead_code_fraction/layer0": 0.3074,
"epoch": 0.18,
"input_norm": 31.997916940053315,
"input_norm/layer0": 31.997916940053315,
"learning_rate": 0.005,
"loss": 2.1632,
"max_norm": 118.44883728027344,
"max_norm/layer0": 118.44883728027344,
"mean_norm": 48.48598670959473,
"mean_norm/layer0": 48.48598670959473,
"multicode_k": 1,
"output_norm": 21.572722558975222,
"output_norm/layer0": 21.572722558975222,
"step": 1800
},
{
"MSE": 543.3154680887858,
"MSE/layer0": 543.3154680887858,
"dead_code_fraction": 0.30485,
"dead_code_fraction/layer0": 0.30485,
"epoch": 0.18,
"input_norm": 31.997930173873904,
"input_norm/layer0": 31.997930173873904,
"learning_rate": 0.005,
"loss": 2.1874,
"max_norm": 119.3927001953125,
"max_norm/layer0": 119.3927001953125,
"mean_norm": 48.82695388793945,
"mean_norm/layer0": 48.82695388793945,
"multicode_k": 1,
"output_norm": 21.595847959518437,
"output_norm/layer0": 21.595847959518437,
"step": 1850
},
{
"MSE": 542.2137928263345,
"MSE/layer0": 542.2137928263345,
"dead_code_fraction": 0.30715,
"dead_code_fraction/layer0": 0.30715,
"epoch": 0.19,
"input_norm": 31.997955802281705,
"input_norm/layer0": 31.997955802281705,
"learning_rate": 0.005,
"loss": 2.2323,
"max_norm": 121.5817642211914,
"max_norm/layer0": 121.5817642211914,
"mean_norm": 49.15649604797363,
"mean_norm/layer0": 49.15649604797363,
"multicode_k": 1,
"output_norm": 21.63884919484457,
"output_norm/layer0": 21.63884919484457,
"step": 1900
},
{
"MSE": 539.4505286661786,
"MSE/layer0": 539.4505286661786,
"dead_code_fraction": 0.3033,
"dead_code_fraction/layer0": 0.3033,
"epoch": 0.2,
"input_norm": 31.997942549387595,
"input_norm/layer0": 31.997942549387595,
"learning_rate": 0.005,
"loss": 2.1894,
"max_norm": 123.63184356689453,
"max_norm/layer0": 123.63184356689453,
"mean_norm": 49.49074363708496,
"mean_norm/layer0": 49.49074363708496,
"multicode_k": 1,
"output_norm": 21.689245723088575,
"output_norm/layer0": 21.689245723088575,
"step": 1950
},
{
"MSE": 539.8872321573892,
"MSE/layer0": 539.8872321573892,
"dead_code_fraction": 0.29975,
"dead_code_fraction/layer0": 0.29975,
"epoch": 0.2,
"input_norm": 31.997952289581303,
"input_norm/layer0": 31.997952289581303,
"learning_rate": 0.005,
"loss": 2.1999,
"max_norm": 125.97776794433594,
"max_norm/layer0": 125.97776794433594,
"mean_norm": 49.814876556396484,
"mean_norm/layer0": 49.814876556396484,
"multicode_k": 1,
"output_norm": 21.72016517957053,
"output_norm/layer0": 21.72016517957053,
"step": 2000
},
{
"epoch": 0.2,
"eval_MSE/layer0": 539.0149815035619,
"eval_accuracy": 0.4955417565578542,
"eval_dead_code_fraction/layer0": 0.30475,
"eval_input_norm/layer0": 31.997959356660743,
"eval_loss": 2.1908392906188965,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 21.766283075917258,
"eval_runtime": 158.005,
"eval_samples_per_second": 29.259,
"eval_steps_per_second": 1.829,
"step": 2000
},
{
"MSE": 538.042401936849,
"MSE/layer0": 538.042401936849,
"dead_code_fraction": 0.30175,
"dead_code_fraction/layer0": 0.30175,
"epoch": 0.2,
"input_norm": 31.99795293172201,
"input_norm/layer0": 31.99795293172201,
"learning_rate": 0.005,
"loss": 2.1768,
"max_norm": 127.91316986083984,
"max_norm/layer0": 127.91316986083984,
"mean_norm": 50.13774490356445,
"mean_norm/layer0": 50.13774490356445,
"multicode_k": 1,
"output_norm": 21.768677377700797,
"output_norm/layer0": 21.768677377700797,
"step": 2050
},
{
"MSE": 537.4139138285318,
"MSE/layer0": 537.4139138285318,
"dead_code_fraction": 0.29605,
"dead_code_fraction/layer0": 0.29605,
"epoch": 0.21,
"input_norm": 31.997954098383584,
"input_norm/layer0": 31.997954098383584,
"learning_rate": 0.005,
"loss": 2.1417,
"max_norm": 129.62669372558594,
"max_norm/layer0": 129.62669372558594,
"mean_norm": 50.44980430603027,
"mean_norm/layer0": 50.44980430603027,
"multicode_k": 1,
"output_norm": 21.780523262023927,
"output_norm/layer0": 21.780523262023927,
"step": 2100
},
{
"MSE": 537.116479644775,
"MSE/layer0": 537.116479644775,
"dead_code_fraction": 0.298,
"dead_code_fraction/layer0": 0.298,
"epoch": 0.21,
"input_norm": 31.99796335220337,
"input_norm/layer0": 31.99796335220337,
"learning_rate": 0.005,
"loss": 2.1753,
"max_norm": 131.71987915039062,
"max_norm/layer0": 131.71987915039062,
"mean_norm": 50.758169174194336,
"mean_norm/layer0": 50.758169174194336,
"multicode_k": 1,
"output_norm": 21.819064016342168,
"output_norm/layer0": 21.819064016342168,
"step": 2150
},
{
"MSE": 536.1095620218915,
"MSE/layer0": 536.1095620218915,
"dead_code_fraction": 0.29655,
"dead_code_fraction/layer0": 0.29655,
"epoch": 0.22,
"input_norm": 31.997976016998287,
"input_norm/layer0": 31.997976016998287,
"learning_rate": 0.005,
"loss": 2.1676,
"max_norm": 133.67694091796875,
"max_norm/layer0": 133.67694091796875,
"mean_norm": 51.058135986328125,
"mean_norm/layer0": 51.058135986328125,
"multicode_k": 1,
"output_norm": 21.83967799504598,
"output_norm/layer0": 21.83967799504598,
"step": 2200
},
{
"MSE": 535.5964339701336,
"MSE/layer0": 535.5964339701336,
"dead_code_fraction": 0.2945,
"dead_code_fraction/layer0": 0.2945,
"epoch": 0.23,
"input_norm": 31.997973279952987,
"input_norm/layer0": 31.997973279952987,
"learning_rate": 0.005,
"loss": 2.1347,
"max_norm": 135.40386962890625,
"max_norm/layer0": 135.40386962890625,
"mean_norm": 51.35656929016113,
"mean_norm/layer0": 51.35656929016113,
"multicode_k": 1,
"output_norm": 21.857111148834242,
"output_norm/layer0": 21.857111148834242,
"step": 2250
},
{
"MSE": 534.8214352925618,
"MSE/layer0": 534.8214352925618,
"dead_code_fraction": 0.2943,
"dead_code_fraction/layer0": 0.2943,
"epoch": 0.23,
"input_norm": 31.99798300425212,
"input_norm/layer0": 31.99798300425212,
"learning_rate": 0.005,
"loss": 2.1712,
"max_norm": 137.13648986816406,
"max_norm/layer0": 137.13648986816406,
"mean_norm": 51.64659118652344,
"mean_norm/layer0": 51.64659118652344,
"multicode_k": 1,
"output_norm": 21.901708291371662,
"output_norm/layer0": 21.901708291371662,
"step": 2300
},
{
"MSE": 533.4100613403319,
"MSE/layer0": 533.4100613403319,
"dead_code_fraction": 0.29105,
"dead_code_fraction/layer0": 0.29105,
"epoch": 0.23,
"input_norm": 31.99798559824626,
"input_norm/layer0": 31.99798559824626,
"learning_rate": 0.005,
"loss": 2.1726,
"max_norm": 138.62417602539062,
"max_norm/layer0": 138.62417602539062,
"mean_norm": 51.931190490722656,
"mean_norm/layer0": 51.931190490722656,
"multicode_k": 1,
"output_norm": 21.91944276809694,
"output_norm/layer0": 21.91944276809694,
"step": 2350
},
{
"MSE": 533.0944277445471,
"MSE/layer0": 533.0944277445471,
"dead_code_fraction": 0.29235,
"dead_code_fraction/layer0": 0.29235,
"epoch": 0.24,
"input_norm": 31.99797873497009,
"input_norm/layer0": 31.99797873497009,
"learning_rate": 0.005,
"loss": 2.1496,
"max_norm": 140.219970703125,
"max_norm/layer0": 140.219970703125,
"mean_norm": 52.213850021362305,
"mean_norm/layer0": 52.213850021362305,
"multicode_k": 1,
"output_norm": 21.941968046824137,
"output_norm/layer0": 21.941968046824137,
"step": 2400
},
{
"MSE": 531.4289741007487,
"MSE/layer0": 531.4289741007487,
"dead_code_fraction": 0.29335,
"dead_code_fraction/layer0": 0.29335,
"epoch": 0.24,
"input_norm": 31.998000961939493,
"input_norm/layer0": 31.998000961939493,
"learning_rate": 0.005,
"loss": 2.153,
"max_norm": 141.84396362304688,
"max_norm/layer0": 141.84396362304688,
"mean_norm": 52.47932052612305,
"mean_norm/layer0": 52.47932052612305,
"multicode_k": 1,
"output_norm": 21.982840156555177,
"output_norm/layer0": 21.982840156555177,
"step": 2450
},
{
"MSE": 531.2627974446617,
"MSE/layer0": 531.2627974446617,
"dead_code_fraction": 0.28885,
"dead_code_fraction/layer0": 0.28885,
"epoch": 0.25,
"input_norm": 31.99799962997436,
"input_norm/layer0": 31.99799962997436,
"learning_rate": 0.005,
"loss": 2.1688,
"max_norm": 143.0140838623047,
"max_norm/layer0": 143.0140838623047,
"mean_norm": 52.74382019042969,
"mean_norm/layer0": 52.74382019042969,
"multicode_k": 1,
"output_norm": 22.00004559199015,
"output_norm/layer0": 22.00004559199015,
"step": 2500
},
{
"epoch": 0.25,
"eval_MSE/layer0": 530.4651256365718,
"eval_accuracy": 0.5006363482007701,
"eval_dead_code_fraction/layer0": 0.29495,
"eval_input_norm/layer0": 31.99800563596064,
"eval_loss": 2.155103921890259,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.022818533393835,
"eval_runtime": 157.5009,
"eval_samples_per_second": 29.352,
"eval_steps_per_second": 1.835,
"step": 2500
},
{
"MSE": 530.4989952596026,
"MSE/layer0": 530.4989952596026,
"dead_code_fraction": 0.29025,
"dead_code_fraction/layer0": 0.29025,
"epoch": 0.26,
"input_norm": 31.99801852544149,
"input_norm/layer0": 31.99801852544149,
"learning_rate": 0.005,
"loss": 2.1541,
"max_norm": 144.50558471679688,
"max_norm/layer0": 144.50558471679688,
"mean_norm": 52.992868423461914,
"mean_norm/layer0": 52.992868423461914,
"multicode_k": 1,
"output_norm": 22.044915301005062,
"output_norm/layer0": 22.044915301005062,
"step": 2550
},
{
"MSE": 529.2955647786457,
"MSE/layer0": 529.2955647786457,
"dead_code_fraction": 0.288,
"dead_code_fraction/layer0": 0.288,
"epoch": 0.26,
"input_norm": 31.998021106719975,
"input_norm/layer0": 31.998021106719975,
"learning_rate": 0.005,
"loss": 2.1563,
"max_norm": 146.2478485107422,
"max_norm/layer0": 146.2478485107422,
"mean_norm": 53.24601364135742,
"mean_norm/layer0": 53.24601364135742,
"multicode_k": 1,
"output_norm": 22.048689235051476,
"output_norm/layer0": 22.048689235051476,
"step": 2600
},
{
"MSE": 529.877343190511,
"MSE/layer0": 529.877343190511,
"dead_code_fraction": 0.288,
"dead_code_fraction/layer0": 0.288,
"epoch": 0.27,
"input_norm": 31.998024587631217,
"input_norm/layer0": 31.998024587631217,
"learning_rate": 0.005,
"loss": 2.1382,
"max_norm": 147.41587829589844,
"max_norm/layer0": 147.41587829589844,
"mean_norm": 53.48561096191406,
"mean_norm/layer0": 53.48561096191406,
"multicode_k": 1,
"output_norm": 22.0797532526652,
"output_norm/layer0": 22.0797532526652,
"step": 2650
},
{
"MSE": 528.3514750671387,
"MSE/layer0": 528.3514750671387,
"dead_code_fraction": 0.28825,
"dead_code_fraction/layer0": 0.28825,
"epoch": 0.27,
"input_norm": 31.99804752349852,
"input_norm/layer0": 31.99804752349852,
"learning_rate": 0.005,
"loss": 2.1742,
"max_norm": 148.7862091064453,
"max_norm/layer0": 148.7862091064453,
"mean_norm": 53.71611213684082,
"mean_norm/layer0": 53.71611213684082,
"multicode_k": 1,
"output_norm": 22.09869578997295,
"output_norm/layer0": 22.09869578997295,
"step": 2700
},
{
"MSE": 528.2884072875979,
"MSE/layer0": 528.2884072875979,
"dead_code_fraction": 0.28335,
"dead_code_fraction/layer0": 0.28335,
"epoch": 0.28,
"input_norm": 31.998042856852216,
"input_norm/layer0": 31.998042856852216,
"learning_rate": 0.005,
"loss": 2.1277,
"max_norm": 150.35140991210938,
"max_norm/layer0": 150.35140991210938,
"mean_norm": 53.946285247802734,
"mean_norm/layer0": 53.946285247802734,
"multicode_k": 1,
"output_norm": 22.106029316584255,
"output_norm/layer0": 22.106029316584255,
"step": 2750
},
{
"MSE": 527.2996965026854,
"MSE/layer0": 527.2996965026854,
"dead_code_fraction": 0.2844,
"dead_code_fraction/layer0": 0.2844,
"epoch": 0.28,
"input_norm": 31.9980613454183,
"input_norm/layer0": 31.9980613454183,
"learning_rate": 0.005,
"loss": 2.1676,
"max_norm": 152.27590942382812,
"max_norm/layer0": 152.27590942382812,
"mean_norm": 54.16430473327637,
"mean_norm/layer0": 54.16430473327637,
"multicode_k": 1,
"output_norm": 22.141783040364587,
"output_norm/layer0": 22.141783040364587,
"step": 2800
},
{
"MSE": 527.5191156514486,
"MSE/layer0": 527.5191156514486,
"dead_code_fraction": 0.28045,
"dead_code_fraction/layer0": 0.28045,
"epoch": 0.28,
"input_norm": 31.998067801793418,
"input_norm/layer0": 31.998067801793418,
"learning_rate": 0.005,
"loss": 2.1076,
"max_norm": 153.54779052734375,
"max_norm/layer0": 153.54779052734375,
"mean_norm": 54.38737678527832,
"mean_norm/layer0": 54.38737678527832,
"multicode_k": 1,
"output_norm": 22.13956375757853,
"output_norm/layer0": 22.13956375757853,
"step": 2850
},
{
"MSE": 527.3752633666991,
"MSE/layer0": 527.3752633666991,
"dead_code_fraction": 0.28165,
"dead_code_fraction/layer0": 0.28165,
"epoch": 0.29,
"input_norm": 31.998070557912186,
"input_norm/layer0": 31.998070557912186,
"learning_rate": 0.005,
"loss": 2.1379,
"max_norm": 155.25857543945312,
"max_norm/layer0": 155.25857543945312,
"mean_norm": 54.598867416381836,
"mean_norm/layer0": 54.598867416381836,
"multicode_k": 1,
"output_norm": 22.1554997475942,
"output_norm/layer0": 22.1554997475942,
"step": 2900
},
{
"MSE": 525.2142114257812,
"MSE/layer0": 525.2142114257812,
"dead_code_fraction": 0.2841,
"dead_code_fraction/layer0": 0.2841,
"epoch": 0.29,
"input_norm": 31.998104591369632,
"input_norm/layer0": 31.998104591369632,
"learning_rate": 0.005,
"loss": 2.1887,
"max_norm": 157.656494140625,
"max_norm/layer0": 157.656494140625,
"mean_norm": 54.80296516418457,
"mean_norm/layer0": 54.80296516418457,
"multicode_k": 1,
"output_norm": 22.194608500798537,
"output_norm/layer0": 22.194608500798537,
"step": 2950
},
{
"MSE": 525.7639581807456,
"MSE/layer0": 525.7639581807456,
"dead_code_fraction": 0.28035,
"dead_code_fraction/layer0": 0.28035,
"epoch": 0.3,
"input_norm": 31.998085311253874,
"input_norm/layer0": 31.998085311253874,
"learning_rate": 0.005,
"loss": 2.1108,
"max_norm": 159.0706787109375,
"max_norm/layer0": 159.0706787109375,
"mean_norm": 55.01374816894531,
"mean_norm/layer0": 55.01374816894531,
"multicode_k": 1,
"output_norm": 22.19143549601236,
"output_norm/layer0": 22.19143549601236,
"step": 3000
},
{
"epoch": 0.3,
"eval_MSE/layer0": 524.9529532255765,
"eval_accuracy": 0.5051228197488481,
"eval_dead_code_fraction/layer0": 0.2809,
"eval_input_norm/layer0": 31.998092802783354,
"eval_loss": 2.126948595046997,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.207113418645122,
"eval_runtime": 157.5523,
"eval_samples_per_second": 29.343,
"eval_steps_per_second": 1.834,
"step": 3000
},
{
"MSE": 525.49979405721,
"MSE/layer0": 525.49979405721,
"dead_code_fraction": 0.28015,
"dead_code_fraction/layer0": 0.28015,
"epoch": 0.3,
"input_norm": 31.998098812103272,
"input_norm/layer0": 31.998098812103272,
"learning_rate": 0.005,
"loss": 2.1814,
"max_norm": 160.52183532714844,
"max_norm/layer0": 160.52183532714844,
"mean_norm": 55.21175575256348,
"mean_norm/layer0": 55.21175575256348,
"multicode_k": 1,
"output_norm": 22.205291633605963,
"output_norm/layer0": 22.205291633605963,
"step": 3050
},
{
"MSE": 525.1535092671712,
"MSE/layer0": 525.1535092671712,
"dead_code_fraction": 0.27915,
"dead_code_fraction/layer0": 0.27915,
"epoch": 0.31,
"input_norm": 31.998094654083246,
"input_norm/layer0": 31.998094654083246,
"learning_rate": 0.005,
"loss": 2.1228,
"max_norm": 161.857666015625,
"max_norm/layer0": 161.857666015625,
"mean_norm": 55.42117881774902,
"mean_norm/layer0": 55.42117881774902,
"multicode_k": 1,
"output_norm": 22.20783314704896,
"output_norm/layer0": 22.20783314704896,
"step": 3100
},
{
"MSE": 524.7367662556965,
"MSE/layer0": 524.7367662556965,
"dead_code_fraction": 0.27865,
"dead_code_fraction/layer0": 0.27865,
"epoch": 0.32,
"input_norm": 31.99810951550802,
"input_norm/layer0": 31.99810951550802,
"learning_rate": 0.005,
"loss": 2.1582,
"max_norm": 163.2421417236328,
"max_norm/layer0": 163.2421417236328,
"mean_norm": 55.61536979675293,
"mean_norm/layer0": 55.61536979675293,
"multicode_k": 1,
"output_norm": 22.229626963933313,
"output_norm/layer0": 22.229626963933313,
"step": 3150
},
{
"MSE": 523.2996738688151,
"MSE/layer0": 523.2996738688151,
"dead_code_fraction": 0.27935,
"dead_code_fraction/layer0": 0.27935,
"epoch": 0.32,
"input_norm": 31.998107938766474,
"input_norm/layer0": 31.998107938766474,
"learning_rate": 0.005,
"loss": 2.0913,
"max_norm": 164.34832763671875,
"max_norm/layer0": 164.34832763671875,
"mean_norm": 55.82136154174805,
"mean_norm/layer0": 55.82136154174805,
"multicode_k": 1,
"output_norm": 22.247861604690552,
"output_norm/layer0": 22.247861604690552,
"step": 3200
},
{
"MSE": 524.0594484965007,
"MSE/layer0": 524.0594484965007,
"dead_code_fraction": 0.2756,
"dead_code_fraction/layer0": 0.2756,
"epoch": 0.33,
"input_norm": 31.998120482762648,
"input_norm/layer0": 31.998120482762648,
"learning_rate": 0.005,
"loss": 2.1073,
"max_norm": 165.75938415527344,
"max_norm/layer0": 165.75938415527344,
"mean_norm": 56.010887145996094,
"mean_norm/layer0": 56.010887145996094,
"multicode_k": 1,
"output_norm": 22.245831327438353,
"output_norm/layer0": 22.245831327438353,
"step": 3250
},
{
"MSE": 525.364818725586,
"MSE/layer0": 525.364818725586,
"dead_code_fraction": 0.2756,
"dead_code_fraction/layer0": 0.2756,
"epoch": 0.33,
"input_norm": 31.998132244745904,
"input_norm/layer0": 31.998132244745904,
"learning_rate": 0.005,
"loss": 2.0807,
"max_norm": 166.85643005371094,
"max_norm/layer0": 166.85643005371094,
"mean_norm": 56.187782287597656,
"mean_norm/layer0": 56.187782287597656,
"multicode_k": 1,
"output_norm": 22.242043924331664,
"output_norm/layer0": 22.242043924331664,
"step": 3300
},
{
"MSE": 523.8938673400878,
"MSE/layer0": 523.8938673400878,
"dead_code_fraction": 0.2733,
"dead_code_fraction/layer0": 0.2733,
"epoch": 0.34,
"input_norm": 31.998154455820725,
"input_norm/layer0": 31.998154455820725,
"learning_rate": 0.005,
"loss": 2.1234,
"max_norm": 167.70089721679688,
"max_norm/layer0": 167.70089721679688,
"mean_norm": 56.36995506286621,
"mean_norm/layer0": 56.36995506286621,
"multicode_k": 1,
"output_norm": 22.246343409220387,
"output_norm/layer0": 22.246343409220387,
"step": 3350
},
{
"MSE": 522.7465829976402,
"MSE/layer0": 522.7465829976402,
"dead_code_fraction": 0.2741,
"dead_code_fraction/layer0": 0.2741,
"epoch": 0.34,
"input_norm": 31.998157631556197,
"input_norm/layer0": 31.998157631556197,
"learning_rate": 0.005,
"loss": 2.1138,
"max_norm": 168.70301818847656,
"max_norm/layer0": 168.70301818847656,
"mean_norm": 56.55203437805176,
"mean_norm/layer0": 56.55203437805176,
"multicode_k": 1,
"output_norm": 22.282327626546234,
"output_norm/layer0": 22.282327626546234,
"step": 3400
},
{
"MSE": 522.0263201395671,
"MSE/layer0": 522.0263201395671,
"dead_code_fraction": 0.27335,
"dead_code_fraction/layer0": 0.27335,
"epoch": 0.34,
"input_norm": 31.99815892855326,
"input_norm/layer0": 31.99815892855326,
"learning_rate": 0.005,
"loss": 2.103,
"max_norm": 169.3920135498047,
"max_norm/layer0": 169.3920135498047,
"mean_norm": 56.73575782775879,
"mean_norm/layer0": 56.73575782775879,
"multicode_k": 1,
"output_norm": 22.29100898424786,
"output_norm/layer0": 22.29100898424786,
"step": 3450
},
{
"MSE": 521.5609470621745,
"MSE/layer0": 521.5609470621745,
"dead_code_fraction": 0.27265,
"dead_code_fraction/layer0": 0.27265,
"epoch": 0.35,
"input_norm": 31.99817145665487,
"input_norm/layer0": 31.99817145665487,
"learning_rate": 0.005,
"loss": 2.1045,
"max_norm": 170.13829040527344,
"max_norm/layer0": 170.13829040527344,
"mean_norm": 56.91371726989746,
"mean_norm/layer0": 56.91371726989746,
"multicode_k": 1,
"output_norm": 22.309985055923462,
"output_norm/layer0": 22.309985055923462,
"step": 3500
},
{
"epoch": 0.35,
"eval_MSE/layer0": 523.0844207110149,
"eval_accuracy": 0.5078879054512807,
"eval_dead_code_fraction/layer0": 0.27345,
"eval_input_norm/layer0": 31.998171135689724,
"eval_loss": 2.1130311489105225,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.351890057112634,
"eval_runtime": 158.0171,
"eval_samples_per_second": 29.256,
"eval_steps_per_second": 1.829,
"step": 3500
},
{
"MSE": 522.4261043294274,
"MSE/layer0": 522.4261043294274,
"dead_code_fraction": 0.27245,
"dead_code_fraction/layer0": 0.27245,
"epoch": 0.35,
"input_norm": 31.998184868494675,
"input_norm/layer0": 31.998184868494675,
"learning_rate": 0.005,
"loss": 2.1296,
"max_norm": 171.21067810058594,
"max_norm/layer0": 171.21067810058594,
"mean_norm": 57.08243370056152,
"mean_norm/layer0": 57.08243370056152,
"multicode_k": 1,
"output_norm": 22.307131767272942,
"output_norm/layer0": 22.307131767272942,
"step": 3550
},
{
"MSE": 520.5630591837569,
"MSE/layer0": 520.5630591837569,
"dead_code_fraction": 0.2715,
"dead_code_fraction/layer0": 0.2715,
"epoch": 0.36,
"input_norm": 31.998175201416018,
"input_norm/layer0": 31.998175201416018,
"learning_rate": 0.005,
"loss": 2.0606,
"max_norm": 172.018798828125,
"max_norm/layer0": 172.018798828125,
"mean_norm": 57.259552001953125,
"mean_norm/layer0": 57.259552001953125,
"multicode_k": 1,
"output_norm": 22.33381741523742,
"output_norm/layer0": 22.33381741523742,
"step": 3600
},
{
"MSE": 521.8190139770511,
"MSE/layer0": 521.8190139770511,
"dead_code_fraction": 0.26915,
"dead_code_fraction/layer0": 0.26915,
"epoch": 0.36,
"input_norm": 31.998206920623783,
"input_norm/layer0": 31.998206920623783,
"learning_rate": 0.005,
"loss": 2.1264,
"max_norm": 173.08360290527344,
"max_norm/layer0": 173.08360290527344,
"mean_norm": 57.425479888916016,
"mean_norm/layer0": 57.425479888916016,
"multicode_k": 1,
"output_norm": 22.321163501739488,
"output_norm/layer0": 22.321163501739488,
"step": 3650
},
{
"MSE": 520.2701113382976,
"MSE/layer0": 520.2701113382976,
"dead_code_fraction": 0.26935,
"dead_code_fraction/layer0": 0.26935,
"epoch": 0.37,
"input_norm": 31.99821238517761,
"input_norm/layer0": 31.99821238517761,
"learning_rate": 0.005,
"loss": 2.1028,
"max_norm": 174.31561279296875,
"max_norm/layer0": 174.31561279296875,
"mean_norm": 57.58916091918945,
"mean_norm/layer0": 57.58916091918945,
"multicode_k": 1,
"output_norm": 22.34191367149354,
"output_norm/layer0": 22.34191367149354,
"step": 3700
},
{
"MSE": 520.4189120992024,
"MSE/layer0": 520.4189120992024,
"dead_code_fraction": 0.26865,
"dead_code_fraction/layer0": 0.26865,
"epoch": 0.38,
"input_norm": 31.99821661313375,
"input_norm/layer0": 31.99821661313375,
"learning_rate": 0.005,
"loss": 2.106,
"max_norm": 175.09739685058594,
"max_norm/layer0": 175.09739685058594,
"mean_norm": 57.75008010864258,
"mean_norm/layer0": 57.75008010864258,
"multicode_k": 1,
"output_norm": 22.352550570170077,
"output_norm/layer0": 22.352550570170077,
"step": 3750
},
{
"MSE": 520.3332616170245,
"MSE/layer0": 520.3332616170245,
"dead_code_fraction": 0.2705,
"dead_code_fraction/layer0": 0.2705,
"epoch": 0.38,
"input_norm": 31.998228356043505,
"input_norm/layer0": 31.998228356043505,
"learning_rate": 0.005,
"loss": 2.1318,
"max_norm": 175.85955810546875,
"max_norm/layer0": 175.85955810546875,
"mean_norm": 57.9084529876709,
"mean_norm/layer0": 57.9084529876709,
"multicode_k": 1,
"output_norm": 22.355525690714526,
"output_norm/layer0": 22.355525690714526,
"step": 3800
},
{
"MSE": 519.1107161458334,
"MSE/layer0": 519.1107161458334,
"dead_code_fraction": 0.26585,
"dead_code_fraction/layer0": 0.26585,
"epoch": 0.39,
"input_norm": 31.998228273391724,
"input_norm/layer0": 31.998228273391724,
"learning_rate": 0.005,
"loss": 2.1063,
"max_norm": 176.55845642089844,
"max_norm/layer0": 176.55845642089844,
"mean_norm": 58.0648193359375,
"mean_norm/layer0": 58.0648193359375,
"multicode_k": 1,
"output_norm": 22.375479180018097,
"output_norm/layer0": 22.375479180018097,
"step": 3850
},
{
"MSE": 520.279450937907,
"MSE/layer0": 520.279450937907,
"dead_code_fraction": 0.26475,
"dead_code_fraction/layer0": 0.26475,
"epoch": 0.39,
"input_norm": 31.998248408635455,
"input_norm/layer0": 31.998248408635455,
"learning_rate": 0.005,
"loss": 2.1158,
"max_norm": 177.40316772460938,
"max_norm/layer0": 177.40316772460938,
"mean_norm": 58.21473693847656,
"mean_norm/layer0": 58.21473693847656,
"multicode_k": 1,
"output_norm": 22.37501454989114,
"output_norm/layer0": 22.37501454989114,
"step": 3900
},
{
"MSE": 520.3905441284179,
"MSE/layer0": 520.3905441284179,
"dead_code_fraction": 0.26645,
"dead_code_fraction/layer0": 0.26645,
"epoch": 0.4,
"input_norm": 31.998255596160874,
"input_norm/layer0": 31.998255596160874,
"learning_rate": 0.005,
"loss": 2.0919,
"max_norm": 178.25682067871094,
"max_norm/layer0": 178.25682067871094,
"mean_norm": 58.36372947692871,
"mean_norm/layer0": 58.36372947692871,
"multicode_k": 1,
"output_norm": 22.360030002593987,
"output_norm/layer0": 22.360030002593987,
"step": 3950
},
{
"MSE": 520.0447977193196,
"MSE/layer0": 520.0447977193196,
"dead_code_fraction": 0.2638,
"dead_code_fraction/layer0": 0.2638,
"epoch": 0.4,
"input_norm": 31.998260914484668,
"input_norm/layer0": 31.998260914484668,
"learning_rate": 0.005,
"loss": 2.0944,
"max_norm": 178.8519287109375,
"max_norm/layer0": 178.8519287109375,
"mean_norm": 58.51635932922363,
"mean_norm/layer0": 58.51635932922363,
"multicode_k": 1,
"output_norm": 22.37334650675455,
"output_norm/layer0": 22.37334650675455,
"step": 4000
},
{
"epoch": 0.4,
"eval_MSE/layer0": 519.885230389297,
"eval_accuracy": 0.5089345655588774,
"eval_dead_code_fraction/layer0": 0.2655,
"eval_input_norm/layer0": 31.998263675723535,
"eval_loss": 2.0995683670043945,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.39302826134025,
"eval_runtime": 158.5492,
"eval_samples_per_second": 29.158,
"eval_steps_per_second": 1.823,
"step": 4000
},
{
"MSE": 519.3041878255204,
"MSE/layer0": 519.3041878255204,
"dead_code_fraction": 0.2634,
"dead_code_fraction/layer0": 0.2634,
"epoch": 0.41,
"input_norm": 31.998263047536216,
"input_norm/layer0": 31.998263047536216,
"learning_rate": 0.005,
"loss": 2.0844,
"max_norm": 179.35386657714844,
"max_norm/layer0": 179.35386657714844,
"mean_norm": 58.670223236083984,
"mean_norm/layer0": 58.670223236083984,
"multicode_k": 1,
"output_norm": 22.375990848541264,
"output_norm/layer0": 22.375990848541264,
"step": 4050
},
{
"MSE": 520.2196419270836,
"MSE/layer0": 520.2196419270836,
"dead_code_fraction": 0.2615,
"dead_code_fraction/layer0": 0.2615,
"epoch": 0.41,
"input_norm": 31.99826691627503,
"input_norm/layer0": 31.99826691627503,
"learning_rate": 0.005,
"loss": 2.0983,
"max_norm": 179.91224670410156,
"max_norm/layer0": 179.91224670410156,
"mean_norm": 58.826820373535156,
"mean_norm/layer0": 58.826820373535156,
"multicode_k": 1,
"output_norm": 22.372630256017054,
"output_norm/layer0": 22.372630256017054,
"step": 4100
},
{
"MSE": 519.6039750162761,
"MSE/layer0": 519.6039750162761,
"dead_code_fraction": 0.26085,
"dead_code_fraction/layer0": 0.26085,
"epoch": 0.41,
"input_norm": 31.998284708658858,
"input_norm/layer0": 31.998284708658858,
"learning_rate": 0.005,
"loss": 2.0974,
"max_norm": 180.4697265625,
"max_norm/layer0": 180.4697265625,
"mean_norm": 58.97820472717285,
"mean_norm/layer0": 58.97820472717285,
"multicode_k": 1,
"output_norm": 22.377655258178706,
"output_norm/layer0": 22.377655258178706,
"step": 4150
},
{
"MSE": 518.396603800456,
"MSE/layer0": 518.396603800456,
"dead_code_fraction": 0.26145,
"dead_code_fraction/layer0": 0.26145,
"epoch": 0.42,
"input_norm": 31.998291470209757,
"input_norm/layer0": 31.998291470209757,
"learning_rate": 0.005,
"loss": 2.1155,
"max_norm": 180.8781280517578,
"max_norm/layer0": 180.8781280517578,
"mean_norm": 59.12506866455078,
"mean_norm/layer0": 59.12506866455078,
"multicode_k": 1,
"output_norm": 22.407741336822514,
"output_norm/layer0": 22.407741336822514,
"step": 4200
},
{
"MSE": 518.4843705240887,
"MSE/layer0": 518.4843705240887,
"dead_code_fraction": 0.2605,
"dead_code_fraction/layer0": 0.2605,
"epoch": 0.42,
"input_norm": 31.99829554239909,
"input_norm/layer0": 31.99829554239909,
"learning_rate": 0.005,
"loss": 2.1004,
"max_norm": 181.51483154296875,
"max_norm/layer0": 181.51483154296875,
"mean_norm": 59.271942138671875,
"mean_norm/layer0": 59.271942138671875,
"multicode_k": 1,
"output_norm": 22.40968936284383,
"output_norm/layer0": 22.40968936284383,
"step": 4250
},
{
"MSE": 518.0018126932782,
"MSE/layer0": 518.0018126932782,
"dead_code_fraction": 0.2586,
"dead_code_fraction/layer0": 0.2586,
"epoch": 0.43,
"input_norm": 31.998309599558517,
"input_norm/layer0": 31.998309599558517,
"learning_rate": 0.005,
"loss": 2.0848,
"max_norm": 181.8904266357422,
"max_norm/layer0": 181.8904266357422,
"mean_norm": 59.40836715698242,
"mean_norm/layer0": 59.40836715698242,
"multicode_k": 1,
"output_norm": 22.42666608492533,
"output_norm/layer0": 22.42666608492533,
"step": 4300
},
{
"MSE": 518.2576261901858,
"MSE/layer0": 518.2576261901858,
"dead_code_fraction": 0.25825,
"dead_code_fraction/layer0": 0.25825,
"epoch": 0.43,
"input_norm": 31.99831358591716,
"input_norm/layer0": 31.99831358591716,
"learning_rate": 0.005,
"loss": 2.0778,
"max_norm": 182.52023315429688,
"max_norm/layer0": 182.52023315429688,
"mean_norm": 59.546592712402344,
"mean_norm/layer0": 59.546592712402344,
"multicode_k": 1,
"output_norm": 22.415684442520128,
"output_norm/layer0": 22.415684442520128,
"step": 4350
},
{
"MSE": 517.176724141439,
"MSE/layer0": 517.176724141439,
"dead_code_fraction": 0.25845,
"dead_code_fraction/layer0": 0.25845,
"epoch": 0.44,
"input_norm": 31.998328673044824,
"input_norm/layer0": 31.998328673044824,
"learning_rate": 0.005,
"loss": 2.0989,
"max_norm": 183.30308532714844,
"max_norm/layer0": 183.30308532714844,
"mean_norm": 59.680843353271484,
"mean_norm/layer0": 59.680843353271484,
"multicode_k": 1,
"output_norm": 22.435629587173473,
"output_norm/layer0": 22.435629587173473,
"step": 4400
},
{
"MSE": 516.945845082601,
"MSE/layer0": 516.945845082601,
"dead_code_fraction": 0.2589,
"dead_code_fraction/layer0": 0.2589,
"epoch": 0.45,
"input_norm": 31.998329006830847,
"input_norm/layer0": 31.998329006830847,
"learning_rate": 0.005,
"loss": 2.087,
"max_norm": 184.17068481445312,
"max_norm/layer0": 184.17068481445312,
"mean_norm": 59.81003379821777,
"mean_norm/layer0": 59.81003379821777,
"multicode_k": 1,
"output_norm": 22.447185754775994,
"output_norm/layer0": 22.447185754775994,
"step": 4450
},
{
"MSE": 517.1110377502445,
"MSE/layer0": 517.1110377502445,
"dead_code_fraction": 0.25715,
"dead_code_fraction/layer0": 0.25715,
"epoch": 0.45,
"input_norm": 31.998346713384,
"input_norm/layer0": 31.998346713384,
"learning_rate": 0.005,
"loss": 2.1314,
"max_norm": 185.53944396972656,
"max_norm/layer0": 185.53944396972656,
"mean_norm": 59.940223693847656,
"mean_norm/layer0": 59.940223693847656,
"multicode_k": 1,
"output_norm": 22.444066270192472,
"output_norm/layer0": 22.444066270192472,
"step": 4500
},
{
"epoch": 0.45,
"eval_MSE/layer0": 517.038530914551,
"eval_accuracy": 0.5114514130862962,
"eval_dead_code_fraction/layer0": 0.25675,
"eval_input_norm/layer0": 31.998349543131468,
"eval_loss": 2.0859904289245605,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.471955899059722,
"eval_runtime": 158.2677,
"eval_samples_per_second": 29.21,
"eval_steps_per_second": 1.826,
"step": 4500
},
{
"MSE": 516.5108834838866,
"MSE/layer0": 516.5108834838866,
"dead_code_fraction": 0.2556,
"dead_code_fraction/layer0": 0.2556,
"epoch": 0.46,
"input_norm": 31.99834162076315,
"input_norm/layer0": 31.99834162076315,
"learning_rate": 0.005,
"loss": 2.0594,
"max_norm": 186.7916259765625,
"max_norm/layer0": 186.7916259765625,
"mean_norm": 60.06948661804199,
"mean_norm/layer0": 60.06948661804199,
"multicode_k": 1,
"output_norm": 22.45672461509705,
"output_norm/layer0": 22.45672461509705,
"step": 4550
},
{
"MSE": 517.5840914408367,
"MSE/layer0": 517.5840914408367,
"dead_code_fraction": 0.25275,
"dead_code_fraction/layer0": 0.25275,
"epoch": 0.46,
"input_norm": 31.998361612955726,
"input_norm/layer0": 31.998361612955726,
"learning_rate": 0.005,
"loss": 2.116,
"max_norm": 187.44607543945312,
"max_norm/layer0": 187.44607543945312,
"mean_norm": 60.1932258605957,
"mean_norm/layer0": 60.1932258605957,
"multicode_k": 1,
"output_norm": 22.445463349024458,
"output_norm/layer0": 22.445463349024458,
"step": 4600
},
{
"MSE": 515.9212077331542,
"MSE/layer0": 515.9212077331542,
"dead_code_fraction": 0.2549,
"dead_code_fraction/layer0": 0.2549,
"epoch": 0.47,
"input_norm": 31.998358809153245,
"input_norm/layer0": 31.998358809153245,
"learning_rate": 0.005,
"loss": 2.0851,
"max_norm": 187.99063110351562,
"max_norm/layer0": 187.99063110351562,
"mean_norm": 60.3179931640625,
"mean_norm/layer0": 60.3179931640625,
"multicode_k": 1,
"output_norm": 22.468881686528533,
"output_norm/layer0": 22.468881686528533,
"step": 4650
},
{
"MSE": 516.2712020365398,
"MSE/layer0": 516.2712020365398,
"dead_code_fraction": 0.2539,
"dead_code_fraction/layer0": 0.2539,
"epoch": 0.47,
"input_norm": 31.99836943308513,
"input_norm/layer0": 31.99836943308513,
"learning_rate": 0.005,
"loss": 2.0646,
"max_norm": 188.7075653076172,
"max_norm/layer0": 188.7075653076172,
"mean_norm": 60.442317962646484,
"mean_norm/layer0": 60.442317962646484,
"multicode_k": 1,
"output_norm": 22.458747002283737,
"output_norm/layer0": 22.458747002283737,
"step": 4700
},
{
"MSE": 515.2177518717448,
"MSE/layer0": 515.2177518717448,
"dead_code_fraction": 0.25225,
"dead_code_fraction/layer0": 0.25225,
"epoch": 0.47,
"input_norm": 31.99837938944498,
"input_norm/layer0": 31.99837938944498,
"learning_rate": 0.005,
"loss": 2.0508,
"max_norm": 189.4132080078125,
"max_norm/layer0": 189.4132080078125,
"mean_norm": 60.56760787963867,
"mean_norm/layer0": 60.56760787963867,
"multicode_k": 1,
"output_norm": 22.48432564417522,
"output_norm/layer0": 22.48432564417522,
"step": 4750
},
{
"MSE": 517.7876967760659,
"MSE/layer0": 517.7876967760659,
"dead_code_fraction": 0.2504,
"dead_code_fraction/layer0": 0.2504,
"epoch": 1.0,
"input_norm": 31.998372135461928,
"input_norm/layer0": 31.998372135461928,
"learning_rate": 0.005,
"loss": 2.0347,
"max_norm": 189.93084716796875,
"max_norm/layer0": 189.93084716796875,
"mean_norm": 60.689674377441406,
"mean_norm/layer0": 60.689674377441406,
"multicode_k": 1,
"output_norm": 22.4315491425679,
"output_norm/layer0": 22.4315491425679,
"step": 4800
},
{
"MSE": 515.4498620096845,
"MSE/layer0": 515.4498620096845,
"dead_code_fraction": 0.2505,
"dead_code_fraction/layer0": 0.2505,
"epoch": 1.01,
"input_norm": 31.998399356206253,
"input_norm/layer0": 31.998399356206253,
"learning_rate": 0.005,
"loss": 2.1351,
"max_norm": 190.8528289794922,
"max_norm/layer0": 190.8528289794922,
"mean_norm": 60.80255126953125,
"mean_norm/layer0": 60.80255126953125,
"multicode_k": 1,
"output_norm": 22.488870484034226,
"output_norm/layer0": 22.488870484034226,
"step": 4850
},
{
"MSE": 515.1998943074543,
"MSE/layer0": 515.1998943074543,
"dead_code_fraction": 0.24975,
"dead_code_fraction/layer0": 0.24975,
"epoch": 1.01,
"input_norm": 31.998391094207765,
"input_norm/layer0": 31.998391094207765,
"learning_rate": 0.005,
"loss": 2.0344,
"max_norm": 191.88272094726562,
"max_norm/layer0": 191.88272094726562,
"mean_norm": 60.923635482788086,
"mean_norm/layer0": 60.923635482788086,
"multicode_k": 1,
"output_norm": 22.493143533070885,
"output_norm/layer0": 22.493143533070885,
"step": 4900
},
{
"MSE": 516.1670984395346,
"MSE/layer0": 516.1670984395346,
"dead_code_fraction": 0.2478,
"dead_code_fraction/layer0": 0.2478,
"epoch": 1.02,
"input_norm": 31.99841277122497,
"input_norm/layer0": 31.99841277122497,
"learning_rate": 0.005,
"loss": 2.0591,
"max_norm": 192.84405517578125,
"max_norm/layer0": 192.84405517578125,
"mean_norm": 61.04226303100586,
"mean_norm/layer0": 61.04226303100586,
"multicode_k": 1,
"output_norm": 22.47217222531637,
"output_norm/layer0": 22.47217222531637,
"step": 4950
},
{
"MSE": 515.1936482747396,
"MSE/layer0": 515.1936482747396,
"dead_code_fraction": 0.2468,
"dead_code_fraction/layer0": 0.2468,
"epoch": 1.02,
"input_norm": 31.998419497807816,
"input_norm/layer0": 31.998419497807816,
"learning_rate": 0.005,
"loss": 2.0685,
"max_norm": 193.819580078125,
"max_norm/layer0": 193.819580078125,
"mean_norm": 61.15685844421387,
"mean_norm/layer0": 61.15685844421387,
"multicode_k": 1,
"output_norm": 22.499980732599887,
"output_norm/layer0": 22.499980732599887,
"step": 5000
},
{
"epoch": 1.02,
"eval_MSE/layer0": 514.3711726943474,
"eval_accuracy": 0.5131406590660113,
"eval_dead_code_fraction/layer0": 0.24975,
"eval_input_norm/layer0": 31.998424857410036,
"eval_loss": 2.076988458633423,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.494330299537523,
"eval_runtime": 157.9418,
"eval_samples_per_second": 29.27,
"eval_steps_per_second": 1.83,
"step": 5000
},
{
"MSE": 515.1962452189127,
"MSE/layer0": 515.1962452189127,
"dead_code_fraction": 0.24585,
"dead_code_fraction/layer0": 0.24585,
"epoch": 1.03,
"input_norm": 31.998427387873342,
"input_norm/layer0": 31.998427387873342,
"learning_rate": 0.005,
"loss": 2.07,
"max_norm": 194.8584442138672,
"max_norm/layer0": 194.8584442138672,
"mean_norm": 61.2799015045166,
"mean_norm/layer0": 61.2799015045166,
"multicode_k": 1,
"output_norm": 22.492868417104084,
"output_norm/layer0": 22.492868417104084,
"step": 5050
},
{
"MSE": 514.6688102722171,
"MSE/layer0": 514.6688102722171,
"dead_code_fraction": 0.24495,
"dead_code_fraction/layer0": 0.24495,
"epoch": 1.03,
"input_norm": 31.99842824935913,
"input_norm/layer0": 31.99842824935913,
"learning_rate": 0.005,
"loss": 2.0308,
"max_norm": 195.33718872070312,
"max_norm/layer0": 195.33718872070312,
"mean_norm": 61.397443771362305,
"mean_norm/layer0": 61.397443771362305,
"multicode_k": 1,
"output_norm": 22.502648471196487,
"output_norm/layer0": 22.502648471196487,
"step": 5100
},
{
"MSE": 515.46877843221,
"MSE/layer0": 515.46877843221,
"dead_code_fraction": 0.2426,
"dead_code_fraction/layer0": 0.2426,
"epoch": 1.04,
"input_norm": 31.998430423736572,
"input_norm/layer0": 31.998430423736572,
"learning_rate": 0.005,
"loss": 2.0427,
"max_norm": 195.8143310546875,
"max_norm/layer0": 195.8143310546875,
"mean_norm": 61.51255416870117,
"mean_norm/layer0": 61.51255416870117,
"multicode_k": 1,
"output_norm": 22.500031328201295,
"output_norm/layer0": 22.500031328201295,
"step": 5150
},
{
"MSE": 515.1060639953612,
"MSE/layer0": 515.1060639953612,
"dead_code_fraction": 0.2439,
"dead_code_fraction/layer0": 0.2439,
"epoch": 1.04,
"input_norm": 31.998455877304075,
"input_norm/layer0": 31.998455877304075,
"learning_rate": 0.005,
"loss": 2.1036,
"max_norm": 196.40415954589844,
"max_norm/layer0": 196.40415954589844,
"mean_norm": 61.620216369628906,
"mean_norm/layer0": 61.620216369628906,
"multicode_k": 1,
"output_norm": 22.504082736968975,
"output_norm/layer0": 22.504082736968975,
"step": 5200
},
{
"MSE": 514.64603418986,
"MSE/layer0": 514.64603418986,
"dead_code_fraction": 0.24415,
"dead_code_fraction/layer0": 0.24415,
"epoch": 1.05,
"input_norm": 31.99846438090008,
"input_norm/layer0": 31.99846438090008,
"learning_rate": 0.005,
"loss": 2.1032,
"max_norm": 197.31690979003906,
"max_norm/layer0": 197.31690979003906,
"mean_norm": 61.73128890991211,
"mean_norm/layer0": 61.73128890991211,
"multicode_k": 1,
"output_norm": 22.51759773572286,
"output_norm/layer0": 22.51759773572286,
"step": 5250
},
{
"MSE": 514.5095549011231,
"MSE/layer0": 514.5095549011231,
"dead_code_fraction": 0.24245,
"dead_code_fraction/layer0": 0.24245,
"epoch": 1.05,
"input_norm": 31.998469727834063,
"input_norm/layer0": 31.998469727834063,
"learning_rate": 0.005,
"loss": 2.0884,
"max_norm": 198.30520629882812,
"max_norm/layer0": 198.30520629882812,
"mean_norm": 61.84503173828125,
"mean_norm/layer0": 61.84503173828125,
"multicode_k": 1,
"output_norm": 22.52236960728964,
"output_norm/layer0": 22.52236960728964,
"step": 5300
},
{
"MSE": 514.2185153198242,
"MSE/layer0": 514.2185153198242,
"dead_code_fraction": 0.2423,
"dead_code_fraction/layer0": 0.2423,
"epoch": 1.06,
"input_norm": 31.99846864700317,
"input_norm/layer0": 31.99846864700317,
"learning_rate": 0.005,
"loss": 2.0541,
"max_norm": 198.76315307617188,
"max_norm/layer0": 198.76315307617188,
"mean_norm": 61.954532623291016,
"mean_norm/layer0": 61.954532623291016,
"multicode_k": 1,
"output_norm": 22.523964621225986,
"output_norm/layer0": 22.523964621225986,
"step": 5350
},
{
"MSE": 514.2201423136396,
"MSE/layer0": 514.2201423136396,
"dead_code_fraction": 0.24065,
"dead_code_fraction/layer0": 0.24065,
"epoch": 1.06,
"input_norm": 31.99848121643067,
"input_norm/layer0": 31.99848121643067,
"learning_rate": 0.005,
"loss": 2.0722,
"max_norm": 199.5216522216797,
"max_norm/layer0": 199.5216522216797,
"mean_norm": 62.062015533447266,
"mean_norm/layer0": 62.062015533447266,
"multicode_k": 1,
"output_norm": 22.529434289932254,
"output_norm/layer0": 22.529434289932254,
"step": 5400
},
{
"MSE": 513.0346335347496,
"MSE/layer0": 513.0346335347496,
"dead_code_fraction": 0.2396,
"dead_code_fraction/layer0": 0.2396,
"epoch": 1.07,
"input_norm": 31.998482402165727,
"input_norm/layer0": 31.998482402165727,
"learning_rate": 0.005,
"loss": 2.0839,
"max_norm": 199.89144897460938,
"max_norm/layer0": 199.89144897460938,
"mean_norm": 62.16894721984863,
"mean_norm/layer0": 62.16894721984863,
"multicode_k": 1,
"output_norm": 22.549472332000725,
"output_norm/layer0": 22.549472332000725,
"step": 5450
},
{
"MSE": 512.9845250447588,
"MSE/layer0": 512.9845250447588,
"dead_code_fraction": 0.23995,
"dead_code_fraction/layer0": 0.23995,
"epoch": 1.07,
"input_norm": 31.99848415692648,
"input_norm/layer0": 31.99848415692648,
"learning_rate": 0.005,
"loss": 2.0496,
"max_norm": 200.10585021972656,
"max_norm/layer0": 200.10585021972656,
"mean_norm": 62.28166961669922,
"mean_norm/layer0": 62.28166961669922,
"multicode_k": 1,
"output_norm": 22.535003283818554,
"output_norm/layer0": 22.535003283818554,
"step": 5500
},
{
"epoch": 1.07,
"eval_MSE/layer0": 513.7822700020247,
"eval_accuracy": 0.5137449731240944,
"eval_dead_code_fraction/layer0": 0.23805,
"eval_input_norm/layer0": 31.998499035448475,
"eval_loss": 2.0730204582214355,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.562518398921107,
"eval_runtime": 158.2513,
"eval_samples_per_second": 29.213,
"eval_steps_per_second": 1.826,
"step": 5500
},
{
"MSE": 513.4142807515464,
"MSE/layer0": 513.4142807515464,
"dead_code_fraction": 0.2388,
"dead_code_fraction/layer0": 0.2388,
"epoch": 1.08,
"input_norm": 31.99849408785501,
"input_norm/layer0": 31.99849408785501,
"learning_rate": 0.005,
"loss": 2.0918,
"max_norm": 200.3399200439453,
"max_norm/layer0": 200.3399200439453,
"mean_norm": 62.38692855834961,
"mean_norm/layer0": 62.38692855834961,
"multicode_k": 1,
"output_norm": 22.54020097732544,
"output_norm/layer0": 22.54020097732544,
"step": 5550
},
{
"MSE": 512.2161093648273,
"MSE/layer0": 512.2161093648273,
"dead_code_fraction": 0.23455,
"dead_code_fraction/layer0": 0.23455,
"epoch": 1.08,
"input_norm": 31.998499333063755,
"input_norm/layer0": 31.998499333063755,
"learning_rate": 0.005,
"loss": 2.0812,
"max_norm": 200.90451049804688,
"max_norm/layer0": 200.90451049804688,
"mean_norm": 62.49030685424805,
"mean_norm/layer0": 62.49030685424805,
"multicode_k": 1,
"output_norm": 22.562892615000422,
"output_norm/layer0": 22.562892615000422,
"step": 5600
},
{
"MSE": 513.2079597473146,
"MSE/layer0": 513.2079597473146,
"dead_code_fraction": 0.2364,
"dead_code_fraction/layer0": 0.2364,
"epoch": 1.09,
"input_norm": 31.998499097824094,
"input_norm/layer0": 31.998499097824094,
"learning_rate": 0.005,
"loss": 2.0405,
"max_norm": 201.2469940185547,
"max_norm/layer0": 201.2469940185547,
"mean_norm": 62.587249755859375,
"mean_norm/layer0": 62.587249755859375,
"multicode_k": 1,
"output_norm": 22.551958309809354,
"output_norm/layer0": 22.551958309809354,
"step": 5650
},
{
"MSE": 512.3663133748375,
"MSE/layer0": 512.3663133748375,
"dead_code_fraction": 0.2359,
"dead_code_fraction/layer0": 0.2359,
"epoch": 1.09,
"input_norm": 31.998522087732937,
"input_norm/layer0": 31.998522087732937,
"learning_rate": 0.005,
"loss": 2.0976,
"max_norm": 202.06686401367188,
"max_norm/layer0": 202.06686401367188,
"mean_norm": 62.68406677246094,
"mean_norm/layer0": 62.68406677246094,
"multicode_k": 1,
"output_norm": 22.56861629168192,
"output_norm/layer0": 22.56861629168192,
"step": 5700
},
{
"MSE": 513.556918741862,
"MSE/layer0": 513.556918741862,
"dead_code_fraction": 0.2325,
"dead_code_fraction/layer0": 0.2325,
"epoch": 1.1,
"input_norm": 31.99852702458699,
"input_norm/layer0": 31.99852702458699,
"learning_rate": 0.005,
"loss": 2.0531,
"max_norm": 202.5853729248047,
"max_norm/layer0": 202.5853729248047,
"mean_norm": 62.78022766113281,
"mean_norm/layer0": 62.78022766113281,
"multicode_k": 1,
"output_norm": 22.55354828198752,
"output_norm/layer0": 22.55354828198752,
"step": 5750
},
{
"MSE": 514.1225356547038,
"MSE/layer0": 514.1225356547038,
"dead_code_fraction": 0.23125,
"dead_code_fraction/layer0": 0.23125,
"epoch": 1.1,
"input_norm": 31.998530540466305,
"input_norm/layer0": 31.998530540466305,
"learning_rate": 0.005,
"loss": 2.0333,
"max_norm": 202.8258514404297,
"max_norm/layer0": 202.8258514404297,
"mean_norm": 62.881099700927734,
"mean_norm/layer0": 62.881099700927734,
"multicode_k": 1,
"output_norm": 22.538857170740776,
"output_norm/layer0": 22.538857170740776,
"step": 5800
},
{
"MSE": 512.891567026774,
"MSE/layer0": 512.891567026774,
"dead_code_fraction": 0.23305,
"dead_code_fraction/layer0": 0.23305,
"epoch": 1.11,
"input_norm": 31.998542674382527,
"input_norm/layer0": 31.998542674382527,
"learning_rate": 0.005,
"loss": 2.0894,
"max_norm": 203.2826385498047,
"max_norm/layer0": 203.2826385498047,
"mean_norm": 62.98002815246582,
"mean_norm/layer0": 62.98002815246582,
"multicode_k": 1,
"output_norm": 22.556459398269645,
"output_norm/layer0": 22.556459398269645,
"step": 5850
},
{
"MSE": 512.6300255839031,
"MSE/layer0": 512.6300255839031,
"dead_code_fraction": 0.23175,
"dead_code_fraction/layer0": 0.23175,
"epoch": 1.11,
"input_norm": 31.998538637161257,
"input_norm/layer0": 31.998538637161257,
"learning_rate": 0.005,
"loss": 2.0371,
"max_norm": 203.56114196777344,
"max_norm/layer0": 203.56114196777344,
"mean_norm": 63.085018157958984,
"mean_norm/layer0": 63.085018157958984,
"multicode_k": 1,
"output_norm": 22.55499767621359,
"output_norm/layer0": 22.55499767621359,
"step": 5900
},
{
"MSE": 512.6470455423993,
"MSE/layer0": 512.6470455423993,
"dead_code_fraction": 0.22945,
"dead_code_fraction/layer0": 0.22945,
"epoch": 1.12,
"input_norm": 31.998542264302582,
"input_norm/layer0": 31.998542264302582,
"learning_rate": 0.005,
"loss": 2.0693,
"max_norm": 204.18482971191406,
"max_norm/layer0": 204.18482971191406,
"mean_norm": 63.186561584472656,
"mean_norm/layer0": 63.186561584472656,
"multicode_k": 1,
"output_norm": 22.56271686236063,
"output_norm/layer0": 22.56271686236063,
"step": 5950
},
{
"MSE": 512.2647941589354,
"MSE/layer0": 512.2647941589354,
"dead_code_fraction": 0.23005,
"dead_code_fraction/layer0": 0.23005,
"epoch": 1.12,
"input_norm": 31.99855575561523,
"input_norm/layer0": 31.99855575561523,
"learning_rate": 0.005,
"loss": 2.1002,
"max_norm": 204.59375,
"max_norm/layer0": 204.59375,
"mean_norm": 63.287431716918945,
"mean_norm/layer0": 63.287431716918945,
"multicode_k": 1,
"output_norm": 22.56941809654236,
"output_norm/layer0": 22.56941809654236,
"step": 6000
},
{
"epoch": 1.12,
"eval_MSE/layer0": 510.787595085063,
"eval_accuracy": 0.5144414778502405,
"eval_dead_code_fraction/layer0": 0.2305,
"eval_input_norm/layer0": 31.998558920130655,
"eval_loss": 2.0667405128479004,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.588203073708286,
"eval_runtime": 158.1457,
"eval_samples_per_second": 29.233,
"eval_steps_per_second": 1.827,
"step": 6000
},
{
"MSE": 512.2951668802899,
"MSE/layer0": 512.2951668802899,
"dead_code_fraction": 0.2284,
"dead_code_fraction/layer0": 0.2284,
"epoch": 1.13,
"input_norm": 31.998558203379314,
"input_norm/layer0": 31.998558203379314,
"learning_rate": 0.005,
"loss": 2.0645,
"max_norm": 205.16860961914062,
"max_norm/layer0": 205.16860961914062,
"mean_norm": 63.38333702087402,
"mean_norm/layer0": 63.38333702087402,
"multicode_k": 1,
"output_norm": 22.570796213150032,
"output_norm/layer0": 22.570796213150032,
"step": 6050
},
{
"MSE": 512.1381341044107,
"MSE/layer0": 512.1381341044107,
"dead_code_fraction": 0.22855,
"dead_code_fraction/layer0": 0.22855,
"epoch": 1.13,
"input_norm": 31.99856172879537,
"input_norm/layer0": 31.99856172879537,
"learning_rate": 0.005,
"loss": 2.0856,
"max_norm": 205.37376403808594,
"max_norm/layer0": 205.37376403808594,
"mean_norm": 63.47422790527344,
"mean_norm/layer0": 63.47422790527344,
"multicode_k": 1,
"output_norm": 22.58201634724934,
"output_norm/layer0": 22.58201634724934,
"step": 6100
},
{
"MSE": 512.7539996846516,
"MSE/layer0": 512.7539996846516,
"dead_code_fraction": 0.22755,
"dead_code_fraction/layer0": 0.22755,
"epoch": 1.14,
"input_norm": 31.99856161753337,
"input_norm/layer0": 31.99856161753337,
"learning_rate": 0.005,
"loss": 2.0468,
"max_norm": 205.76866149902344,
"max_norm/layer0": 205.76866149902344,
"mean_norm": 63.56420707702637,
"mean_norm/layer0": 63.56420707702637,
"multicode_k": 1,
"output_norm": 22.575629208882646,
"output_norm/layer0": 22.575629208882646,
"step": 6150
},
{
"MSE": 512.1215149434411,
"MSE/layer0": 512.1215149434411,
"dead_code_fraction": 0.2268,
"dead_code_fraction/layer0": 0.2268,
"epoch": 1.14,
"input_norm": 31.998577674229928,
"input_norm/layer0": 31.998577674229928,
"learning_rate": 0.005,
"loss": 2.0663,
"max_norm": 206.54251098632812,
"max_norm/layer0": 206.54251098632812,
"mean_norm": 63.64880561828613,
"mean_norm/layer0": 63.64880561828613,
"multicode_k": 1,
"output_norm": 22.58266611417133,
"output_norm/layer0": 22.58266611417133,
"step": 6200
},
{
"MSE": 513.3685421752932,
"MSE/layer0": 513.3685421752932,
"dead_code_fraction": 0.22515,
"dead_code_fraction/layer0": 0.22515,
"epoch": 1.15,
"input_norm": 31.998585720062266,
"input_norm/layer0": 31.998585720062266,
"learning_rate": 0.005,
"loss": 2.0807,
"max_norm": 207.23460388183594,
"max_norm/layer0": 207.23460388183594,
"mean_norm": 63.73150444030762,
"mean_norm/layer0": 63.73150444030762,
"multicode_k": 1,
"output_norm": 22.574931882222508,
"output_norm/layer0": 22.574931882222508,
"step": 6250
},
{
"MSE": 512.1649493916829,
"MSE/layer0": 512.1649493916829,
"dead_code_fraction": 0.2243,
"dead_code_fraction/layer0": 0.2243,
"epoch": 1.15,
"input_norm": 31.99859083811442,
"input_norm/layer0": 31.99859083811442,
"learning_rate": 0.005,
"loss": 1.9994,
"max_norm": 207.4078826904297,
"max_norm/layer0": 207.4078826904297,
"mean_norm": 63.8239631652832,
"mean_norm/layer0": 63.8239631652832,
"multicode_k": 1,
"output_norm": 22.573653513590493,
"output_norm/layer0": 22.573653513590493,
"step": 6300
},
{
"MSE": 512.4084614054359,
"MSE/layer0": 512.4084614054359,
"dead_code_fraction": 0.22405,
"dead_code_fraction/layer0": 0.22405,
"epoch": 1.16,
"input_norm": 31.998589369455978,
"input_norm/layer0": 31.998589369455978,
"learning_rate": 0.005,
"loss": 2.0383,
"max_norm": 207.421875,
"max_norm/layer0": 207.421875,
"mean_norm": 63.91918754577637,
"mean_norm/layer0": 63.91918754577637,
"multicode_k": 1,
"output_norm": 22.570101757049564,
"output_norm/layer0": 22.570101757049564,
"step": 6350
},
{
"MSE": 511.3037980651857,
"MSE/layer0": 511.3037980651857,
"dead_code_fraction": 0.22325,
"dead_code_fraction/layer0": 0.22325,
"epoch": 1.16,
"input_norm": 31.9986056105296,
"input_norm/layer0": 31.9986056105296,
"learning_rate": 0.005,
"loss": 2.0836,
"max_norm": 207.90211486816406,
"max_norm/layer0": 207.90211486816406,
"mean_norm": 64.0091323852539,
"mean_norm/layer0": 64.0091323852539,
"multicode_k": 1,
"output_norm": 22.591040735244757,
"output_norm/layer0": 22.591040735244757,
"step": 6400
},
{
"MSE": 511.63349212646506,
"MSE/layer0": 511.63349212646506,
"dead_code_fraction": 0.2231,
"dead_code_fraction/layer0": 0.2231,
"epoch": 1.17,
"input_norm": 31.998600152333573,
"input_norm/layer0": 31.998600152333573,
"learning_rate": 0.005,
"loss": 2.049,
"max_norm": 208.1908416748047,
"max_norm/layer0": 208.1908416748047,
"mean_norm": 64.09888458251953,
"mean_norm/layer0": 64.09888458251953,
"multicode_k": 1,
"output_norm": 22.598680645624796,
"output_norm/layer0": 22.598680645624796,
"step": 6450
},
{
"MSE": 510.1135516866045,
"MSE/layer0": 510.1135516866045,
"dead_code_fraction": 0.2198,
"dead_code_fraction/layer0": 0.2198,
"epoch": 1.17,
"input_norm": 31.99861148198446,
"input_norm/layer0": 31.99861148198446,
"learning_rate": 0.005,
"loss": 2.0723,
"max_norm": 208.76829528808594,
"max_norm/layer0": 208.76829528808594,
"mean_norm": 64.1937198638916,
"mean_norm/layer0": 64.1937198638916,
"multicode_k": 1,
"output_norm": 22.610935223897293,
"output_norm/layer0": 22.610935223897293,
"step": 6500
},
{
"epoch": 1.17,
"eval_MSE/layer0": 510.5624312578848,
"eval_accuracy": 0.5148121435408701,
"eval_dead_code_fraction/layer0": 0.2206,
"eval_input_norm/layer0": 31.99861497196212,
"eval_loss": 2.0631778240203857,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.613337997850753,
"eval_runtime": 157.9752,
"eval_samples_per_second": 29.264,
"eval_steps_per_second": 1.829,
"step": 6500
},
{
"MSE": 511.1518494669597,
"MSE/layer0": 511.1518494669597,
"dead_code_fraction": 0.2202,
"dead_code_fraction/layer0": 0.2202,
"epoch": 1.18,
"input_norm": 31.998620487848918,
"input_norm/layer0": 31.998620487848918,
"learning_rate": 0.005,
"loss": 2.0713,
"max_norm": 209.1894989013672,
"max_norm/layer0": 209.1894989013672,
"mean_norm": 64.28516006469727,
"mean_norm/layer0": 64.28516006469727,
"multicode_k": 1,
"output_norm": 22.59137951215108,
"output_norm/layer0": 22.59137951215108,
"step": 6550
},
{
"MSE": 511.4045928446453,
"MSE/layer0": 511.4045928446453,
"dead_code_fraction": 0.2177,
"dead_code_fraction/layer0": 0.2177,
"epoch": 1.18,
"input_norm": 31.99861013412476,
"input_norm/layer0": 31.99861013412476,
"learning_rate": 0.005,
"loss": 2.0067,
"max_norm": 209.52085876464844,
"max_norm/layer0": 209.52085876464844,
"mean_norm": 64.37364196777344,
"mean_norm/layer0": 64.37364196777344,
"multicode_k": 1,
"output_norm": 22.59921900431315,
"output_norm/layer0": 22.59921900431315,
"step": 6600
},
{
"MSE": 510.8443921915694,
"MSE/layer0": 510.8443921915694,
"dead_code_fraction": 0.21885,
"dead_code_fraction/layer0": 0.21885,
"epoch": 1.19,
"input_norm": 31.998613767623894,
"input_norm/layer0": 31.998613767623894,
"learning_rate": 0.005,
"loss": 2.013,
"max_norm": 209.98431396484375,
"max_norm/layer0": 209.98431396484375,
"mean_norm": 64.46432113647461,
"mean_norm/layer0": 64.46432113647461,
"multicode_k": 1,
"output_norm": 22.60319686889649,
"output_norm/layer0": 22.60319686889649,
"step": 6650
},
{
"MSE": 510.0460713704424,
"MSE/layer0": 510.0460713704424,
"dead_code_fraction": 0.2183,
"dead_code_fraction/layer0": 0.2183,
"epoch": 1.19,
"input_norm": 31.998641831080132,
"input_norm/layer0": 31.998641831080132,
"learning_rate": 0.005,
"loss": 2.1151,
"max_norm": 210.53810119628906,
"max_norm/layer0": 210.53810119628906,
"mean_norm": 64.55224609375,
"mean_norm/layer0": 64.55224609375,
"multicode_k": 1,
"output_norm": 22.621459808349613,
"output_norm/layer0": 22.621459808349613,
"step": 6700
},
{
"MSE": 509.41305414835614,
"MSE/layer0": 509.41305414835614,
"dead_code_fraction": 0.2172,
"dead_code_fraction/layer0": 0.2172,
"epoch": 1.2,
"input_norm": 31.998632535934448,
"input_norm/layer0": 31.998632535934448,
"learning_rate": 0.005,
"loss": 2.0318,
"max_norm": 210.88394165039062,
"max_norm/layer0": 210.88394165039062,
"mean_norm": 64.64096069335938,
"mean_norm/layer0": 64.64096069335938,
"multicode_k": 1,
"output_norm": 22.626508464813227,
"output_norm/layer0": 22.626508464813227,
"step": 6750
},
{
"MSE": 510.3878801981608,
"MSE/layer0": 510.3878801981608,
"dead_code_fraction": 0.21645,
"dead_code_fraction/layer0": 0.21645,
"epoch": 1.2,
"input_norm": 31.998641300201413,
"input_norm/layer0": 31.998641300201413,
"learning_rate": 0.005,
"loss": 2.0492,
"max_norm": 211.13937377929688,
"max_norm/layer0": 211.13937377929688,
"mean_norm": 64.7227783203125,
"mean_norm/layer0": 64.7227783203125,
"multicode_k": 1,
"output_norm": 22.63286488850911,
"output_norm/layer0": 22.63286488850911,
"step": 6800
},
{
"MSE": 509.17419825236027,
"MSE/layer0": 509.17419825236027,
"dead_code_fraction": 0.2138,
"dead_code_fraction/layer0": 0.2138,
"epoch": 1.21,
"input_norm": 31.998648115793856,
"input_norm/layer0": 31.998648115793856,
"learning_rate": 0.005,
"loss": 2.0467,
"max_norm": 211.3644256591797,
"max_norm/layer0": 211.3644256591797,
"mean_norm": 64.80514907836914,
"mean_norm/layer0": 64.80514907836914,
"multicode_k": 1,
"output_norm": 22.63950007438659,
"output_norm/layer0": 22.63950007438659,
"step": 6850
},
{
"MSE": 509.3450110371906,
"MSE/layer0": 509.3450110371906,
"dead_code_fraction": 0.2144,
"dead_code_fraction/layer0": 0.2144,
"epoch": 1.21,
"input_norm": 31.998654368718455,
"input_norm/layer0": 31.998654368718455,
"learning_rate": 0.005,
"loss": 2.0327,
"max_norm": 211.51609802246094,
"max_norm/layer0": 211.51609802246094,
"mean_norm": 64.885498046875,
"mean_norm/layer0": 64.885498046875,
"multicode_k": 1,
"output_norm": 22.636532586415615,
"output_norm/layer0": 22.636532586415615,
"step": 6900
},
{
"MSE": 509.711417948405,
"MSE/layer0": 509.711417948405,
"dead_code_fraction": 0.2121,
"dead_code_fraction/layer0": 0.2121,
"epoch": 1.22,
"input_norm": 31.998653659820555,
"input_norm/layer0": 31.998653659820555,
"learning_rate": 0.005,
"loss": 2.0344,
"max_norm": 211.93910217285156,
"max_norm/layer0": 211.93910217285156,
"mean_norm": 64.96215629577637,
"mean_norm/layer0": 64.96215629577637,
"multicode_k": 1,
"output_norm": 22.642279275258375,
"output_norm/layer0": 22.642279275258375,
"step": 6950
},
{
"MSE": 509.53209904988614,
"MSE/layer0": 509.53209904988614,
"dead_code_fraction": 0.2112,
"dead_code_fraction/layer0": 0.2112,
"epoch": 1.22,
"input_norm": 31.99865920702616,
"input_norm/layer0": 31.99865920702616,
"learning_rate": 0.005,
"loss": 2.023,
"max_norm": 212.15188598632812,
"max_norm/layer0": 212.15188598632812,
"mean_norm": 65.03938484191895,
"mean_norm/layer0": 65.03938484191895,
"multicode_k": 1,
"output_norm": 22.641168931325275,
"output_norm/layer0": 22.641168931325275,
"step": 7000
},
{
"epoch": 1.22,
"eval_MSE/layer0": 509.9877618207523,
"eval_accuracy": 0.5156894350128739,
"eval_dead_code_fraction/layer0": 0.21105,
"eval_input_norm/layer0": 31.998664335077162,
"eval_loss": 2.0573580265045166,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.65440880063548,
"eval_runtime": 158.8251,
"eval_samples_per_second": 29.107,
"eval_steps_per_second": 1.82,
"step": 7000
},
{
"MSE": 509.9403240458172,
"MSE/layer0": 509.9403240458172,
"dead_code_fraction": 0.2101,
"dead_code_fraction/layer0": 0.2101,
"epoch": 1.23,
"input_norm": 31.998663558959954,
"input_norm/layer0": 31.998663558959954,
"learning_rate": 0.005,
"loss": 2.0391,
"max_norm": 212.45599365234375,
"max_norm/layer0": 212.45599365234375,
"mean_norm": 65.11711883544922,
"mean_norm/layer0": 65.11711883544922,
"multicode_k": 1,
"output_norm": 22.636152718861904,
"output_norm/layer0": 22.636152718861904,
"step": 7050
},
{
"MSE": 509.17088083903013,
"MSE/layer0": 509.17088083903013,
"dead_code_fraction": 0.2099,
"dead_code_fraction/layer0": 0.2099,
"epoch": 1.23,
"input_norm": 31.998674535751356,
"input_norm/layer0": 31.998674535751356,
"learning_rate": 0.005,
"loss": 2.0413,
"max_norm": 212.8926544189453,
"max_norm/layer0": 212.8926544189453,
"mean_norm": 65.19314002990723,
"mean_norm/layer0": 65.19314002990723,
"multicode_k": 1,
"output_norm": 22.652867739995315,
"output_norm/layer0": 22.652867739995315,
"step": 7100
},
{
"MSE": 509.09580220540397,
"MSE/layer0": 509.09580220540397,
"dead_code_fraction": 0.209,
"dead_code_fraction/layer0": 0.209,
"epoch": 1.24,
"input_norm": 31.99867464383444,
"input_norm/layer0": 31.99867464383444,
"learning_rate": 0.005,
"loss": 2.0495,
"max_norm": 213.29238891601562,
"max_norm/layer0": 213.29238891601562,
"mean_norm": 65.27325248718262,
"mean_norm/layer0": 65.27325248718262,
"multicode_k": 1,
"output_norm": 22.646062428156533,
"output_norm/layer0": 22.646062428156533,
"step": 7150
},
{
"MSE": 509.9214274597167,
"MSE/layer0": 509.9214274597167,
"dead_code_fraction": 0.20905,
"dead_code_fraction/layer0": 0.20905,
"epoch": 1.24,
"input_norm": 31.998673133850097,
"input_norm/layer0": 31.998673133850097,
"learning_rate": 0.005,
"loss": 2.0462,
"max_norm": 213.58729553222656,
"max_norm/layer0": 213.58729553222656,
"mean_norm": 65.35407447814941,
"mean_norm/layer0": 65.35407447814941,
"multicode_k": 1,
"output_norm": 22.63937306404113,
"output_norm/layer0": 22.63937306404113,
"step": 7200
},
{
"MSE": 508.71533091227207,
"MSE/layer0": 508.71533091227207,
"dead_code_fraction": 0.2082,
"dead_code_fraction/layer0": 0.2082,
"epoch": 1.25,
"input_norm": 31.99868763287862,
"input_norm/layer0": 31.99868763287862,
"learning_rate": 0.005,
"loss": 2.0582,
"max_norm": 213.80873107910156,
"max_norm/layer0": 213.80873107910156,
"mean_norm": 65.43496131896973,
"mean_norm/layer0": 65.43496131896973,
"multicode_k": 1,
"output_norm": 22.648734455108645,
"output_norm/layer0": 22.648734455108645,
"step": 7250
},
{
"MSE": 507.686293182373,
"MSE/layer0": 507.686293182373,
"dead_code_fraction": 0.2066,
"dead_code_fraction/layer0": 0.2066,
"epoch": 1.25,
"input_norm": 31.998690617879234,
"input_norm/layer0": 31.998690617879234,
"learning_rate": 0.005,
"loss": 2.0485,
"max_norm": 214.17088317871094,
"max_norm/layer0": 214.17088317871094,
"mean_norm": 65.51487731933594,
"mean_norm/layer0": 65.51487731933594,
"multicode_k": 1,
"output_norm": 22.669575303395582,
"output_norm/layer0": 22.669575303395582,
"step": 7300
},
{
"MSE": 507.97169540405275,
"MSE/layer0": 507.97169540405275,
"dead_code_fraction": 0.20445,
"dead_code_fraction/layer0": 0.20445,
"epoch": 1.26,
"input_norm": 31.99869660695392,
"input_norm/layer0": 31.99869660695392,
"learning_rate": 0.005,
"loss": 2.0534,
"max_norm": 214.52955627441406,
"max_norm/layer0": 214.52955627441406,
"mean_norm": 65.59026718139648,
"mean_norm/layer0": 65.59026718139648,
"multicode_k": 1,
"output_norm": 22.678728303909296,
"output_norm/layer0": 22.678728303909296,
"step": 7350
},
{
"MSE": 507.6675502522787,
"MSE/layer0": 507.6675502522787,
"dead_code_fraction": 0.20485,
"dead_code_fraction/layer0": 0.20485,
"epoch": 1.26,
"input_norm": 31.998699353535965,
"input_norm/layer0": 31.998699353535965,
"learning_rate": 0.005,
"loss": 2.0638,
"max_norm": 214.7173614501953,
"max_norm/layer0": 214.7173614501953,
"mean_norm": 65.67013740539551,
"mean_norm/layer0": 65.67013740539551,
"multicode_k": 1,
"output_norm": 22.67898440043131,
"output_norm/layer0": 22.67898440043131,
"step": 7400
},
{
"MSE": 507.85135843912786,
"MSE/layer0": 507.85135843912786,
"dead_code_fraction": 0.2049,
"dead_code_fraction/layer0": 0.2049,
"epoch": 1.27,
"input_norm": 31.998699776331584,
"input_norm/layer0": 31.998699776331584,
"learning_rate": 0.005,
"loss": 2.0353,
"max_norm": 215.19158935546875,
"max_norm/layer0": 215.19158935546875,
"mean_norm": 65.75178337097168,
"mean_norm/layer0": 65.75178337097168,
"multicode_k": 1,
"output_norm": 22.680205952326446,
"output_norm/layer0": 22.680205952326446,
"step": 7450
},
{
"MSE": 507.253986562093,
"MSE/layer0": 507.253986562093,
"dead_code_fraction": 0.20435,
"dead_code_fraction/layer0": 0.20435,
"epoch": 1.27,
"input_norm": 31.99870971679686,
"input_norm/layer0": 31.99870971679686,
"learning_rate": 0.005,
"loss": 2.0791,
"max_norm": 215.7554931640625,
"max_norm/layer0": 215.7554931640625,
"mean_norm": 65.82438659667969,
"mean_norm/layer0": 65.82438659667969,
"multicode_k": 1,
"output_norm": 22.691158383687345,
"output_norm/layer0": 22.691158383687345,
"step": 7500
},
{
"epoch": 1.27,
"eval_MSE/layer0": 507.1513778155122,
"eval_accuracy": 0.5167855735982843,
"eval_dead_code_fraction/layer0": 0.2033,
"eval_input_norm/layer0": 31.998707461867696,
"eval_loss": 2.0513455867767334,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.70183411032355,
"eval_runtime": 158.8151,
"eval_samples_per_second": 29.109,
"eval_steps_per_second": 1.82,
"step": 7500
},
{
"MSE": 508.0465566507977,
"MSE/layer0": 508.0465566507977,
"dead_code_fraction": 0.20265,
"dead_code_fraction/layer0": 0.20265,
"epoch": 1.28,
"input_norm": 31.9987080860138,
"input_norm/layer0": 31.9987080860138,
"learning_rate": 0.005,
"loss": 2.0357,
"max_norm": 216.1879119873047,
"max_norm/layer0": 216.1879119873047,
"mean_norm": 65.89747428894043,
"mean_norm/layer0": 65.89747428894043,
"multicode_k": 1,
"output_norm": 22.691229712168372,
"output_norm/layer0": 22.691229712168372,
"step": 7550
},
{
"MSE": 506.86150853474936,
"MSE/layer0": 506.86150853474936,
"dead_code_fraction": 0.20235,
"dead_code_fraction/layer0": 0.20235,
"epoch": 1.28,
"input_norm": 31.998724161783855,
"input_norm/layer0": 31.998724161783855,
"learning_rate": 0.005,
"loss": 2.0643,
"max_norm": 216.84507751464844,
"max_norm/layer0": 216.84507751464844,
"mean_norm": 65.96598243713379,
"mean_norm/layer0": 65.96598243713379,
"multicode_k": 1,
"output_norm": 22.70548650105794,
"output_norm/layer0": 22.70548650105794,
"step": 7600
},
{
"MSE": 508.52483596801756,
"MSE/layer0": 508.52483596801756,
"dead_code_fraction": 0.20115,
"dead_code_fraction/layer0": 0.20115,
"epoch": 1.29,
"input_norm": 31.998720836639407,
"input_norm/layer0": 31.998720836639407,
"learning_rate": 0.005,
"loss": 2.0331,
"max_norm": 217.07077026367188,
"max_norm/layer0": 217.07077026367188,
"mean_norm": 66.04256629943848,
"mean_norm/layer0": 66.04256629943848,
"multicode_k": 1,
"output_norm": 22.671403992970788,
"output_norm/layer0": 22.671403992970788,
"step": 7650
},
{
"MSE": 506.7901182556151,
"MSE/layer0": 506.7901182556151,
"dead_code_fraction": 0.20025,
"dead_code_fraction/layer0": 0.20025,
"epoch": 1.29,
"input_norm": 31.998723080952953,
"input_norm/layer0": 31.998723080952953,
"learning_rate": 0.005,
"loss": 2.0643,
"max_norm": 217.60621643066406,
"max_norm/layer0": 217.60621643066406,
"mean_norm": 66.1141586303711,
"mean_norm/layer0": 66.1141586303711,
"multicode_k": 1,
"output_norm": 22.711970895131433,
"output_norm/layer0": 22.711970895131433,
"step": 7700
},
{
"MSE": 506.4805715942383,
"MSE/layer0": 506.4805715942383,
"dead_code_fraction": 0.19955,
"dead_code_fraction/layer0": 0.19955,
"epoch": 1.3,
"input_norm": 31.998739531834943,
"input_norm/layer0": 31.998739531834943,
"learning_rate": 0.005,
"loss": 2.0999,
"max_norm": 218.18724060058594,
"max_norm/layer0": 218.18724060058594,
"mean_norm": 66.18310356140137,
"mean_norm/layer0": 66.18310356140137,
"multicode_k": 1,
"output_norm": 22.715899858474735,
"output_norm/layer0": 22.715899858474735,
"step": 7750
},
{
"MSE": 507.79560877482083,
"MSE/layer0": 507.79560877482083,
"dead_code_fraction": 0.1983,
"dead_code_fraction/layer0": 0.1983,
"epoch": 1.3,
"input_norm": 31.9987256272634,
"input_norm/layer0": 31.9987256272634,
"learning_rate": 0.005,
"loss": 2.0143,
"max_norm": 218.3722686767578,
"max_norm/layer0": 218.3722686767578,
"mean_norm": 66.25444984436035,
"mean_norm/layer0": 66.25444984436035,
"multicode_k": 1,
"output_norm": 22.692439622879014,
"output_norm/layer0": 22.692439622879014,
"step": 7800
},
{
"MSE": 507.2388439432779,
"MSE/layer0": 507.2388439432779,
"dead_code_fraction": 0.198,
"dead_code_fraction/layer0": 0.198,
"epoch": 1.31,
"input_norm": 31.998735243479416,
"input_norm/layer0": 31.998735243479416,
"learning_rate": 0.005,
"loss": 2.069,
"max_norm": 218.93580627441406,
"max_norm/layer0": 218.93580627441406,
"mean_norm": 66.32441329956055,
"mean_norm/layer0": 66.32441329956055,
"multicode_k": 1,
"output_norm": 22.703038584391276,
"output_norm/layer0": 22.703038584391276,
"step": 7850
},
{
"MSE": 508.13961395263664,
"MSE/layer0": 508.13961395263664,
"dead_code_fraction": 0.19705,
"dead_code_fraction/layer0": 0.19705,
"epoch": 1.31,
"input_norm": 31.99873922983806,
"input_norm/layer0": 31.99873922983806,
"learning_rate": 0.005,
"loss": 2.0712,
"max_norm": 219.51759338378906,
"max_norm/layer0": 219.51759338378906,
"mean_norm": 66.39589881896973,
"mean_norm/layer0": 66.39589881896973,
"multicode_k": 1,
"output_norm": 22.68491499900817,
"output_norm/layer0": 22.68491499900817,
"step": 7900
},
{
"MSE": 506.5046355692546,
"MSE/layer0": 506.5046355692546,
"dead_code_fraction": 0.1958,
"dead_code_fraction/layer0": 0.1958,
"epoch": 1.32,
"input_norm": 31.998745075861606,
"input_norm/layer0": 31.998745075861606,
"learning_rate": 0.005,
"loss": 2.0623,
"max_norm": 220.1356658935547,
"max_norm/layer0": 220.1356658935547,
"mean_norm": 66.46616172790527,
"mean_norm/layer0": 66.46616172790527,
"multicode_k": 1,
"output_norm": 22.709094810485844,
"output_norm/layer0": 22.709094810485844,
"step": 7950
},
{
"MSE": 506.24584472656227,
"MSE/layer0": 506.24584472656227,
"dead_code_fraction": 0.1962,
"dead_code_fraction/layer0": 0.1962,
"epoch": 1.32,
"input_norm": 31.998744071324662,
"input_norm/layer0": 31.998744071324662,
"learning_rate": 0.005,
"loss": 2.0252,
"max_norm": 220.52029418945312,
"max_norm/layer0": 220.52029418945312,
"mean_norm": 66.54170417785645,
"mean_norm/layer0": 66.54170417785645,
"multicode_k": 1,
"output_norm": 22.71004734039306,
"output_norm/layer0": 22.71004734039306,
"step": 8000
},
{
"epoch": 1.32,
"eval_MSE/layer0": 505.2722684186489,
"eval_accuracy": 0.5173414664109856,
"eval_dead_code_fraction/layer0": 0.19525,
"eval_input_norm/layer0": 31.998757950702117,
"eval_loss": 2.046276569366455,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.71078164304668,
"eval_runtime": 158.1298,
"eval_samples_per_second": 29.235,
"eval_steps_per_second": 1.828,
"step": 8000
},
{
"MSE": 507.15304565429676,
"MSE/layer0": 507.15304565429676,
"dead_code_fraction": 0.1941,
"dead_code_fraction/layer0": 0.1941,
"epoch": 1.33,
"input_norm": 31.998751821517956,
"input_norm/layer0": 31.998751821517956,
"learning_rate": 0.005,
"loss": 2.0231,
"max_norm": 221.12425231933594,
"max_norm/layer0": 221.12425231933594,
"mean_norm": 66.61260223388672,
"mean_norm/layer0": 66.61260223388672,
"multicode_k": 1,
"output_norm": 22.70729770024618,
"output_norm/layer0": 22.70729770024618,
"step": 8050
},
{
"MSE": 508.0300794474282,
"MSE/layer0": 508.0300794474282,
"dead_code_fraction": 0.19355,
"dead_code_fraction/layer0": 0.19355,
"epoch": 1.33,
"input_norm": 31.998755750656134,
"input_norm/layer0": 31.998755750656134,
"learning_rate": 0.005,
"loss": 2.0065,
"max_norm": 221.41090393066406,
"max_norm/layer0": 221.41090393066406,
"mean_norm": 66.68024253845215,
"mean_norm/layer0": 66.68024253845215,
"multicode_k": 1,
"output_norm": 22.685567102432238,
"output_norm/layer0": 22.685567102432238,
"step": 8100
},
{
"MSE": 506.83792968750004,
"MSE/layer0": 506.83792968750004,
"dead_code_fraction": 0.194,
"dead_code_fraction/layer0": 0.194,
"epoch": 1.34,
"input_norm": 31.998766530354814,
"input_norm/layer0": 31.998766530354814,
"learning_rate": 0.005,
"loss": 2.0545,
"max_norm": 221.77352905273438,
"max_norm/layer0": 221.77352905273438,
"mean_norm": 66.74850654602051,
"mean_norm/layer0": 66.74850654602051,
"multicode_k": 1,
"output_norm": 22.711014649073284,
"output_norm/layer0": 22.711014649073284,
"step": 8150
},
{
"MSE": 506.1638347880046,
"MSE/layer0": 506.1638347880046,
"dead_code_fraction": 0.1922,
"dead_code_fraction/layer0": 0.1922,
"epoch": 1.34,
"input_norm": 31.998765303293865,
"input_norm/layer0": 31.998765303293865,
"learning_rate": 0.005,
"loss": 2.0291,
"max_norm": 222.23851013183594,
"max_norm/layer0": 222.23851013183594,
"mean_norm": 66.81972694396973,
"mean_norm/layer0": 66.81972694396973,
"multicode_k": 1,
"output_norm": 22.712359495162957,
"output_norm/layer0": 22.712359495162957,
"step": 8200
},
{
"MSE": 505.4201058959959,
"MSE/layer0": 505.4201058959959,
"dead_code_fraction": 0.19165,
"dead_code_fraction/layer0": 0.19165,
"epoch": 1.35,
"input_norm": 31.998765595753984,
"input_norm/layer0": 31.998765595753984,
"learning_rate": 0.005,
"loss": 2.0255,
"max_norm": 222.60708618164062,
"max_norm/layer0": 222.60708618164062,
"mean_norm": 66.89296340942383,
"mean_norm/layer0": 66.89296340942383,
"multicode_k": 1,
"output_norm": 22.733057559331257,
"output_norm/layer0": 22.733057559331257,
"step": 8250
},
{
"MSE": 506.6631129964193,
"MSE/layer0": 506.6631129964193,
"dead_code_fraction": 0.18985,
"dead_code_fraction/layer0": 0.18985,
"epoch": 1.35,
"input_norm": 31.998774194717406,
"input_norm/layer0": 31.998774194717406,
"learning_rate": 0.005,
"loss": 2.0543,
"max_norm": 222.95948791503906,
"max_norm/layer0": 222.95948791503906,
"mean_norm": 66.95783233642578,
"mean_norm/layer0": 66.95783233642578,
"multicode_k": 1,
"output_norm": 22.715471951166787,
"output_norm/layer0": 22.715471951166787,
"step": 8300
},
{
"MSE": 505.8098661804198,
"MSE/layer0": 505.8098661804198,
"dead_code_fraction": 0.1901,
"dead_code_fraction/layer0": 0.1901,
"epoch": 1.36,
"input_norm": 31.998776054382326,
"input_norm/layer0": 31.998776054382326,
"learning_rate": 0.005,
"loss": 2.0361,
"max_norm": 222.99290466308594,
"max_norm/layer0": 222.99290466308594,
"mean_norm": 67.03095436096191,
"mean_norm/layer0": 67.03095436096191,
"multicode_k": 1,
"output_norm": 22.720023854573576,
"output_norm/layer0": 22.720023854573576,
"step": 8350
},
{
"MSE": 504.6476872253421,
"MSE/layer0": 504.6476872253421,
"dead_code_fraction": 0.18865,
"dead_code_fraction/layer0": 0.18865,
"epoch": 1.36,
"input_norm": 31.99877415021262,
"input_norm/layer0": 31.99877415021262,
"learning_rate": 0.005,
"loss": 2.018,
"max_norm": 222.99652099609375,
"max_norm/layer0": 222.99652099609375,
"mean_norm": 67.10310173034668,
"mean_norm/layer0": 67.10310173034668,
"multicode_k": 1,
"output_norm": 22.743260552088422,
"output_norm/layer0": 22.743260552088422,
"step": 8400
},
{
"MSE": 505.1742755126953,
"MSE/layer0": 505.1742755126953,
"dead_code_fraction": 0.18805,
"dead_code_fraction/layer0": 0.18805,
"epoch": 1.37,
"input_norm": 31.998781833648685,
"input_norm/layer0": 31.998781833648685,
"learning_rate": 0.005,
"loss": 2.0373,
"max_norm": 223.39710998535156,
"max_norm/layer0": 223.39710998535156,
"mean_norm": 67.17368698120117,
"mean_norm/layer0": 67.17368698120117,
"multicode_k": 1,
"output_norm": 22.74353121121724,
"output_norm/layer0": 22.74353121121724,
"step": 8450
},
{
"MSE": 505.00153442382805,
"MSE/layer0": 505.00153442382805,
"dead_code_fraction": 0.1875,
"dead_code_fraction/layer0": 0.1875,
"epoch": 1.37,
"input_norm": 31.998789456685383,
"input_norm/layer0": 31.998789456685383,
"learning_rate": 0.005,
"loss": 2.0432,
"max_norm": 223.86239624023438,
"max_norm/layer0": 223.86239624023438,
"mean_norm": 67.2455825805664,
"mean_norm/layer0": 67.2455825805664,
"multicode_k": 1,
"output_norm": 22.747594401041667,
"output_norm/layer0": 22.747594401041667,
"step": 8500
},
{
"epoch": 1.37,
"eval_MSE/layer0": 502.9394664067146,
"eval_accuracy": 0.5183496432580605,
"eval_dead_code_fraction/layer0": 0.18745,
"eval_input_norm/layer0": 31.998788164622738,
"eval_loss": 2.042330265045166,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.756197618711013,
"eval_runtime": 159.3738,
"eval_samples_per_second": 29.007,
"eval_steps_per_second": 1.813,
"step": 8500
},
{
"MSE": 504.47000788370775,
"MSE/layer0": 504.47000788370775,
"dead_code_fraction": 0.1867,
"dead_code_fraction/layer0": 0.1867,
"epoch": 1.38,
"input_norm": 31.998792708714795,
"input_norm/layer0": 31.998792708714795,
"learning_rate": 0.005,
"loss": 2.0483,
"max_norm": 224.50177001953125,
"max_norm/layer0": 224.50177001953125,
"mean_norm": 67.31682586669922,
"mean_norm/layer0": 67.31682586669922,
"multicode_k": 1,
"output_norm": 22.753840500513725,
"output_norm/layer0": 22.753840500513725,
"step": 8550
},
{
"MSE": 504.7471296691896,
"MSE/layer0": 504.7471296691896,
"dead_code_fraction": 0.1861,
"dead_code_fraction/layer0": 0.1861,
"epoch": 1.38,
"input_norm": 31.99879879315695,
"input_norm/layer0": 31.99879879315695,
"learning_rate": 0.005,
"loss": 2.0626,
"max_norm": 224.77008056640625,
"max_norm/layer0": 224.77008056640625,
"mean_norm": 67.38501358032227,
"mean_norm/layer0": 67.38501358032227,
"multicode_k": 1,
"output_norm": 22.75965905507406,
"output_norm/layer0": 22.75965905507406,
"step": 8600
},
{
"MSE": 504.43309575398786,
"MSE/layer0": 504.43309575398786,
"dead_code_fraction": 0.18575,
"dead_code_fraction/layer0": 0.18575,
"epoch": 1.39,
"input_norm": 31.9987975247701,
"input_norm/layer0": 31.9987975247701,
"learning_rate": 0.005,
"loss": 2.0528,
"max_norm": 224.8895263671875,
"max_norm/layer0": 224.8895263671875,
"mean_norm": 67.45294189453125,
"mean_norm/layer0": 67.45294189453125,
"multicode_k": 1,
"output_norm": 22.765578152338662,
"output_norm/layer0": 22.765578152338662,
"step": 8650
},
{
"MSE": 504.8997240193688,
"MSE/layer0": 504.8997240193688,
"dead_code_fraction": 0.1849,
"dead_code_fraction/layer0": 0.1849,
"epoch": 1.39,
"input_norm": 31.998805205027267,
"input_norm/layer0": 31.998805205027267,
"learning_rate": 0.005,
"loss": 2.0355,
"max_norm": 225.1109619140625,
"max_norm/layer0": 225.1109619140625,
"mean_norm": 67.51644897460938,
"mean_norm/layer0": 67.51644897460938,
"multicode_k": 1,
"output_norm": 22.76556049982706,
"output_norm/layer0": 22.76556049982706,
"step": 8700
},
{
"MSE": 504.98007812499975,
"MSE/layer0": 504.98007812499975,
"dead_code_fraction": 0.1841,
"dead_code_fraction/layer0": 0.1841,
"epoch": 1.4,
"input_norm": 31.998811095555627,
"input_norm/layer0": 31.998811095555627,
"learning_rate": 0.005,
"loss": 2.048,
"max_norm": 225.3004608154297,
"max_norm/layer0": 225.3004608154297,
"mean_norm": 67.58170700073242,
"mean_norm/layer0": 67.58170700073242,
"multicode_k": 1,
"output_norm": 22.734453417460124,
"output_norm/layer0": 22.734453417460124,
"step": 8750
},
{
"MSE": 505.8172926839193,
"MSE/layer0": 505.8172926839193,
"dead_code_fraction": 0.1825,
"dead_code_fraction/layer0": 0.1825,
"epoch": 1.4,
"input_norm": 31.998811902999876,
"input_norm/layer0": 31.998811902999876,
"learning_rate": 0.005,
"loss": 2.0314,
"max_norm": 225.43496704101562,
"max_norm/layer0": 225.43496704101562,
"mean_norm": 67.64213943481445,
"mean_norm/layer0": 67.64213943481445,
"multicode_k": 1,
"output_norm": 22.746523040135706,
"output_norm/layer0": 22.746523040135706,
"step": 8800
},
{
"MSE": 505.15463668823276,
"MSE/layer0": 505.15463668823276,
"dead_code_fraction": 0.1834,
"dead_code_fraction/layer0": 0.1834,
"epoch": 1.41,
"input_norm": 31.99881089528401,
"input_norm/layer0": 31.99881089528401,
"learning_rate": 0.005,
"loss": 2.0019,
"max_norm": 225.2454376220703,
"max_norm/layer0": 225.2454376220703,
"mean_norm": 67.70701217651367,
"mean_norm/layer0": 67.70701217651367,
"multicode_k": 1,
"output_norm": 22.74102473258972,
"output_norm/layer0": 22.74102473258972,
"step": 8850
},
{
"MSE": 505.15305394490576,
"MSE/layer0": 505.15305394490576,
"dead_code_fraction": 0.18105,
"dead_code_fraction/layer0": 0.18105,
"epoch": 1.41,
"input_norm": 31.99882117907206,
"input_norm/layer0": 31.99882117907206,
"learning_rate": 0.005,
"loss": 2.0614,
"max_norm": 224.98548889160156,
"max_norm/layer0": 224.98548889160156,
"mean_norm": 67.77053833007812,
"mean_norm/layer0": 67.77053833007812,
"multicode_k": 1,
"output_norm": 22.750008074442544,
"output_norm/layer0": 22.750008074442544,
"step": 8900
},
{
"MSE": 505.46065561930345,
"MSE/layer0": 505.46065561930345,
"dead_code_fraction": 0.1809,
"dead_code_fraction/layer0": 0.1809,
"epoch": 1.42,
"input_norm": 31.99882030487061,
"input_norm/layer0": 31.99882030487061,
"learning_rate": 0.005,
"loss": 2.0259,
"max_norm": 224.90966796875,
"max_norm/layer0": 224.90966796875,
"mean_norm": 67.83388900756836,
"mean_norm/layer0": 67.83388900756836,
"multicode_k": 1,
"output_norm": 22.744747044245393,
"output_norm/layer0": 22.744747044245393,
"step": 8950
},
{
"MSE": 503.93126592000317,
"MSE/layer0": 503.93126592000317,
"dead_code_fraction": 0.1795,
"dead_code_fraction/layer0": 0.1795,
"epoch": 1.42,
"input_norm": 31.99882487614949,
"input_norm/layer0": 31.99882487614949,
"learning_rate": 0.005,
"loss": 2.0549,
"max_norm": 224.75604248046875,
"max_norm/layer0": 224.75604248046875,
"mean_norm": 67.89757537841797,
"mean_norm/layer0": 67.89757537841797,
"multicode_k": 1,
"output_norm": 22.767707106272383,
"output_norm/layer0": 22.767707106272383,
"step": 9000
},
{
"epoch": 1.42,
"eval_MSE/layer0": 502.90162357014304,
"eval_accuracy": 0.518752237368134,
"eval_dead_code_fraction/layer0": 0.1797,
"eval_input_norm/layer0": 31.998819289515865,
"eval_loss": 2.0394132137298584,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.772194602647794,
"eval_runtime": 158.4408,
"eval_samples_per_second": 29.178,
"eval_steps_per_second": 1.824,
"step": 9000
},
{
"MSE": 504.23655522664376,
"MSE/layer0": 504.23655522664376,
"dead_code_fraction": 0.17875,
"dead_code_fraction/layer0": 0.17875,
"epoch": 1.43,
"input_norm": 31.998824621836345,
"input_norm/layer0": 31.998824621836345,
"learning_rate": 0.005,
"loss": 2.0392,
"max_norm": 224.60926818847656,
"max_norm/layer0": 224.60926818847656,
"mean_norm": 67.96440505981445,
"mean_norm/layer0": 67.96440505981445,
"multicode_k": 1,
"output_norm": 22.772467877070113,
"output_norm/layer0": 22.772467877070113,
"step": 9050
},
{
"MSE": 503.93936604817725,
"MSE/layer0": 503.93936604817725,
"dead_code_fraction": 0.17785,
"dead_code_fraction/layer0": 0.17785,
"epoch": 1.43,
"input_norm": 31.99882525444032,
"input_norm/layer0": 31.99882525444032,
"learning_rate": 0.005,
"loss": 2.0329,
"max_norm": 224.35545349121094,
"max_norm/layer0": 224.35545349121094,
"mean_norm": 68.02788925170898,
"mean_norm/layer0": 68.02788925170898,
"multicode_k": 1,
"output_norm": 22.77433245976766,
"output_norm/layer0": 22.77433245976766,
"step": 9100
},
{
"MSE": 504.38566899617547,
"MSE/layer0": 504.38566899617547,
"dead_code_fraction": 0.1771,
"dead_code_fraction/layer0": 0.1771,
"epoch": 1.44,
"input_norm": 31.998831052780154,
"input_norm/layer0": 31.998831052780154,
"learning_rate": 0.005,
"loss": 2.0175,
"max_norm": 224.176025390625,
"max_norm/layer0": 224.176025390625,
"mean_norm": 68.09515762329102,
"mean_norm/layer0": 68.09515762329102,
"multicode_k": 1,
"output_norm": 22.758301575978592,
"output_norm/layer0": 22.758301575978592,
"step": 9150
},
{
"MSE": 503.9738773091634,
"MSE/layer0": 503.9738773091634,
"dead_code_fraction": 0.1763,
"dead_code_fraction/layer0": 0.1763,
"epoch": 1.44,
"input_norm": 31.998839066823308,
"input_norm/layer0": 31.998839066823308,
"learning_rate": 0.005,
"loss": 2.0462,
"max_norm": 224.0868377685547,
"max_norm/layer0": 224.0868377685547,
"mean_norm": 68.16043853759766,
"mean_norm/layer0": 68.16043853759766,
"multicode_k": 1,
"output_norm": 22.7744267431895,
"output_norm/layer0": 22.7744267431895,
"step": 9200
},
{
"MSE": 503.29069310506196,
"MSE/layer0": 503.29069310506196,
"dead_code_fraction": 0.17485,
"dead_code_fraction/layer0": 0.17485,
"epoch": 1.45,
"input_norm": 31.99883868853251,
"input_norm/layer0": 31.99883868853251,
"learning_rate": 0.005,
"loss": 2.0416,
"max_norm": 223.89230346679688,
"max_norm/layer0": 223.89230346679688,
"mean_norm": 68.22885513305664,
"mean_norm/layer0": 68.22885513305664,
"multicode_k": 1,
"output_norm": 22.78441795984904,
"output_norm/layer0": 22.78441795984904,
"step": 9250
},
{
"MSE": 504.4634376017252,
"MSE/layer0": 504.4634376017252,
"dead_code_fraction": 0.17465,
"dead_code_fraction/layer0": 0.17465,
"epoch": 1.45,
"input_norm": 31.998847064971933,
"input_norm/layer0": 31.998847064971933,
"learning_rate": 0.005,
"loss": 2.0822,
"max_norm": 223.5952911376953,
"max_norm/layer0": 223.5952911376953,
"mean_norm": 68.2917366027832,
"mean_norm/layer0": 68.2917366027832,
"multicode_k": 1,
"output_norm": 22.78019981384277,
"output_norm/layer0": 22.78019981384277,
"step": 9300
},
{
"MSE": 504.5819336954755,
"MSE/layer0": 504.5819336954755,
"dead_code_fraction": 0.1737,
"dead_code_fraction/layer0": 0.1737,
"epoch": 1.46,
"input_norm": 31.998844486872358,
"input_norm/layer0": 31.998844486872358,
"learning_rate": 0.005,
"loss": 2.0181,
"max_norm": 223.33349609375,
"max_norm/layer0": 223.33349609375,
"mean_norm": 68.3541030883789,
"mean_norm/layer0": 68.3541030883789,
"multicode_k": 1,
"output_norm": 22.770421886444097,
"output_norm/layer0": 22.770421886444097,
"step": 9350
},
{
"MSE": 504.08388671875014,
"MSE/layer0": 504.08388671875014,
"dead_code_fraction": 0.17315,
"dead_code_fraction/layer0": 0.17315,
"epoch": 1.46,
"input_norm": 31.998852834701534,
"input_norm/layer0": 31.998852834701534,
"learning_rate": 0.005,
"loss": 2.0332,
"max_norm": 223.0471954345703,
"max_norm/layer0": 223.0471954345703,
"mean_norm": 68.41642379760742,
"mean_norm/layer0": 68.41642379760742,
"multicode_k": 1,
"output_norm": 22.783455673853553,
"output_norm/layer0": 22.783455673853553,
"step": 9400
},
{
"MSE": 504.4143726603196,
"MSE/layer0": 504.4143726603196,
"dead_code_fraction": 0.17145,
"dead_code_fraction/layer0": 0.17145,
"epoch": 1.47,
"input_norm": 31.998856865564967,
"input_norm/layer0": 31.998856865564967,
"learning_rate": 0.005,
"loss": 2.0241,
"max_norm": 222.83218383789062,
"max_norm/layer0": 222.83218383789062,
"mean_norm": 68.48007202148438,
"mean_norm/layer0": 68.48007202148438,
"multicode_k": 1,
"output_norm": 22.767489954630527,
"output_norm/layer0": 22.767489954630527,
"step": 9450
},
{
"MSE": 503.2655168151856,
"MSE/layer0": 503.2655168151856,
"dead_code_fraction": 0.17245,
"dead_code_fraction/layer0": 0.17245,
"epoch": 1.47,
"input_norm": 31.998857196172086,
"input_norm/layer0": 31.998857196172086,
"learning_rate": 0.005,
"loss": 2.0087,
"max_norm": 222.5254669189453,
"max_norm/layer0": 222.5254669189453,
"mean_norm": 68.54964065551758,
"mean_norm/layer0": 68.54964065551758,
"multicode_k": 1,
"output_norm": 22.78383262634278,
"output_norm/layer0": 22.78383262634278,
"step": 9500
},
{
"epoch": 1.47,
"eval_MSE/layer0": 504.0087830256569,
"eval_accuracy": 0.5192516739689711,
"eval_dead_code_fraction/layer0": 0.1704,
"eval_input_norm/layer0": 31.99886018302103,
"eval_loss": 2.0364596843719482,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.799023320451585,
"eval_runtime": 158.3046,
"eval_samples_per_second": 29.203,
"eval_steps_per_second": 1.826,
"step": 9500
},
{
"MSE": 502.63093187018274,
"MSE/layer0": 502.63093187018274,
"dead_code_fraction": 0.1713,
"dead_code_fraction/layer0": 0.1713,
"epoch": 1.48,
"input_norm": 31.998863240132852,
"input_norm/layer0": 31.998863240132852,
"learning_rate": 0.005,
"loss": 2.0083,
"max_norm": 222.2374725341797,
"max_norm/layer0": 222.2374725341797,
"mean_norm": 68.61249160766602,
"mean_norm/layer0": 68.61249160766602,
"multicode_k": 1,
"output_norm": 22.79880291995348,
"output_norm/layer0": 22.79880291995348,
"step": 9550
},
{
"MSE": 505.36792836568793,
"MSE/layer0": 505.36792836568793,
"dead_code_fraction": 0.17135,
"dead_code_fraction/layer0": 0.17135,
"epoch": 2.0,
"input_norm": 31.99885930271917,
"input_norm/layer0": 31.99885930271917,
"learning_rate": 0.005,
"loss": 2.0034,
"max_norm": 222.0310516357422,
"max_norm/layer0": 222.0310516357422,
"mean_norm": 68.67721176147461,
"mean_norm/layer0": 68.67721176147461,
"multicode_k": 1,
"output_norm": 22.74409036474983,
"output_norm/layer0": 22.74409036474983,
"step": 9600
},
{
"MSE": 502.98986485799134,
"MSE/layer0": 502.98986485799134,
"dead_code_fraction": 0.1697,
"dead_code_fraction/layer0": 0.1697,
"epoch": 2.01,
"input_norm": 31.998873513539642,
"input_norm/layer0": 31.998873513539642,
"learning_rate": 0.005,
"loss": 2.072,
"max_norm": 222.00772094726562,
"max_norm/layer0": 222.00772094726562,
"mean_norm": 68.73538589477539,
"mean_norm/layer0": 68.73538589477539,
"multicode_k": 1,
"output_norm": 22.796976168950394,
"output_norm/layer0": 22.796976168950394,
"step": 9650
},
{
"MSE": 503.04733729044574,
"MSE/layer0": 503.04733729044574,
"dead_code_fraction": 0.16915,
"dead_code_fraction/layer0": 0.16915,
"epoch": 2.01,
"input_norm": 31.998862508138025,
"input_norm/layer0": 31.998862508138025,
"learning_rate": 0.005,
"loss": 1.9691,
"max_norm": 221.80978393554688,
"max_norm/layer0": 221.80978393554688,
"mean_norm": 68.80109405517578,
"mean_norm/layer0": 68.80109405517578,
"multicode_k": 1,
"output_norm": 22.794911410013835,
"output_norm/layer0": 22.794911410013835,
"step": 9700
},
{
"MSE": 503.3161979675292,
"MSE/layer0": 503.3161979675292,
"dead_code_fraction": 0.16895,
"dead_code_fraction/layer0": 0.16895,
"epoch": 2.02,
"input_norm": 31.998878345489487,
"input_norm/layer0": 31.998878345489487,
"learning_rate": 0.005,
"loss": 2.0368,
"max_norm": 221.684814453125,
"max_norm/layer0": 221.684814453125,
"mean_norm": 68.86429214477539,
"mean_norm/layer0": 68.86429214477539,
"multicode_k": 1,
"output_norm": 22.785858039855956,
"output_norm/layer0": 22.785858039855956,
"step": 9750
},
{
"MSE": 502.7885366821291,
"MSE/layer0": 502.7885366821291,
"dead_code_fraction": 0.16775,
"dead_code_fraction/layer0": 0.16775,
"epoch": 2.02,
"input_norm": 31.998876323699957,
"input_norm/layer0": 31.998876323699957,
"learning_rate": 0.005,
"loss": 2.0029,
"max_norm": 221.55738830566406,
"max_norm/layer0": 221.55738830566406,
"mean_norm": 68.92353439331055,
"mean_norm/layer0": 68.92353439331055,
"multicode_k": 1,
"output_norm": 22.80311137835186,
"output_norm/layer0": 22.80311137835186,
"step": 9800
},
{
"MSE": 503.08141484578465,
"MSE/layer0": 503.08141484578465,
"dead_code_fraction": 0.16675,
"dead_code_fraction/layer0": 0.16675,
"epoch": 2.03,
"input_norm": 31.998882681528727,
"input_norm/layer0": 31.998882681528727,
"learning_rate": 0.005,
"loss": 2.0058,
"max_norm": 221.4176025390625,
"max_norm/layer0": 221.4176025390625,
"mean_norm": 68.97920608520508,
"mean_norm/layer0": 68.97920608520508,
"multicode_k": 1,
"output_norm": 22.79436633110047,
"output_norm/layer0": 22.79436633110047,
"step": 9850
},
{
"MSE": 503.44391169230175,
"MSE/layer0": 503.44391169230175,
"dead_code_fraction": 0.16635,
"dead_code_fraction/layer0": 0.16635,
"epoch": 2.03,
"input_norm": 31.998889300028488,
"input_norm/layer0": 31.998889300028488,
"learning_rate": 0.005,
"loss": 2.0128,
"max_norm": 220.8733673095703,
"max_norm/layer0": 220.8733673095703,
"mean_norm": 69.03522872924805,
"mean_norm/layer0": 69.03522872924805,
"multicode_k": 1,
"output_norm": 22.793825833002728,
"output_norm/layer0": 22.793825833002728,
"step": 9900
},
{
"MSE": 503.14160481770807,
"MSE/layer0": 503.14160481770807,
"dead_code_fraction": 0.1655,
"dead_code_fraction/layer0": 0.1655,
"epoch": 2.04,
"input_norm": 31.99888905207317,
"input_norm/layer0": 31.99888905207317,
"learning_rate": 0.005,
"loss": 2.0053,
"max_norm": 220.66598510742188,
"max_norm/layer0": 220.66598510742188,
"mean_norm": 69.08990859985352,
"mean_norm/layer0": 69.08990859985352,
"multicode_k": 1,
"output_norm": 22.802439581553138,
"output_norm/layer0": 22.802439581553138,
"step": 9950
},
{
"MSE": 502.7584656778976,
"MSE/layer0": 502.7584656778976,
"dead_code_fraction": 0.16445,
"dead_code_fraction/layer0": 0.16445,
"epoch": 2.04,
"input_norm": 31.998899453481037,
"input_norm/layer0": 31.998899453481037,
"learning_rate": 0.005,
"loss": 2.0569,
"max_norm": 220.5869903564453,
"max_norm/layer0": 220.5869903564453,
"mean_norm": 69.14492416381836,
"mean_norm/layer0": 69.14492416381836,
"multicode_k": 1,
"output_norm": 22.808293444315584,
"output_norm/layer0": 22.808293444315584,
"step": 10000
},
{
"epoch": 2.04,
"eval_MSE/layer0": 501.8128262733759,
"eval_accuracy": 0.5193506309245984,
"eval_dead_code_fraction/layer0": 0.16395,
"eval_input_norm/layer0": 31.998895487949337,
"eval_loss": 2.0353407859802246,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 22.80092038433711,
"eval_runtime": 158.6027,
"eval_samples_per_second": 29.148,
"eval_steps_per_second": 1.822,
"step": 10000
},
{
"MSE": 0.0,
"MSE/layer0": 0.0,
"dead_code_fraction": 1.0,
"dead_code_fraction/layer0": 1.0,
"epoch": 2.04,
"input_norm": 0.0,
"input_norm/layer0": 0.0,
"max_norm": 220.5869903564453,
"max_norm/layer0": 220.5869903564453,
"mean_norm": 69.14492416381836,
"mean_norm/layer0": 69.14492416381836,
"multicode_k": 1,
"output_norm": 0.0,
"output_norm/layer0": 0.0,
"step": 10000,
"total_flos": 7.43098011353088e+16,
"train_loss": 2.205516522693634,
"train_runtime": 15654.0479,
"train_samples_per_second": 61.326,
"train_steps_per_second": 0.639
}
],
"logging_steps": 50,
"max_steps": 10000,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 7.43098011353088e+16,
"trial_name": null,
"trial_params": null
}