{ "best_metric": 2.0353407859802246, "best_model_checkpoint": "output_main/wandb/run-20240211_075351-8o9ldy4a/files/train_output/checkpoint-10000", "epoch": 2.042133333333333, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "MSE": 892.0916341145833, "MSE/layer0": 892.0916341145833, "dead_code_fraction": 0.15045, "dead_code_fraction/layer0": 0.15045, "epoch": 0.0, "input_norm": 31.997259775797524, "input_norm/layer0": 31.997259775797524, "learning_rate": 1e-05, "loss": 8.134, "max_norm": 35.01011657714844, "max_norm/layer0": 35.01011657714844, "mean_norm": 31.990370750427246, "mean_norm/layer0": 31.990370750427246, "multicode_k": 1, "output_norm": 8.571834087371826, "output_norm/layer0": 8.571834087371826, "step": 1 }, { "MSE": 889.7418754733337, "MSE/layer0": 889.7418754733337, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.99632342656454, "input_norm/layer0": 31.99632342656454, "learning_rate": 0.0005, "loss": 7.1762, "max_norm": 35.03640365600586, "max_norm/layer0": 35.03640365600586, "mean_norm": 32.01236152648926, "mean_norm/layer0": 32.01236152648926, "multicode_k": 1, "output_norm": 8.591146861614817, "output_norm/layer0": 8.591146861614817, "step": 50 }, { "MSE": 869.5438468424481, "MSE/layer0": 869.5438468424481, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.996115023295076, "input_norm/layer0": 31.996115023295076, "learning_rate": 0.001, "loss": 5.0118, "max_norm": 35.15137481689453, "max_norm/layer0": 35.15137481689453, "mean_norm": 32.11746788024902, "mean_norm/layer0": 32.11746788024902, "multicode_k": 1, "output_norm": 8.768607576688133, "output_norm/layer0": 8.768607576688133, "step": 100 }, { "MSE": 841.8395769246417, "MSE/layer0": 841.8395769246417, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.996588408152256, "input_norm/layer0": 31.996588408152256, "learning_rate": 0.0015, "loss": 4.2338, "max_norm": 35.45364761352539, "max_norm/layer0": 35.45364761352539, "mean_norm": 32.34040641784668, "mean_norm/layer0": 32.34040641784668, "multicode_k": 1, "output_norm": 9.237536044120787, "output_norm/layer0": 9.237536044120787, "step": 150 }, { "MSE": 817.2703357950843, "MSE/layer0": 817.2703357950843, "dead_code_fraction": 5e-05, "dead_code_fraction/layer0": 5e-05, "epoch": 0.02, "input_norm": 31.99718633969625, "input_norm/layer0": 31.99718633969625, "learning_rate": 0.002, "loss": 3.837, "max_norm": 36.11206817626953, "max_norm/layer0": 36.11206817626953, "mean_norm": 32.652212142944336, "mean_norm/layer0": 32.652212142944336, "multicode_k": 1, "output_norm": 9.962018431027724, "output_norm/layer0": 9.962018431027724, "step": 200 }, { "MSE": 787.4571261596678, "MSE/layer0": 787.4571261596678, "dead_code_fraction": 0.0028, "dead_code_fraction/layer0": 0.0028, "epoch": 0.03, "input_norm": 31.99750740687052, "input_norm/layer0": 31.99750740687052, "learning_rate": 0.0025, "loss": 3.5507, "max_norm": 37.03396987915039, "max_norm/layer0": 37.03396987915039, "mean_norm": 33.030792236328125, "mean_norm/layer0": 33.030792236328125, "multicode_k": 1, "output_norm": 11.101801137924198, "output_norm/layer0": 11.101801137924198, "step": 250 }, { "MSE": 759.7653246053058, "MSE/layer0": 759.7653246053058, "dead_code_fraction": 0.02905, "dead_code_fraction/layer0": 0.02905, "epoch": 0.03, "input_norm": 31.99749964078267, "input_norm/layer0": 31.99749964078267, "learning_rate": 0.003, "loss": 3.3015, "max_norm": 37.927757263183594, "max_norm/layer0": 37.927757263183594, "mean_norm": 33.33859634399414, "mean_norm/layer0": 33.33859634399414, "multicode_k": 1, "output_norm": 12.222484871546431, "output_norm/layer0": 12.222484871546431, "step": 300 }, { "MSE": 734.5841912841795, "MSE/layer0": 734.5841912841795, "dead_code_fraction": 0.06455, "dead_code_fraction/layer0": 0.06455, "epoch": 0.04, "input_norm": 31.99746166547139, "input_norm/layer0": 31.99746166547139, "learning_rate": 0.0034999999999999996, "loss": 3.1483, "max_norm": 40.570350646972656, "max_norm/layer0": 40.570350646972656, "mean_norm": 33.79829216003418, "mean_norm/layer0": 33.79829216003418, "multicode_k": 1, "output_norm": 13.233797086079917, "output_norm/layer0": 13.233797086079917, "step": 350 }, { "MSE": 705.9179516601566, "MSE/layer0": 705.9179516601566, "dead_code_fraction": 0.13495, "dead_code_fraction/layer0": 0.13495, "epoch": 0.04, "input_norm": 31.997578941980994, "input_norm/layer0": 31.997578941980994, "learning_rate": 0.004, "loss": 3.0479, "max_norm": 45.86402130126953, "max_norm/layer0": 45.86402130126953, "mean_norm": 34.60604667663574, "mean_norm/layer0": 34.60604667663574, "multicode_k": 1, "output_norm": 14.794977650642394, "output_norm/layer0": 14.794977650642394, "step": 400 }, { "MSE": 673.0142825317382, "MSE/layer0": 673.0142825317382, "dead_code_fraction": 0.236, "dead_code_fraction/layer0": 0.236, "epoch": 0.04, "input_norm": 31.99772956212363, "input_norm/layer0": 31.99772956212363, "learning_rate": 0.0045000000000000005, "loss": 2.9234, "max_norm": 50.35022735595703, "max_norm/layer0": 50.35022735595703, "mean_norm": 35.50743293762207, "mean_norm/layer0": 35.50743293762207, "multicode_k": 1, "output_norm": 16.412540513674415, "output_norm/layer0": 16.412540513674415, "step": 450 }, { "MSE": 646.1952704874673, "MSE/layer0": 646.1952704874673, "dead_code_fraction": 0.31565, "dead_code_fraction/layer0": 0.31565, "epoch": 0.05, "input_norm": 31.997816743850702, "input_norm/layer0": 31.997816743850702, "learning_rate": 0.005, "loss": 2.8364, "max_norm": 55.06960678100586, "max_norm/layer0": 55.06960678100586, "mean_norm": 36.40013122558594, "mean_norm/layer0": 36.40013122558594, "multicode_k": 1, "output_norm": 17.61372879664104, "output_norm/layer0": 17.61372879664104, "step": 500 }, { "epoch": 0.05, "eval_MSE/layer0": 634.8931657946682, "eval_accuracy": 0.42267877747562077, "eval_dead_code_fraction/layer0": 0.3619, "eval_input_norm/layer0": 31.9978586178746, "eval_loss": 2.7649216651916504, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.081893277070293, "eval_runtime": 157.3558, "eval_samples_per_second": 29.379, "eval_steps_per_second": 1.837, "step": 500 }, { "MSE": 627.919213663737, "MSE/layer0": 627.919213663737, "dead_code_fraction": 0.35455, "dead_code_fraction/layer0": 0.35455, "epoch": 0.06, "input_norm": 31.997863556543983, "input_norm/layer0": 31.997863556543983, "learning_rate": 0.005, "loss": 2.6999, "max_norm": 59.44381332397461, "max_norm/layer0": 59.44381332397461, "mean_norm": 37.23677062988281, "mean_norm/layer0": 37.23677062988281, "multicode_k": 1, "output_norm": 18.411861616770416, "output_norm/layer0": 18.411861616770416, "step": 550 }, { "MSE": 613.3249330647789, "MSE/layer0": 613.3249330647789, "dead_code_fraction": 0.38215, "dead_code_fraction/layer0": 0.38215, "epoch": 0.06, "input_norm": 31.99789684613545, "input_norm/layer0": 31.99789684613545, "learning_rate": 0.005, "loss": 2.6511, "max_norm": 66.23004150390625, "max_norm/layer0": 66.23004150390625, "mean_norm": 38.00171661376953, "mean_norm/layer0": 38.00171661376953, "multicode_k": 1, "output_norm": 18.973640613555915, "output_norm/layer0": 18.973640613555915, "step": 600 }, { "MSE": 601.0688813273114, "MSE/layer0": 601.0688813273114, "dead_code_fraction": 0.3855, "dead_code_fraction/layer0": 0.3855, "epoch": 0.07, "input_norm": 31.997907568613698, "input_norm/layer0": 31.997907568613698, "learning_rate": 0.005, "loss": 2.5727, "max_norm": 72.61077117919922, "max_norm/layer0": 72.61077117919922, "mean_norm": 38.68782615661621, "mean_norm/layer0": 38.68782615661621, "multicode_k": 1, "output_norm": 19.389015719095863, "output_norm/layer0": 19.389015719095863, "step": 650 }, { "MSE": 595.7544806925458, "MSE/layer0": 595.7544806925458, "dead_code_fraction": 0.3847, "dead_code_fraction/layer0": 0.3847, "epoch": 0.07, "input_norm": 31.99792699813842, "input_norm/layer0": 31.99792699813842, "learning_rate": 0.005, "loss": 2.5303, "max_norm": 77.1572036743164, "max_norm/layer0": 77.1572036743164, "mean_norm": 39.301788330078125, "mean_norm/layer0": 39.301788330078125, "multicode_k": 1, "output_norm": 19.654865121841446, "output_norm/layer0": 19.654865121841446, "step": 700 }, { "MSE": 587.4001970418295, "MSE/layer0": 587.4001970418295, "dead_code_fraction": 0.38495, "dead_code_fraction/layer0": 0.38495, "epoch": 0.07, "input_norm": 31.997964229583737, "input_norm/layer0": 31.997964229583737, "learning_rate": 0.005, "loss": 2.5181, "max_norm": 81.00206756591797, "max_norm/layer0": 81.00206756591797, "mean_norm": 39.8663330078125, "mean_norm/layer0": 39.8663330078125, "multicode_k": 1, "output_norm": 19.91484704653422, "output_norm/layer0": 19.91484704653422, "step": 750 }, { "MSE": 582.8578649902345, "MSE/layer0": 582.8578649902345, "dead_code_fraction": 0.37595, "dead_code_fraction/layer0": 0.37595, "epoch": 0.08, "input_norm": 31.997961613337196, "input_norm/layer0": 31.997961613337196, "learning_rate": 0.005, "loss": 2.488, "max_norm": 84.8564682006836, "max_norm/layer0": 84.8564682006836, "mean_norm": 40.41610145568848, "mean_norm/layer0": 40.41610145568848, "multicode_k": 1, "output_norm": 20.113984060287464, "output_norm/layer0": 20.113984060287464, "step": 800 }, { "MSE": 578.7394322713219, "MSE/layer0": 578.7394322713219, "dead_code_fraction": 0.36775, "dead_code_fraction/layer0": 0.36775, "epoch": 0.09, "input_norm": 31.99793098767598, "input_norm/layer0": 31.99793098767598, "learning_rate": 0.005, "loss": 2.3972, "max_norm": 88.52584838867188, "max_norm/layer0": 88.52584838867188, "mean_norm": 40.93037033081055, "mean_norm/layer0": 40.93037033081055, "multicode_k": 1, "output_norm": 20.255761035283413, "output_norm/layer0": 20.255761035283413, "step": 850 }, { "MSE": 574.7943645222981, "MSE/layer0": 574.7943645222981, "dead_code_fraction": 0.3752, "dead_code_fraction/layer0": 0.3752, "epoch": 0.09, "input_norm": 31.99794203122458, "input_norm/layer0": 31.99794203122458, "learning_rate": 0.005, "loss": 2.4475, "max_norm": 91.37139129638672, "max_norm/layer0": 91.37139129638672, "mean_norm": 41.42861366271973, "mean_norm/layer0": 41.42861366271973, "multicode_k": 1, "output_norm": 20.38246509869893, "output_norm/layer0": 20.38246509869893, "step": 900 }, { "MSE": 572.0475691731768, "MSE/layer0": 572.0475691731768, "dead_code_fraction": 0.369, "dead_code_fraction/layer0": 0.369, "epoch": 0.1, "input_norm": 31.997947810490906, "input_norm/layer0": 31.997947810490906, "learning_rate": 0.005, "loss": 2.3928, "max_norm": 93.76451873779297, "max_norm/layer0": 93.76451873779297, "mean_norm": 41.89710807800293, "mean_norm/layer0": 41.89710807800293, "multicode_k": 1, "output_norm": 20.522438500722256, "output_norm/layer0": 20.522438500722256, "step": 950 }, { "MSE": 571.223816274007, "MSE/layer0": 571.223816274007, "dead_code_fraction": 0.35845, "dead_code_fraction/layer0": 0.35845, "epoch": 0.1, "input_norm": 31.997930752436314, "input_norm/layer0": 31.997930752436314, "learning_rate": 0.005, "loss": 2.3611, "max_norm": 95.86876678466797, "max_norm/layer0": 95.86876678466797, "mean_norm": 42.36003875732422, "mean_norm/layer0": 42.36003875732422, "multicode_k": 1, "output_norm": 20.59194125175477, "output_norm/layer0": 20.59194125175477, "step": 1000 }, { "epoch": 0.1, "eval_MSE/layer0": 568.7263942209383, "eval_accuracy": 0.47120194006380184, "eval_dead_code_fraction/layer0": 0.36065, "eval_input_norm/layer0": 31.997911268824648, "eval_loss": 2.370492935180664, "eval_multicode_k": 1, "eval_output_norm/layer0": 20.66302644662364, "eval_runtime": 157.3974, "eval_samples_per_second": 29.372, "eval_steps_per_second": 1.836, "step": 1000 }, { "MSE": 568.216246948242, "MSE/layer0": 568.216246948242, "dead_code_fraction": 0.35655, "dead_code_fraction/layer0": 0.35655, "epoch": 0.1, "input_norm": 31.997933057149254, "input_norm/layer0": 31.997933057149254, "learning_rate": 0.005, "loss": 2.3877, "max_norm": 97.93981170654297, "max_norm/layer0": 97.93981170654297, "mean_norm": 42.796369552612305, "mean_norm/layer0": 42.796369552612305, "multicode_k": 1, "output_norm": 20.69294343630473, "output_norm/layer0": 20.69294343630473, "step": 1050 }, { "MSE": 566.0765097045902, "MSE/layer0": 566.0765097045902, "dead_code_fraction": 0.3515, "dead_code_fraction/layer0": 0.3515, "epoch": 0.11, "input_norm": 31.997944701512658, "input_norm/layer0": 31.997944701512658, "learning_rate": 0.005, "loss": 2.32, "max_norm": 99.40829467773438, "max_norm/layer0": 99.40829467773438, "mean_norm": 43.20481872558594, "mean_norm/layer0": 43.20481872558594, "multicode_k": 1, "output_norm": 20.780460087458298, "output_norm/layer0": 20.780460087458298, "step": 1100 }, { "MSE": 563.1435256449383, "MSE/layer0": 563.1435256449383, "dead_code_fraction": 0.3425, "dead_code_fraction/layer0": 0.3425, "epoch": 0.12, "input_norm": 31.99793601353964, "input_norm/layer0": 31.99793601353964, "learning_rate": 0.005, "loss": 2.3309, "max_norm": 100.84235382080078, "max_norm/layer0": 100.84235382080078, "mean_norm": 43.63128852844238, "mean_norm/layer0": 43.63128852844238, "multicode_k": 1, "output_norm": 20.85479287147521, "output_norm/layer0": 20.85479287147521, "step": 1150 }, { "MSE": 561.2093427530926, "MSE/layer0": 561.2093427530926, "dead_code_fraction": 0.3403, "dead_code_fraction/layer0": 0.3403, "epoch": 0.12, "input_norm": 31.99792865435282, "input_norm/layer0": 31.99792865435282, "learning_rate": 0.005, "loss": 2.3308, "max_norm": 102.74110412597656, "max_norm/layer0": 102.74110412597656, "mean_norm": 44.03978157043457, "mean_norm/layer0": 44.03978157043457, "multicode_k": 1, "output_norm": 20.931864147186282, "output_norm/layer0": 20.931864147186282, "step": 1200 }, { "MSE": 559.3785518391925, "MSE/layer0": 559.3785518391925, "dead_code_fraction": 0.3412, "dead_code_fraction/layer0": 0.3412, "epoch": 0.12, "input_norm": 31.99792771339417, "input_norm/layer0": 31.99792771339417, "learning_rate": 0.005, "loss": 2.3437, "max_norm": 104.6494369506836, "max_norm/layer0": 104.6494369506836, "mean_norm": 44.438026428222656, "mean_norm/layer0": 44.438026428222656, "multicode_k": 1, "output_norm": 21.008427244822187, "output_norm/layer0": 21.008427244822187, "step": 1250 }, { "MSE": 557.9434753417968, "MSE/layer0": 557.9434753417968, "dead_code_fraction": 0.33015, "dead_code_fraction/layer0": 0.33015, "epoch": 0.13, "input_norm": 31.997915770212824, "input_norm/layer0": 31.997915770212824, "learning_rate": 0.005, "loss": 2.2785, "max_norm": 106.27558135986328, "max_norm/layer0": 106.27558135986328, "mean_norm": 44.82562255859375, "mean_norm/layer0": 44.82562255859375, "multicode_k": 1, "output_norm": 21.07806761741638, "output_norm/layer0": 21.07806761741638, "step": 1300 }, { "MSE": 556.1554424031574, "MSE/layer0": 556.1554424031574, "dead_code_fraction": 0.3277, "dead_code_fraction/layer0": 0.3277, "epoch": 0.14, "input_norm": 31.9979051399231, "input_norm/layer0": 31.9979051399231, "learning_rate": 0.005, "loss": 2.2823, "max_norm": 107.8658676147461, "max_norm/layer0": 107.8658676147461, "mean_norm": 45.21988105773926, "mean_norm/layer0": 45.21988105773926, "multicode_k": 1, "output_norm": 21.124666048685715, "output_norm/layer0": 21.124666048685715, "step": 1350 }, { "MSE": 554.1472004191082, "MSE/layer0": 554.1472004191082, "dead_code_fraction": 0.32535, "dead_code_fraction/layer0": 0.32535, "epoch": 0.14, "input_norm": 31.99791674613953, "input_norm/layer0": 31.99791674613953, "learning_rate": 0.005, "loss": 2.3034, "max_norm": 109.18831634521484, "max_norm/layer0": 109.18831634521484, "mean_norm": 45.60391616821289, "mean_norm/layer0": 45.60391616821289, "multicode_k": 1, "output_norm": 21.184103918075557, "output_norm/layer0": 21.184103918075557, "step": 1400 }, { "MSE": 553.0813423156735, "MSE/layer0": 553.0813423156735, "dead_code_fraction": 0.3218, "dead_code_fraction/layer0": 0.3218, "epoch": 0.14, "input_norm": 31.997899109522507, "input_norm/layer0": 31.997899109522507, "learning_rate": 0.005, "loss": 2.2583, "max_norm": 110.68695831298828, "max_norm/layer0": 110.68695831298828, "mean_norm": 45.98097801208496, "mean_norm/layer0": 45.98097801208496, "multicode_k": 1, "output_norm": 21.234303328196226, "output_norm/layer0": 21.234303328196226, "step": 1450 }, { "MSE": 551.1942003377276, "MSE/layer0": 551.1942003377276, "dead_code_fraction": 0.32175, "dead_code_fraction/layer0": 0.32175, "epoch": 0.15, "input_norm": 31.997910699844365, "input_norm/layer0": 31.997910699844365, "learning_rate": 0.005, "loss": 2.2395, "max_norm": 112.16923522949219, "max_norm/layer0": 112.16923522949219, "mean_norm": 46.355411529541016, "mean_norm/layer0": 46.355411529541016, "multicode_k": 1, "output_norm": 21.303704795837398, "output_norm/layer0": 21.303704795837398, "step": 1500 }, { "epoch": 0.15, "eval_MSE/layer0": 550.3311246673497, "eval_accuracy": 0.486590169556823, "eval_dead_code_fraction/layer0": 0.32665, "eval_input_norm/layer0": 31.99789719372221, "eval_loss": 2.253082513809204, "eval_multicode_k": 1, "eval_output_norm/layer0": 21.329729693291277, "eval_runtime": 160.4576, "eval_samples_per_second": 28.811, "eval_steps_per_second": 1.801, "step": 1500 }, { "MSE": 551.4163179524738, "MSE/layer0": 551.4163179524738, "dead_code_fraction": 0.3174, "dead_code_fraction/layer0": 0.3174, "epoch": 0.15, "input_norm": 31.997892700831095, "input_norm/layer0": 31.997892700831095, "learning_rate": 0.005, "loss": 2.1968, "max_norm": 113.21269989013672, "max_norm/layer0": 113.21269989013672, "mean_norm": 46.7271785736084, "mean_norm/layer0": 46.7271785736084, "multicode_k": 1, "output_norm": 21.321544698079432, "output_norm/layer0": 21.321544698079432, "step": 1550 }, { "MSE": 549.0553175354001, "MSE/layer0": 549.0553175354001, "dead_code_fraction": 0.31715, "dead_code_fraction/layer0": 0.31715, "epoch": 0.16, "input_norm": 31.99791664441427, "input_norm/layer0": 31.99791664441427, "learning_rate": 0.005, "loss": 2.2863, "max_norm": 114.45362854003906, "max_norm/layer0": 114.45362854003906, "mean_norm": 47.08230972290039, "mean_norm/layer0": 47.08230972290039, "multicode_k": 1, "output_norm": 21.38125430742899, "output_norm/layer0": 21.38125430742899, "step": 1600 }, { "MSE": 547.4109810384114, "MSE/layer0": 547.4109810384114, "dead_code_fraction": 0.3131, "dead_code_fraction/layer0": 0.3131, "epoch": 0.17, "input_norm": 31.997924680709843, "input_norm/layer0": 31.997924680709843, "learning_rate": 0.005, "loss": 2.2147, "max_norm": 115.29362487792969, "max_norm/layer0": 115.29362487792969, "mean_norm": 47.438798904418945, "mean_norm/layer0": 47.438798904418945, "multicode_k": 1, "output_norm": 21.454637037913013, "output_norm/layer0": 21.454637037913013, "step": 1650 }, { "MSE": 546.0445864868163, "MSE/layer0": 546.0445864868163, "dead_code_fraction": 0.31475, "dead_code_fraction/layer0": 0.31475, "epoch": 0.17, "input_norm": 31.997929503122954, "input_norm/layer0": 31.997929503122954, "learning_rate": 0.005, "loss": 2.2501, "max_norm": 116.09871673583984, "max_norm/layer0": 116.09871673583984, "mean_norm": 47.79398536682129, "mean_norm/layer0": 47.79398536682129, "multicode_k": 1, "output_norm": 21.4808695602417, "output_norm/layer0": 21.4808695602417, "step": 1700 }, { "MSE": 545.4600128173831, "MSE/layer0": 545.4600128173831, "dead_code_fraction": 0.30905, "dead_code_fraction/layer0": 0.30905, "epoch": 0.17, "input_norm": 31.997937501271572, "input_norm/layer0": 31.997937501271572, "learning_rate": 0.005, "loss": 2.2296, "max_norm": 117.0920181274414, "max_norm/layer0": 117.0920181274414, "mean_norm": 48.138267517089844, "mean_norm/layer0": 48.138267517089844, "multicode_k": 1, "output_norm": 21.52623297691346, "output_norm/layer0": 21.52623297691346, "step": 1750 }, { "MSE": 543.9589634704591, "MSE/layer0": 543.9589634704591, "dead_code_fraction": 0.3074, "dead_code_fraction/layer0": 0.3074, "epoch": 0.18, "input_norm": 31.997916940053315, "input_norm/layer0": 31.997916940053315, "learning_rate": 0.005, "loss": 2.1632, "max_norm": 118.44883728027344, "max_norm/layer0": 118.44883728027344, "mean_norm": 48.48598670959473, "mean_norm/layer0": 48.48598670959473, "multicode_k": 1, "output_norm": 21.572722558975222, "output_norm/layer0": 21.572722558975222, "step": 1800 }, { "MSE": 543.3154680887858, "MSE/layer0": 543.3154680887858, "dead_code_fraction": 0.30485, "dead_code_fraction/layer0": 0.30485, "epoch": 0.18, "input_norm": 31.997930173873904, "input_norm/layer0": 31.997930173873904, "learning_rate": 0.005, "loss": 2.1874, "max_norm": 119.3927001953125, "max_norm/layer0": 119.3927001953125, "mean_norm": 48.82695388793945, "mean_norm/layer0": 48.82695388793945, "multicode_k": 1, "output_norm": 21.595847959518437, "output_norm/layer0": 21.595847959518437, "step": 1850 }, { "MSE": 542.2137928263345, "MSE/layer0": 542.2137928263345, "dead_code_fraction": 0.30715, "dead_code_fraction/layer0": 0.30715, "epoch": 0.19, "input_norm": 31.997955802281705, "input_norm/layer0": 31.997955802281705, "learning_rate": 0.005, "loss": 2.2323, "max_norm": 121.5817642211914, "max_norm/layer0": 121.5817642211914, "mean_norm": 49.15649604797363, "mean_norm/layer0": 49.15649604797363, "multicode_k": 1, "output_norm": 21.63884919484457, "output_norm/layer0": 21.63884919484457, "step": 1900 }, { "MSE": 539.4505286661786, "MSE/layer0": 539.4505286661786, "dead_code_fraction": 0.3033, "dead_code_fraction/layer0": 0.3033, "epoch": 0.2, "input_norm": 31.997942549387595, "input_norm/layer0": 31.997942549387595, "learning_rate": 0.005, "loss": 2.1894, "max_norm": 123.63184356689453, "max_norm/layer0": 123.63184356689453, "mean_norm": 49.49074363708496, "mean_norm/layer0": 49.49074363708496, "multicode_k": 1, "output_norm": 21.689245723088575, "output_norm/layer0": 21.689245723088575, "step": 1950 }, { "MSE": 539.8872321573892, "MSE/layer0": 539.8872321573892, "dead_code_fraction": 0.29975, "dead_code_fraction/layer0": 0.29975, "epoch": 0.2, "input_norm": 31.997952289581303, "input_norm/layer0": 31.997952289581303, "learning_rate": 0.005, "loss": 2.1999, "max_norm": 125.97776794433594, "max_norm/layer0": 125.97776794433594, "mean_norm": 49.814876556396484, "mean_norm/layer0": 49.814876556396484, "multicode_k": 1, "output_norm": 21.72016517957053, "output_norm/layer0": 21.72016517957053, "step": 2000 }, { "epoch": 0.2, "eval_MSE/layer0": 539.0149815035619, "eval_accuracy": 0.4955417565578542, "eval_dead_code_fraction/layer0": 0.30475, "eval_input_norm/layer0": 31.997959356660743, "eval_loss": 2.1908392906188965, "eval_multicode_k": 1, "eval_output_norm/layer0": 21.766283075917258, "eval_runtime": 158.005, "eval_samples_per_second": 29.259, "eval_steps_per_second": 1.829, "step": 2000 }, { "MSE": 538.042401936849, "MSE/layer0": 538.042401936849, "dead_code_fraction": 0.30175, "dead_code_fraction/layer0": 0.30175, "epoch": 0.2, "input_norm": 31.99795293172201, "input_norm/layer0": 31.99795293172201, "learning_rate": 0.005, "loss": 2.1768, "max_norm": 127.91316986083984, "max_norm/layer0": 127.91316986083984, "mean_norm": 50.13774490356445, "mean_norm/layer0": 50.13774490356445, "multicode_k": 1, "output_norm": 21.768677377700797, "output_norm/layer0": 21.768677377700797, "step": 2050 }, { "MSE": 537.4139138285318, "MSE/layer0": 537.4139138285318, "dead_code_fraction": 0.29605, "dead_code_fraction/layer0": 0.29605, "epoch": 0.21, "input_norm": 31.997954098383584, "input_norm/layer0": 31.997954098383584, "learning_rate": 0.005, "loss": 2.1417, "max_norm": 129.62669372558594, "max_norm/layer0": 129.62669372558594, "mean_norm": 50.44980430603027, "mean_norm/layer0": 50.44980430603027, "multicode_k": 1, "output_norm": 21.780523262023927, "output_norm/layer0": 21.780523262023927, "step": 2100 }, { "MSE": 537.116479644775, "MSE/layer0": 537.116479644775, "dead_code_fraction": 0.298, "dead_code_fraction/layer0": 0.298, "epoch": 0.21, "input_norm": 31.99796335220337, "input_norm/layer0": 31.99796335220337, "learning_rate": 0.005, "loss": 2.1753, "max_norm": 131.71987915039062, "max_norm/layer0": 131.71987915039062, "mean_norm": 50.758169174194336, "mean_norm/layer0": 50.758169174194336, "multicode_k": 1, "output_norm": 21.819064016342168, "output_norm/layer0": 21.819064016342168, "step": 2150 }, { "MSE": 536.1095620218915, "MSE/layer0": 536.1095620218915, "dead_code_fraction": 0.29655, "dead_code_fraction/layer0": 0.29655, "epoch": 0.22, "input_norm": 31.997976016998287, "input_norm/layer0": 31.997976016998287, "learning_rate": 0.005, "loss": 2.1676, "max_norm": 133.67694091796875, "max_norm/layer0": 133.67694091796875, "mean_norm": 51.058135986328125, "mean_norm/layer0": 51.058135986328125, "multicode_k": 1, "output_norm": 21.83967799504598, "output_norm/layer0": 21.83967799504598, "step": 2200 }, { "MSE": 535.5964339701336, "MSE/layer0": 535.5964339701336, "dead_code_fraction": 0.2945, "dead_code_fraction/layer0": 0.2945, "epoch": 0.23, "input_norm": 31.997973279952987, "input_norm/layer0": 31.997973279952987, "learning_rate": 0.005, "loss": 2.1347, "max_norm": 135.40386962890625, "max_norm/layer0": 135.40386962890625, "mean_norm": 51.35656929016113, "mean_norm/layer0": 51.35656929016113, "multicode_k": 1, "output_norm": 21.857111148834242, "output_norm/layer0": 21.857111148834242, "step": 2250 }, { "MSE": 534.8214352925618, "MSE/layer0": 534.8214352925618, "dead_code_fraction": 0.2943, "dead_code_fraction/layer0": 0.2943, "epoch": 0.23, "input_norm": 31.99798300425212, "input_norm/layer0": 31.99798300425212, "learning_rate": 0.005, "loss": 2.1712, "max_norm": 137.13648986816406, "max_norm/layer0": 137.13648986816406, "mean_norm": 51.64659118652344, "mean_norm/layer0": 51.64659118652344, "multicode_k": 1, "output_norm": 21.901708291371662, "output_norm/layer0": 21.901708291371662, "step": 2300 }, { "MSE": 533.4100613403319, "MSE/layer0": 533.4100613403319, "dead_code_fraction": 0.29105, "dead_code_fraction/layer0": 0.29105, "epoch": 0.23, "input_norm": 31.99798559824626, "input_norm/layer0": 31.99798559824626, "learning_rate": 0.005, "loss": 2.1726, "max_norm": 138.62417602539062, "max_norm/layer0": 138.62417602539062, "mean_norm": 51.931190490722656, "mean_norm/layer0": 51.931190490722656, "multicode_k": 1, "output_norm": 21.91944276809694, "output_norm/layer0": 21.91944276809694, "step": 2350 }, { "MSE": 533.0944277445471, "MSE/layer0": 533.0944277445471, "dead_code_fraction": 0.29235, "dead_code_fraction/layer0": 0.29235, "epoch": 0.24, "input_norm": 31.99797873497009, "input_norm/layer0": 31.99797873497009, "learning_rate": 0.005, "loss": 2.1496, "max_norm": 140.219970703125, "max_norm/layer0": 140.219970703125, "mean_norm": 52.213850021362305, "mean_norm/layer0": 52.213850021362305, "multicode_k": 1, "output_norm": 21.941968046824137, "output_norm/layer0": 21.941968046824137, "step": 2400 }, { "MSE": 531.4289741007487, "MSE/layer0": 531.4289741007487, "dead_code_fraction": 0.29335, "dead_code_fraction/layer0": 0.29335, "epoch": 0.24, "input_norm": 31.998000961939493, "input_norm/layer0": 31.998000961939493, "learning_rate": 0.005, "loss": 2.153, "max_norm": 141.84396362304688, "max_norm/layer0": 141.84396362304688, "mean_norm": 52.47932052612305, "mean_norm/layer0": 52.47932052612305, "multicode_k": 1, "output_norm": 21.982840156555177, "output_norm/layer0": 21.982840156555177, "step": 2450 }, { "MSE": 531.2627974446617, "MSE/layer0": 531.2627974446617, "dead_code_fraction": 0.28885, "dead_code_fraction/layer0": 0.28885, "epoch": 0.25, "input_norm": 31.99799962997436, "input_norm/layer0": 31.99799962997436, "learning_rate": 0.005, "loss": 2.1688, "max_norm": 143.0140838623047, "max_norm/layer0": 143.0140838623047, "mean_norm": 52.74382019042969, "mean_norm/layer0": 52.74382019042969, "multicode_k": 1, "output_norm": 22.00004559199015, "output_norm/layer0": 22.00004559199015, "step": 2500 }, { "epoch": 0.25, "eval_MSE/layer0": 530.4651256365718, "eval_accuracy": 0.5006363482007701, "eval_dead_code_fraction/layer0": 0.29495, "eval_input_norm/layer0": 31.99800563596064, "eval_loss": 2.155103921890259, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.022818533393835, "eval_runtime": 157.5009, "eval_samples_per_second": 29.352, "eval_steps_per_second": 1.835, "step": 2500 }, { "MSE": 530.4989952596026, "MSE/layer0": 530.4989952596026, "dead_code_fraction": 0.29025, "dead_code_fraction/layer0": 0.29025, "epoch": 0.26, "input_norm": 31.99801852544149, "input_norm/layer0": 31.99801852544149, "learning_rate": 0.005, "loss": 2.1541, "max_norm": 144.50558471679688, "max_norm/layer0": 144.50558471679688, "mean_norm": 52.992868423461914, "mean_norm/layer0": 52.992868423461914, "multicode_k": 1, "output_norm": 22.044915301005062, "output_norm/layer0": 22.044915301005062, "step": 2550 }, { "MSE": 529.2955647786457, "MSE/layer0": 529.2955647786457, "dead_code_fraction": 0.288, "dead_code_fraction/layer0": 0.288, "epoch": 0.26, "input_norm": 31.998021106719975, "input_norm/layer0": 31.998021106719975, "learning_rate": 0.005, "loss": 2.1563, "max_norm": 146.2478485107422, "max_norm/layer0": 146.2478485107422, "mean_norm": 53.24601364135742, "mean_norm/layer0": 53.24601364135742, "multicode_k": 1, "output_norm": 22.048689235051476, "output_norm/layer0": 22.048689235051476, "step": 2600 }, { "MSE": 529.877343190511, "MSE/layer0": 529.877343190511, "dead_code_fraction": 0.288, "dead_code_fraction/layer0": 0.288, "epoch": 0.27, "input_norm": 31.998024587631217, "input_norm/layer0": 31.998024587631217, "learning_rate": 0.005, "loss": 2.1382, "max_norm": 147.41587829589844, "max_norm/layer0": 147.41587829589844, "mean_norm": 53.48561096191406, "mean_norm/layer0": 53.48561096191406, "multicode_k": 1, "output_norm": 22.0797532526652, "output_norm/layer0": 22.0797532526652, "step": 2650 }, { "MSE": 528.3514750671387, "MSE/layer0": 528.3514750671387, "dead_code_fraction": 0.28825, "dead_code_fraction/layer0": 0.28825, "epoch": 0.27, "input_norm": 31.99804752349852, "input_norm/layer0": 31.99804752349852, "learning_rate": 0.005, "loss": 2.1742, "max_norm": 148.7862091064453, "max_norm/layer0": 148.7862091064453, "mean_norm": 53.71611213684082, "mean_norm/layer0": 53.71611213684082, "multicode_k": 1, "output_norm": 22.09869578997295, "output_norm/layer0": 22.09869578997295, "step": 2700 }, { "MSE": 528.2884072875979, "MSE/layer0": 528.2884072875979, "dead_code_fraction": 0.28335, "dead_code_fraction/layer0": 0.28335, "epoch": 0.28, "input_norm": 31.998042856852216, "input_norm/layer0": 31.998042856852216, "learning_rate": 0.005, "loss": 2.1277, "max_norm": 150.35140991210938, "max_norm/layer0": 150.35140991210938, "mean_norm": 53.946285247802734, "mean_norm/layer0": 53.946285247802734, "multicode_k": 1, "output_norm": 22.106029316584255, "output_norm/layer0": 22.106029316584255, "step": 2750 }, { "MSE": 527.2996965026854, "MSE/layer0": 527.2996965026854, "dead_code_fraction": 0.2844, "dead_code_fraction/layer0": 0.2844, "epoch": 0.28, "input_norm": 31.9980613454183, "input_norm/layer0": 31.9980613454183, "learning_rate": 0.005, "loss": 2.1676, "max_norm": 152.27590942382812, "max_norm/layer0": 152.27590942382812, "mean_norm": 54.16430473327637, "mean_norm/layer0": 54.16430473327637, "multicode_k": 1, "output_norm": 22.141783040364587, "output_norm/layer0": 22.141783040364587, "step": 2800 }, { "MSE": 527.5191156514486, "MSE/layer0": 527.5191156514486, "dead_code_fraction": 0.28045, "dead_code_fraction/layer0": 0.28045, "epoch": 0.28, "input_norm": 31.998067801793418, "input_norm/layer0": 31.998067801793418, "learning_rate": 0.005, "loss": 2.1076, "max_norm": 153.54779052734375, "max_norm/layer0": 153.54779052734375, "mean_norm": 54.38737678527832, "mean_norm/layer0": 54.38737678527832, "multicode_k": 1, "output_norm": 22.13956375757853, "output_norm/layer0": 22.13956375757853, "step": 2850 }, { "MSE": 527.3752633666991, "MSE/layer0": 527.3752633666991, "dead_code_fraction": 0.28165, "dead_code_fraction/layer0": 0.28165, "epoch": 0.29, "input_norm": 31.998070557912186, "input_norm/layer0": 31.998070557912186, "learning_rate": 0.005, "loss": 2.1379, "max_norm": 155.25857543945312, "max_norm/layer0": 155.25857543945312, "mean_norm": 54.598867416381836, "mean_norm/layer0": 54.598867416381836, "multicode_k": 1, "output_norm": 22.1554997475942, "output_norm/layer0": 22.1554997475942, "step": 2900 }, { "MSE": 525.2142114257812, "MSE/layer0": 525.2142114257812, "dead_code_fraction": 0.2841, "dead_code_fraction/layer0": 0.2841, "epoch": 0.29, "input_norm": 31.998104591369632, "input_norm/layer0": 31.998104591369632, "learning_rate": 0.005, "loss": 2.1887, "max_norm": 157.656494140625, "max_norm/layer0": 157.656494140625, "mean_norm": 54.80296516418457, "mean_norm/layer0": 54.80296516418457, "multicode_k": 1, "output_norm": 22.194608500798537, "output_norm/layer0": 22.194608500798537, "step": 2950 }, { "MSE": 525.7639581807456, "MSE/layer0": 525.7639581807456, "dead_code_fraction": 0.28035, "dead_code_fraction/layer0": 0.28035, "epoch": 0.3, "input_norm": 31.998085311253874, "input_norm/layer0": 31.998085311253874, "learning_rate": 0.005, "loss": 2.1108, "max_norm": 159.0706787109375, "max_norm/layer0": 159.0706787109375, "mean_norm": 55.01374816894531, "mean_norm/layer0": 55.01374816894531, "multicode_k": 1, "output_norm": 22.19143549601236, "output_norm/layer0": 22.19143549601236, "step": 3000 }, { "epoch": 0.3, "eval_MSE/layer0": 524.9529532255765, "eval_accuracy": 0.5051228197488481, "eval_dead_code_fraction/layer0": 0.2809, "eval_input_norm/layer0": 31.998092802783354, "eval_loss": 2.126948595046997, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.207113418645122, "eval_runtime": 157.5523, "eval_samples_per_second": 29.343, "eval_steps_per_second": 1.834, "step": 3000 }, { "MSE": 525.49979405721, "MSE/layer0": 525.49979405721, "dead_code_fraction": 0.28015, "dead_code_fraction/layer0": 0.28015, "epoch": 0.3, "input_norm": 31.998098812103272, "input_norm/layer0": 31.998098812103272, "learning_rate": 0.005, "loss": 2.1814, "max_norm": 160.52183532714844, "max_norm/layer0": 160.52183532714844, "mean_norm": 55.21175575256348, "mean_norm/layer0": 55.21175575256348, "multicode_k": 1, "output_norm": 22.205291633605963, "output_norm/layer0": 22.205291633605963, "step": 3050 }, { "MSE": 525.1535092671712, "MSE/layer0": 525.1535092671712, "dead_code_fraction": 0.27915, "dead_code_fraction/layer0": 0.27915, "epoch": 0.31, "input_norm": 31.998094654083246, "input_norm/layer0": 31.998094654083246, "learning_rate": 0.005, "loss": 2.1228, "max_norm": 161.857666015625, "max_norm/layer0": 161.857666015625, "mean_norm": 55.42117881774902, "mean_norm/layer0": 55.42117881774902, "multicode_k": 1, "output_norm": 22.20783314704896, "output_norm/layer0": 22.20783314704896, "step": 3100 }, { "MSE": 524.7367662556965, "MSE/layer0": 524.7367662556965, "dead_code_fraction": 0.27865, "dead_code_fraction/layer0": 0.27865, "epoch": 0.32, "input_norm": 31.99810951550802, "input_norm/layer0": 31.99810951550802, "learning_rate": 0.005, "loss": 2.1582, "max_norm": 163.2421417236328, "max_norm/layer0": 163.2421417236328, "mean_norm": 55.61536979675293, "mean_norm/layer0": 55.61536979675293, "multicode_k": 1, "output_norm": 22.229626963933313, "output_norm/layer0": 22.229626963933313, "step": 3150 }, { "MSE": 523.2996738688151, "MSE/layer0": 523.2996738688151, "dead_code_fraction": 0.27935, "dead_code_fraction/layer0": 0.27935, "epoch": 0.32, "input_norm": 31.998107938766474, "input_norm/layer0": 31.998107938766474, "learning_rate": 0.005, "loss": 2.0913, "max_norm": 164.34832763671875, "max_norm/layer0": 164.34832763671875, "mean_norm": 55.82136154174805, "mean_norm/layer0": 55.82136154174805, "multicode_k": 1, "output_norm": 22.247861604690552, "output_norm/layer0": 22.247861604690552, "step": 3200 }, { "MSE": 524.0594484965007, "MSE/layer0": 524.0594484965007, "dead_code_fraction": 0.2756, "dead_code_fraction/layer0": 0.2756, "epoch": 0.33, "input_norm": 31.998120482762648, "input_norm/layer0": 31.998120482762648, "learning_rate": 0.005, "loss": 2.1073, "max_norm": 165.75938415527344, "max_norm/layer0": 165.75938415527344, "mean_norm": 56.010887145996094, "mean_norm/layer0": 56.010887145996094, "multicode_k": 1, "output_norm": 22.245831327438353, "output_norm/layer0": 22.245831327438353, "step": 3250 }, { "MSE": 525.364818725586, "MSE/layer0": 525.364818725586, "dead_code_fraction": 0.2756, "dead_code_fraction/layer0": 0.2756, "epoch": 0.33, "input_norm": 31.998132244745904, "input_norm/layer0": 31.998132244745904, "learning_rate": 0.005, "loss": 2.0807, "max_norm": 166.85643005371094, "max_norm/layer0": 166.85643005371094, "mean_norm": 56.187782287597656, "mean_norm/layer0": 56.187782287597656, "multicode_k": 1, "output_norm": 22.242043924331664, "output_norm/layer0": 22.242043924331664, "step": 3300 }, { "MSE": 523.8938673400878, "MSE/layer0": 523.8938673400878, "dead_code_fraction": 0.2733, "dead_code_fraction/layer0": 0.2733, "epoch": 0.34, "input_norm": 31.998154455820725, "input_norm/layer0": 31.998154455820725, "learning_rate": 0.005, "loss": 2.1234, "max_norm": 167.70089721679688, "max_norm/layer0": 167.70089721679688, "mean_norm": 56.36995506286621, "mean_norm/layer0": 56.36995506286621, "multicode_k": 1, "output_norm": 22.246343409220387, "output_norm/layer0": 22.246343409220387, "step": 3350 }, { "MSE": 522.7465829976402, "MSE/layer0": 522.7465829976402, "dead_code_fraction": 0.2741, "dead_code_fraction/layer0": 0.2741, "epoch": 0.34, "input_norm": 31.998157631556197, "input_norm/layer0": 31.998157631556197, "learning_rate": 0.005, "loss": 2.1138, "max_norm": 168.70301818847656, "max_norm/layer0": 168.70301818847656, "mean_norm": 56.55203437805176, "mean_norm/layer0": 56.55203437805176, "multicode_k": 1, "output_norm": 22.282327626546234, "output_norm/layer0": 22.282327626546234, "step": 3400 }, { "MSE": 522.0263201395671, "MSE/layer0": 522.0263201395671, "dead_code_fraction": 0.27335, "dead_code_fraction/layer0": 0.27335, "epoch": 0.34, "input_norm": 31.99815892855326, "input_norm/layer0": 31.99815892855326, "learning_rate": 0.005, "loss": 2.103, "max_norm": 169.3920135498047, "max_norm/layer0": 169.3920135498047, "mean_norm": 56.73575782775879, "mean_norm/layer0": 56.73575782775879, "multicode_k": 1, "output_norm": 22.29100898424786, "output_norm/layer0": 22.29100898424786, "step": 3450 }, { "MSE": 521.5609470621745, "MSE/layer0": 521.5609470621745, "dead_code_fraction": 0.27265, "dead_code_fraction/layer0": 0.27265, "epoch": 0.35, "input_norm": 31.99817145665487, "input_norm/layer0": 31.99817145665487, "learning_rate": 0.005, "loss": 2.1045, "max_norm": 170.13829040527344, "max_norm/layer0": 170.13829040527344, "mean_norm": 56.91371726989746, "mean_norm/layer0": 56.91371726989746, "multicode_k": 1, "output_norm": 22.309985055923462, "output_norm/layer0": 22.309985055923462, "step": 3500 }, { "epoch": 0.35, "eval_MSE/layer0": 523.0844207110149, "eval_accuracy": 0.5078879054512807, "eval_dead_code_fraction/layer0": 0.27345, "eval_input_norm/layer0": 31.998171135689724, "eval_loss": 2.1130311489105225, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.351890057112634, "eval_runtime": 158.0171, "eval_samples_per_second": 29.256, "eval_steps_per_second": 1.829, "step": 3500 }, { "MSE": 522.4261043294274, "MSE/layer0": 522.4261043294274, "dead_code_fraction": 0.27245, "dead_code_fraction/layer0": 0.27245, "epoch": 0.35, "input_norm": 31.998184868494675, "input_norm/layer0": 31.998184868494675, "learning_rate": 0.005, "loss": 2.1296, "max_norm": 171.21067810058594, "max_norm/layer0": 171.21067810058594, "mean_norm": 57.08243370056152, "mean_norm/layer0": 57.08243370056152, "multicode_k": 1, "output_norm": 22.307131767272942, "output_norm/layer0": 22.307131767272942, "step": 3550 }, { "MSE": 520.5630591837569, "MSE/layer0": 520.5630591837569, "dead_code_fraction": 0.2715, "dead_code_fraction/layer0": 0.2715, "epoch": 0.36, "input_norm": 31.998175201416018, "input_norm/layer0": 31.998175201416018, "learning_rate": 0.005, "loss": 2.0606, "max_norm": 172.018798828125, "max_norm/layer0": 172.018798828125, "mean_norm": 57.259552001953125, "mean_norm/layer0": 57.259552001953125, "multicode_k": 1, "output_norm": 22.33381741523742, "output_norm/layer0": 22.33381741523742, "step": 3600 }, { "MSE": 521.8190139770511, "MSE/layer0": 521.8190139770511, "dead_code_fraction": 0.26915, "dead_code_fraction/layer0": 0.26915, "epoch": 0.36, "input_norm": 31.998206920623783, "input_norm/layer0": 31.998206920623783, "learning_rate": 0.005, "loss": 2.1264, "max_norm": 173.08360290527344, "max_norm/layer0": 173.08360290527344, "mean_norm": 57.425479888916016, "mean_norm/layer0": 57.425479888916016, "multicode_k": 1, "output_norm": 22.321163501739488, "output_norm/layer0": 22.321163501739488, "step": 3650 }, { "MSE": 520.2701113382976, "MSE/layer0": 520.2701113382976, "dead_code_fraction": 0.26935, "dead_code_fraction/layer0": 0.26935, "epoch": 0.37, "input_norm": 31.99821238517761, "input_norm/layer0": 31.99821238517761, "learning_rate": 0.005, "loss": 2.1028, "max_norm": 174.31561279296875, "max_norm/layer0": 174.31561279296875, "mean_norm": 57.58916091918945, "mean_norm/layer0": 57.58916091918945, "multicode_k": 1, "output_norm": 22.34191367149354, "output_norm/layer0": 22.34191367149354, "step": 3700 }, { "MSE": 520.4189120992024, "MSE/layer0": 520.4189120992024, "dead_code_fraction": 0.26865, "dead_code_fraction/layer0": 0.26865, "epoch": 0.38, "input_norm": 31.99821661313375, "input_norm/layer0": 31.99821661313375, "learning_rate": 0.005, "loss": 2.106, "max_norm": 175.09739685058594, "max_norm/layer0": 175.09739685058594, "mean_norm": 57.75008010864258, "mean_norm/layer0": 57.75008010864258, "multicode_k": 1, "output_norm": 22.352550570170077, "output_norm/layer0": 22.352550570170077, "step": 3750 }, { "MSE": 520.3332616170245, "MSE/layer0": 520.3332616170245, "dead_code_fraction": 0.2705, "dead_code_fraction/layer0": 0.2705, "epoch": 0.38, "input_norm": 31.998228356043505, "input_norm/layer0": 31.998228356043505, "learning_rate": 0.005, "loss": 2.1318, "max_norm": 175.85955810546875, "max_norm/layer0": 175.85955810546875, "mean_norm": 57.9084529876709, "mean_norm/layer0": 57.9084529876709, "multicode_k": 1, "output_norm": 22.355525690714526, "output_norm/layer0": 22.355525690714526, "step": 3800 }, { "MSE": 519.1107161458334, "MSE/layer0": 519.1107161458334, "dead_code_fraction": 0.26585, "dead_code_fraction/layer0": 0.26585, "epoch": 0.39, "input_norm": 31.998228273391724, "input_norm/layer0": 31.998228273391724, "learning_rate": 0.005, "loss": 2.1063, "max_norm": 176.55845642089844, "max_norm/layer0": 176.55845642089844, "mean_norm": 58.0648193359375, "mean_norm/layer0": 58.0648193359375, "multicode_k": 1, "output_norm": 22.375479180018097, "output_norm/layer0": 22.375479180018097, "step": 3850 }, { "MSE": 520.279450937907, "MSE/layer0": 520.279450937907, "dead_code_fraction": 0.26475, "dead_code_fraction/layer0": 0.26475, "epoch": 0.39, "input_norm": 31.998248408635455, "input_norm/layer0": 31.998248408635455, "learning_rate": 0.005, "loss": 2.1158, "max_norm": 177.40316772460938, "max_norm/layer0": 177.40316772460938, "mean_norm": 58.21473693847656, "mean_norm/layer0": 58.21473693847656, "multicode_k": 1, "output_norm": 22.37501454989114, "output_norm/layer0": 22.37501454989114, "step": 3900 }, { "MSE": 520.3905441284179, "MSE/layer0": 520.3905441284179, "dead_code_fraction": 0.26645, "dead_code_fraction/layer0": 0.26645, "epoch": 0.4, "input_norm": 31.998255596160874, "input_norm/layer0": 31.998255596160874, "learning_rate": 0.005, "loss": 2.0919, "max_norm": 178.25682067871094, "max_norm/layer0": 178.25682067871094, "mean_norm": 58.36372947692871, "mean_norm/layer0": 58.36372947692871, "multicode_k": 1, "output_norm": 22.360030002593987, "output_norm/layer0": 22.360030002593987, "step": 3950 }, { "MSE": 520.0447977193196, "MSE/layer0": 520.0447977193196, "dead_code_fraction": 0.2638, "dead_code_fraction/layer0": 0.2638, "epoch": 0.4, "input_norm": 31.998260914484668, "input_norm/layer0": 31.998260914484668, "learning_rate": 0.005, "loss": 2.0944, "max_norm": 178.8519287109375, "max_norm/layer0": 178.8519287109375, "mean_norm": 58.51635932922363, "mean_norm/layer0": 58.51635932922363, "multicode_k": 1, "output_norm": 22.37334650675455, "output_norm/layer0": 22.37334650675455, "step": 4000 }, { "epoch": 0.4, "eval_MSE/layer0": 519.885230389297, "eval_accuracy": 0.5089345655588774, "eval_dead_code_fraction/layer0": 0.2655, "eval_input_norm/layer0": 31.998263675723535, "eval_loss": 2.0995683670043945, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.39302826134025, "eval_runtime": 158.5492, "eval_samples_per_second": 29.158, "eval_steps_per_second": 1.823, "step": 4000 }, { "MSE": 519.3041878255204, "MSE/layer0": 519.3041878255204, "dead_code_fraction": 0.2634, "dead_code_fraction/layer0": 0.2634, "epoch": 0.41, "input_norm": 31.998263047536216, "input_norm/layer0": 31.998263047536216, "learning_rate": 0.005, "loss": 2.0844, "max_norm": 179.35386657714844, "max_norm/layer0": 179.35386657714844, "mean_norm": 58.670223236083984, "mean_norm/layer0": 58.670223236083984, "multicode_k": 1, "output_norm": 22.375990848541264, "output_norm/layer0": 22.375990848541264, "step": 4050 }, { "MSE": 520.2196419270836, "MSE/layer0": 520.2196419270836, "dead_code_fraction": 0.2615, "dead_code_fraction/layer0": 0.2615, "epoch": 0.41, "input_norm": 31.99826691627503, "input_norm/layer0": 31.99826691627503, "learning_rate": 0.005, "loss": 2.0983, "max_norm": 179.91224670410156, "max_norm/layer0": 179.91224670410156, "mean_norm": 58.826820373535156, "mean_norm/layer0": 58.826820373535156, "multicode_k": 1, "output_norm": 22.372630256017054, "output_norm/layer0": 22.372630256017054, "step": 4100 }, { "MSE": 519.6039750162761, "MSE/layer0": 519.6039750162761, "dead_code_fraction": 0.26085, "dead_code_fraction/layer0": 0.26085, "epoch": 0.41, "input_norm": 31.998284708658858, "input_norm/layer0": 31.998284708658858, "learning_rate": 0.005, "loss": 2.0974, "max_norm": 180.4697265625, "max_norm/layer0": 180.4697265625, "mean_norm": 58.97820472717285, "mean_norm/layer0": 58.97820472717285, "multicode_k": 1, "output_norm": 22.377655258178706, "output_norm/layer0": 22.377655258178706, "step": 4150 }, { "MSE": 518.396603800456, "MSE/layer0": 518.396603800456, "dead_code_fraction": 0.26145, "dead_code_fraction/layer0": 0.26145, "epoch": 0.42, "input_norm": 31.998291470209757, "input_norm/layer0": 31.998291470209757, "learning_rate": 0.005, "loss": 2.1155, "max_norm": 180.8781280517578, "max_norm/layer0": 180.8781280517578, "mean_norm": 59.12506866455078, "mean_norm/layer0": 59.12506866455078, "multicode_k": 1, "output_norm": 22.407741336822514, "output_norm/layer0": 22.407741336822514, "step": 4200 }, { "MSE": 518.4843705240887, "MSE/layer0": 518.4843705240887, "dead_code_fraction": 0.2605, "dead_code_fraction/layer0": 0.2605, "epoch": 0.42, "input_norm": 31.99829554239909, "input_norm/layer0": 31.99829554239909, "learning_rate": 0.005, "loss": 2.1004, "max_norm": 181.51483154296875, "max_norm/layer0": 181.51483154296875, "mean_norm": 59.271942138671875, "mean_norm/layer0": 59.271942138671875, "multicode_k": 1, "output_norm": 22.40968936284383, "output_norm/layer0": 22.40968936284383, "step": 4250 }, { "MSE": 518.0018126932782, "MSE/layer0": 518.0018126932782, "dead_code_fraction": 0.2586, "dead_code_fraction/layer0": 0.2586, "epoch": 0.43, "input_norm": 31.998309599558517, "input_norm/layer0": 31.998309599558517, "learning_rate": 0.005, "loss": 2.0848, "max_norm": 181.8904266357422, "max_norm/layer0": 181.8904266357422, "mean_norm": 59.40836715698242, "mean_norm/layer0": 59.40836715698242, "multicode_k": 1, "output_norm": 22.42666608492533, "output_norm/layer0": 22.42666608492533, "step": 4300 }, { "MSE": 518.2576261901858, "MSE/layer0": 518.2576261901858, "dead_code_fraction": 0.25825, "dead_code_fraction/layer0": 0.25825, "epoch": 0.43, "input_norm": 31.99831358591716, "input_norm/layer0": 31.99831358591716, "learning_rate": 0.005, "loss": 2.0778, "max_norm": 182.52023315429688, "max_norm/layer0": 182.52023315429688, "mean_norm": 59.546592712402344, "mean_norm/layer0": 59.546592712402344, "multicode_k": 1, "output_norm": 22.415684442520128, "output_norm/layer0": 22.415684442520128, "step": 4350 }, { "MSE": 517.176724141439, "MSE/layer0": 517.176724141439, "dead_code_fraction": 0.25845, "dead_code_fraction/layer0": 0.25845, "epoch": 0.44, "input_norm": 31.998328673044824, "input_norm/layer0": 31.998328673044824, "learning_rate": 0.005, "loss": 2.0989, "max_norm": 183.30308532714844, "max_norm/layer0": 183.30308532714844, "mean_norm": 59.680843353271484, "mean_norm/layer0": 59.680843353271484, "multicode_k": 1, "output_norm": 22.435629587173473, "output_norm/layer0": 22.435629587173473, "step": 4400 }, { "MSE": 516.945845082601, "MSE/layer0": 516.945845082601, "dead_code_fraction": 0.2589, "dead_code_fraction/layer0": 0.2589, "epoch": 0.45, "input_norm": 31.998329006830847, "input_norm/layer0": 31.998329006830847, "learning_rate": 0.005, "loss": 2.087, "max_norm": 184.17068481445312, "max_norm/layer0": 184.17068481445312, "mean_norm": 59.81003379821777, "mean_norm/layer0": 59.81003379821777, "multicode_k": 1, "output_norm": 22.447185754775994, "output_norm/layer0": 22.447185754775994, "step": 4450 }, { "MSE": 517.1110377502445, "MSE/layer0": 517.1110377502445, "dead_code_fraction": 0.25715, "dead_code_fraction/layer0": 0.25715, "epoch": 0.45, "input_norm": 31.998346713384, "input_norm/layer0": 31.998346713384, "learning_rate": 0.005, "loss": 2.1314, "max_norm": 185.53944396972656, "max_norm/layer0": 185.53944396972656, "mean_norm": 59.940223693847656, "mean_norm/layer0": 59.940223693847656, "multicode_k": 1, "output_norm": 22.444066270192472, "output_norm/layer0": 22.444066270192472, "step": 4500 }, { "epoch": 0.45, "eval_MSE/layer0": 517.038530914551, "eval_accuracy": 0.5114514130862962, "eval_dead_code_fraction/layer0": 0.25675, "eval_input_norm/layer0": 31.998349543131468, "eval_loss": 2.0859904289245605, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.471955899059722, "eval_runtime": 158.2677, "eval_samples_per_second": 29.21, "eval_steps_per_second": 1.826, "step": 4500 }, { "MSE": 516.5108834838866, "MSE/layer0": 516.5108834838866, "dead_code_fraction": 0.2556, "dead_code_fraction/layer0": 0.2556, "epoch": 0.46, "input_norm": 31.99834162076315, "input_norm/layer0": 31.99834162076315, "learning_rate": 0.005, "loss": 2.0594, "max_norm": 186.7916259765625, "max_norm/layer0": 186.7916259765625, "mean_norm": 60.06948661804199, "mean_norm/layer0": 60.06948661804199, "multicode_k": 1, "output_norm": 22.45672461509705, "output_norm/layer0": 22.45672461509705, "step": 4550 }, { "MSE": 517.5840914408367, "MSE/layer0": 517.5840914408367, "dead_code_fraction": 0.25275, "dead_code_fraction/layer0": 0.25275, "epoch": 0.46, "input_norm": 31.998361612955726, "input_norm/layer0": 31.998361612955726, "learning_rate": 0.005, "loss": 2.116, "max_norm": 187.44607543945312, "max_norm/layer0": 187.44607543945312, "mean_norm": 60.1932258605957, "mean_norm/layer0": 60.1932258605957, "multicode_k": 1, "output_norm": 22.445463349024458, "output_norm/layer0": 22.445463349024458, "step": 4600 }, { "MSE": 515.9212077331542, "MSE/layer0": 515.9212077331542, "dead_code_fraction": 0.2549, "dead_code_fraction/layer0": 0.2549, "epoch": 0.47, "input_norm": 31.998358809153245, "input_norm/layer0": 31.998358809153245, "learning_rate": 0.005, "loss": 2.0851, "max_norm": 187.99063110351562, "max_norm/layer0": 187.99063110351562, "mean_norm": 60.3179931640625, "mean_norm/layer0": 60.3179931640625, "multicode_k": 1, "output_norm": 22.468881686528533, "output_norm/layer0": 22.468881686528533, "step": 4650 }, { "MSE": 516.2712020365398, "MSE/layer0": 516.2712020365398, "dead_code_fraction": 0.2539, "dead_code_fraction/layer0": 0.2539, "epoch": 0.47, "input_norm": 31.99836943308513, "input_norm/layer0": 31.99836943308513, "learning_rate": 0.005, "loss": 2.0646, "max_norm": 188.7075653076172, "max_norm/layer0": 188.7075653076172, "mean_norm": 60.442317962646484, "mean_norm/layer0": 60.442317962646484, "multicode_k": 1, "output_norm": 22.458747002283737, "output_norm/layer0": 22.458747002283737, "step": 4700 }, { "MSE": 515.2177518717448, "MSE/layer0": 515.2177518717448, "dead_code_fraction": 0.25225, "dead_code_fraction/layer0": 0.25225, "epoch": 0.47, "input_norm": 31.99837938944498, "input_norm/layer0": 31.99837938944498, "learning_rate": 0.005, "loss": 2.0508, "max_norm": 189.4132080078125, "max_norm/layer0": 189.4132080078125, "mean_norm": 60.56760787963867, "mean_norm/layer0": 60.56760787963867, "multicode_k": 1, "output_norm": 22.48432564417522, "output_norm/layer0": 22.48432564417522, "step": 4750 }, { "MSE": 517.7876967760659, "MSE/layer0": 517.7876967760659, "dead_code_fraction": 0.2504, "dead_code_fraction/layer0": 0.2504, "epoch": 1.0, "input_norm": 31.998372135461928, "input_norm/layer0": 31.998372135461928, "learning_rate": 0.005, "loss": 2.0347, "max_norm": 189.93084716796875, "max_norm/layer0": 189.93084716796875, "mean_norm": 60.689674377441406, "mean_norm/layer0": 60.689674377441406, "multicode_k": 1, "output_norm": 22.4315491425679, "output_norm/layer0": 22.4315491425679, "step": 4800 }, { "MSE": 515.4498620096845, "MSE/layer0": 515.4498620096845, "dead_code_fraction": 0.2505, "dead_code_fraction/layer0": 0.2505, "epoch": 1.01, "input_norm": 31.998399356206253, "input_norm/layer0": 31.998399356206253, "learning_rate": 0.005, "loss": 2.1351, "max_norm": 190.8528289794922, "max_norm/layer0": 190.8528289794922, "mean_norm": 60.80255126953125, "mean_norm/layer0": 60.80255126953125, "multicode_k": 1, "output_norm": 22.488870484034226, "output_norm/layer0": 22.488870484034226, "step": 4850 }, { "MSE": 515.1998943074543, "MSE/layer0": 515.1998943074543, "dead_code_fraction": 0.24975, "dead_code_fraction/layer0": 0.24975, "epoch": 1.01, "input_norm": 31.998391094207765, "input_norm/layer0": 31.998391094207765, "learning_rate": 0.005, "loss": 2.0344, "max_norm": 191.88272094726562, "max_norm/layer0": 191.88272094726562, "mean_norm": 60.923635482788086, "mean_norm/layer0": 60.923635482788086, "multicode_k": 1, "output_norm": 22.493143533070885, "output_norm/layer0": 22.493143533070885, "step": 4900 }, { "MSE": 516.1670984395346, "MSE/layer0": 516.1670984395346, "dead_code_fraction": 0.2478, "dead_code_fraction/layer0": 0.2478, "epoch": 1.02, "input_norm": 31.99841277122497, "input_norm/layer0": 31.99841277122497, "learning_rate": 0.005, "loss": 2.0591, "max_norm": 192.84405517578125, "max_norm/layer0": 192.84405517578125, "mean_norm": 61.04226303100586, "mean_norm/layer0": 61.04226303100586, "multicode_k": 1, "output_norm": 22.47217222531637, "output_norm/layer0": 22.47217222531637, "step": 4950 }, { "MSE": 515.1936482747396, "MSE/layer0": 515.1936482747396, "dead_code_fraction": 0.2468, "dead_code_fraction/layer0": 0.2468, "epoch": 1.02, "input_norm": 31.998419497807816, "input_norm/layer0": 31.998419497807816, "learning_rate": 0.005, "loss": 2.0685, "max_norm": 193.819580078125, "max_norm/layer0": 193.819580078125, "mean_norm": 61.15685844421387, "mean_norm/layer0": 61.15685844421387, "multicode_k": 1, "output_norm": 22.499980732599887, "output_norm/layer0": 22.499980732599887, "step": 5000 }, { "epoch": 1.02, "eval_MSE/layer0": 514.3711726943474, "eval_accuracy": 0.5131406590660113, "eval_dead_code_fraction/layer0": 0.24975, "eval_input_norm/layer0": 31.998424857410036, "eval_loss": 2.076988458633423, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.494330299537523, "eval_runtime": 157.9418, "eval_samples_per_second": 29.27, "eval_steps_per_second": 1.83, "step": 5000 }, { "MSE": 515.1962452189127, "MSE/layer0": 515.1962452189127, "dead_code_fraction": 0.24585, "dead_code_fraction/layer0": 0.24585, "epoch": 1.03, "input_norm": 31.998427387873342, "input_norm/layer0": 31.998427387873342, "learning_rate": 0.005, "loss": 2.07, "max_norm": 194.8584442138672, "max_norm/layer0": 194.8584442138672, "mean_norm": 61.2799015045166, "mean_norm/layer0": 61.2799015045166, "multicode_k": 1, "output_norm": 22.492868417104084, "output_norm/layer0": 22.492868417104084, "step": 5050 }, { "MSE": 514.6688102722171, "MSE/layer0": 514.6688102722171, "dead_code_fraction": 0.24495, "dead_code_fraction/layer0": 0.24495, "epoch": 1.03, "input_norm": 31.99842824935913, "input_norm/layer0": 31.99842824935913, "learning_rate": 0.005, "loss": 2.0308, "max_norm": 195.33718872070312, "max_norm/layer0": 195.33718872070312, "mean_norm": 61.397443771362305, "mean_norm/layer0": 61.397443771362305, "multicode_k": 1, "output_norm": 22.502648471196487, "output_norm/layer0": 22.502648471196487, "step": 5100 }, { "MSE": 515.46877843221, "MSE/layer0": 515.46877843221, "dead_code_fraction": 0.2426, "dead_code_fraction/layer0": 0.2426, "epoch": 1.04, "input_norm": 31.998430423736572, "input_norm/layer0": 31.998430423736572, "learning_rate": 0.005, "loss": 2.0427, "max_norm": 195.8143310546875, "max_norm/layer0": 195.8143310546875, "mean_norm": 61.51255416870117, "mean_norm/layer0": 61.51255416870117, "multicode_k": 1, "output_norm": 22.500031328201295, "output_norm/layer0": 22.500031328201295, "step": 5150 }, { "MSE": 515.1060639953612, "MSE/layer0": 515.1060639953612, "dead_code_fraction": 0.2439, "dead_code_fraction/layer0": 0.2439, "epoch": 1.04, "input_norm": 31.998455877304075, "input_norm/layer0": 31.998455877304075, "learning_rate": 0.005, "loss": 2.1036, "max_norm": 196.40415954589844, "max_norm/layer0": 196.40415954589844, "mean_norm": 61.620216369628906, "mean_norm/layer0": 61.620216369628906, "multicode_k": 1, "output_norm": 22.504082736968975, "output_norm/layer0": 22.504082736968975, "step": 5200 }, { "MSE": 514.64603418986, "MSE/layer0": 514.64603418986, "dead_code_fraction": 0.24415, "dead_code_fraction/layer0": 0.24415, "epoch": 1.05, "input_norm": 31.99846438090008, "input_norm/layer0": 31.99846438090008, "learning_rate": 0.005, "loss": 2.1032, "max_norm": 197.31690979003906, "max_norm/layer0": 197.31690979003906, "mean_norm": 61.73128890991211, "mean_norm/layer0": 61.73128890991211, "multicode_k": 1, "output_norm": 22.51759773572286, "output_norm/layer0": 22.51759773572286, "step": 5250 }, { "MSE": 514.5095549011231, "MSE/layer0": 514.5095549011231, "dead_code_fraction": 0.24245, "dead_code_fraction/layer0": 0.24245, "epoch": 1.05, "input_norm": 31.998469727834063, "input_norm/layer0": 31.998469727834063, "learning_rate": 0.005, "loss": 2.0884, "max_norm": 198.30520629882812, "max_norm/layer0": 198.30520629882812, "mean_norm": 61.84503173828125, "mean_norm/layer0": 61.84503173828125, "multicode_k": 1, "output_norm": 22.52236960728964, "output_norm/layer0": 22.52236960728964, "step": 5300 }, { "MSE": 514.2185153198242, "MSE/layer0": 514.2185153198242, "dead_code_fraction": 0.2423, "dead_code_fraction/layer0": 0.2423, "epoch": 1.06, "input_norm": 31.99846864700317, "input_norm/layer0": 31.99846864700317, "learning_rate": 0.005, "loss": 2.0541, "max_norm": 198.76315307617188, "max_norm/layer0": 198.76315307617188, "mean_norm": 61.954532623291016, "mean_norm/layer0": 61.954532623291016, "multicode_k": 1, "output_norm": 22.523964621225986, "output_norm/layer0": 22.523964621225986, "step": 5350 }, { "MSE": 514.2201423136396, "MSE/layer0": 514.2201423136396, "dead_code_fraction": 0.24065, "dead_code_fraction/layer0": 0.24065, "epoch": 1.06, "input_norm": 31.99848121643067, "input_norm/layer0": 31.99848121643067, "learning_rate": 0.005, "loss": 2.0722, "max_norm": 199.5216522216797, "max_norm/layer0": 199.5216522216797, "mean_norm": 62.062015533447266, "mean_norm/layer0": 62.062015533447266, "multicode_k": 1, "output_norm": 22.529434289932254, "output_norm/layer0": 22.529434289932254, "step": 5400 }, { "MSE": 513.0346335347496, "MSE/layer0": 513.0346335347496, "dead_code_fraction": 0.2396, "dead_code_fraction/layer0": 0.2396, "epoch": 1.07, "input_norm": 31.998482402165727, "input_norm/layer0": 31.998482402165727, "learning_rate": 0.005, "loss": 2.0839, "max_norm": 199.89144897460938, "max_norm/layer0": 199.89144897460938, "mean_norm": 62.16894721984863, "mean_norm/layer0": 62.16894721984863, "multicode_k": 1, "output_norm": 22.549472332000725, "output_norm/layer0": 22.549472332000725, "step": 5450 }, { "MSE": 512.9845250447588, "MSE/layer0": 512.9845250447588, "dead_code_fraction": 0.23995, "dead_code_fraction/layer0": 0.23995, "epoch": 1.07, "input_norm": 31.99848415692648, "input_norm/layer0": 31.99848415692648, "learning_rate": 0.005, "loss": 2.0496, "max_norm": 200.10585021972656, "max_norm/layer0": 200.10585021972656, "mean_norm": 62.28166961669922, "mean_norm/layer0": 62.28166961669922, "multicode_k": 1, "output_norm": 22.535003283818554, "output_norm/layer0": 22.535003283818554, "step": 5500 }, { "epoch": 1.07, "eval_MSE/layer0": 513.7822700020247, "eval_accuracy": 0.5137449731240944, "eval_dead_code_fraction/layer0": 0.23805, "eval_input_norm/layer0": 31.998499035448475, "eval_loss": 2.0730204582214355, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.562518398921107, "eval_runtime": 158.2513, "eval_samples_per_second": 29.213, "eval_steps_per_second": 1.826, "step": 5500 }, { "MSE": 513.4142807515464, "MSE/layer0": 513.4142807515464, "dead_code_fraction": 0.2388, "dead_code_fraction/layer0": 0.2388, "epoch": 1.08, "input_norm": 31.99849408785501, "input_norm/layer0": 31.99849408785501, "learning_rate": 0.005, "loss": 2.0918, "max_norm": 200.3399200439453, "max_norm/layer0": 200.3399200439453, "mean_norm": 62.38692855834961, "mean_norm/layer0": 62.38692855834961, "multicode_k": 1, "output_norm": 22.54020097732544, "output_norm/layer0": 22.54020097732544, "step": 5550 }, { "MSE": 512.2161093648273, "MSE/layer0": 512.2161093648273, "dead_code_fraction": 0.23455, "dead_code_fraction/layer0": 0.23455, "epoch": 1.08, "input_norm": 31.998499333063755, "input_norm/layer0": 31.998499333063755, "learning_rate": 0.005, "loss": 2.0812, "max_norm": 200.90451049804688, "max_norm/layer0": 200.90451049804688, "mean_norm": 62.49030685424805, "mean_norm/layer0": 62.49030685424805, "multicode_k": 1, "output_norm": 22.562892615000422, "output_norm/layer0": 22.562892615000422, "step": 5600 }, { "MSE": 513.2079597473146, "MSE/layer0": 513.2079597473146, "dead_code_fraction": 0.2364, "dead_code_fraction/layer0": 0.2364, "epoch": 1.09, "input_norm": 31.998499097824094, "input_norm/layer0": 31.998499097824094, "learning_rate": 0.005, "loss": 2.0405, "max_norm": 201.2469940185547, "max_norm/layer0": 201.2469940185547, "mean_norm": 62.587249755859375, "mean_norm/layer0": 62.587249755859375, "multicode_k": 1, "output_norm": 22.551958309809354, "output_norm/layer0": 22.551958309809354, "step": 5650 }, { "MSE": 512.3663133748375, "MSE/layer0": 512.3663133748375, "dead_code_fraction": 0.2359, "dead_code_fraction/layer0": 0.2359, "epoch": 1.09, "input_norm": 31.998522087732937, "input_norm/layer0": 31.998522087732937, "learning_rate": 0.005, "loss": 2.0976, "max_norm": 202.06686401367188, "max_norm/layer0": 202.06686401367188, "mean_norm": 62.68406677246094, "mean_norm/layer0": 62.68406677246094, "multicode_k": 1, "output_norm": 22.56861629168192, "output_norm/layer0": 22.56861629168192, "step": 5700 }, { "MSE": 513.556918741862, "MSE/layer0": 513.556918741862, "dead_code_fraction": 0.2325, "dead_code_fraction/layer0": 0.2325, "epoch": 1.1, "input_norm": 31.99852702458699, "input_norm/layer0": 31.99852702458699, "learning_rate": 0.005, "loss": 2.0531, "max_norm": 202.5853729248047, "max_norm/layer0": 202.5853729248047, "mean_norm": 62.78022766113281, "mean_norm/layer0": 62.78022766113281, "multicode_k": 1, "output_norm": 22.55354828198752, "output_norm/layer0": 22.55354828198752, "step": 5750 }, { "MSE": 514.1225356547038, "MSE/layer0": 514.1225356547038, "dead_code_fraction": 0.23125, "dead_code_fraction/layer0": 0.23125, "epoch": 1.1, "input_norm": 31.998530540466305, "input_norm/layer0": 31.998530540466305, "learning_rate": 0.005, "loss": 2.0333, "max_norm": 202.8258514404297, "max_norm/layer0": 202.8258514404297, "mean_norm": 62.881099700927734, "mean_norm/layer0": 62.881099700927734, "multicode_k": 1, "output_norm": 22.538857170740776, "output_norm/layer0": 22.538857170740776, "step": 5800 }, { "MSE": 512.891567026774, "MSE/layer0": 512.891567026774, "dead_code_fraction": 0.23305, "dead_code_fraction/layer0": 0.23305, "epoch": 1.11, "input_norm": 31.998542674382527, "input_norm/layer0": 31.998542674382527, "learning_rate": 0.005, "loss": 2.0894, "max_norm": 203.2826385498047, "max_norm/layer0": 203.2826385498047, "mean_norm": 62.98002815246582, "mean_norm/layer0": 62.98002815246582, "multicode_k": 1, "output_norm": 22.556459398269645, "output_norm/layer0": 22.556459398269645, "step": 5850 }, { "MSE": 512.6300255839031, "MSE/layer0": 512.6300255839031, "dead_code_fraction": 0.23175, "dead_code_fraction/layer0": 0.23175, "epoch": 1.11, "input_norm": 31.998538637161257, "input_norm/layer0": 31.998538637161257, "learning_rate": 0.005, "loss": 2.0371, "max_norm": 203.56114196777344, "max_norm/layer0": 203.56114196777344, "mean_norm": 63.085018157958984, "mean_norm/layer0": 63.085018157958984, "multicode_k": 1, "output_norm": 22.55499767621359, "output_norm/layer0": 22.55499767621359, "step": 5900 }, { "MSE": 512.6470455423993, "MSE/layer0": 512.6470455423993, "dead_code_fraction": 0.22945, "dead_code_fraction/layer0": 0.22945, "epoch": 1.12, "input_norm": 31.998542264302582, "input_norm/layer0": 31.998542264302582, "learning_rate": 0.005, "loss": 2.0693, "max_norm": 204.18482971191406, "max_norm/layer0": 204.18482971191406, "mean_norm": 63.186561584472656, "mean_norm/layer0": 63.186561584472656, "multicode_k": 1, "output_norm": 22.56271686236063, "output_norm/layer0": 22.56271686236063, "step": 5950 }, { "MSE": 512.2647941589354, "MSE/layer0": 512.2647941589354, "dead_code_fraction": 0.23005, "dead_code_fraction/layer0": 0.23005, "epoch": 1.12, "input_norm": 31.99855575561523, "input_norm/layer0": 31.99855575561523, "learning_rate": 0.005, "loss": 2.1002, "max_norm": 204.59375, "max_norm/layer0": 204.59375, "mean_norm": 63.287431716918945, "mean_norm/layer0": 63.287431716918945, "multicode_k": 1, "output_norm": 22.56941809654236, "output_norm/layer0": 22.56941809654236, "step": 6000 }, { "epoch": 1.12, "eval_MSE/layer0": 510.787595085063, "eval_accuracy": 0.5144414778502405, "eval_dead_code_fraction/layer0": 0.2305, "eval_input_norm/layer0": 31.998558920130655, "eval_loss": 2.0667405128479004, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.588203073708286, "eval_runtime": 158.1457, "eval_samples_per_second": 29.233, "eval_steps_per_second": 1.827, "step": 6000 }, { "MSE": 512.2951668802899, "MSE/layer0": 512.2951668802899, "dead_code_fraction": 0.2284, "dead_code_fraction/layer0": 0.2284, "epoch": 1.13, "input_norm": 31.998558203379314, "input_norm/layer0": 31.998558203379314, "learning_rate": 0.005, "loss": 2.0645, "max_norm": 205.16860961914062, "max_norm/layer0": 205.16860961914062, "mean_norm": 63.38333702087402, "mean_norm/layer0": 63.38333702087402, "multicode_k": 1, "output_norm": 22.570796213150032, "output_norm/layer0": 22.570796213150032, "step": 6050 }, { "MSE": 512.1381341044107, "MSE/layer0": 512.1381341044107, "dead_code_fraction": 0.22855, "dead_code_fraction/layer0": 0.22855, "epoch": 1.13, "input_norm": 31.99856172879537, "input_norm/layer0": 31.99856172879537, "learning_rate": 0.005, "loss": 2.0856, "max_norm": 205.37376403808594, "max_norm/layer0": 205.37376403808594, "mean_norm": 63.47422790527344, "mean_norm/layer0": 63.47422790527344, "multicode_k": 1, "output_norm": 22.58201634724934, "output_norm/layer0": 22.58201634724934, "step": 6100 }, { "MSE": 512.7539996846516, "MSE/layer0": 512.7539996846516, "dead_code_fraction": 0.22755, "dead_code_fraction/layer0": 0.22755, "epoch": 1.14, "input_norm": 31.99856161753337, "input_norm/layer0": 31.99856161753337, "learning_rate": 0.005, "loss": 2.0468, "max_norm": 205.76866149902344, "max_norm/layer0": 205.76866149902344, "mean_norm": 63.56420707702637, "mean_norm/layer0": 63.56420707702637, "multicode_k": 1, "output_norm": 22.575629208882646, "output_norm/layer0": 22.575629208882646, "step": 6150 }, { "MSE": 512.1215149434411, "MSE/layer0": 512.1215149434411, "dead_code_fraction": 0.2268, "dead_code_fraction/layer0": 0.2268, "epoch": 1.14, "input_norm": 31.998577674229928, "input_norm/layer0": 31.998577674229928, "learning_rate": 0.005, "loss": 2.0663, "max_norm": 206.54251098632812, "max_norm/layer0": 206.54251098632812, "mean_norm": 63.64880561828613, "mean_norm/layer0": 63.64880561828613, "multicode_k": 1, "output_norm": 22.58266611417133, "output_norm/layer0": 22.58266611417133, "step": 6200 }, { "MSE": 513.3685421752932, "MSE/layer0": 513.3685421752932, "dead_code_fraction": 0.22515, "dead_code_fraction/layer0": 0.22515, "epoch": 1.15, "input_norm": 31.998585720062266, "input_norm/layer0": 31.998585720062266, "learning_rate": 0.005, "loss": 2.0807, "max_norm": 207.23460388183594, "max_norm/layer0": 207.23460388183594, "mean_norm": 63.73150444030762, "mean_norm/layer0": 63.73150444030762, "multicode_k": 1, "output_norm": 22.574931882222508, "output_norm/layer0": 22.574931882222508, "step": 6250 }, { "MSE": 512.1649493916829, "MSE/layer0": 512.1649493916829, "dead_code_fraction": 0.2243, "dead_code_fraction/layer0": 0.2243, "epoch": 1.15, "input_norm": 31.99859083811442, "input_norm/layer0": 31.99859083811442, "learning_rate": 0.005, "loss": 1.9994, "max_norm": 207.4078826904297, "max_norm/layer0": 207.4078826904297, "mean_norm": 63.8239631652832, "mean_norm/layer0": 63.8239631652832, "multicode_k": 1, "output_norm": 22.573653513590493, "output_norm/layer0": 22.573653513590493, "step": 6300 }, { "MSE": 512.4084614054359, "MSE/layer0": 512.4084614054359, "dead_code_fraction": 0.22405, "dead_code_fraction/layer0": 0.22405, "epoch": 1.16, "input_norm": 31.998589369455978, "input_norm/layer0": 31.998589369455978, "learning_rate": 0.005, "loss": 2.0383, "max_norm": 207.421875, "max_norm/layer0": 207.421875, "mean_norm": 63.91918754577637, "mean_norm/layer0": 63.91918754577637, "multicode_k": 1, "output_norm": 22.570101757049564, "output_norm/layer0": 22.570101757049564, "step": 6350 }, { "MSE": 511.3037980651857, "MSE/layer0": 511.3037980651857, "dead_code_fraction": 0.22325, "dead_code_fraction/layer0": 0.22325, "epoch": 1.16, "input_norm": 31.9986056105296, "input_norm/layer0": 31.9986056105296, "learning_rate": 0.005, "loss": 2.0836, "max_norm": 207.90211486816406, "max_norm/layer0": 207.90211486816406, "mean_norm": 64.0091323852539, "mean_norm/layer0": 64.0091323852539, "multicode_k": 1, "output_norm": 22.591040735244757, "output_norm/layer0": 22.591040735244757, "step": 6400 }, { "MSE": 511.63349212646506, "MSE/layer0": 511.63349212646506, "dead_code_fraction": 0.2231, "dead_code_fraction/layer0": 0.2231, "epoch": 1.17, "input_norm": 31.998600152333573, "input_norm/layer0": 31.998600152333573, "learning_rate": 0.005, "loss": 2.049, "max_norm": 208.1908416748047, "max_norm/layer0": 208.1908416748047, "mean_norm": 64.09888458251953, "mean_norm/layer0": 64.09888458251953, "multicode_k": 1, "output_norm": 22.598680645624796, "output_norm/layer0": 22.598680645624796, "step": 6450 }, { "MSE": 510.1135516866045, "MSE/layer0": 510.1135516866045, "dead_code_fraction": 0.2198, "dead_code_fraction/layer0": 0.2198, "epoch": 1.17, "input_norm": 31.99861148198446, "input_norm/layer0": 31.99861148198446, "learning_rate": 0.005, "loss": 2.0723, "max_norm": 208.76829528808594, "max_norm/layer0": 208.76829528808594, "mean_norm": 64.1937198638916, "mean_norm/layer0": 64.1937198638916, "multicode_k": 1, "output_norm": 22.610935223897293, "output_norm/layer0": 22.610935223897293, "step": 6500 }, { "epoch": 1.17, "eval_MSE/layer0": 510.5624312578848, "eval_accuracy": 0.5148121435408701, "eval_dead_code_fraction/layer0": 0.2206, "eval_input_norm/layer0": 31.99861497196212, "eval_loss": 2.0631778240203857, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.613337997850753, "eval_runtime": 157.9752, "eval_samples_per_second": 29.264, "eval_steps_per_second": 1.829, "step": 6500 }, { "MSE": 511.1518494669597, "MSE/layer0": 511.1518494669597, "dead_code_fraction": 0.2202, "dead_code_fraction/layer0": 0.2202, "epoch": 1.18, "input_norm": 31.998620487848918, "input_norm/layer0": 31.998620487848918, "learning_rate": 0.005, "loss": 2.0713, "max_norm": 209.1894989013672, "max_norm/layer0": 209.1894989013672, "mean_norm": 64.28516006469727, "mean_norm/layer0": 64.28516006469727, "multicode_k": 1, "output_norm": 22.59137951215108, "output_norm/layer0": 22.59137951215108, "step": 6550 }, { "MSE": 511.4045928446453, "MSE/layer0": 511.4045928446453, "dead_code_fraction": 0.2177, "dead_code_fraction/layer0": 0.2177, "epoch": 1.18, "input_norm": 31.99861013412476, "input_norm/layer0": 31.99861013412476, "learning_rate": 0.005, "loss": 2.0067, "max_norm": 209.52085876464844, "max_norm/layer0": 209.52085876464844, "mean_norm": 64.37364196777344, "mean_norm/layer0": 64.37364196777344, "multicode_k": 1, "output_norm": 22.59921900431315, "output_norm/layer0": 22.59921900431315, "step": 6600 }, { "MSE": 510.8443921915694, "MSE/layer0": 510.8443921915694, "dead_code_fraction": 0.21885, "dead_code_fraction/layer0": 0.21885, "epoch": 1.19, "input_norm": 31.998613767623894, "input_norm/layer0": 31.998613767623894, "learning_rate": 0.005, "loss": 2.013, "max_norm": 209.98431396484375, "max_norm/layer0": 209.98431396484375, "mean_norm": 64.46432113647461, "mean_norm/layer0": 64.46432113647461, "multicode_k": 1, "output_norm": 22.60319686889649, "output_norm/layer0": 22.60319686889649, "step": 6650 }, { "MSE": 510.0460713704424, "MSE/layer0": 510.0460713704424, "dead_code_fraction": 0.2183, "dead_code_fraction/layer0": 0.2183, "epoch": 1.19, "input_norm": 31.998641831080132, "input_norm/layer0": 31.998641831080132, "learning_rate": 0.005, "loss": 2.1151, "max_norm": 210.53810119628906, "max_norm/layer0": 210.53810119628906, "mean_norm": 64.55224609375, "mean_norm/layer0": 64.55224609375, "multicode_k": 1, "output_norm": 22.621459808349613, "output_norm/layer0": 22.621459808349613, "step": 6700 }, { "MSE": 509.41305414835614, "MSE/layer0": 509.41305414835614, "dead_code_fraction": 0.2172, "dead_code_fraction/layer0": 0.2172, "epoch": 1.2, "input_norm": 31.998632535934448, "input_norm/layer0": 31.998632535934448, "learning_rate": 0.005, "loss": 2.0318, "max_norm": 210.88394165039062, "max_norm/layer0": 210.88394165039062, "mean_norm": 64.64096069335938, "mean_norm/layer0": 64.64096069335938, "multicode_k": 1, "output_norm": 22.626508464813227, "output_norm/layer0": 22.626508464813227, "step": 6750 }, { "MSE": 510.3878801981608, "MSE/layer0": 510.3878801981608, "dead_code_fraction": 0.21645, "dead_code_fraction/layer0": 0.21645, "epoch": 1.2, "input_norm": 31.998641300201413, "input_norm/layer0": 31.998641300201413, "learning_rate": 0.005, "loss": 2.0492, "max_norm": 211.13937377929688, "max_norm/layer0": 211.13937377929688, "mean_norm": 64.7227783203125, "mean_norm/layer0": 64.7227783203125, "multicode_k": 1, "output_norm": 22.63286488850911, "output_norm/layer0": 22.63286488850911, "step": 6800 }, { "MSE": 509.17419825236027, "MSE/layer0": 509.17419825236027, "dead_code_fraction": 0.2138, "dead_code_fraction/layer0": 0.2138, "epoch": 1.21, "input_norm": 31.998648115793856, "input_norm/layer0": 31.998648115793856, "learning_rate": 0.005, "loss": 2.0467, "max_norm": 211.3644256591797, "max_norm/layer0": 211.3644256591797, "mean_norm": 64.80514907836914, "mean_norm/layer0": 64.80514907836914, "multicode_k": 1, "output_norm": 22.63950007438659, "output_norm/layer0": 22.63950007438659, "step": 6850 }, { "MSE": 509.3450110371906, "MSE/layer0": 509.3450110371906, "dead_code_fraction": 0.2144, "dead_code_fraction/layer0": 0.2144, "epoch": 1.21, "input_norm": 31.998654368718455, "input_norm/layer0": 31.998654368718455, "learning_rate": 0.005, "loss": 2.0327, "max_norm": 211.51609802246094, "max_norm/layer0": 211.51609802246094, "mean_norm": 64.885498046875, "mean_norm/layer0": 64.885498046875, "multicode_k": 1, "output_norm": 22.636532586415615, "output_norm/layer0": 22.636532586415615, "step": 6900 }, { "MSE": 509.711417948405, "MSE/layer0": 509.711417948405, "dead_code_fraction": 0.2121, "dead_code_fraction/layer0": 0.2121, "epoch": 1.22, "input_norm": 31.998653659820555, "input_norm/layer0": 31.998653659820555, "learning_rate": 0.005, "loss": 2.0344, "max_norm": 211.93910217285156, "max_norm/layer0": 211.93910217285156, "mean_norm": 64.96215629577637, "mean_norm/layer0": 64.96215629577637, "multicode_k": 1, "output_norm": 22.642279275258375, "output_norm/layer0": 22.642279275258375, "step": 6950 }, { "MSE": 509.53209904988614, "MSE/layer0": 509.53209904988614, "dead_code_fraction": 0.2112, "dead_code_fraction/layer0": 0.2112, "epoch": 1.22, "input_norm": 31.99865920702616, "input_norm/layer0": 31.99865920702616, "learning_rate": 0.005, "loss": 2.023, "max_norm": 212.15188598632812, "max_norm/layer0": 212.15188598632812, "mean_norm": 65.03938484191895, "mean_norm/layer0": 65.03938484191895, "multicode_k": 1, "output_norm": 22.641168931325275, "output_norm/layer0": 22.641168931325275, "step": 7000 }, { "epoch": 1.22, "eval_MSE/layer0": 509.9877618207523, "eval_accuracy": 0.5156894350128739, "eval_dead_code_fraction/layer0": 0.21105, "eval_input_norm/layer0": 31.998664335077162, "eval_loss": 2.0573580265045166, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.65440880063548, "eval_runtime": 158.8251, "eval_samples_per_second": 29.107, "eval_steps_per_second": 1.82, "step": 7000 }, { "MSE": 509.9403240458172, "MSE/layer0": 509.9403240458172, "dead_code_fraction": 0.2101, "dead_code_fraction/layer0": 0.2101, "epoch": 1.23, "input_norm": 31.998663558959954, "input_norm/layer0": 31.998663558959954, "learning_rate": 0.005, "loss": 2.0391, "max_norm": 212.45599365234375, "max_norm/layer0": 212.45599365234375, "mean_norm": 65.11711883544922, "mean_norm/layer0": 65.11711883544922, "multicode_k": 1, "output_norm": 22.636152718861904, "output_norm/layer0": 22.636152718861904, "step": 7050 }, { "MSE": 509.17088083903013, "MSE/layer0": 509.17088083903013, "dead_code_fraction": 0.2099, "dead_code_fraction/layer0": 0.2099, "epoch": 1.23, "input_norm": 31.998674535751356, "input_norm/layer0": 31.998674535751356, "learning_rate": 0.005, "loss": 2.0413, "max_norm": 212.8926544189453, "max_norm/layer0": 212.8926544189453, "mean_norm": 65.19314002990723, "mean_norm/layer0": 65.19314002990723, "multicode_k": 1, "output_norm": 22.652867739995315, "output_norm/layer0": 22.652867739995315, "step": 7100 }, { "MSE": 509.09580220540397, "MSE/layer0": 509.09580220540397, "dead_code_fraction": 0.209, "dead_code_fraction/layer0": 0.209, "epoch": 1.24, "input_norm": 31.99867464383444, "input_norm/layer0": 31.99867464383444, "learning_rate": 0.005, "loss": 2.0495, "max_norm": 213.29238891601562, "max_norm/layer0": 213.29238891601562, "mean_norm": 65.27325248718262, "mean_norm/layer0": 65.27325248718262, "multicode_k": 1, "output_norm": 22.646062428156533, "output_norm/layer0": 22.646062428156533, "step": 7150 }, { "MSE": 509.9214274597167, "MSE/layer0": 509.9214274597167, "dead_code_fraction": 0.20905, "dead_code_fraction/layer0": 0.20905, "epoch": 1.24, "input_norm": 31.998673133850097, "input_norm/layer0": 31.998673133850097, "learning_rate": 0.005, "loss": 2.0462, "max_norm": 213.58729553222656, "max_norm/layer0": 213.58729553222656, "mean_norm": 65.35407447814941, "mean_norm/layer0": 65.35407447814941, "multicode_k": 1, "output_norm": 22.63937306404113, "output_norm/layer0": 22.63937306404113, "step": 7200 }, { "MSE": 508.71533091227207, "MSE/layer0": 508.71533091227207, "dead_code_fraction": 0.2082, "dead_code_fraction/layer0": 0.2082, "epoch": 1.25, "input_norm": 31.99868763287862, "input_norm/layer0": 31.99868763287862, "learning_rate": 0.005, "loss": 2.0582, "max_norm": 213.80873107910156, "max_norm/layer0": 213.80873107910156, "mean_norm": 65.43496131896973, "mean_norm/layer0": 65.43496131896973, "multicode_k": 1, "output_norm": 22.648734455108645, "output_norm/layer0": 22.648734455108645, "step": 7250 }, { "MSE": 507.686293182373, "MSE/layer0": 507.686293182373, "dead_code_fraction": 0.2066, "dead_code_fraction/layer0": 0.2066, "epoch": 1.25, "input_norm": 31.998690617879234, "input_norm/layer0": 31.998690617879234, "learning_rate": 0.005, "loss": 2.0485, "max_norm": 214.17088317871094, "max_norm/layer0": 214.17088317871094, "mean_norm": 65.51487731933594, "mean_norm/layer0": 65.51487731933594, "multicode_k": 1, "output_norm": 22.669575303395582, "output_norm/layer0": 22.669575303395582, "step": 7300 }, { "MSE": 507.97169540405275, "MSE/layer0": 507.97169540405275, "dead_code_fraction": 0.20445, "dead_code_fraction/layer0": 0.20445, "epoch": 1.26, "input_norm": 31.99869660695392, "input_norm/layer0": 31.99869660695392, "learning_rate": 0.005, "loss": 2.0534, "max_norm": 214.52955627441406, "max_norm/layer0": 214.52955627441406, "mean_norm": 65.59026718139648, "mean_norm/layer0": 65.59026718139648, "multicode_k": 1, "output_norm": 22.678728303909296, "output_norm/layer0": 22.678728303909296, "step": 7350 }, { "MSE": 507.6675502522787, "MSE/layer0": 507.6675502522787, "dead_code_fraction": 0.20485, "dead_code_fraction/layer0": 0.20485, "epoch": 1.26, "input_norm": 31.998699353535965, "input_norm/layer0": 31.998699353535965, "learning_rate": 0.005, "loss": 2.0638, "max_norm": 214.7173614501953, "max_norm/layer0": 214.7173614501953, "mean_norm": 65.67013740539551, "mean_norm/layer0": 65.67013740539551, "multicode_k": 1, "output_norm": 22.67898440043131, "output_norm/layer0": 22.67898440043131, "step": 7400 }, { "MSE": 507.85135843912786, "MSE/layer0": 507.85135843912786, "dead_code_fraction": 0.2049, "dead_code_fraction/layer0": 0.2049, "epoch": 1.27, "input_norm": 31.998699776331584, "input_norm/layer0": 31.998699776331584, "learning_rate": 0.005, "loss": 2.0353, "max_norm": 215.19158935546875, "max_norm/layer0": 215.19158935546875, "mean_norm": 65.75178337097168, "mean_norm/layer0": 65.75178337097168, "multicode_k": 1, "output_norm": 22.680205952326446, "output_norm/layer0": 22.680205952326446, "step": 7450 }, { "MSE": 507.253986562093, "MSE/layer0": 507.253986562093, "dead_code_fraction": 0.20435, "dead_code_fraction/layer0": 0.20435, "epoch": 1.27, "input_norm": 31.99870971679686, "input_norm/layer0": 31.99870971679686, "learning_rate": 0.005, "loss": 2.0791, "max_norm": 215.7554931640625, "max_norm/layer0": 215.7554931640625, "mean_norm": 65.82438659667969, "mean_norm/layer0": 65.82438659667969, "multicode_k": 1, "output_norm": 22.691158383687345, "output_norm/layer0": 22.691158383687345, "step": 7500 }, { "epoch": 1.27, "eval_MSE/layer0": 507.1513778155122, "eval_accuracy": 0.5167855735982843, "eval_dead_code_fraction/layer0": 0.2033, "eval_input_norm/layer0": 31.998707461867696, "eval_loss": 2.0513455867767334, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.70183411032355, "eval_runtime": 158.8151, "eval_samples_per_second": 29.109, "eval_steps_per_second": 1.82, "step": 7500 }, { "MSE": 508.0465566507977, "MSE/layer0": 508.0465566507977, "dead_code_fraction": 0.20265, "dead_code_fraction/layer0": 0.20265, "epoch": 1.28, "input_norm": 31.9987080860138, "input_norm/layer0": 31.9987080860138, "learning_rate": 0.005, "loss": 2.0357, "max_norm": 216.1879119873047, "max_norm/layer0": 216.1879119873047, "mean_norm": 65.89747428894043, "mean_norm/layer0": 65.89747428894043, "multicode_k": 1, "output_norm": 22.691229712168372, "output_norm/layer0": 22.691229712168372, "step": 7550 }, { "MSE": 506.86150853474936, "MSE/layer0": 506.86150853474936, "dead_code_fraction": 0.20235, "dead_code_fraction/layer0": 0.20235, "epoch": 1.28, "input_norm": 31.998724161783855, "input_norm/layer0": 31.998724161783855, "learning_rate": 0.005, "loss": 2.0643, "max_norm": 216.84507751464844, "max_norm/layer0": 216.84507751464844, "mean_norm": 65.96598243713379, "mean_norm/layer0": 65.96598243713379, "multicode_k": 1, "output_norm": 22.70548650105794, "output_norm/layer0": 22.70548650105794, "step": 7600 }, { "MSE": 508.52483596801756, "MSE/layer0": 508.52483596801756, "dead_code_fraction": 0.20115, "dead_code_fraction/layer0": 0.20115, "epoch": 1.29, "input_norm": 31.998720836639407, "input_norm/layer0": 31.998720836639407, "learning_rate": 0.005, "loss": 2.0331, "max_norm": 217.07077026367188, "max_norm/layer0": 217.07077026367188, "mean_norm": 66.04256629943848, "mean_norm/layer0": 66.04256629943848, "multicode_k": 1, "output_norm": 22.671403992970788, "output_norm/layer0": 22.671403992970788, "step": 7650 }, { "MSE": 506.7901182556151, "MSE/layer0": 506.7901182556151, "dead_code_fraction": 0.20025, "dead_code_fraction/layer0": 0.20025, "epoch": 1.29, "input_norm": 31.998723080952953, "input_norm/layer0": 31.998723080952953, "learning_rate": 0.005, "loss": 2.0643, "max_norm": 217.60621643066406, "max_norm/layer0": 217.60621643066406, "mean_norm": 66.1141586303711, "mean_norm/layer0": 66.1141586303711, "multicode_k": 1, "output_norm": 22.711970895131433, "output_norm/layer0": 22.711970895131433, "step": 7700 }, { "MSE": 506.4805715942383, "MSE/layer0": 506.4805715942383, "dead_code_fraction": 0.19955, "dead_code_fraction/layer0": 0.19955, "epoch": 1.3, "input_norm": 31.998739531834943, "input_norm/layer0": 31.998739531834943, "learning_rate": 0.005, "loss": 2.0999, "max_norm": 218.18724060058594, "max_norm/layer0": 218.18724060058594, "mean_norm": 66.18310356140137, "mean_norm/layer0": 66.18310356140137, "multicode_k": 1, "output_norm": 22.715899858474735, "output_norm/layer0": 22.715899858474735, "step": 7750 }, { "MSE": 507.79560877482083, "MSE/layer0": 507.79560877482083, "dead_code_fraction": 0.1983, "dead_code_fraction/layer0": 0.1983, "epoch": 1.3, "input_norm": 31.9987256272634, "input_norm/layer0": 31.9987256272634, "learning_rate": 0.005, "loss": 2.0143, "max_norm": 218.3722686767578, "max_norm/layer0": 218.3722686767578, "mean_norm": 66.25444984436035, "mean_norm/layer0": 66.25444984436035, "multicode_k": 1, "output_norm": 22.692439622879014, "output_norm/layer0": 22.692439622879014, "step": 7800 }, { "MSE": 507.2388439432779, "MSE/layer0": 507.2388439432779, "dead_code_fraction": 0.198, "dead_code_fraction/layer0": 0.198, "epoch": 1.31, "input_norm": 31.998735243479416, "input_norm/layer0": 31.998735243479416, "learning_rate": 0.005, "loss": 2.069, "max_norm": 218.93580627441406, "max_norm/layer0": 218.93580627441406, "mean_norm": 66.32441329956055, "mean_norm/layer0": 66.32441329956055, "multicode_k": 1, "output_norm": 22.703038584391276, "output_norm/layer0": 22.703038584391276, "step": 7850 }, { "MSE": 508.13961395263664, "MSE/layer0": 508.13961395263664, "dead_code_fraction": 0.19705, "dead_code_fraction/layer0": 0.19705, "epoch": 1.31, "input_norm": 31.99873922983806, "input_norm/layer0": 31.99873922983806, "learning_rate": 0.005, "loss": 2.0712, "max_norm": 219.51759338378906, "max_norm/layer0": 219.51759338378906, "mean_norm": 66.39589881896973, "mean_norm/layer0": 66.39589881896973, "multicode_k": 1, "output_norm": 22.68491499900817, "output_norm/layer0": 22.68491499900817, "step": 7900 }, { "MSE": 506.5046355692546, "MSE/layer0": 506.5046355692546, "dead_code_fraction": 0.1958, "dead_code_fraction/layer0": 0.1958, "epoch": 1.32, "input_norm": 31.998745075861606, "input_norm/layer0": 31.998745075861606, "learning_rate": 0.005, "loss": 2.0623, "max_norm": 220.1356658935547, "max_norm/layer0": 220.1356658935547, "mean_norm": 66.46616172790527, "mean_norm/layer0": 66.46616172790527, "multicode_k": 1, "output_norm": 22.709094810485844, "output_norm/layer0": 22.709094810485844, "step": 7950 }, { "MSE": 506.24584472656227, "MSE/layer0": 506.24584472656227, "dead_code_fraction": 0.1962, "dead_code_fraction/layer0": 0.1962, "epoch": 1.32, "input_norm": 31.998744071324662, "input_norm/layer0": 31.998744071324662, "learning_rate": 0.005, "loss": 2.0252, "max_norm": 220.52029418945312, "max_norm/layer0": 220.52029418945312, "mean_norm": 66.54170417785645, "mean_norm/layer0": 66.54170417785645, "multicode_k": 1, "output_norm": 22.71004734039306, "output_norm/layer0": 22.71004734039306, "step": 8000 }, { "epoch": 1.32, "eval_MSE/layer0": 505.2722684186489, "eval_accuracy": 0.5173414664109856, "eval_dead_code_fraction/layer0": 0.19525, "eval_input_norm/layer0": 31.998757950702117, "eval_loss": 2.046276569366455, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.71078164304668, "eval_runtime": 158.1298, "eval_samples_per_second": 29.235, "eval_steps_per_second": 1.828, "step": 8000 }, { "MSE": 507.15304565429676, "MSE/layer0": 507.15304565429676, "dead_code_fraction": 0.1941, "dead_code_fraction/layer0": 0.1941, "epoch": 1.33, "input_norm": 31.998751821517956, "input_norm/layer0": 31.998751821517956, "learning_rate": 0.005, "loss": 2.0231, "max_norm": 221.12425231933594, "max_norm/layer0": 221.12425231933594, "mean_norm": 66.61260223388672, "mean_norm/layer0": 66.61260223388672, "multicode_k": 1, "output_norm": 22.70729770024618, "output_norm/layer0": 22.70729770024618, "step": 8050 }, { "MSE": 508.0300794474282, "MSE/layer0": 508.0300794474282, "dead_code_fraction": 0.19355, "dead_code_fraction/layer0": 0.19355, "epoch": 1.33, "input_norm": 31.998755750656134, "input_norm/layer0": 31.998755750656134, "learning_rate": 0.005, "loss": 2.0065, "max_norm": 221.41090393066406, "max_norm/layer0": 221.41090393066406, "mean_norm": 66.68024253845215, "mean_norm/layer0": 66.68024253845215, "multicode_k": 1, "output_norm": 22.685567102432238, "output_norm/layer0": 22.685567102432238, "step": 8100 }, { "MSE": 506.83792968750004, "MSE/layer0": 506.83792968750004, "dead_code_fraction": 0.194, "dead_code_fraction/layer0": 0.194, "epoch": 1.34, "input_norm": 31.998766530354814, "input_norm/layer0": 31.998766530354814, "learning_rate": 0.005, "loss": 2.0545, "max_norm": 221.77352905273438, "max_norm/layer0": 221.77352905273438, "mean_norm": 66.74850654602051, "mean_norm/layer0": 66.74850654602051, "multicode_k": 1, "output_norm": 22.711014649073284, "output_norm/layer0": 22.711014649073284, "step": 8150 }, { "MSE": 506.1638347880046, "MSE/layer0": 506.1638347880046, "dead_code_fraction": 0.1922, "dead_code_fraction/layer0": 0.1922, "epoch": 1.34, "input_norm": 31.998765303293865, "input_norm/layer0": 31.998765303293865, "learning_rate": 0.005, "loss": 2.0291, "max_norm": 222.23851013183594, "max_norm/layer0": 222.23851013183594, "mean_norm": 66.81972694396973, "mean_norm/layer0": 66.81972694396973, "multicode_k": 1, "output_norm": 22.712359495162957, "output_norm/layer0": 22.712359495162957, "step": 8200 }, { "MSE": 505.4201058959959, "MSE/layer0": 505.4201058959959, "dead_code_fraction": 0.19165, "dead_code_fraction/layer0": 0.19165, "epoch": 1.35, "input_norm": 31.998765595753984, "input_norm/layer0": 31.998765595753984, "learning_rate": 0.005, "loss": 2.0255, "max_norm": 222.60708618164062, "max_norm/layer0": 222.60708618164062, "mean_norm": 66.89296340942383, "mean_norm/layer0": 66.89296340942383, "multicode_k": 1, "output_norm": 22.733057559331257, "output_norm/layer0": 22.733057559331257, "step": 8250 }, { "MSE": 506.6631129964193, "MSE/layer0": 506.6631129964193, "dead_code_fraction": 0.18985, "dead_code_fraction/layer0": 0.18985, "epoch": 1.35, "input_norm": 31.998774194717406, "input_norm/layer0": 31.998774194717406, "learning_rate": 0.005, "loss": 2.0543, "max_norm": 222.95948791503906, "max_norm/layer0": 222.95948791503906, "mean_norm": 66.95783233642578, "mean_norm/layer0": 66.95783233642578, "multicode_k": 1, "output_norm": 22.715471951166787, "output_norm/layer0": 22.715471951166787, "step": 8300 }, { "MSE": 505.8098661804198, "MSE/layer0": 505.8098661804198, "dead_code_fraction": 0.1901, "dead_code_fraction/layer0": 0.1901, "epoch": 1.36, "input_norm": 31.998776054382326, "input_norm/layer0": 31.998776054382326, "learning_rate": 0.005, "loss": 2.0361, "max_norm": 222.99290466308594, "max_norm/layer0": 222.99290466308594, "mean_norm": 67.03095436096191, "mean_norm/layer0": 67.03095436096191, "multicode_k": 1, "output_norm": 22.720023854573576, "output_norm/layer0": 22.720023854573576, "step": 8350 }, { "MSE": 504.6476872253421, "MSE/layer0": 504.6476872253421, "dead_code_fraction": 0.18865, "dead_code_fraction/layer0": 0.18865, "epoch": 1.36, "input_norm": 31.99877415021262, "input_norm/layer0": 31.99877415021262, "learning_rate": 0.005, "loss": 2.018, "max_norm": 222.99652099609375, "max_norm/layer0": 222.99652099609375, "mean_norm": 67.10310173034668, "mean_norm/layer0": 67.10310173034668, "multicode_k": 1, "output_norm": 22.743260552088422, "output_norm/layer0": 22.743260552088422, "step": 8400 }, { "MSE": 505.1742755126953, "MSE/layer0": 505.1742755126953, "dead_code_fraction": 0.18805, "dead_code_fraction/layer0": 0.18805, "epoch": 1.37, "input_norm": 31.998781833648685, "input_norm/layer0": 31.998781833648685, "learning_rate": 0.005, "loss": 2.0373, "max_norm": 223.39710998535156, "max_norm/layer0": 223.39710998535156, "mean_norm": 67.17368698120117, "mean_norm/layer0": 67.17368698120117, "multicode_k": 1, "output_norm": 22.74353121121724, "output_norm/layer0": 22.74353121121724, "step": 8450 }, { "MSE": 505.00153442382805, "MSE/layer0": 505.00153442382805, "dead_code_fraction": 0.1875, "dead_code_fraction/layer0": 0.1875, "epoch": 1.37, "input_norm": 31.998789456685383, "input_norm/layer0": 31.998789456685383, "learning_rate": 0.005, "loss": 2.0432, "max_norm": 223.86239624023438, "max_norm/layer0": 223.86239624023438, "mean_norm": 67.2455825805664, "mean_norm/layer0": 67.2455825805664, "multicode_k": 1, "output_norm": 22.747594401041667, "output_norm/layer0": 22.747594401041667, "step": 8500 }, { "epoch": 1.37, "eval_MSE/layer0": 502.9394664067146, "eval_accuracy": 0.5183496432580605, "eval_dead_code_fraction/layer0": 0.18745, "eval_input_norm/layer0": 31.998788164622738, "eval_loss": 2.042330265045166, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.756197618711013, "eval_runtime": 159.3738, "eval_samples_per_second": 29.007, "eval_steps_per_second": 1.813, "step": 8500 }, { "MSE": 504.47000788370775, "MSE/layer0": 504.47000788370775, "dead_code_fraction": 0.1867, "dead_code_fraction/layer0": 0.1867, "epoch": 1.38, "input_norm": 31.998792708714795, "input_norm/layer0": 31.998792708714795, "learning_rate": 0.005, "loss": 2.0483, "max_norm": 224.50177001953125, "max_norm/layer0": 224.50177001953125, "mean_norm": 67.31682586669922, "mean_norm/layer0": 67.31682586669922, "multicode_k": 1, "output_norm": 22.753840500513725, "output_norm/layer0": 22.753840500513725, "step": 8550 }, { "MSE": 504.7471296691896, "MSE/layer0": 504.7471296691896, "dead_code_fraction": 0.1861, "dead_code_fraction/layer0": 0.1861, "epoch": 1.38, "input_norm": 31.99879879315695, "input_norm/layer0": 31.99879879315695, "learning_rate": 0.005, "loss": 2.0626, "max_norm": 224.77008056640625, "max_norm/layer0": 224.77008056640625, "mean_norm": 67.38501358032227, "mean_norm/layer0": 67.38501358032227, "multicode_k": 1, "output_norm": 22.75965905507406, "output_norm/layer0": 22.75965905507406, "step": 8600 }, { "MSE": 504.43309575398786, "MSE/layer0": 504.43309575398786, "dead_code_fraction": 0.18575, "dead_code_fraction/layer0": 0.18575, "epoch": 1.39, "input_norm": 31.9987975247701, "input_norm/layer0": 31.9987975247701, "learning_rate": 0.005, "loss": 2.0528, "max_norm": 224.8895263671875, "max_norm/layer0": 224.8895263671875, "mean_norm": 67.45294189453125, "mean_norm/layer0": 67.45294189453125, "multicode_k": 1, "output_norm": 22.765578152338662, "output_norm/layer0": 22.765578152338662, "step": 8650 }, { "MSE": 504.8997240193688, "MSE/layer0": 504.8997240193688, "dead_code_fraction": 0.1849, "dead_code_fraction/layer0": 0.1849, "epoch": 1.39, "input_norm": 31.998805205027267, "input_norm/layer0": 31.998805205027267, "learning_rate": 0.005, "loss": 2.0355, "max_norm": 225.1109619140625, "max_norm/layer0": 225.1109619140625, "mean_norm": 67.51644897460938, "mean_norm/layer0": 67.51644897460938, "multicode_k": 1, "output_norm": 22.76556049982706, "output_norm/layer0": 22.76556049982706, "step": 8700 }, { "MSE": 504.98007812499975, "MSE/layer0": 504.98007812499975, "dead_code_fraction": 0.1841, "dead_code_fraction/layer0": 0.1841, "epoch": 1.4, "input_norm": 31.998811095555627, "input_norm/layer0": 31.998811095555627, "learning_rate": 0.005, "loss": 2.048, "max_norm": 225.3004608154297, "max_norm/layer0": 225.3004608154297, "mean_norm": 67.58170700073242, "mean_norm/layer0": 67.58170700073242, "multicode_k": 1, "output_norm": 22.734453417460124, "output_norm/layer0": 22.734453417460124, "step": 8750 }, { "MSE": 505.8172926839193, "MSE/layer0": 505.8172926839193, "dead_code_fraction": 0.1825, "dead_code_fraction/layer0": 0.1825, "epoch": 1.4, "input_norm": 31.998811902999876, "input_norm/layer0": 31.998811902999876, "learning_rate": 0.005, "loss": 2.0314, "max_norm": 225.43496704101562, "max_norm/layer0": 225.43496704101562, "mean_norm": 67.64213943481445, "mean_norm/layer0": 67.64213943481445, "multicode_k": 1, "output_norm": 22.746523040135706, "output_norm/layer0": 22.746523040135706, "step": 8800 }, { "MSE": 505.15463668823276, "MSE/layer0": 505.15463668823276, "dead_code_fraction": 0.1834, "dead_code_fraction/layer0": 0.1834, "epoch": 1.41, "input_norm": 31.99881089528401, "input_norm/layer0": 31.99881089528401, "learning_rate": 0.005, "loss": 2.0019, "max_norm": 225.2454376220703, "max_norm/layer0": 225.2454376220703, "mean_norm": 67.70701217651367, "mean_norm/layer0": 67.70701217651367, "multicode_k": 1, "output_norm": 22.74102473258972, "output_norm/layer0": 22.74102473258972, "step": 8850 }, { "MSE": 505.15305394490576, "MSE/layer0": 505.15305394490576, "dead_code_fraction": 0.18105, "dead_code_fraction/layer0": 0.18105, "epoch": 1.41, "input_norm": 31.99882117907206, "input_norm/layer0": 31.99882117907206, "learning_rate": 0.005, "loss": 2.0614, "max_norm": 224.98548889160156, "max_norm/layer0": 224.98548889160156, "mean_norm": 67.77053833007812, "mean_norm/layer0": 67.77053833007812, "multicode_k": 1, "output_norm": 22.750008074442544, "output_norm/layer0": 22.750008074442544, "step": 8900 }, { "MSE": 505.46065561930345, "MSE/layer0": 505.46065561930345, "dead_code_fraction": 0.1809, "dead_code_fraction/layer0": 0.1809, "epoch": 1.42, "input_norm": 31.99882030487061, "input_norm/layer0": 31.99882030487061, "learning_rate": 0.005, "loss": 2.0259, "max_norm": 224.90966796875, "max_norm/layer0": 224.90966796875, "mean_norm": 67.83388900756836, "mean_norm/layer0": 67.83388900756836, "multicode_k": 1, "output_norm": 22.744747044245393, "output_norm/layer0": 22.744747044245393, "step": 8950 }, { "MSE": 503.93126592000317, "MSE/layer0": 503.93126592000317, "dead_code_fraction": 0.1795, "dead_code_fraction/layer0": 0.1795, "epoch": 1.42, "input_norm": 31.99882487614949, "input_norm/layer0": 31.99882487614949, "learning_rate": 0.005, "loss": 2.0549, "max_norm": 224.75604248046875, "max_norm/layer0": 224.75604248046875, "mean_norm": 67.89757537841797, "mean_norm/layer0": 67.89757537841797, "multicode_k": 1, "output_norm": 22.767707106272383, "output_norm/layer0": 22.767707106272383, "step": 9000 }, { "epoch": 1.42, "eval_MSE/layer0": 502.90162357014304, "eval_accuracy": 0.518752237368134, "eval_dead_code_fraction/layer0": 0.1797, "eval_input_norm/layer0": 31.998819289515865, "eval_loss": 2.0394132137298584, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.772194602647794, "eval_runtime": 158.4408, "eval_samples_per_second": 29.178, "eval_steps_per_second": 1.824, "step": 9000 }, { "MSE": 504.23655522664376, "MSE/layer0": 504.23655522664376, "dead_code_fraction": 0.17875, "dead_code_fraction/layer0": 0.17875, "epoch": 1.43, "input_norm": 31.998824621836345, "input_norm/layer0": 31.998824621836345, "learning_rate": 0.005, "loss": 2.0392, "max_norm": 224.60926818847656, "max_norm/layer0": 224.60926818847656, "mean_norm": 67.96440505981445, "mean_norm/layer0": 67.96440505981445, "multicode_k": 1, "output_norm": 22.772467877070113, "output_norm/layer0": 22.772467877070113, "step": 9050 }, { "MSE": 503.93936604817725, "MSE/layer0": 503.93936604817725, "dead_code_fraction": 0.17785, "dead_code_fraction/layer0": 0.17785, "epoch": 1.43, "input_norm": 31.99882525444032, "input_norm/layer0": 31.99882525444032, "learning_rate": 0.005, "loss": 2.0329, "max_norm": 224.35545349121094, "max_norm/layer0": 224.35545349121094, "mean_norm": 68.02788925170898, "mean_norm/layer0": 68.02788925170898, "multicode_k": 1, "output_norm": 22.77433245976766, "output_norm/layer0": 22.77433245976766, "step": 9100 }, { "MSE": 504.38566899617547, "MSE/layer0": 504.38566899617547, "dead_code_fraction": 0.1771, "dead_code_fraction/layer0": 0.1771, "epoch": 1.44, "input_norm": 31.998831052780154, "input_norm/layer0": 31.998831052780154, "learning_rate": 0.005, "loss": 2.0175, "max_norm": 224.176025390625, "max_norm/layer0": 224.176025390625, "mean_norm": 68.09515762329102, "mean_norm/layer0": 68.09515762329102, "multicode_k": 1, "output_norm": 22.758301575978592, "output_norm/layer0": 22.758301575978592, "step": 9150 }, { "MSE": 503.9738773091634, "MSE/layer0": 503.9738773091634, "dead_code_fraction": 0.1763, "dead_code_fraction/layer0": 0.1763, "epoch": 1.44, "input_norm": 31.998839066823308, "input_norm/layer0": 31.998839066823308, "learning_rate": 0.005, "loss": 2.0462, "max_norm": 224.0868377685547, "max_norm/layer0": 224.0868377685547, "mean_norm": 68.16043853759766, "mean_norm/layer0": 68.16043853759766, "multicode_k": 1, "output_norm": 22.7744267431895, "output_norm/layer0": 22.7744267431895, "step": 9200 }, { "MSE": 503.29069310506196, "MSE/layer0": 503.29069310506196, "dead_code_fraction": 0.17485, "dead_code_fraction/layer0": 0.17485, "epoch": 1.45, "input_norm": 31.99883868853251, "input_norm/layer0": 31.99883868853251, "learning_rate": 0.005, "loss": 2.0416, "max_norm": 223.89230346679688, "max_norm/layer0": 223.89230346679688, "mean_norm": 68.22885513305664, "mean_norm/layer0": 68.22885513305664, "multicode_k": 1, "output_norm": 22.78441795984904, "output_norm/layer0": 22.78441795984904, "step": 9250 }, { "MSE": 504.4634376017252, "MSE/layer0": 504.4634376017252, "dead_code_fraction": 0.17465, "dead_code_fraction/layer0": 0.17465, "epoch": 1.45, "input_norm": 31.998847064971933, "input_norm/layer0": 31.998847064971933, "learning_rate": 0.005, "loss": 2.0822, "max_norm": 223.5952911376953, "max_norm/layer0": 223.5952911376953, "mean_norm": 68.2917366027832, "mean_norm/layer0": 68.2917366027832, "multicode_k": 1, "output_norm": 22.78019981384277, "output_norm/layer0": 22.78019981384277, "step": 9300 }, { "MSE": 504.5819336954755, "MSE/layer0": 504.5819336954755, "dead_code_fraction": 0.1737, "dead_code_fraction/layer0": 0.1737, "epoch": 1.46, "input_norm": 31.998844486872358, "input_norm/layer0": 31.998844486872358, "learning_rate": 0.005, "loss": 2.0181, "max_norm": 223.33349609375, "max_norm/layer0": 223.33349609375, "mean_norm": 68.3541030883789, "mean_norm/layer0": 68.3541030883789, "multicode_k": 1, "output_norm": 22.770421886444097, "output_norm/layer0": 22.770421886444097, "step": 9350 }, { "MSE": 504.08388671875014, "MSE/layer0": 504.08388671875014, "dead_code_fraction": 0.17315, "dead_code_fraction/layer0": 0.17315, "epoch": 1.46, "input_norm": 31.998852834701534, "input_norm/layer0": 31.998852834701534, "learning_rate": 0.005, "loss": 2.0332, "max_norm": 223.0471954345703, "max_norm/layer0": 223.0471954345703, "mean_norm": 68.41642379760742, "mean_norm/layer0": 68.41642379760742, "multicode_k": 1, "output_norm": 22.783455673853553, "output_norm/layer0": 22.783455673853553, "step": 9400 }, { "MSE": 504.4143726603196, "MSE/layer0": 504.4143726603196, "dead_code_fraction": 0.17145, "dead_code_fraction/layer0": 0.17145, "epoch": 1.47, "input_norm": 31.998856865564967, "input_norm/layer0": 31.998856865564967, "learning_rate": 0.005, "loss": 2.0241, "max_norm": 222.83218383789062, "max_norm/layer0": 222.83218383789062, "mean_norm": 68.48007202148438, "mean_norm/layer0": 68.48007202148438, "multicode_k": 1, "output_norm": 22.767489954630527, "output_norm/layer0": 22.767489954630527, "step": 9450 }, { "MSE": 503.2655168151856, "MSE/layer0": 503.2655168151856, "dead_code_fraction": 0.17245, "dead_code_fraction/layer0": 0.17245, "epoch": 1.47, "input_norm": 31.998857196172086, "input_norm/layer0": 31.998857196172086, "learning_rate": 0.005, "loss": 2.0087, "max_norm": 222.5254669189453, "max_norm/layer0": 222.5254669189453, "mean_norm": 68.54964065551758, "mean_norm/layer0": 68.54964065551758, "multicode_k": 1, "output_norm": 22.78383262634278, "output_norm/layer0": 22.78383262634278, "step": 9500 }, { "epoch": 1.47, "eval_MSE/layer0": 504.0087830256569, "eval_accuracy": 0.5192516739689711, "eval_dead_code_fraction/layer0": 0.1704, "eval_input_norm/layer0": 31.99886018302103, "eval_loss": 2.0364596843719482, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.799023320451585, "eval_runtime": 158.3046, "eval_samples_per_second": 29.203, "eval_steps_per_second": 1.826, "step": 9500 }, { "MSE": 502.63093187018274, "MSE/layer0": 502.63093187018274, "dead_code_fraction": 0.1713, "dead_code_fraction/layer0": 0.1713, "epoch": 1.48, "input_norm": 31.998863240132852, "input_norm/layer0": 31.998863240132852, "learning_rate": 0.005, "loss": 2.0083, "max_norm": 222.2374725341797, "max_norm/layer0": 222.2374725341797, "mean_norm": 68.61249160766602, "mean_norm/layer0": 68.61249160766602, "multicode_k": 1, "output_norm": 22.79880291995348, "output_norm/layer0": 22.79880291995348, "step": 9550 }, { "MSE": 505.36792836568793, "MSE/layer0": 505.36792836568793, "dead_code_fraction": 0.17135, "dead_code_fraction/layer0": 0.17135, "epoch": 2.0, "input_norm": 31.99885930271917, "input_norm/layer0": 31.99885930271917, "learning_rate": 0.005, "loss": 2.0034, "max_norm": 222.0310516357422, "max_norm/layer0": 222.0310516357422, "mean_norm": 68.67721176147461, "mean_norm/layer0": 68.67721176147461, "multicode_k": 1, "output_norm": 22.74409036474983, "output_norm/layer0": 22.74409036474983, "step": 9600 }, { "MSE": 502.98986485799134, "MSE/layer0": 502.98986485799134, "dead_code_fraction": 0.1697, "dead_code_fraction/layer0": 0.1697, "epoch": 2.01, "input_norm": 31.998873513539642, "input_norm/layer0": 31.998873513539642, "learning_rate": 0.005, "loss": 2.072, "max_norm": 222.00772094726562, "max_norm/layer0": 222.00772094726562, "mean_norm": 68.73538589477539, "mean_norm/layer0": 68.73538589477539, "multicode_k": 1, "output_norm": 22.796976168950394, "output_norm/layer0": 22.796976168950394, "step": 9650 }, { "MSE": 503.04733729044574, "MSE/layer0": 503.04733729044574, "dead_code_fraction": 0.16915, "dead_code_fraction/layer0": 0.16915, "epoch": 2.01, "input_norm": 31.998862508138025, "input_norm/layer0": 31.998862508138025, "learning_rate": 0.005, "loss": 1.9691, "max_norm": 221.80978393554688, "max_norm/layer0": 221.80978393554688, "mean_norm": 68.80109405517578, "mean_norm/layer0": 68.80109405517578, "multicode_k": 1, "output_norm": 22.794911410013835, "output_norm/layer0": 22.794911410013835, "step": 9700 }, { "MSE": 503.3161979675292, "MSE/layer0": 503.3161979675292, "dead_code_fraction": 0.16895, "dead_code_fraction/layer0": 0.16895, "epoch": 2.02, "input_norm": 31.998878345489487, "input_norm/layer0": 31.998878345489487, "learning_rate": 0.005, "loss": 2.0368, "max_norm": 221.684814453125, "max_norm/layer0": 221.684814453125, "mean_norm": 68.86429214477539, "mean_norm/layer0": 68.86429214477539, "multicode_k": 1, "output_norm": 22.785858039855956, "output_norm/layer0": 22.785858039855956, "step": 9750 }, { "MSE": 502.7885366821291, "MSE/layer0": 502.7885366821291, "dead_code_fraction": 0.16775, "dead_code_fraction/layer0": 0.16775, "epoch": 2.02, "input_norm": 31.998876323699957, "input_norm/layer0": 31.998876323699957, "learning_rate": 0.005, "loss": 2.0029, "max_norm": 221.55738830566406, "max_norm/layer0": 221.55738830566406, "mean_norm": 68.92353439331055, "mean_norm/layer0": 68.92353439331055, "multicode_k": 1, "output_norm": 22.80311137835186, "output_norm/layer0": 22.80311137835186, "step": 9800 }, { "MSE": 503.08141484578465, "MSE/layer0": 503.08141484578465, "dead_code_fraction": 0.16675, "dead_code_fraction/layer0": 0.16675, "epoch": 2.03, "input_norm": 31.998882681528727, "input_norm/layer0": 31.998882681528727, "learning_rate": 0.005, "loss": 2.0058, "max_norm": 221.4176025390625, "max_norm/layer0": 221.4176025390625, "mean_norm": 68.97920608520508, "mean_norm/layer0": 68.97920608520508, "multicode_k": 1, "output_norm": 22.79436633110047, "output_norm/layer0": 22.79436633110047, "step": 9850 }, { "MSE": 503.44391169230175, "MSE/layer0": 503.44391169230175, "dead_code_fraction": 0.16635, "dead_code_fraction/layer0": 0.16635, "epoch": 2.03, "input_norm": 31.998889300028488, "input_norm/layer0": 31.998889300028488, "learning_rate": 0.005, "loss": 2.0128, "max_norm": 220.8733673095703, "max_norm/layer0": 220.8733673095703, "mean_norm": 69.03522872924805, "mean_norm/layer0": 69.03522872924805, "multicode_k": 1, "output_norm": 22.793825833002728, "output_norm/layer0": 22.793825833002728, "step": 9900 }, { "MSE": 503.14160481770807, "MSE/layer0": 503.14160481770807, "dead_code_fraction": 0.1655, "dead_code_fraction/layer0": 0.1655, "epoch": 2.04, "input_norm": 31.99888905207317, "input_norm/layer0": 31.99888905207317, "learning_rate": 0.005, "loss": 2.0053, "max_norm": 220.66598510742188, "max_norm/layer0": 220.66598510742188, "mean_norm": 69.08990859985352, "mean_norm/layer0": 69.08990859985352, "multicode_k": 1, "output_norm": 22.802439581553138, "output_norm/layer0": 22.802439581553138, "step": 9950 }, { "MSE": 502.7584656778976, "MSE/layer0": 502.7584656778976, "dead_code_fraction": 0.16445, "dead_code_fraction/layer0": 0.16445, "epoch": 2.04, "input_norm": 31.998899453481037, "input_norm/layer0": 31.998899453481037, "learning_rate": 0.005, "loss": 2.0569, "max_norm": 220.5869903564453, "max_norm/layer0": 220.5869903564453, "mean_norm": 69.14492416381836, "mean_norm/layer0": 69.14492416381836, "multicode_k": 1, "output_norm": 22.808293444315584, "output_norm/layer0": 22.808293444315584, "step": 10000 }, { "epoch": 2.04, "eval_MSE/layer0": 501.8128262733759, "eval_accuracy": 0.5193506309245984, "eval_dead_code_fraction/layer0": 0.16395, "eval_input_norm/layer0": 31.998895487949337, "eval_loss": 2.0353407859802246, "eval_multicode_k": 1, "eval_output_norm/layer0": 22.80092038433711, "eval_runtime": 158.6027, "eval_samples_per_second": 29.148, "eval_steps_per_second": 1.822, "step": 10000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.04, "input_norm": 0.0, "input_norm/layer0": 0.0, "max_norm": 220.5869903564453, "max_norm/layer0": 220.5869903564453, "mean_norm": 69.14492416381836, "mean_norm/layer0": 69.14492416381836, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 10000, "total_flos": 7.43098011353088e+16, "train_loss": 2.205516522693634, "train_runtime": 15654.0479, "train_samples_per_second": 61.326, "train_steps_per_second": 0.639 } ], "logging_steps": 50, "max_steps": 10000, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 7.43098011353088e+16, "trial_name": null, "trial_params": null }