{ "best_metric": 1.89570152759552, "best_model_checkpoint": "/tmp/wandb/run-20240207_044253-56k3p8kp/files/train_output/checkpoint-10000", "epoch": 1.044022968505307, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "MSE": 872.5187733968098, "MSE/layer0": 872.5187733968098, "dead_code_fraction": 0.0276, "dead_code_fraction/layer0": 0.0276, "epoch": 0.0, "input_norm": 31.997111479441326, "input_norm/layer0": 31.997111479441326, "learning_rate": 1e-06, "loss": 9.0051, "max_norm": 34.71393966674805, "max_norm/layer0": 34.71393966674805, "mean_norm": 31.98521327972412, "mean_norm/layer0": 31.98521327972412, "multicode_k": 1, "output_norm": 4.134780248006185, "output_norm/layer0": 4.134780248006185, "step": 1 }, { "MSE": 871.4381560241286, "MSE/layer0": 871.4381560241286, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.99644809839677, "input_norm/layer0": 31.99644809839677, "learning_rate": 5e-05, "loss": 7.0703, "max_norm": 34.72187423706055, "max_norm/layer0": 34.72187423706055, "mean_norm": 31.991936683654785, "mean_norm/layer0": 31.991936683654785, "multicode_k": 1, "output_norm": 4.145846879401173, "output_norm/layer0": 4.145846879401173, "step": 50 }, { "MSE": 868.4475470987957, "MSE/layer0": 868.4475470987957, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.995786774953213, "input_norm/layer0": 31.995786774953213, "learning_rate": 0.0001, "loss": 4.1515, "max_norm": 34.748802185058594, "max_norm/layer0": 34.748802185058594, "mean_norm": 32.0172176361084, "mean_norm/layer0": 32.0172176361084, "multicode_k": 1, "output_norm": 4.178660261631009, "output_norm/layer0": 4.178660261631009, "step": 100 }, { "MSE": 864.7878089396156, "MSE/layer0": 864.7878089396156, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.02, "input_norm": 31.995868380864444, "input_norm/layer0": 31.995868380864444, "learning_rate": 0.00015, "loss": 3.596, "max_norm": 34.7879753112793, "max_norm/layer0": 34.7879753112793, "mean_norm": 32.057809829711914, "mean_norm/layer0": 32.057809829711914, "multicode_k": 1, "output_norm": 4.227458424568177, "output_norm/layer0": 4.227458424568177, "step": 150 }, { "MSE": 862.2720657348631, "MSE/layer0": 862.2720657348631, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.02, "input_norm": 31.996261011759444, "input_norm/layer0": 31.996261011759444, "learning_rate": 0.0002, "loss": 3.3864, "max_norm": 34.834476470947266, "max_norm/layer0": 34.834476470947266, "mean_norm": 32.09993934631348, "mean_norm/layer0": 32.09993934631348, "multicode_k": 1, "output_norm": 4.271083230972291, "output_norm/layer0": 4.271083230972291, "step": 200 }, { "MSE": 860.8860168457031, "MSE/layer0": 860.8860168457031, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.03, "input_norm": 31.99663330396016, "input_norm/layer0": 31.99663330396016, "learning_rate": 0.00025, "loss": 3.1841, "max_norm": 34.880577087402344, "max_norm/layer0": 34.880577087402344, "mean_norm": 32.15042304992676, "mean_norm/layer0": 32.15042304992676, "multicode_k": 1, "output_norm": 4.302526236375174, "output_norm/layer0": 4.302526236375174, "step": 250 }, { "MSE": 859.4145241292313, "MSE/layer0": 859.4145241292313, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.03, "input_norm": 31.99707999547323, "input_norm/layer0": 31.99707999547323, "learning_rate": 0.0003, "loss": 2.9941, "max_norm": 34.94011688232422, "max_norm/layer0": 34.94011688232422, "mean_norm": 32.21405220031738, "mean_norm/layer0": 32.21405220031738, "multicode_k": 1, "output_norm": 4.340623443921407, "output_norm/layer0": 4.340623443921407, "step": 300 }, { "MSE": 857.4514228312173, "MSE/layer0": 857.4514228312173, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.04, "input_norm": 31.997263495127353, "input_norm/layer0": 31.997263495127353, "learning_rate": 0.00035, "loss": 2.8154, "max_norm": 35.02033996582031, "max_norm/layer0": 35.02033996582031, "mean_norm": 32.2895393371582, "mean_norm/layer0": 32.2895393371582, "multicode_k": 1, "output_norm": 4.388785634040833, "output_norm/layer0": 4.388785634040833, "step": 350 }, { "MSE": 855.6023776245115, "MSE/layer0": 855.6023776245115, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.04, "input_norm": 31.997391548156735, "input_norm/layer0": 31.997391548156735, "learning_rate": 0.0004, "loss": 2.6472, "max_norm": 35.093902587890625, "max_norm/layer0": 35.093902587890625, "mean_norm": 32.36477088928223, "mean_norm/layer0": 32.36477088928223, "multicode_k": 1, "output_norm": 4.438224600950877, "output_norm/layer0": 4.438224600950877, "step": 400 }, { "MSE": 852.2393107096357, "MSE/layer0": 852.2393107096357, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.05, "input_norm": 31.997483587265002, "input_norm/layer0": 31.997483587265002, "learning_rate": 0.00045000000000000004, "loss": 2.5584, "max_norm": 35.304176330566406, "max_norm/layer0": 35.304176330566406, "mean_norm": 32.54551696777344, "mean_norm/layer0": 32.54551696777344, "multicode_k": 1, "output_norm": 4.531697844664256, "output_norm/layer0": 4.531697844664256, "step": 450 }, { "MSE": 845.160081481933, "MSE/layer0": 845.160081481933, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.05, "input_norm": 31.997576513290404, "input_norm/layer0": 31.997576513290404, "learning_rate": 0.0005, "loss": 2.5072, "max_norm": 35.78097915649414, "max_norm/layer0": 35.78097915649414, "mean_norm": 32.836992263793945, "mean_norm/layer0": 32.836992263793945, "multicode_k": 1, "output_norm": 4.75731077671051, "output_norm/layer0": 4.75731077671051, "step": 500 }, { "epoch": 0.05, "eval_MSE/layer0": 841.1602262364518, "eval_accuracy": 0.4578774282778804, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.99765928777141, "eval_loss": 2.476405382156372, "eval_multicode_k": 1, "eval_output_norm/layer0": 4.911408371361153, "eval_runtime": 73.5499, "eval_samples_per_second": 62.855, "eval_steps_per_second": 7.859, "step": 500 }, { "MSE": 837.1320628865564, "MSE/layer0": 837.1320628865564, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.06, "input_norm": 31.997703491846714, "input_norm/layer0": 31.997703491846714, "learning_rate": 0.0004973684210526315, "loss": 2.446, "max_norm": 36.301849365234375, "max_norm/layer0": 36.301849365234375, "mean_norm": 33.16576957702637, "mean_norm/layer0": 33.16576957702637, "multicode_k": 1, "output_norm": 5.083427506287892, "output_norm/layer0": 5.083427506287892, "step": 550 }, { "MSE": 829.8174697875975, "MSE/layer0": 829.8174697875975, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.06, "input_norm": 31.997781289418548, "input_norm/layer0": 31.997781289418548, "learning_rate": 0.0004947368421052632, "loss": 2.4026, "max_norm": 36.790077209472656, "max_norm/layer0": 36.790077209472656, "mean_norm": 33.519426345825195, "mean_norm/layer0": 33.519426345825195, "multicode_k": 1, "output_norm": 5.438902084827422, "output_norm/layer0": 5.438902084827422, "step": 600 }, { "MSE": 823.647299601237, "MSE/layer0": 823.647299601237, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.07, "input_norm": 31.997854344050104, "input_norm/layer0": 31.997854344050104, "learning_rate": 0.0004921052631578947, "loss": 2.3506, "max_norm": 37.23988723754883, "max_norm/layer0": 37.23988723754883, "mean_norm": 33.882219314575195, "mean_norm/layer0": 33.882219314575195, "multicode_k": 1, "output_norm": 5.780141766071318, "output_norm/layer0": 5.780141766071318, "step": 650 }, { "MSE": 818.3900874837236, "MSE/layer0": 818.3900874837236, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.07, "input_norm": 31.99789853731792, "input_norm/layer0": 31.99789853731792, "learning_rate": 0.0004894736842105264, "loss": 2.3252, "max_norm": 37.74921417236328, "max_norm/layer0": 37.74921417236328, "mean_norm": 34.241193771362305, "mean_norm/layer0": 34.241193771362305, "multicode_k": 1, "output_norm": 6.09345253547033, "output_norm/layer0": 6.09345253547033, "step": 700 }, { "MSE": 813.5141651407878, "MSE/layer0": 813.5141651407878, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.08, "input_norm": 31.99791718482971, "input_norm/layer0": 31.99791718482971, "learning_rate": 0.0004868421052631579, "loss": 2.2972, "max_norm": 38.29411315917969, "max_norm/layer0": 38.29411315917969, "mean_norm": 34.602651596069336, "mean_norm/layer0": 34.602651596069336, "multicode_k": 1, "output_norm": 6.373116828600564, "output_norm/layer0": 6.373116828600564, "step": 750 }, { "MSE": 808.9583784993486, "MSE/layer0": 808.9583784993486, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.08, "input_norm": 31.997929916381842, "input_norm/layer0": 31.997929916381842, "learning_rate": 0.0004842105263157895, "loss": 2.2848, "max_norm": 38.83885955810547, "max_norm/layer0": 38.83885955810547, "mean_norm": 34.96581268310547, "mean_norm/layer0": 34.96581268310547, "multicode_k": 1, "output_norm": 6.6348445963859515, "output_norm/layer0": 6.6348445963859515, "step": 800 }, { "MSE": 805.0894353230792, "MSE/layer0": 805.0894353230792, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.09, "input_norm": 31.99793632825216, "input_norm/layer0": 31.99793632825216, "learning_rate": 0.00048157894736842105, "loss": 2.2718, "max_norm": 39.34720993041992, "max_norm/layer0": 39.34720993041992, "mean_norm": 35.32806396484375, "mean_norm/layer0": 35.32806396484375, "multicode_k": 1, "output_norm": 6.866891795794173, "output_norm/layer0": 6.866891795794173, "step": 850 }, { "MSE": 801.1131992594401, "MSE/layer0": 801.1131992594401, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.09, "input_norm": 31.997941767374677, "input_norm/layer0": 31.997941767374677, "learning_rate": 0.00047894736842105264, "loss": 2.2552, "max_norm": 39.885169982910156, "max_norm/layer0": 39.885169982910156, "mean_norm": 35.689327239990234, "mean_norm/layer0": 35.689327239990234, "multicode_k": 1, "output_norm": 7.08060004631678, "output_norm/layer0": 7.08060004631678, "step": 900 }, { "MSE": 797.5655348714191, "MSE/layer0": 797.5655348714191, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.1, "input_norm": 31.997945496241247, "input_norm/layer0": 31.997945496241247, "learning_rate": 0.0004763157894736842, "loss": 2.2364, "max_norm": 40.398529052734375, "max_norm/layer0": 40.398529052734375, "mean_norm": 36.051015853881836, "mean_norm/layer0": 36.051015853881836, "multicode_k": 1, "output_norm": 7.280441036224362, "output_norm/layer0": 7.280441036224362, "step": 950 }, { "MSE": 794.0057167561844, "MSE/layer0": 794.0057167561844, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.1, "input_norm": 31.997958205540975, "input_norm/layer0": 31.997958205540975, "learning_rate": 0.00047368421052631577, "loss": 2.2285, "max_norm": 40.882999420166016, "max_norm/layer0": 40.882999420166016, "mean_norm": 36.412479400634766, "mean_norm/layer0": 36.412479400634766, "multicode_k": 1, "output_norm": 7.463625483512881, "output_norm/layer0": 7.463625483512881, "step": 1000 }, { "epoch": 0.1, "eval_MSE/layer0": 792.3022871601257, "eval_accuracy": 0.49262569806414397, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997970815399036, "eval_loss": 2.2265193462371826, "eval_multicode_k": 1, "eval_output_norm/layer0": 7.55243569582849, "eval_runtime": 73.9102, "eval_samples_per_second": 62.549, "eval_steps_per_second": 7.82, "step": 1000 }, { "MSE": 790.4031213378905, "MSE/layer0": 790.4031213378905, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.11, "input_norm": 31.997961893081662, "input_norm/layer0": 31.997961893081662, "learning_rate": 0.0004710526315789474, "loss": 2.2276, "max_norm": 41.373714447021484, "max_norm/layer0": 41.373714447021484, "mean_norm": 36.77394676208496, "mean_norm/layer0": 36.77394676208496, "multicode_k": 1, "output_norm": 7.636834317048386, "output_norm/layer0": 7.636834317048386, "step": 1050 }, { "MSE": 786.9933625284832, "MSE/layer0": 786.9933625284832, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.11, "input_norm": 31.99796496391297, "input_norm/layer0": 31.99796496391297, "learning_rate": 0.00046842105263157895, "loss": 2.2167, "max_norm": 41.845481872558594, "max_norm/layer0": 41.845481872558594, "mean_norm": 37.13482093811035, "mean_norm/layer0": 37.13482093811035, "multicode_k": 1, "output_norm": 7.803330462773646, "output_norm/layer0": 7.803330462773646, "step": 1100 }, { "MSE": 783.8570914713541, "MSE/layer0": 783.8570914713541, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.12, "input_norm": 31.997962007522577, "input_norm/layer0": 31.997962007522577, "learning_rate": 0.00046578947368421054, "loss": 2.2049, "max_norm": 42.328094482421875, "max_norm/layer0": 42.328094482421875, "mean_norm": 37.49737358093262, "mean_norm/layer0": 37.49737358093262, "multicode_k": 1, "output_norm": 7.957673575878145, "output_norm/layer0": 7.957673575878145, "step": 1150 }, { "MSE": 780.325506286621, "MSE/layer0": 780.325506286621, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.13, "input_norm": 31.997955818176273, "input_norm/layer0": 31.997955818176273, "learning_rate": 0.00046315789473684214, "loss": 2.2048, "max_norm": 42.827125549316406, "max_norm/layer0": 42.827125549316406, "mean_norm": 37.85981369018555, "mean_norm/layer0": 37.85981369018555, "multicode_k": 1, "output_norm": 8.110501464207967, "output_norm/layer0": 8.110501464207967, "step": 1200 }, { "MSE": 777.4963677978517, "MSE/layer0": 777.4963677978517, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.13, "input_norm": 31.997957773208608, "input_norm/layer0": 31.997957773208608, "learning_rate": 0.0004605263157894737, "loss": 2.1813, "max_norm": 43.32162094116211, "max_norm/layer0": 43.32162094116211, "mean_norm": 38.223052978515625, "mean_norm/layer0": 38.223052978515625, "multicode_k": 1, "output_norm": 8.244436805248263, "output_norm/layer0": 8.244436805248263, "step": 1250 }, { "MSE": 774.260437520345, "MSE/layer0": 774.260437520345, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.14, "input_norm": 31.99796381632487, "input_norm/layer0": 31.99796381632487, "learning_rate": 0.00045789473684210527, "loss": 2.1836, "max_norm": 43.81217575073242, "max_norm/layer0": 43.81217575073242, "mean_norm": 38.58406066894531, "mean_norm/layer0": 38.58406066894531, "multicode_k": 1, "output_norm": 8.38570425987244, "output_norm/layer0": 8.38570425987244, "step": 1300 }, { "MSE": 771.4710861206056, "MSE/layer0": 771.4710861206056, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.14, "input_norm": 31.997958866755184, "input_norm/layer0": 31.997958866755184, "learning_rate": 0.00045526315789473686, "loss": 2.1749, "max_norm": 44.29291915893555, "max_norm/layer0": 44.29291915893555, "mean_norm": 38.94841957092285, "mean_norm/layer0": 38.94841957092285, "multicode_k": 1, "output_norm": 8.50825534900029, "output_norm/layer0": 8.50825534900029, "step": 1350 }, { "MSE": 768.6556185913084, "MSE/layer0": 768.6556185913084, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.15, "input_norm": 31.99795674959818, "input_norm/layer0": 31.99795674959818, "learning_rate": 0.00045263157894736845, "loss": 2.1767, "max_norm": 44.80799865722656, "max_norm/layer0": 44.80799865722656, "mean_norm": 39.31004524230957, "mean_norm/layer0": 39.31004524230957, "multicode_k": 1, "output_norm": 8.633222222328184, "output_norm/layer0": 8.633222222328184, "step": 1400 }, { "MSE": 765.9088921101885, "MSE/layer0": 765.9088921101885, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.15, "input_norm": 31.99795736630759, "input_norm/layer0": 31.99795736630759, "learning_rate": 0.00045000000000000004, "loss": 2.1614, "max_norm": 45.24712371826172, "max_norm/layer0": 45.24712371826172, "mean_norm": 39.66674041748047, "mean_norm/layer0": 39.66674041748047, "multicode_k": 1, "output_norm": 8.743508942921961, "output_norm/layer0": 8.743508942921961, "step": 1450 }, { "MSE": 763.2800780232742, "MSE/layer0": 763.2800780232742, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.16, "input_norm": 31.997952944437664, "input_norm/layer0": 31.997952944437664, "learning_rate": 0.0004473684210526316, "loss": 2.1472, "max_norm": 45.6886100769043, "max_norm/layer0": 45.6886100769043, "mean_norm": 40.02728462219238, "mean_norm/layer0": 40.02728462219238, "multicode_k": 1, "output_norm": 8.859908480644224, "output_norm/layer0": 8.859908480644224, "step": 1500 }, { "epoch": 0.16, "eval_MSE/layer0": 761.8682555426203, "eval_accuracy": 0.502513147213907, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.99796608230291, "eval_loss": 2.1583588123321533, "eval_multicode_k": 1, "eval_output_norm/layer0": 8.92388377993132, "eval_runtime": 73.3386, "eval_samples_per_second": 63.036, "eval_steps_per_second": 7.881, "step": 1500 }, { "MSE": 760.1600253295896, "MSE/layer0": 760.1600253295896, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.16, "input_norm": 31.997961203257255, "input_norm/layer0": 31.997961203257255, "learning_rate": 0.00044473684210526317, "loss": 2.1601, "max_norm": 46.172386169433594, "max_norm/layer0": 46.172386169433594, "mean_norm": 40.38890838623047, "mean_norm/layer0": 40.38890838623047, "multicode_k": 1, "output_norm": 8.976485926310215, "output_norm/layer0": 8.976485926310215, "step": 1550 }, { "MSE": 757.7968755086266, "MSE/layer0": 757.7968755086266, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.17, "input_norm": 31.99795768419901, "input_norm/layer0": 31.99795768419901, "learning_rate": 0.0004421052631578947, "loss": 2.1503, "max_norm": 46.59892272949219, "max_norm/layer0": 46.59892272949219, "mean_norm": 40.74970626831055, "mean_norm/layer0": 40.74970626831055, "multicode_k": 1, "output_norm": 9.079196619192757, "output_norm/layer0": 9.079196619192757, "step": 1600 }, { "MSE": 755.1489293416341, "MSE/layer0": 755.1489293416341, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.17, "input_norm": 31.997956597010287, "input_norm/layer0": 31.997956597010287, "learning_rate": 0.0004394736842105263, "loss": 2.1474, "max_norm": 47.01366424560547, "max_norm/layer0": 47.01366424560547, "mean_norm": 41.107492446899414, "mean_norm/layer0": 41.107492446899414, "multicode_k": 1, "output_norm": 9.18502354939779, "output_norm/layer0": 9.18502354939779, "step": 1650 }, { "MSE": 752.7132907104492, "MSE/layer0": 752.7132907104492, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.18, "input_norm": 31.997961333592727, "input_norm/layer0": 31.997961333592727, "learning_rate": 0.00043684210526315795, "loss": 2.1451, "max_norm": 47.46398162841797, "max_norm/layer0": 47.46398162841797, "mean_norm": 41.466739654541016, "mean_norm/layer0": 41.466739654541016, "multicode_k": 1, "output_norm": 9.288365476131446, "output_norm/layer0": 9.288365476131446, "step": 1700 }, { "MSE": 750.1894300333656, "MSE/layer0": 750.1894300333656, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.18, "input_norm": 31.99795596122742, "input_norm/layer0": 31.99795596122742, "learning_rate": 0.0004342105263157895, "loss": 2.1298, "max_norm": 47.89784240722656, "max_norm/layer0": 47.89784240722656, "mean_norm": 41.825233459472656, "mean_norm/layer0": 41.825233459472656, "multicode_k": 1, "output_norm": 9.383608838717148, "output_norm/layer0": 9.383608838717148, "step": 1750 }, { "MSE": 747.6542997233073, "MSE/layer0": 747.6542997233073, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.19, "input_norm": 31.997955916722606, "input_norm/layer0": 31.997955916722606, "learning_rate": 0.0004315789473684211, "loss": 2.1238, "max_norm": 48.32524871826172, "max_norm/layer0": 48.32524871826172, "mean_norm": 42.18182373046875, "mean_norm/layer0": 42.18182373046875, "multicode_k": 1, "output_norm": 9.481378455162048, "output_norm/layer0": 9.481378455162048, "step": 1800 }, { "MSE": 745.4623332722983, "MSE/layer0": 745.4623332722983, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.19, "input_norm": 31.99795308430989, "input_norm/layer0": 31.99795308430989, "learning_rate": 0.0004289473684210526, "loss": 2.1193, "max_norm": 48.75049591064453, "max_norm/layer0": 48.75049591064453, "mean_norm": 42.53817176818848, "mean_norm/layer0": 42.53817176818848, "multicode_k": 1, "output_norm": 9.570223178863522, "output_norm/layer0": 9.570223178863522, "step": 1850 }, { "MSE": 743.2356170654296, "MSE/layer0": 743.2356170654296, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.2, "input_norm": 31.997956037521366, "input_norm/layer0": 31.997956037521366, "learning_rate": 0.0004263157894736842, "loss": 2.114, "max_norm": 49.169532775878906, "max_norm/layer0": 49.169532775878906, "mean_norm": 42.89301300048828, "mean_norm/layer0": 42.89301300048828, "multicode_k": 1, "output_norm": 9.656177865664167, "output_norm/layer0": 9.656177865664167, "step": 1900 }, { "MSE": 740.6696187337238, "MSE/layer0": 740.6696187337238, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.2, "input_norm": 31.997947629292796, "input_norm/layer0": 31.997947629292796, "learning_rate": 0.0004236842105263158, "loss": 2.1208, "max_norm": 49.5915641784668, "max_norm/layer0": 49.5915641784668, "mean_norm": 43.247257232666016, "mean_norm/layer0": 43.247257232666016, "multicode_k": 1, "output_norm": 9.750187404950456, "output_norm/layer0": 9.750187404950456, "step": 1950 }, { "MSE": 738.2711766560866, "MSE/layer0": 738.2711766560866, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.21, "input_norm": 31.99795049031576, "input_norm/layer0": 31.99795049031576, "learning_rate": 0.00042105263157894734, "loss": 2.1144, "max_norm": 50.01121520996094, "max_norm/layer0": 50.01121520996094, "mean_norm": 43.60071563720703, "mean_norm/layer0": 43.60071563720703, "multicode_k": 1, "output_norm": 9.839046444892887, "output_norm/layer0": 9.839046444892887, "step": 2000 }, { "epoch": 0.21, "eval_MSE/layer0": 737.1842960305685, "eval_accuracy": 0.5089533842961654, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997949216728358, "eval_loss": 2.112781524658203, "eval_multicode_k": 1, "eval_output_norm/layer0": 9.899169789850005, "eval_runtime": 73.2721, "eval_samples_per_second": 63.094, "eval_steps_per_second": 7.888, "step": 2000 }, { "MSE": 736.3252647908528, "MSE/layer0": 736.3252647908528, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.21, "input_norm": 31.997952928543082, "input_norm/layer0": 31.997952928543082, "learning_rate": 0.000418421052631579, "loss": 2.1054, "max_norm": 50.480525970458984, "max_norm/layer0": 50.480525970458984, "mean_norm": 43.9530086517334, "mean_norm/layer0": 43.9530086517334, "multicode_k": 1, "output_norm": 9.923008087476088, "output_norm/layer0": 9.923008087476088, "step": 2050 }, { "MSE": 734.2413449096682, "MSE/layer0": 734.2413449096682, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.22, "input_norm": 31.99795233090719, "input_norm/layer0": 31.99795233090719, "learning_rate": 0.0004157894736842106, "loss": 2.114, "max_norm": 50.909828186035156, "max_norm/layer0": 50.909828186035156, "mean_norm": 44.302608489990234, "mean_norm/layer0": 44.302608489990234, "multicode_k": 1, "output_norm": 9.99465080579122, "output_norm/layer0": 9.99465080579122, "step": 2100 }, { "MSE": 732.1211085001627, "MSE/layer0": 732.1211085001627, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.22, "input_norm": 31.997947177886957, "input_norm/layer0": 31.997947177886957, "learning_rate": 0.0004131578947368421, "loss": 2.1053, "max_norm": 51.30076217651367, "max_norm/layer0": 51.30076217651367, "mean_norm": 44.650190353393555, "mean_norm/layer0": 44.650190353393555, "multicode_k": 1, "output_norm": 10.083865798314415, "output_norm/layer0": 10.083865798314415, "step": 2150 }, { "MSE": 729.7699541219072, "MSE/layer0": 729.7699541219072, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.23, "input_norm": 31.997944199244184, "input_norm/layer0": 31.997944199244184, "learning_rate": 0.0004105263157894737, "loss": 2.092, "max_norm": 51.70292282104492, "max_norm/layer0": 51.70292282104492, "mean_norm": 44.99736022949219, "mean_norm/layer0": 44.99736022949219, "multicode_k": 1, "output_norm": 10.171215546925865, "output_norm/layer0": 10.171215546925865, "step": 2200 }, { "MSE": 727.7426215616864, "MSE/layer0": 727.7426215616864, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.23, "input_norm": 31.997949040730795, "input_norm/layer0": 31.997949040730795, "learning_rate": 0.00040789473684210524, "loss": 2.0989, "max_norm": 52.09043502807617, "max_norm/layer0": 52.09043502807617, "mean_norm": 45.34288787841797, "mean_norm/layer0": 45.34288787841797, "multicode_k": 1, "output_norm": 10.245072917938227, "output_norm/layer0": 10.245072917938227, "step": 2250 }, { "MSE": 725.7510225423177, "MSE/layer0": 725.7510225423177, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.24, "input_norm": 31.997945086161295, "input_norm/layer0": 31.997945086161295, "learning_rate": 0.00040526315789473684, "loss": 2.0921, "max_norm": 52.48381423950195, "max_norm/layer0": 52.48381423950195, "mean_norm": 45.685386657714844, "mean_norm/layer0": 45.685386657714844, "multicode_k": 1, "output_norm": 10.316563812891642, "output_norm/layer0": 10.316563812891642, "step": 2300 }, { "MSE": 723.730980834961, "MSE/layer0": 723.730980834961, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.25, "input_norm": 31.997938013076794, "input_norm/layer0": 31.997938013076794, "learning_rate": 0.00040263157894736843, "loss": 2.0863, "max_norm": 52.871910095214844, "max_norm/layer0": 52.871910095214844, "mean_norm": 46.027950286865234, "mean_norm/layer0": 46.027950286865234, "multicode_k": 1, "output_norm": 10.396288099288938, "output_norm/layer0": 10.396288099288938, "step": 2350 }, { "MSE": 721.850106608073, "MSE/layer0": 721.850106608073, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.25, "input_norm": 31.99794317245484, "input_norm/layer0": 31.99794317245484, "learning_rate": 0.0004, "loss": 2.0883, "max_norm": 53.25300598144531, "max_norm/layer0": 53.25300598144531, "mean_norm": 46.366220474243164, "mean_norm/layer0": 46.366220474243164, "multicode_k": 1, "output_norm": 10.462737544377642, "output_norm/layer0": 10.462737544377642, "step": 2400 }, { "MSE": 720.002911987305, "MSE/layer0": 720.002911987305, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.26, "input_norm": 31.997945442199722, "input_norm/layer0": 31.997945442199722, "learning_rate": 0.0003973684210526316, "loss": 2.0813, "max_norm": 53.6557502746582, "max_norm/layer0": 53.6557502746582, "mean_norm": 46.70218849182129, "mean_norm/layer0": 46.70218849182129, "multicode_k": 1, "output_norm": 10.54251501719157, "output_norm/layer0": 10.54251501719157, "step": 2450 }, { "MSE": 717.8726328531905, "MSE/layer0": 717.8726328531905, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.26, "input_norm": 31.997946141560867, "input_norm/layer0": 31.997946141560867, "learning_rate": 0.00039473684210526315, "loss": 2.0847, "max_norm": 54.013648986816406, "max_norm/layer0": 54.013648986816406, "mean_norm": 47.03492546081543, "mean_norm/layer0": 47.03492546081543, "multicode_k": 1, "output_norm": 10.61746094703674, "output_norm/layer0": 10.61746094703674, "step": 2500 }, { "epoch": 0.26, "eval_MSE/layer0": 716.9390104187793, "eval_accuracy": 0.5142129041984603, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997947305666536, "eval_loss": 2.0790653228759766, "eval_multicode_k": 1, "eval_output_norm/layer0": 10.657726783760687, "eval_runtime": 73.6422, "eval_samples_per_second": 62.776, "eval_steps_per_second": 7.849, "step": 2500 }, { "MSE": 715.8716929117836, "MSE/layer0": 715.8716929117836, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.27, "input_norm": 31.99793814023335, "input_norm/layer0": 31.99793814023335, "learning_rate": 0.00039210526315789474, "loss": 2.0789, "max_norm": 54.395057678222656, "max_norm/layer0": 54.395057678222656, "mean_norm": 47.36547088623047, "mean_norm/layer0": 47.36547088623047, "multicode_k": 1, "output_norm": 10.687965892155965, "output_norm/layer0": 10.687965892155965, "step": 2550 }, { "MSE": 713.9484742228188, "MSE/layer0": 713.9484742228188, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.27, "input_norm": 31.997940645217888, "input_norm/layer0": 31.997940645217888, "learning_rate": 0.00038947368421052633, "loss": 2.0747, "max_norm": 54.81391525268555, "max_norm/layer0": 54.81391525268555, "mean_norm": 47.6934928894043, "mean_norm/layer0": 47.6934928894043, "multicode_k": 1, "output_norm": 10.762619382540386, "output_norm/layer0": 10.762619382540386, "step": 2600 }, { "MSE": 711.9854763793942, "MSE/layer0": 711.9854763793942, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.28, "input_norm": 31.997925097147615, "input_norm/layer0": 31.997925097147615, "learning_rate": 0.00038684210526315787, "loss": 2.0679, "max_norm": 55.21110916137695, "max_norm/layer0": 55.21110916137695, "mean_norm": 48.01936340332031, "mean_norm/layer0": 48.01936340332031, "multicode_k": 1, "output_norm": 10.838534935315447, "output_norm/layer0": 10.838534935315447, "step": 2650 }, { "MSE": 710.4415082804362, "MSE/layer0": 710.4415082804362, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.28, "input_norm": 31.997930930455517, "input_norm/layer0": 31.997930930455517, "learning_rate": 0.00038421052631578946, "loss": 2.0619, "max_norm": 55.63144302368164, "max_norm/layer0": 55.63144302368164, "mean_norm": 48.34212875366211, "mean_norm/layer0": 48.34212875366211, "multicode_k": 1, "output_norm": 10.893479135831196, "output_norm/layer0": 10.893479135831196, "step": 2700 }, { "MSE": 708.5378164672845, "MSE/layer0": 708.5378164672845, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.29, "input_norm": 31.99792820294698, "input_norm/layer0": 31.99792820294698, "learning_rate": 0.00038157894736842105, "loss": 2.0461, "max_norm": 56.01336669921875, "max_norm/layer0": 56.01336669921875, "mean_norm": 48.66323280334473, "mean_norm/layer0": 48.66323280334473, "multicode_k": 1, "output_norm": 10.971131575902309, "output_norm/layer0": 10.971131575902309, "step": 2750 }, { "MSE": 706.6155220540361, "MSE/layer0": 706.6155220540361, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.29, "input_norm": 31.997930272420245, "input_norm/layer0": 31.997930272420245, "learning_rate": 0.00037894736842105265, "loss": 2.0594, "max_norm": 56.40309143066406, "max_norm/layer0": 56.40309143066406, "mean_norm": 48.980411529541016, "mean_norm/layer0": 48.980411529541016, "multicode_k": 1, "output_norm": 11.042961815198257, "output_norm/layer0": 11.042961815198257, "step": 2800 }, { "MSE": 704.6534555053711, "MSE/layer0": 704.6534555053711, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.3, "input_norm": 31.99792085011799, "input_norm/layer0": 31.99792085011799, "learning_rate": 0.00037631578947368424, "loss": 2.0499, "max_norm": 56.79050064086914, "max_norm/layer0": 56.79050064086914, "mean_norm": 49.293588638305664, "mean_norm/layer0": 49.293588638305664, "multicode_k": 1, "output_norm": 11.11463791847229, "output_norm/layer0": 11.11463791847229, "step": 2850 }, { "MSE": 702.691480916341, "MSE/layer0": 702.691480916341, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.3, "input_norm": 31.997921177546203, "input_norm/layer0": 31.997921177546203, "learning_rate": 0.0003736842105263158, "loss": 2.0472, "max_norm": 57.16228103637695, "max_norm/layer0": 57.16228103637695, "mean_norm": 49.60378646850586, "mean_norm/layer0": 49.60378646850586, "multicode_k": 1, "output_norm": 11.188902417818706, "output_norm/layer0": 11.188902417818706, "step": 2900 }, { "MSE": 700.9804660034181, "MSE/layer0": 700.9804660034181, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.31, "input_norm": 31.997924566268914, "input_norm/layer0": 31.997924566268914, "learning_rate": 0.00037105263157894737, "loss": 2.0557, "max_norm": 57.52459716796875, "max_norm/layer0": 57.52459716796875, "mean_norm": 49.91103553771973, "mean_norm/layer0": 49.91103553771973, "multicode_k": 1, "output_norm": 11.253552745183304, "output_norm/layer0": 11.253552745183304, "step": 2950 }, { "MSE": 699.5130490112299, "MSE/layer0": 699.5130490112299, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.31, "input_norm": 31.997922519048053, "input_norm/layer0": 31.997922519048053, "learning_rate": 0.00036842105263157896, "loss": 2.0439, "max_norm": 57.87739562988281, "max_norm/layer0": 57.87739562988281, "mean_norm": 50.21486854553223, "mean_norm/layer0": 50.21486854553223, "multicode_k": 1, "output_norm": 11.316310184796652, "output_norm/layer0": 11.316310184796652, "step": 3000 }, { "epoch": 0.31, "eval_MSE/layer0": 698.7265792011616, "eval_accuracy": 0.5184875063671823, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997919895214224, "eval_loss": 2.0482470989227295, "eval_multicode_k": 1, "eval_output_norm/layer0": 11.359921689315088, "eval_runtime": 74.2109, "eval_samples_per_second": 62.295, "eval_steps_per_second": 7.789, "step": 3000 }, { "MSE": 697.8663801066077, "MSE/layer0": 697.8663801066077, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.32, "input_norm": 31.997911771138504, "input_norm/layer0": 31.997911771138504, "learning_rate": 0.00036578947368421055, "loss": 2.0511, "max_norm": 58.24200439453125, "max_norm/layer0": 58.24200439453125, "mean_norm": 50.51446723937988, "mean_norm/layer0": 50.51446723937988, "multicode_k": 1, "output_norm": 11.388139980634046, "output_norm/layer0": 11.388139980634046, "step": 3050 }, { "MSE": 696.0450835164395, "MSE/layer0": 696.0450835164395, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.32, "input_norm": 31.9979209582011, "input_norm/layer0": 31.9979209582011, "learning_rate": 0.00036315789473684214, "loss": 2.0466, "max_norm": 58.58406066894531, "max_norm/layer0": 58.58406066894531, "mean_norm": 50.81120681762695, "mean_norm/layer0": 50.81120681762695, "multicode_k": 1, "output_norm": 11.455551563898727, "output_norm/layer0": 11.455551563898727, "step": 3100 }, { "MSE": 694.5301999918622, "MSE/layer0": 694.5301999918622, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.33, "input_norm": 31.99790574709574, "input_norm/layer0": 31.99790574709574, "learning_rate": 0.0003605263157894737, "loss": 2.0294, "max_norm": 58.931087493896484, "max_norm/layer0": 58.931087493896484, "mean_norm": 51.104164123535156, "mean_norm/layer0": 51.104164123535156, "multicode_k": 1, "output_norm": 11.512675134340917, "output_norm/layer0": 11.512675134340917, "step": 3150 }, { "MSE": 692.5095411173497, "MSE/layer0": 692.5095411173497, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.33, "input_norm": 31.997909634908044, "input_norm/layer0": 31.997909634908044, "learning_rate": 0.0003578947368421053, "loss": 2.0455, "max_norm": 59.2867546081543, "max_norm/layer0": 59.2867546081543, "mean_norm": 51.39415168762207, "mean_norm/layer0": 51.39415168762207, "multicode_k": 1, "output_norm": 11.587491785685224, "output_norm/layer0": 11.587491785685224, "step": 3200 }, { "MSE": 691.1425885009767, "MSE/layer0": 691.1425885009767, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.34, "input_norm": 31.99791768709818, "input_norm/layer0": 31.99791768709818, "learning_rate": 0.00035526315789473687, "loss": 2.0348, "max_norm": 59.64825439453125, "max_norm/layer0": 59.64825439453125, "mean_norm": 51.68109130859375, "mean_norm/layer0": 51.68109130859375, "multicode_k": 1, "output_norm": 11.643148959477745, "output_norm/layer0": 11.643148959477745, "step": 3250 }, { "MSE": 689.2906094360355, "MSE/layer0": 689.2906094360355, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.34, "input_norm": 31.997913980483997, "input_norm/layer0": 31.997913980483997, "learning_rate": 0.0003526315789473684, "loss": 2.0293, "max_norm": 59.97624206542969, "max_norm/layer0": 59.97624206542969, "mean_norm": 51.965484619140625, "mean_norm/layer0": 51.965484619140625, "multicode_k": 1, "output_norm": 11.714975148836775, "output_norm/layer0": 11.714975148836775, "step": 3300 }, { "MSE": 688.0525922648112, "MSE/layer0": 688.0525922648112, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.35, "input_norm": 31.997908350626624, "input_norm/layer0": 31.997908350626624, "learning_rate": 0.00035, "loss": 2.0389, "max_norm": 60.30556869506836, "max_norm/layer0": 60.30556869506836, "mean_norm": 52.2458438873291, "mean_norm/layer0": 52.2458438873291, "multicode_k": 1, "output_norm": 11.772027517954506, "output_norm/layer0": 11.772027517954506, "step": 3350 }, { "MSE": 686.4814953613279, "MSE/layer0": 686.4814953613279, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.35, "input_norm": 31.997902415593472, "input_norm/layer0": 31.997902415593472, "learning_rate": 0.0003473684210526316, "loss": 2.0266, "max_norm": 60.628334045410156, "max_norm/layer0": 60.628334045410156, "mean_norm": 52.522024154663086, "mean_norm/layer0": 52.522024154663086, "multicode_k": 1, "output_norm": 11.842156640688584, "output_norm/layer0": 11.842156640688584, "step": 3400 }, { "MSE": 684.6515231323242, "MSE/layer0": 684.6515231323242, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.36, "input_norm": 31.99791290283203, "input_norm/layer0": 31.99791290283203, "learning_rate": 0.0003447368421052632, "loss": 2.0248, "max_norm": 60.95072555541992, "max_norm/layer0": 60.95072555541992, "mean_norm": 52.79400825500488, "mean_norm/layer0": 52.79400825500488, "multicode_k": 1, "output_norm": 11.907371897697445, "output_norm/layer0": 11.907371897697445, "step": 3450 }, { "MSE": 683.5430062866212, "MSE/layer0": 683.5430062866212, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.37, "input_norm": 31.9979167175293, "input_norm/layer0": 31.9979167175293, "learning_rate": 0.00034210526315789477, "loss": 2.0263, "max_norm": 61.270816802978516, "max_norm/layer0": 61.270816802978516, "mean_norm": 53.06429481506348, "mean_norm/layer0": 53.06429481506348, "multicode_k": 1, "output_norm": 11.956860675811765, "output_norm/layer0": 11.956860675811765, "step": 3500 }, { "epoch": 0.37, "eval_MSE/layer0": 682.2680427869782, "eval_accuracy": 0.5224062440993215, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997916449774355, "eval_loss": 2.0253396034240723, "eval_multicode_k": 1, "eval_output_norm/layer0": 12.010468493388789, "eval_runtime": 73.2716, "eval_samples_per_second": 63.094, "eval_steps_per_second": 7.888, "step": 3500 }, { "MSE": 682.0599540201822, "MSE/layer0": 682.0599540201822, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.37, "input_norm": 31.9979091612498, "input_norm/layer0": 31.9979091612498, "learning_rate": 0.0003394736842105263, "loss": 2.035, "max_norm": 61.60363006591797, "max_norm/layer0": 61.60363006591797, "mean_norm": 53.33056831359863, "mean_norm/layer0": 53.33056831359863, "multicode_k": 1, "output_norm": 12.018342121442167, "output_norm/layer0": 12.018342121442167, "step": 3550 }, { "MSE": 680.5750654093424, "MSE/layer0": 680.5750654093424, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.38, "input_norm": 31.997909587224328, "input_norm/layer0": 31.997909587224328, "learning_rate": 0.0003368421052631579, "loss": 2.0232, "max_norm": 61.922420501708984, "max_norm/layer0": 61.922420501708984, "mean_norm": 53.59366035461426, "mean_norm/layer0": 53.59366035461426, "multicode_k": 1, "output_norm": 12.078021968205773, "output_norm/layer0": 12.078021968205773, "step": 3600 }, { "MSE": 678.8478289794925, "MSE/layer0": 678.8478289794925, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.38, "input_norm": 31.99789888381958, "input_norm/layer0": 31.99789888381958, "learning_rate": 0.00033421052631578944, "loss": 2.0171, "max_norm": 62.24449157714844, "max_norm/layer0": 62.24449157714844, "mean_norm": 53.85357475280762, "mean_norm/layer0": 53.85357475280762, "multicode_k": 1, "output_norm": 12.149001522064214, "output_norm/layer0": 12.149001522064214, "step": 3650 }, { "MSE": 677.7631386311848, "MSE/layer0": 677.7631386311848, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.39, "input_norm": 31.997902571360274, "input_norm/layer0": 31.997902571360274, "learning_rate": 0.00033157894736842103, "loss": 2.0212, "max_norm": 62.564937591552734, "max_norm/layer0": 62.564937591552734, "mean_norm": 54.10923385620117, "mean_norm/layer0": 54.10923385620117, "multicode_k": 1, "output_norm": 12.200160818099977, "output_norm/layer0": 12.200160818099977, "step": 3700 }, { "MSE": 676.4079176839191, "MSE/layer0": 676.4079176839191, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.39, "input_norm": 31.99789404869079, "input_norm/layer0": 31.99789404869079, "learning_rate": 0.0003289473684210527, "loss": 2.0184, "max_norm": 62.88063430786133, "max_norm/layer0": 62.88063430786133, "mean_norm": 54.362863540649414, "mean_norm/layer0": 54.362863540649414, "multicode_k": 1, "output_norm": 12.259288868904115, "output_norm/layer0": 12.259288868904115, "step": 3750 }, { "MSE": 675.2395422363282, "MSE/layer0": 675.2395422363282, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.4, "input_norm": 31.99789286295573, "input_norm/layer0": 31.99789286295573, "learning_rate": 0.0003263157894736842, "loss": 2.0058, "max_norm": 63.18323516845703, "max_norm/layer0": 63.18323516845703, "mean_norm": 54.61160659790039, "mean_norm/layer0": 54.61160659790039, "multicode_k": 1, "output_norm": 12.305311093330385, "output_norm/layer0": 12.305311093330385, "step": 3800 }, { "MSE": 673.5289611816404, "MSE/layer0": 673.5289611816404, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.4, "input_norm": 31.997895905176787, "input_norm/layer0": 31.997895905176787, "learning_rate": 0.0003236842105263158, "loss": 2.0147, "max_norm": 63.47829055786133, "max_norm/layer0": 63.47829055786133, "mean_norm": 54.85733413696289, "mean_norm/layer0": 54.85733413696289, "multicode_k": 1, "output_norm": 12.368880640665692, "output_norm/layer0": 12.368880640665692, "step": 3850 }, { "MSE": 672.7262348429363, "MSE/layer0": 672.7262348429363, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.41, "input_norm": 31.997892808914187, "input_norm/layer0": 31.997892808914187, "learning_rate": 0.0003210526315789474, "loss": 2.0011, "max_norm": 63.7920036315918, "max_norm/layer0": 63.7920036315918, "mean_norm": 55.099992752075195, "mean_norm/layer0": 55.099992752075195, "multicode_k": 1, "output_norm": 12.413625540733335, "output_norm/layer0": 12.413625540733335, "step": 3900 }, { "MSE": 671.2364042154949, "MSE/layer0": 671.2364042154949, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.41, "input_norm": 31.997892484664916, "input_norm/layer0": 31.997892484664916, "learning_rate": 0.00031842105263157894, "loss": 2.0068, "max_norm": 64.07488250732422, "max_norm/layer0": 64.07488250732422, "mean_norm": 55.33942985534668, "mean_norm/layer0": 55.33942985534668, "multicode_k": 1, "output_norm": 12.478335504531861, "output_norm/layer0": 12.478335504531861, "step": 3950 }, { "MSE": 669.9738427734378, "MSE/layer0": 669.9738427734378, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.42, "input_norm": 31.997889916102086, "input_norm/layer0": 31.997889916102086, "learning_rate": 0.00031578947368421053, "loss": 1.9906, "max_norm": 64.34879302978516, "max_norm/layer0": 64.34879302978516, "mean_norm": 55.576541900634766, "mean_norm/layer0": 55.576541900634766, "multicode_k": 1, "output_norm": 12.524646544456482, "output_norm/layer0": 12.524646544456482, "step": 4000 }, { "epoch": 0.42, "eval_MSE/layer0": 669.1965223770751, "eval_accuracy": 0.5253332978103237, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997898890449704, "eval_loss": 2.006638526916504, "eval_multicode_k": 1, "eval_output_norm/layer0": 12.556819209953474, "eval_runtime": 73.3751, "eval_samples_per_second": 63.005, "eval_steps_per_second": 7.877, "step": 4000 }, { "MSE": 668.3390091959637, "MSE/layer0": 668.3390091959637, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.42, "input_norm": 31.99788640658062, "input_norm/layer0": 31.99788640658062, "learning_rate": 0.00031315789473684207, "loss": 1.9962, "max_norm": 64.65262603759766, "max_norm/layer0": 64.65262603759766, "mean_norm": 55.811140060424805, "mean_norm/layer0": 55.811140060424805, "multicode_k": 1, "output_norm": 12.584023051261894, "output_norm/layer0": 12.584023051261894, "step": 4050 }, { "MSE": 667.4144735717773, "MSE/layer0": 667.4144735717773, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.43, "input_norm": 31.99788414637247, "input_norm/layer0": 31.99788414637247, "learning_rate": 0.0003105263157894737, "loss": 2.0038, "max_norm": 64.9332275390625, "max_norm/layer0": 64.9332275390625, "mean_norm": 56.04119682312012, "mean_norm/layer0": 56.04119682312012, "multicode_k": 1, "output_norm": 12.633416105906175, "output_norm/layer0": 12.633416105906175, "step": 4100 }, { "MSE": 666.502211812337, "MSE/layer0": 666.502211812337, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.43, "input_norm": 31.997885338465373, "input_norm/layer0": 31.997885338465373, "learning_rate": 0.0003078947368421053, "loss": 2.0046, "max_norm": 65.20265197753906, "max_norm/layer0": 65.20265197753906, "mean_norm": 56.26777458190918, "mean_norm/layer0": 56.26777458190918, "multicode_k": 1, "output_norm": 12.67455391089122, "output_norm/layer0": 12.67455391089122, "step": 4150 }, { "MSE": 665.0832258097332, "MSE/layer0": 665.0832258097332, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.44, "input_norm": 31.997875661849967, "input_norm/layer0": 31.997875661849967, "learning_rate": 0.00030526315789473684, "loss": 2.0066, "max_norm": 65.46887969970703, "max_norm/layer0": 65.46887969970703, "mean_norm": 56.49208450317383, "mean_norm/layer0": 56.49208450317383, "multicode_k": 1, "output_norm": 12.73067569255829, "output_norm/layer0": 12.73067569255829, "step": 4200 }, { "MSE": 663.9124774169925, "MSE/layer0": 663.9124774169925, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.44, "input_norm": 31.997874129613244, "input_norm/layer0": 31.997874129613244, "learning_rate": 0.00030263157894736844, "loss": 2.0006, "max_norm": 65.73078918457031, "max_norm/layer0": 65.73078918457031, "mean_norm": 56.712989807128906, "mean_norm/layer0": 56.712989807128906, "multicode_k": 1, "output_norm": 12.783326719601945, "output_norm/layer0": 12.783326719601945, "step": 4250 }, { "MSE": 663.0191631062823, "MSE/layer0": 663.0191631062823, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.45, "input_norm": 31.99787082672119, "input_norm/layer0": 31.99787082672119, "learning_rate": 0.0003, "loss": 1.9862, "max_norm": 65.99481964111328, "max_norm/layer0": 65.99481964111328, "mean_norm": 56.93141746520996, "mean_norm/layer0": 56.93141746520996, "multicode_k": 1, "output_norm": 12.824343484242757, "output_norm/layer0": 12.824343484242757, "step": 4300 }, { "MSE": 661.9175501505531, "MSE/layer0": 661.9175501505531, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.45, "input_norm": 31.997863101959226, "input_norm/layer0": 31.997863101959226, "learning_rate": 0.00029736842105263157, "loss": 1.9891, "max_norm": 66.25289916992188, "max_norm/layer0": 66.25289916992188, "mean_norm": 57.14705848693848, "mean_norm/layer0": 57.14705848693848, "multicode_k": 1, "output_norm": 12.873331023852028, "output_norm/layer0": 12.873331023852028, "step": 4350 }, { "MSE": 660.8278486124677, "MSE/layer0": 660.8278486124677, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.46, "input_norm": 31.99786113739014, "input_norm/layer0": 31.99786113739014, "learning_rate": 0.00029473684210526316, "loss": 1.9874, "max_norm": 66.49950408935547, "max_norm/layer0": 66.49950408935547, "mean_norm": 57.3592414855957, "mean_norm/layer0": 57.3592414855957, "multicode_k": 1, "output_norm": 12.925755645434062, "output_norm/layer0": 12.925755645434062, "step": 4400 }, { "MSE": 659.7812182617188, "MSE/layer0": 659.7812182617188, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.46, "input_norm": 31.997859818140668, "input_norm/layer0": 31.997859818140668, "learning_rate": 0.00029210526315789475, "loss": 1.9894, "max_norm": 66.74647521972656, "max_norm/layer0": 66.74647521972656, "mean_norm": 57.56860542297363, "mean_norm/layer0": 57.56860542297363, "multicode_k": 1, "output_norm": 12.969949612617494, "output_norm/layer0": 12.969949612617494, "step": 4450 }, { "MSE": 658.2862462361654, "MSE/layer0": 658.2862462361654, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.47, "input_norm": 31.997855517069482, "input_norm/layer0": 31.997855517069482, "learning_rate": 0.00028947368421052634, "loss": 1.9852, "max_norm": 67.0057373046875, "max_norm/layer0": 67.0057373046875, "mean_norm": 57.77582931518555, "mean_norm/layer0": 57.77582931518555, "multicode_k": 1, "output_norm": 13.019407332738238, "output_norm/layer0": 13.019407332738238, "step": 4500 }, { "epoch": 0.47, "eval_MSE/layer0": 657.5871718611108, "eval_accuracy": 0.5279040641917702, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997854675071842, "eval_loss": 1.9898165464401245, "eval_multicode_k": 1, "eval_output_norm/layer0": 13.052642994207561, "eval_runtime": 74.0479, "eval_samples_per_second": 62.433, "eval_steps_per_second": 7.806, "step": 4500 }, { "MSE": 657.2974259440105, "MSE/layer0": 657.2974259440105, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.48, "input_norm": 31.99785115559897, "input_norm/layer0": 31.99785115559897, "learning_rate": 0.0002868421052631579, "loss": 1.9727, "max_norm": 67.25566101074219, "max_norm/layer0": 67.25566101074219, "mean_norm": 57.98077964782715, "mean_norm/layer0": 57.98077964782715, "multicode_k": 1, "output_norm": 13.063522001902262, "output_norm/layer0": 13.063522001902262, "step": 4550 }, { "MSE": 656.5759895833334, "MSE/layer0": 656.5759895833334, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.48, "input_norm": 31.997858088811242, "input_norm/layer0": 31.997858088811242, "learning_rate": 0.00028421052631578947, "loss": 1.9897, "max_norm": 67.49605560302734, "max_norm/layer0": 67.49605560302734, "mean_norm": 58.182559967041016, "mean_norm/layer0": 58.182559967041016, "multicode_k": 1, "output_norm": 13.099744346936546, "output_norm/layer0": 13.099744346936546, "step": 4600 }, { "MSE": 655.8373800659178, "MSE/layer0": 655.8373800659178, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.49, "input_norm": 31.997857850392663, "input_norm/layer0": 31.997857850392663, "learning_rate": 0.00028157894736842106, "loss": 1.9918, "max_norm": 67.72962188720703, "max_norm/layer0": 67.72962188720703, "mean_norm": 58.38115119934082, "mean_norm/layer0": 58.38115119934082, "multicode_k": 1, "output_norm": 13.13247790972392, "output_norm/layer0": 13.13247790972392, "step": 4650 }, { "MSE": 654.6057424926755, "MSE/layer0": 654.6057424926755, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.49, "input_norm": 31.997855739593504, "input_norm/layer0": 31.997855739593504, "learning_rate": 0.0002789473684210526, "loss": 1.9908, "max_norm": 67.96855163574219, "max_norm/layer0": 67.96855163574219, "mean_norm": 58.57722091674805, "mean_norm/layer0": 58.57722091674805, "multicode_k": 1, "output_norm": 13.187800091107682, "output_norm/layer0": 13.187800091107682, "step": 4700 }, { "MSE": 653.7336292521161, "MSE/layer0": 653.7336292521161, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.5, "input_norm": 31.997861677805588, "input_norm/layer0": 31.997861677805588, "learning_rate": 0.00027631578947368425, "loss": 1.9919, "max_norm": 68.20356750488281, "max_norm/layer0": 68.20356750488281, "mean_norm": 58.77041053771973, "mean_norm/layer0": 58.77041053771973, "multicode_k": 1, "output_norm": 13.224705770810434, "output_norm/layer0": 13.224705770810434, "step": 4750 }, { "MSE": 652.4711893717447, "MSE/layer0": 652.4711893717447, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.5, "input_norm": 31.997852430343634, "input_norm/layer0": 31.997852430343634, "learning_rate": 0.00027368421052631584, "loss": 1.9777, "max_norm": 68.42557525634766, "max_norm/layer0": 68.42557525634766, "mean_norm": 58.96235466003418, "mean_norm/layer0": 58.96235466003418, "multicode_k": 1, "output_norm": 13.275700616836549, "output_norm/layer0": 13.275700616836549, "step": 4800 }, { "MSE": 651.660216674805, "MSE/layer0": 651.660216674805, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.51, "input_norm": 31.997857831319166, "input_norm/layer0": 31.997857831319166, "learning_rate": 0.0002710526315789474, "loss": 1.9728, "max_norm": 68.6562271118164, "max_norm/layer0": 68.6562271118164, "mean_norm": 59.151214599609375, "mean_norm/layer0": 59.151214599609375, "multicode_k": 1, "output_norm": 13.316913062731425, "output_norm/layer0": 13.316913062731425, "step": 4850 }, { "MSE": 651.1180463663741, "MSE/layer0": 651.1180463663741, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.51, "input_norm": 31.997851276397704, "input_norm/layer0": 31.997851276397704, "learning_rate": 0.00026842105263157897, "loss": 1.9806, "max_norm": 68.8842544555664, "max_norm/layer0": 68.8842544555664, "mean_norm": 59.336891174316406, "mean_norm/layer0": 59.336891174316406, "multicode_k": 1, "output_norm": 13.348248120943708, "output_norm/layer0": 13.348248120943708, "step": 4900 }, { "MSE": 650.0774853515621, "MSE/layer0": 650.0774853515621, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.52, "input_norm": 31.997846142450957, "input_norm/layer0": 31.997846142450957, "learning_rate": 0.0002657894736842105, "loss": 1.9718, "max_norm": 69.09481811523438, "max_norm/layer0": 69.09481811523438, "mean_norm": 59.52014923095703, "mean_norm/layer0": 59.52014923095703, "multicode_k": 1, "output_norm": 13.38570951779683, "output_norm/layer0": 13.38570951779683, "step": 4950 }, { "MSE": 649.2541728719073, "MSE/layer0": 649.2541728719073, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.52, "input_norm": 31.997852964401247, "input_norm/layer0": 31.997852964401247, "learning_rate": 0.0002631578947368421, "loss": 1.9687, "max_norm": 69.3100357055664, "max_norm/layer0": 69.3100357055664, "mean_norm": 59.70068359375, "mean_norm/layer0": 59.70068359375, "multicode_k": 1, "output_norm": 13.423000381787617, "output_norm/layer0": 13.423000381787617, "step": 5000 }, { "epoch": 0.52, "eval_MSE/layer0": 648.246248562512, "eval_accuracy": 0.5299863891896716, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997853133679993, "eval_loss": 1.975706934928894, "eval_multicode_k": 1, "eval_output_norm/layer0": 13.449585199510798, "eval_runtime": 73.7352, "eval_samples_per_second": 62.697, "eval_steps_per_second": 7.839, "step": 5000 }, { "MSE": 648.4500269571938, "MSE/layer0": 648.4500269571938, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.53, "input_norm": 31.99784724235535, "input_norm/layer0": 31.99784724235535, "learning_rate": 0.0002605263157894737, "loss": 1.9816, "max_norm": 69.5140151977539, "max_norm/layer0": 69.5140151977539, "mean_norm": 59.87860107421875, "mean_norm/layer0": 59.87860107421875, "multicode_k": 1, "output_norm": 13.459953915278113, "output_norm/layer0": 13.459953915278113, "step": 5050 }, { "MSE": 647.5120207722985, "MSE/layer0": 647.5120207722985, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.53, "input_norm": 31.997845083872484, "input_norm/layer0": 31.997845083872484, "learning_rate": 0.0002578947368421053, "loss": 1.9778, "max_norm": 69.72222137451172, "max_norm/layer0": 69.72222137451172, "mean_norm": 60.054636001586914, "mean_norm/layer0": 60.054636001586914, "multicode_k": 1, "output_norm": 13.495457221666976, "output_norm/layer0": 13.495457221666976, "step": 5100 }, { "MSE": 646.8909526570638, "MSE/layer0": 646.8909526570638, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.54, "input_norm": 31.99783927281696, "input_norm/layer0": 31.99783927281696, "learning_rate": 0.0002552631578947369, "loss": 1.9608, "max_norm": 69.93621826171875, "max_norm/layer0": 69.93621826171875, "mean_norm": 60.228532791137695, "mean_norm/layer0": 60.228532791137695, "multicode_k": 1, "output_norm": 13.523821023305253, "output_norm/layer0": 13.523821023305253, "step": 5150 }, { "MSE": 645.6001059977214, "MSE/layer0": 645.6001059977214, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.54, "input_norm": 31.997829329172767, "input_norm/layer0": 31.997829329172767, "learning_rate": 0.0002526315789473684, "loss": 1.9514, "max_norm": 70.1629867553711, "max_norm/layer0": 70.1629867553711, "mean_norm": 60.39993667602539, "mean_norm/layer0": 60.39993667602539, "multicode_k": 1, "output_norm": 13.575601536432904, "output_norm/layer0": 13.575601536432904, "step": 5200 }, { "MSE": 645.0477313232423, "MSE/layer0": 645.0477313232423, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.55, "input_norm": 31.997829844156904, "input_norm/layer0": 31.997829844156904, "learning_rate": 0.00025, "loss": 1.953, "max_norm": 70.36659240722656, "max_norm/layer0": 70.36659240722656, "mean_norm": 60.568695068359375, "mean_norm/layer0": 60.568695068359375, "multicode_k": 1, "output_norm": 13.606370126406352, "output_norm/layer0": 13.606370126406352, "step": 5250 }, { "MSE": 644.0795441691082, "MSE/layer0": 644.0795441691082, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.55, "input_norm": 31.997827720642086, "input_norm/layer0": 31.997827720642086, "learning_rate": 0.0002473684210526316, "loss": 1.9664, "max_norm": 70.58203125, "max_norm/layer0": 70.58203125, "mean_norm": 60.73503303527832, "mean_norm/layer0": 60.73503303527832, "multicode_k": 1, "output_norm": 13.644356350898736, "output_norm/layer0": 13.644356350898736, "step": 5300 }, { "MSE": 643.4398297119142, "MSE/layer0": 643.4398297119142, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.56, "input_norm": 31.99783255259196, "input_norm/layer0": 31.99783255259196, "learning_rate": 0.0002447368421052632, "loss": 1.9612, "max_norm": 70.80116271972656, "max_norm/layer0": 70.80116271972656, "mean_norm": 60.89903450012207, "mean_norm/layer0": 60.89903450012207, "multicode_k": 1, "output_norm": 13.676611545880633, "output_norm/layer0": 13.676611545880633, "step": 5350 }, { "MSE": 642.6565199788413, "MSE/layer0": 642.6565199788413, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.56, "input_norm": 31.997826932271334, "input_norm/layer0": 31.997826932271334, "learning_rate": 0.00024210526315789475, "loss": 1.9695, "max_norm": 71.0198745727539, "max_norm/layer0": 71.0198745727539, "mean_norm": 61.06051063537598, "mean_norm/layer0": 61.06051063537598, "multicode_k": 1, "output_norm": 13.705395914713542, "output_norm/layer0": 13.705395914713542, "step": 5400 }, { "MSE": 641.5518863932293, "MSE/layer0": 641.5518863932293, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.57, "input_norm": 31.99782320658366, "input_norm/layer0": 31.99782320658366, "learning_rate": 0.00023947368421052632, "loss": 1.9708, "max_norm": 71.22209930419922, "max_norm/layer0": 71.22209930419922, "mean_norm": 61.22001647949219, "mean_norm/layer0": 61.22001647949219, "multicode_k": 1, "output_norm": 13.747722525596622, "output_norm/layer0": 13.747722525596622, "step": 5450 }, { "MSE": 641.0277577718095, "MSE/layer0": 641.0277577718095, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.57, "input_norm": 31.997817249298095, "input_norm/layer0": 31.997817249298095, "learning_rate": 0.00023684210526315788, "loss": 1.9672, "max_norm": 71.42549896240234, "max_norm/layer0": 71.42549896240234, "mean_norm": 61.377342224121094, "mean_norm/layer0": 61.377342224121094, "multicode_k": 1, "output_norm": 13.775313488642375, "output_norm/layer0": 13.775313488642375, "step": 5500 }, { "epoch": 0.57, "eval_MSE/layer0": 640.0821653411886, "eval_accuracy": 0.5321348969378108, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997811444105338, "eval_loss": 1.9619895219802856, "eval_multicode_k": 1, "eval_output_norm/layer0": 13.80778875099279, "eval_runtime": 73.8101, "eval_samples_per_second": 62.634, "eval_steps_per_second": 7.831, "step": 5500 }, { "MSE": 640.2260070800783, "MSE/layer0": 640.2260070800783, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.58, "input_norm": 31.997807060877484, "input_norm/layer0": 31.997807060877484, "learning_rate": 0.00023421052631578948, "loss": 1.9526, "max_norm": 71.6324691772461, "max_norm/layer0": 71.6324691772461, "mean_norm": 61.532691955566406, "mean_norm/layer0": 61.532691955566406, "multicode_k": 1, "output_norm": 13.81434581597646, "output_norm/layer0": 13.81434581597646, "step": 5550 }, { "MSE": 639.6603690592448, "MSE/layer0": 639.6603690592448, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.58, "input_norm": 31.997815796534205, "input_norm/layer0": 31.997815796534205, "learning_rate": 0.00023157894736842107, "loss": 1.9592, "max_norm": 71.83050537109375, "max_norm/layer0": 71.83050537109375, "mean_norm": 61.68556213378906, "mean_norm/layer0": 61.68556213378906, "multicode_k": 1, "output_norm": 13.843803273836771, "output_norm/layer0": 13.843803273836771, "step": 5600 }, { "MSE": 638.8630006917316, "MSE/layer0": 638.8630006917316, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.59, "input_norm": 31.997804651260374, "input_norm/layer0": 31.997804651260374, "learning_rate": 0.00022894736842105263, "loss": 1.9582, "max_norm": 72.0186767578125, "max_norm/layer0": 72.0186767578125, "mean_norm": 61.836381912231445, "mean_norm/layer0": 61.836381912231445, "multicode_k": 1, "output_norm": 13.87206829547882, "output_norm/layer0": 13.87206829547882, "step": 5650 }, { "MSE": 638.6114538574218, "MSE/layer0": 638.6114538574218, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.6, "input_norm": 31.997799615859993, "input_norm/layer0": 31.997799615859993, "learning_rate": 0.00022631578947368422, "loss": 1.9581, "max_norm": 72.212158203125, "max_norm/layer0": 72.212158203125, "mean_norm": 61.984375, "mean_norm/layer0": 61.984375, "multicode_k": 1, "output_norm": 13.890618721644087, "output_norm/layer0": 13.890618721644087, "step": 5700 }, { "MSE": 637.4200433349613, "MSE/layer0": 637.4200433349613, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.6, "input_norm": 31.9977961031596, "input_norm/layer0": 31.9977961031596, "learning_rate": 0.0002236842105263158, "loss": 1.9563, "max_norm": 72.40010833740234, "max_norm/layer0": 72.40010833740234, "mean_norm": 62.13043212890625, "mean_norm/layer0": 62.13043212890625, "multicode_k": 1, "output_norm": 13.935336654980983, "output_norm/layer0": 13.935336654980983, "step": 5750 }, { "MSE": 636.9881141153974, "MSE/layer0": 636.9881141153974, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.61, "input_norm": 31.997795972824097, "input_norm/layer0": 31.997795972824097, "learning_rate": 0.00022105263157894735, "loss": 1.9652, "max_norm": 72.58822631835938, "max_norm/layer0": 72.58822631835938, "mean_norm": 62.274553298950195, "mean_norm/layer0": 62.274553298950195, "multicode_k": 1, "output_norm": 13.960987841288247, "output_norm/layer0": 13.960987841288247, "step": 5800 }, { "MSE": 636.22215037028, "MSE/layer0": 636.22215037028, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.61, "input_norm": 31.997794774373368, "input_norm/layer0": 31.997794774373368, "learning_rate": 0.00021842105263157897, "loss": 1.9509, "max_norm": 72.77027130126953, "max_norm/layer0": 72.77027130126953, "mean_norm": 62.417043685913086, "mean_norm/layer0": 62.417043685913086, "multicode_k": 1, "output_norm": 13.98557560602824, "output_norm/layer0": 13.98557560602824, "step": 5850 }, { "MSE": 635.6220120239254, "MSE/layer0": 635.6220120239254, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.62, "input_norm": 31.997796500523897, "input_norm/layer0": 31.997796500523897, "learning_rate": 0.00021578947368421054, "loss": 1.9637, "max_norm": 72.93942260742188, "max_norm/layer0": 72.93942260742188, "mean_norm": 62.5573787689209, "mean_norm/layer0": 62.5573787689209, "multicode_k": 1, "output_norm": 14.011822309494022, "output_norm/layer0": 14.011822309494022, "step": 5900 }, { "MSE": 635.1990796915693, "MSE/layer0": 635.1990796915693, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.62, "input_norm": 31.99778350830077, "input_norm/layer0": 31.99778350830077, "learning_rate": 0.0002131578947368421, "loss": 1.9417, "max_norm": 73.11217498779297, "max_norm/layer0": 73.11217498779297, "mean_norm": 62.69554328918457, "mean_norm/layer0": 62.69554328918457, "multicode_k": 1, "output_norm": 14.040199557940166, "output_norm/layer0": 14.040199557940166, "step": 5950 }, { "MSE": 634.617561645508, "MSE/layer0": 634.617561645508, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.63, "input_norm": 31.9977766195933, "input_norm/layer0": 31.9977766195933, "learning_rate": 0.00021052631578947367, "loss": 1.9441, "max_norm": 73.27617645263672, "max_norm/layer0": 73.27617645263672, "mean_norm": 62.831491470336914, "mean_norm/layer0": 62.831491470336914, "multicode_k": 1, "output_norm": 14.065582130750016, "output_norm/layer0": 14.065582130750016, "step": 6000 }, { "epoch": 0.63, "eval_MSE/layer0": 633.8831350106634, "eval_accuracy": 0.5338761587531762, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997772803689244, "eval_loss": 1.951315999031067, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.101806794000915, "eval_runtime": 73.5977, "eval_samples_per_second": 62.814, "eval_steps_per_second": 7.854, "step": 6000 }, { "MSE": 633.8391249593099, "MSE/layer0": 633.8391249593099, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.63, "input_norm": 31.99777683258057, "input_norm/layer0": 31.99777683258057, "learning_rate": 0.0002078947368421053, "loss": 1.9507, "max_norm": 73.43240356445312, "max_norm/layer0": 73.43240356445312, "mean_norm": 62.96537971496582, "mean_norm/layer0": 62.96537971496582, "multicode_k": 1, "output_norm": 14.0993266805013, "output_norm/layer0": 14.0993266805013, "step": 6050 }, { "MSE": 633.1878758748373, "MSE/layer0": 633.1878758748373, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.64, "input_norm": 31.997768185933438, "input_norm/layer0": 31.997768185933438, "learning_rate": 0.00020526315789473685, "loss": 1.9535, "max_norm": 73.59780883789062, "max_norm/layer0": 73.59780883789062, "mean_norm": 63.09744453430176, "mean_norm/layer0": 63.09744453430176, "multicode_k": 1, "output_norm": 14.12703340212504, "output_norm/layer0": 14.12703340212504, "step": 6100 }, { "MSE": 632.4774736531577, "MSE/layer0": 632.4774736531577, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.64, "input_norm": 31.997762225468954, "input_norm/layer0": 31.997762225468954, "learning_rate": 0.00020263157894736842, "loss": 1.9502, "max_norm": 73.7634506225586, "max_norm/layer0": 73.7634506225586, "mean_norm": 63.227373123168945, "mean_norm/layer0": 63.227373123168945, "multicode_k": 1, "output_norm": 14.155767776171366, "output_norm/layer0": 14.155767776171366, "step": 6150 }, { "MSE": 632.0819724527997, "MSE/layer0": 632.0819724527997, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.65, "input_norm": 31.997758595148735, "input_norm/layer0": 31.997758595148735, "learning_rate": 0.0002, "loss": 1.948, "max_norm": 73.93152618408203, "max_norm/layer0": 73.93152618408203, "mean_norm": 63.35538673400879, "mean_norm/layer0": 63.35538673400879, "multicode_k": 1, "output_norm": 14.17972202301026, "output_norm/layer0": 14.17972202301026, "step": 6200 }, { "MSE": 631.3937511189779, "MSE/layer0": 631.3937511189779, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.65, "input_norm": 31.997760909398387, "input_norm/layer0": 31.997760909398387, "learning_rate": 0.00019736842105263157, "loss": 1.9449, "max_norm": 74.07744598388672, "max_norm/layer0": 74.07744598388672, "mean_norm": 63.481435775756836, "mean_norm/layer0": 63.481435775756836, "multicode_k": 1, "output_norm": 14.207703741391498, "output_norm/layer0": 14.207703741391498, "step": 6250 }, { "MSE": 631.1097898356121, "MSE/layer0": 631.1097898356121, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.66, "input_norm": 31.997752253214514, "input_norm/layer0": 31.997752253214514, "learning_rate": 0.00019473684210526317, "loss": 1.9256, "max_norm": 74.23124694824219, "max_norm/layer0": 74.23124694824219, "mean_norm": 63.605464935302734, "mean_norm/layer0": 63.605464935302734, "multicode_k": 1, "output_norm": 14.22562705675761, "output_norm/layer0": 14.22562705675761, "step": 6300 }, { "MSE": 630.4715811157231, "MSE/layer0": 630.4715811157231, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.66, "input_norm": 31.99775326093037, "input_norm/layer0": 31.99775326093037, "learning_rate": 0.00019210526315789473, "loss": 1.9394, "max_norm": 74.37789154052734, "max_norm/layer0": 74.37789154052734, "mean_norm": 63.72765922546387, "mean_norm/layer0": 63.72765922546387, "multicode_k": 1, "output_norm": 14.252348532676702, "output_norm/layer0": 14.252348532676702, "step": 6350 }, { "MSE": 629.5616383870444, "MSE/layer0": 629.5616383870444, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.67, "input_norm": 31.9977592086792, "input_norm/layer0": 31.9977592086792, "learning_rate": 0.00018947368421052632, "loss": 1.948, "max_norm": 74.52799987792969, "max_norm/layer0": 74.52799987792969, "mean_norm": 63.84817886352539, "mean_norm/layer0": 63.84817886352539, "multicode_k": 1, "output_norm": 14.278619543711342, "output_norm/layer0": 14.278619543711342, "step": 6400 }, { "MSE": 628.9405068969726, "MSE/layer0": 628.9405068969726, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.67, "input_norm": 31.9977388159434, "input_norm/layer0": 31.9977388159434, "learning_rate": 0.0001868421052631579, "loss": 1.9365, "max_norm": 74.66854095458984, "max_norm/layer0": 74.66854095458984, "mean_norm": 63.96674346923828, "mean_norm/layer0": 63.96674346923828, "multicode_k": 1, "output_norm": 14.308290360768634, "output_norm/layer0": 14.308290360768634, "step": 6450 }, { "MSE": 628.8358187866208, "MSE/layer0": 628.8358187866208, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.68, "input_norm": 31.997742996215806, "input_norm/layer0": 31.997742996215806, "learning_rate": 0.00018421052631578948, "loss": 1.9408, "max_norm": 74.80778503417969, "max_norm/layer0": 74.80778503417969, "mean_norm": 64.08341407775879, "mean_norm/layer0": 64.08341407775879, "multicode_k": 1, "output_norm": 14.319794411659238, "output_norm/layer0": 14.319794411659238, "step": 6500 }, { "epoch": 0.68, "eval_MSE/layer0": 628.092910030562, "eval_accuracy": 0.5357603583933366, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997742160687373, "eval_loss": 1.9396723508834839, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.354976222496019, "eval_runtime": 73.9338, "eval_samples_per_second": 62.529, "eval_steps_per_second": 7.818, "step": 6500 }, { "MSE": 628.4872816975908, "MSE/layer0": 628.4872816975908, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.68, "input_norm": 31.997741378148394, "input_norm/layer0": 31.997741378148394, "learning_rate": 0.00018157894736842107, "loss": 1.9392, "max_norm": 74.95079040527344, "max_norm/layer0": 74.95079040527344, "mean_norm": 64.19818496704102, "mean_norm/layer0": 64.19818496704102, "multicode_k": 1, "output_norm": 14.340212704340617, "output_norm/layer0": 14.340212704340617, "step": 6550 }, { "MSE": 627.595106302897, "MSE/layer0": 627.595106302897, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.69, "input_norm": 31.99773236592611, "input_norm/layer0": 31.99773236592611, "learning_rate": 0.00017894736842105264, "loss": 1.9312, "max_norm": 75.08959197998047, "max_norm/layer0": 75.08959197998047, "mean_norm": 64.3110408782959, "mean_norm/layer0": 64.3110408782959, "multicode_k": 1, "output_norm": 14.375651826858522, "output_norm/layer0": 14.375651826858522, "step": 6600 }, { "MSE": 627.2688003540036, "MSE/layer0": 627.2688003540036, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.69, "input_norm": 31.997724459966015, "input_norm/layer0": 31.997724459966015, "learning_rate": 0.0001763157894736842, "loss": 1.9454, "max_norm": 75.23365783691406, "max_norm/layer0": 75.23365783691406, "mean_norm": 64.42234230041504, "mean_norm/layer0": 64.42234230041504, "multicode_k": 1, "output_norm": 14.385090745290121, "output_norm/layer0": 14.385090745290121, "step": 6650 }, { "MSE": 626.5893623860678, "MSE/layer0": 626.5893623860678, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.7, "input_norm": 31.997720209757503, "input_norm/layer0": 31.997720209757503, "learning_rate": 0.0001736842105263158, "loss": 1.9428, "max_norm": 75.36791229248047, "max_norm/layer0": 75.36791229248047, "mean_norm": 64.5310287475586, "mean_norm/layer0": 64.5310287475586, "multicode_k": 1, "output_norm": 14.414791498184208, "output_norm/layer0": 14.414791498184208, "step": 6700 }, { "MSE": 626.1687516276043, "MSE/layer0": 626.1687516276043, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.7, "input_norm": 31.997717237472536, "input_norm/layer0": 31.997717237472536, "learning_rate": 0.00017105263157894739, "loss": 1.9341, "max_norm": 75.49561309814453, "max_norm/layer0": 75.49561309814453, "mean_norm": 64.63836669921875, "mean_norm/layer0": 64.63836669921875, "multicode_k": 1, "output_norm": 14.436859647432962, "output_norm/layer0": 14.436859647432962, "step": 6750 }, { "MSE": 625.7842074584966, "MSE/layer0": 625.7842074584966, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.71, "input_norm": 31.997723042170207, "input_norm/layer0": 31.997723042170207, "learning_rate": 0.00016842105263157895, "loss": 1.9391, "max_norm": 75.62852478027344, "max_norm/layer0": 75.62852478027344, "mean_norm": 64.74386024475098, "mean_norm/layer0": 64.74386024475098, "multicode_k": 1, "output_norm": 14.45211536884308, "output_norm/layer0": 14.45211536884308, "step": 6800 }, { "MSE": 625.3583324178057, "MSE/layer0": 625.3583324178057, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.72, "input_norm": 31.997710723876956, "input_norm/layer0": 31.997710723876956, "learning_rate": 0.00016578947368421052, "loss": 1.9214, "max_norm": 75.7518081665039, "max_norm/layer0": 75.7518081665039, "mean_norm": 64.84785079956055, "mean_norm/layer0": 64.84785079956055, "multicode_k": 1, "output_norm": 14.472083713213603, "output_norm/layer0": 14.472083713213603, "step": 6850 }, { "MSE": 625.0808269246418, "MSE/layer0": 625.0808269246418, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.72, "input_norm": 31.997701005935667, "input_norm/layer0": 31.997701005935667, "learning_rate": 0.0001631578947368421, "loss": 1.9248, "max_norm": 75.8736343383789, "max_norm/layer0": 75.8736343383789, "mean_norm": 64.94989013671875, "mean_norm/layer0": 64.94989013671875, "multicode_k": 1, "output_norm": 14.49320138454437, "output_norm/layer0": 14.49320138454437, "step": 6900 }, { "MSE": 624.4893544514975, "MSE/layer0": 624.4893544514975, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.73, "input_norm": 31.997702992757166, "input_norm/layer0": 31.997702992757166, "learning_rate": 0.0001605263157894737, "loss": 1.9357, "max_norm": 75.99244689941406, "max_norm/layer0": 75.99244689941406, "mean_norm": 65.05006790161133, "mean_norm/layer0": 65.05006790161133, "multicode_k": 1, "output_norm": 14.515017460187277, "output_norm/layer0": 14.515017460187277, "step": 6950 }, { "MSE": 623.983821309408, "MSE/layer0": 623.983821309408, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.73, "input_norm": 31.997692581812547, "input_norm/layer0": 31.997692581812547, "learning_rate": 0.00015789473684210527, "loss": 1.9256, "max_norm": 76.1169204711914, "max_norm/layer0": 76.1169204711914, "mean_norm": 65.14841270446777, "mean_norm/layer0": 65.14841270446777, "multicode_k": 1, "output_norm": 14.531605450312297, "output_norm/layer0": 14.531605450312297, "step": 7000 }, { "epoch": 0.73, "eval_MSE/layer0": 623.2726008245854, "eval_accuracy": 0.5373965313049694, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.99768957905041, "eval_loss": 1.9302037954330444, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.553397603295936, "eval_runtime": 73.3018, "eval_samples_per_second": 63.068, "eval_steps_per_second": 7.885, "step": 7000 }, { "MSE": 623.8173256429034, "MSE/layer0": 623.8173256429034, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.74, "input_norm": 31.997689800262457, "input_norm/layer0": 31.997689800262457, "learning_rate": 0.00015526315789473686, "loss": 1.9215, "max_norm": 76.22943115234375, "max_norm/layer0": 76.22943115234375, "mean_norm": 65.2452278137207, "mean_norm/layer0": 65.2452278137207, "multicode_k": 1, "output_norm": 14.544135572115584, "output_norm/layer0": 14.544135572115584, "step": 7050 }, { "MSE": 623.4564833577472, "MSE/layer0": 623.4564833577472, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.74, "input_norm": 31.997697146733607, "input_norm/layer0": 31.997697146733607, "learning_rate": 0.00015263157894736842, "loss": 1.9291, "max_norm": 76.35796356201172, "max_norm/layer0": 76.35796356201172, "mean_norm": 65.33997344970703, "mean_norm/layer0": 65.33997344970703, "multicode_k": 1, "output_norm": 14.557166822751359, "output_norm/layer0": 14.557166822751359, "step": 7100 }, { "MSE": 622.3157424926754, "MSE/layer0": 622.3157424926754, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.75, "input_norm": 31.997690575917574, "input_norm/layer0": 31.997690575917574, "learning_rate": 0.00015, "loss": 1.9272, "max_norm": 76.47930145263672, "max_norm/layer0": 76.47930145263672, "mean_norm": 65.4333724975586, "mean_norm/layer0": 65.4333724975586, "multicode_k": 1, "output_norm": 14.59491890271505, "output_norm/layer0": 14.59491890271505, "step": 7150 }, { "MSE": 622.1008169555663, "MSE/layer0": 622.1008169555663, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.75, "input_norm": 31.997691469192503, "input_norm/layer0": 31.997691469192503, "learning_rate": 0.00014736842105263158, "loss": 1.9421, "max_norm": 76.5845947265625, "max_norm/layer0": 76.5845947265625, "mean_norm": 65.52462577819824, "mean_norm/layer0": 65.52462577819824, "multicode_k": 1, "output_norm": 14.608456416130064, "output_norm/layer0": 14.608456416130064, "step": 7200 }, { "MSE": 621.7943653361006, "MSE/layer0": 621.7943653361006, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.76, "input_norm": 31.997678140004478, "input_norm/layer0": 31.997678140004478, "learning_rate": 0.00014473684210526317, "loss": 1.9221, "max_norm": 76.68899536132812, "max_norm/layer0": 76.68899536132812, "mean_norm": 65.61434745788574, "mean_norm/layer0": 65.61434745788574, "multicode_k": 1, "output_norm": 14.622403078079222, "output_norm/layer0": 14.622403078079222, "step": 7250 }, { "MSE": 621.7445918782552, "MSE/layer0": 621.7445918782552, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.76, "input_norm": 31.997679424285884, "input_norm/layer0": 31.997679424285884, "learning_rate": 0.00014210526315789474, "loss": 1.9172, "max_norm": 76.79942321777344, "max_norm/layer0": 76.79942321777344, "mean_norm": 65.70241737365723, "mean_norm/layer0": 65.70241737365723, "multicode_k": 1, "output_norm": 14.632240413029983, "output_norm/layer0": 14.632240413029983, "step": 7300 }, { "MSE": 621.0073055013017, "MSE/layer0": 621.0073055013017, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.77, "input_norm": 31.997667986551914, "input_norm/layer0": 31.997667986551914, "learning_rate": 0.0001394736842105263, "loss": 1.9187, "max_norm": 76.90473937988281, "max_norm/layer0": 76.90473937988281, "mean_norm": 65.78865623474121, "mean_norm/layer0": 65.78865623474121, "multicode_k": 1, "output_norm": 14.659644064903254, "output_norm/layer0": 14.659644064903254, "step": 7350 }, { "MSE": 620.5166587320964, "MSE/layer0": 620.5166587320964, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.77, "input_norm": 31.99766827583312, "input_norm/layer0": 31.99766827583312, "learning_rate": 0.00013684210526315792, "loss": 1.9236, "max_norm": 77.00653839111328, "max_norm/layer0": 77.00653839111328, "mean_norm": 65.87344741821289, "mean_norm/layer0": 65.87344741821289, "multicode_k": 1, "output_norm": 14.683248674074807, "output_norm/layer0": 14.683248674074807, "step": 7400 }, { "MSE": 620.4730934651691, "MSE/layer0": 620.4730934651691, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.78, "input_norm": 31.99766536712645, "input_norm/layer0": 31.99766536712645, "learning_rate": 0.00013421052631578948, "loss": 1.9181, "max_norm": 77.11151123046875, "max_norm/layer0": 77.11151123046875, "mean_norm": 65.95642852783203, "mean_norm/layer0": 65.95642852783203, "multicode_k": 1, "output_norm": 14.682427426973977, "output_norm/layer0": 14.682427426973977, "step": 7450 }, { "MSE": 619.8806704711913, "MSE/layer0": 619.8806704711913, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.78, "input_norm": 31.997652931213374, "input_norm/layer0": 31.997652931213374, "learning_rate": 0.00013157894736842105, "loss": 1.9204, "max_norm": 77.21614837646484, "max_norm/layer0": 77.21614837646484, "mean_norm": 66.03750610351562, "mean_norm/layer0": 66.03750610351562, "multicode_k": 1, "output_norm": 14.709125100771587, "output_norm/layer0": 14.709125100771587, "step": 7500 }, { "epoch": 0.78, "eval_MSE/layer0": 619.4572802491444, "eval_accuracy": 0.538146743438657, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997657016941467, "eval_loss": 1.9224542379379272, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.72584700899003, "eval_runtime": 73.2809, "eval_samples_per_second": 63.086, "eval_steps_per_second": 7.887, "step": 7500 }, { "MSE": 619.6498880004883, "MSE/layer0": 619.6498880004883, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.79, "input_norm": 31.997653865814208, "input_norm/layer0": 31.997653865814208, "learning_rate": 0.00012894736842105264, "loss": 1.9109, "max_norm": 77.3195571899414, "max_norm/layer0": 77.3195571899414, "mean_norm": 66.11709403991699, "mean_norm/layer0": 66.11709403991699, "multicode_k": 1, "output_norm": 14.724224853515622, "output_norm/layer0": 14.724224853515622, "step": 7550 }, { "MSE": 619.544646809896, "MSE/layer0": 619.544646809896, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.79, "input_norm": 31.997655792236333, "input_norm/layer0": 31.997655792236333, "learning_rate": 0.0001263157894736842, "loss": 1.9247, "max_norm": 77.41654205322266, "max_norm/layer0": 77.41654205322266, "mean_norm": 66.19502639770508, "mean_norm/layer0": 66.19502639770508, "multicode_k": 1, "output_norm": 14.729852019945778, "output_norm/layer0": 14.729852019945778, "step": 7600 }, { "MSE": 619.1442233276366, "MSE/layer0": 619.1442233276366, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.8, "input_norm": 31.99764471054077, "input_norm/layer0": 31.99764471054077, "learning_rate": 0.0001236842105263158, "loss": 1.9237, "max_norm": 77.5074234008789, "max_norm/layer0": 77.5074234008789, "mean_norm": 66.27114677429199, "mean_norm/layer0": 66.27114677429199, "multicode_k": 1, "output_norm": 14.745990212758379, "output_norm/layer0": 14.745990212758379, "step": 7650 }, { "MSE": 618.6404962158206, "MSE/layer0": 618.6404962158206, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.8, "input_norm": 31.997635892232267, "input_norm/layer0": 31.997635892232267, "learning_rate": 0.00012105263157894738, "loss": 1.913, "max_norm": 77.602294921875, "max_norm/layer0": 77.602294921875, "mean_norm": 66.34577751159668, "mean_norm/layer0": 66.34577751159668, "multicode_k": 1, "output_norm": 14.766639779408772, "output_norm/layer0": 14.766639779408772, "step": 7700 }, { "MSE": 618.2833578491213, "MSE/layer0": 618.2833578491213, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.81, "input_norm": 31.997631740570075, "input_norm/layer0": 31.997631740570075, "learning_rate": 0.00011842105263157894, "loss": 1.9214, "max_norm": 77.6917724609375, "max_norm/layer0": 77.6917724609375, "mean_norm": 66.41888046264648, "mean_norm/layer0": 66.41888046264648, "multicode_k": 1, "output_norm": 14.779039435386654, "output_norm/layer0": 14.779039435386654, "step": 7750 }, { "MSE": 618.2477112833653, "MSE/layer0": 618.2477112833653, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.81, "input_norm": 31.997634382247924, "input_norm/layer0": 31.997634382247924, "learning_rate": 0.00011578947368421053, "loss": 1.9127, "max_norm": 77.77839660644531, "max_norm/layer0": 77.77839660644531, "mean_norm": 66.49017333984375, "mean_norm/layer0": 66.49017333984375, "multicode_k": 1, "output_norm": 14.782011265754704, "output_norm/layer0": 14.782011265754704, "step": 7800 }, { "MSE": 617.7417582194005, "MSE/layer0": 617.7417582194005, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.82, "input_norm": 31.997628266016648, "input_norm/layer0": 31.997628266016648, "learning_rate": 0.00011315789473684211, "loss": 1.9084, "max_norm": 77.86212158203125, "max_norm/layer0": 77.86212158203125, "mean_norm": 66.55990791320801, "mean_norm/layer0": 66.55990791320801, "multicode_k": 1, "output_norm": 14.801776518821718, "output_norm/layer0": 14.801776518821718, "step": 7850 }, { "MSE": 617.339886271159, "MSE/layer0": 617.339886271159, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.82, "input_norm": 31.99762384732564, "input_norm/layer0": 31.99762384732564, "learning_rate": 0.00011052631578947368, "loss": 1.9115, "max_norm": 77.94374084472656, "max_norm/layer0": 77.94374084472656, "mean_norm": 66.62779235839844, "mean_norm/layer0": 66.62779235839844, "multicode_k": 1, "output_norm": 14.823196705182394, "output_norm/layer0": 14.823196705182394, "step": 7900 }, { "MSE": 617.3184334309897, "MSE/layer0": 617.3184334309897, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.83, "input_norm": 31.9976179567973, "input_norm/layer0": 31.9976179567973, "learning_rate": 0.00010789473684210527, "loss": 1.9136, "max_norm": 78.02580261230469, "max_norm/layer0": 78.02580261230469, "mean_norm": 66.69412803649902, "mean_norm/layer0": 66.69412803649902, "multicode_k": 1, "output_norm": 14.828250519434608, "output_norm/layer0": 14.828250519434608, "step": 7950 }, { "MSE": 616.9322255452475, "MSE/layer0": 616.9322255452475, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.84, "input_norm": 31.997613105773937, "input_norm/layer0": 31.997613105773937, "learning_rate": 0.00010526315789473683, "loss": 1.907, "max_norm": 78.10686492919922, "max_norm/layer0": 78.10686492919922, "mean_norm": 66.7584114074707, "mean_norm/layer0": 66.7584114074707, "multicode_k": 1, "output_norm": 14.839720834096273, "output_norm/layer0": 14.839720834096273, "step": 8000 }, { "epoch": 0.84, "eval_MSE/layer0": 616.4379357749087, "eval_accuracy": 0.5393073732024142, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.99761064584294, "eval_loss": 1.9150168895721436, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.862492301828695, "eval_runtime": 73.6278, "eval_samples_per_second": 62.789, "eval_steps_per_second": 7.85, "step": 8000 }, { "MSE": 616.6813212076825, "MSE/layer0": 616.6813212076825, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.84, "input_norm": 31.997603750228897, "input_norm/layer0": 31.997603750228897, "learning_rate": 0.00010263157894736843, "loss": 1.8975, "max_norm": 78.18397521972656, "max_norm/layer0": 78.18397521972656, "mean_norm": 66.82158279418945, "mean_norm/layer0": 66.82158279418945, "multicode_k": 1, "output_norm": 14.848202861150106, "output_norm/layer0": 14.848202861150106, "step": 8050 }, { "MSE": 616.5551970418294, "MSE/layer0": 616.5551970418294, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.85, "input_norm": 31.99760689099629, "input_norm/layer0": 31.99760689099629, "learning_rate": 0.0001, "loss": 1.916, "max_norm": 78.26499938964844, "max_norm/layer0": 78.26499938964844, "mean_norm": 66.88335037231445, "mean_norm/layer0": 66.88335037231445, "multicode_k": 1, "output_norm": 14.8604402812322, "output_norm/layer0": 14.8604402812322, "step": 8100 }, { "MSE": 616.288039347331, "MSE/layer0": 616.288039347331, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.85, "input_norm": 31.997600466410333, "input_norm/layer0": 31.997600466410333, "learning_rate": 9.736842105263158e-05, "loss": 1.902, "max_norm": 78.33844757080078, "max_norm/layer0": 78.33844757080078, "mean_norm": 66.94340133666992, "mean_norm/layer0": 66.94340133666992, "multicode_k": 1, "output_norm": 14.872187639872232, "output_norm/layer0": 14.872187639872232, "step": 8150 }, { "MSE": 615.8982196044924, "MSE/layer0": 615.8982196044924, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.86, "input_norm": 31.997594718933108, "input_norm/layer0": 31.997594718933108, "learning_rate": 9.473684210526316e-05, "loss": 1.9142, "max_norm": 78.40998077392578, "max_norm/layer0": 78.40998077392578, "mean_norm": 67.00171661376953, "mean_norm/layer0": 67.00171661376953, "multicode_k": 1, "output_norm": 14.884622203509018, "output_norm/layer0": 14.884622203509018, "step": 8200 }, { "MSE": 615.649053141276, "MSE/layer0": 615.649053141276, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.86, "input_norm": 31.997591203053794, "input_norm/layer0": 31.997591203053794, "learning_rate": 9.210526315789474e-05, "loss": 1.9103, "max_norm": 78.47700500488281, "max_norm/layer0": 78.47700500488281, "mean_norm": 67.05831527709961, "mean_norm/layer0": 67.05831527709961, "multicode_k": 1, "output_norm": 14.896942078272502, "output_norm/layer0": 14.896942078272502, "step": 8250 }, { "MSE": 615.4050069173176, "MSE/layer0": 615.4050069173176, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.87, "input_norm": 31.99757507324218, "input_norm/layer0": 31.99757507324218, "learning_rate": 8.947368421052632e-05, "loss": 1.8999, "max_norm": 78.54086303710938, "max_norm/layer0": 78.54086303710938, "mean_norm": 67.11351013183594, "mean_norm/layer0": 67.11351013183594, "multicode_k": 1, "output_norm": 14.907591681480406, "output_norm/layer0": 14.907591681480406, "step": 8300 }, { "MSE": 615.0221789550782, "MSE/layer0": 615.0221789550782, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.87, "input_norm": 31.997587076822917, "input_norm/layer0": 31.997587076822917, "learning_rate": 8.68421052631579e-05, "loss": 1.9122, "max_norm": 78.60425567626953, "max_norm/layer0": 78.60425567626953, "mean_norm": 67.1669692993164, "mean_norm/layer0": 67.1669692993164, "multicode_k": 1, "output_norm": 14.918850135803218, "output_norm/layer0": 14.918850135803218, "step": 8350 }, { "MSE": 614.7660255940759, "MSE/layer0": 614.7660255940759, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.88, "input_norm": 31.99758012771608, "input_norm/layer0": 31.99758012771608, "learning_rate": 8.421052631578948e-05, "loss": 1.9074, "max_norm": 78.66250610351562, "max_norm/layer0": 78.66250610351562, "mean_norm": 67.21884536743164, "mean_norm/layer0": 67.21884536743164, "multicode_k": 1, "output_norm": 14.930259111722311, "output_norm/layer0": 14.930259111722311, "step": 8400 }, { "MSE": 614.4904387410484, "MSE/layer0": 614.4904387410484, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.88, "input_norm": 31.99757884025574, "input_norm/layer0": 31.99757884025574, "learning_rate": 8.157894736842105e-05, "loss": 1.9151, "max_norm": 78.7247314453125, "max_norm/layer0": 78.7247314453125, "mean_norm": 67.26914596557617, "mean_norm/layer0": 67.26914596557617, "multicode_k": 1, "output_norm": 14.941800510088594, "output_norm/layer0": 14.941800510088594, "step": 8450 }, { "MSE": 614.3984759521479, "MSE/layer0": 614.3984759521479, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.89, "input_norm": 31.997565978368122, "input_norm/layer0": 31.997565978368122, "learning_rate": 7.894736842105263e-05, "loss": 1.8931, "max_norm": 78.78428649902344, "max_norm/layer0": 78.78428649902344, "mean_norm": 67.31785583496094, "mean_norm/layer0": 67.31785583496094, "multicode_k": 1, "output_norm": 14.948297271728517, "output_norm/layer0": 14.948297271728517, "step": 8500 }, { "epoch": 0.89, "eval_MSE/layer0": 613.78736410403, "eval_accuracy": 0.5408171011151899, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997572115378908, "eval_loss": 1.9076036214828491, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.968526063531659, "eval_runtime": 73.9448, "eval_samples_per_second": 62.52, "eval_steps_per_second": 7.817, "step": 8500 }, { "MSE": 614.2003710937502, "MSE/layer0": 614.2003710937502, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.89, "input_norm": 31.997571328481037, "input_norm/layer0": 31.997571328481037, "learning_rate": 7.631578947368421e-05, "loss": 1.9006, "max_norm": 78.83836364746094, "max_norm/layer0": 78.83836364746094, "mean_norm": 67.36493301391602, "mean_norm/layer0": 67.36493301391602, "multicode_k": 1, "output_norm": 14.955024781227117, "output_norm/layer0": 14.955024781227117, "step": 8550 }, { "MSE": 613.8168900553383, "MSE/layer0": 613.8168900553383, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.9, "input_norm": 31.997557487487796, "input_norm/layer0": 31.997557487487796, "learning_rate": 7.368421052631579e-05, "loss": 1.9045, "max_norm": 78.8912582397461, "max_norm/layer0": 78.8912582397461, "mean_norm": 67.41046524047852, "mean_norm/layer0": 67.41046524047852, "multicode_k": 1, "output_norm": 14.968488362630207, "output_norm/layer0": 14.968488362630207, "step": 8600 }, { "MSE": 613.6968625895179, "MSE/layer0": 613.6968625895179, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.9, "input_norm": 31.99754828453064, "input_norm/layer0": 31.99754828453064, "learning_rate": 7.105263157894737e-05, "loss": 1.9009, "max_norm": 78.942626953125, "max_norm/layer0": 78.942626953125, "mean_norm": 67.45438766479492, "mean_norm/layer0": 67.45438766479492, "multicode_k": 1, "output_norm": 14.979475774765014, "output_norm/layer0": 14.979475774765014, "step": 8650 }, { "MSE": 613.3956824747725, "MSE/layer0": 613.3956824747725, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.91, "input_norm": 31.997546965281174, "input_norm/layer0": 31.997546965281174, "learning_rate": 6.842105263157896e-05, "loss": 1.9, "max_norm": 78.99479675292969, "max_norm/layer0": 78.99479675292969, "mean_norm": 67.49666595458984, "mean_norm/layer0": 67.49666595458984, "multicode_k": 1, "output_norm": 14.988234910964966, "output_norm/layer0": 14.988234910964966, "step": 8700 }, { "MSE": 613.2128627522789, "MSE/layer0": 613.2128627522789, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.91, "input_norm": 31.997544927597048, "input_norm/layer0": 31.997544927597048, "learning_rate": 6.578947368421052e-05, "loss": 1.9059, "max_norm": 79.04541015625, "max_norm/layer0": 79.04541015625, "mean_norm": 67.53742218017578, "mean_norm/layer0": 67.53742218017578, "multicode_k": 1, "output_norm": 14.991036421457924, "output_norm/layer0": 14.991036421457924, "step": 8750 }, { "MSE": 612.9370720418297, "MSE/layer0": 612.9370720418297, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.92, "input_norm": 31.99753908475239, "input_norm/layer0": 31.99753908475239, "learning_rate": 6.31578947368421e-05, "loss": 1.9023, "max_norm": 79.09040069580078, "max_norm/layer0": 79.09040069580078, "mean_norm": 67.57658767700195, "mean_norm/layer0": 67.57658767700195, "multicode_k": 1, "output_norm": 15.003661061922706, "output_norm/layer0": 15.003661061922706, "step": 8800 }, { "MSE": 613.0978963216148, "MSE/layer0": 613.0978963216148, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.92, "input_norm": 31.997535756429023, "input_norm/layer0": 31.997535756429023, "learning_rate": 6.052631578947369e-05, "loss": 1.9004, "max_norm": 79.13478088378906, "max_norm/layer0": 79.13478088378906, "mean_norm": 67.61412811279297, "mean_norm/layer0": 67.61412811279297, "multicode_k": 1, "output_norm": 14.999437109629307, "output_norm/layer0": 14.999437109629307, "step": 8850 }, { "MSE": 612.746408691406, "MSE/layer0": 612.746408691406, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.93, "input_norm": 31.997531512578334, "input_norm/layer0": 31.997531512578334, "learning_rate": 5.789473684210527e-05, "loss": 1.8947, "max_norm": 79.17863464355469, "max_norm/layer0": 79.17863464355469, "mean_norm": 67.65010452270508, "mean_norm/layer0": 67.65010452270508, "multicode_k": 1, "output_norm": 15.013854147593182, "output_norm/layer0": 15.013854147593182, "step": 8900 }, { "MSE": 612.5075473022462, "MSE/layer0": 612.5075473022462, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.93, "input_norm": 31.997522573471066, "input_norm/layer0": 31.997522573471066, "learning_rate": 5.526315789473684e-05, "loss": 1.888, "max_norm": 79.2198257446289, "max_norm/layer0": 79.2198257446289, "mean_norm": 67.6845588684082, "mean_norm/layer0": 67.6845588684082, "multicode_k": 1, "output_norm": 15.024005990028382, "output_norm/layer0": 15.024005990028382, "step": 8950 }, { "MSE": 612.4464337158204, "MSE/layer0": 612.4464337158204, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.94, "input_norm": 31.99751985549927, "input_norm/layer0": 31.99751985549927, "learning_rate": 5.263157894736842e-05, "loss": 1.9021, "max_norm": 79.25985717773438, "max_norm/layer0": 79.25985717773438, "mean_norm": 67.71733856201172, "mean_norm/layer0": 67.71733856201172, "multicode_k": 1, "output_norm": 15.025202210744226, "output_norm/layer0": 15.025202210744226, "step": 9000 }, { "epoch": 0.94, "eval_MSE/layer0": 612.012579843437, "eval_accuracy": 0.5416772654217966, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.99751990196794, "eval_loss": 1.90205979347229, "eval_multicode_k": 1, "eval_output_norm/layer0": 15.037853428586699, "eval_runtime": 73.2981, "eval_samples_per_second": 63.071, "eval_steps_per_second": 7.886, "step": 9000 }, { "MSE": 612.1616466267901, "MSE/layer0": 612.1616466267901, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.94, "input_norm": 31.997515144348153, "input_norm/layer0": 31.997515144348153, "learning_rate": 5e-05, "loss": 1.8979, "max_norm": 79.2950668334961, "max_norm/layer0": 79.2950668334961, "mean_norm": 67.74863052368164, "mean_norm/layer0": 67.74863052368164, "multicode_k": 1, "output_norm": 15.036479252179465, "output_norm/layer0": 15.036479252179465, "step": 9050 }, { "MSE": 611.8442991129552, "MSE/layer0": 611.8442991129552, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.95, "input_norm": 31.99751302719116, "input_norm/layer0": 31.99751302719116, "learning_rate": 4.736842105263158e-05, "loss": 1.8978, "max_norm": 79.32892608642578, "max_norm/layer0": 79.32892608642578, "mean_norm": 67.77827835083008, "mean_norm/layer0": 67.77827835083008, "multicode_k": 1, "output_norm": 15.046743833223978, "output_norm/layer0": 15.046743833223978, "step": 9100 }, { "MSE": 611.9183032226562, "MSE/layer0": 611.9183032226562, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.96, "input_norm": 31.99751057942708, "input_norm/layer0": 31.99751057942708, "learning_rate": 4.473684210526316e-05, "loss": 1.8971, "max_norm": 79.36182403564453, "max_norm/layer0": 79.36182403564453, "mean_norm": 67.80632781982422, "mean_norm/layer0": 67.80632781982422, "multicode_k": 1, "output_norm": 15.041637244224557, "output_norm/layer0": 15.041637244224557, "step": 9150 }, { "MSE": 611.5441438802083, "MSE/layer0": 611.5441438802083, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.96, "input_norm": 31.99750095685323, "input_norm/layer0": 31.99750095685323, "learning_rate": 4.210526315789474e-05, "loss": 1.8874, "max_norm": 79.39281463623047, "max_norm/layer0": 79.39281463623047, "mean_norm": 67.83284759521484, "mean_norm/layer0": 67.83284759521484, "multicode_k": 1, "output_norm": 15.055660729408274, "output_norm/layer0": 15.055660729408274, "step": 9200 }, { "MSE": 611.5922235107425, "MSE/layer0": 611.5922235107425, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.97, "input_norm": 31.99750220934551, "input_norm/layer0": 31.99750220934551, "learning_rate": 3.9473684210526316e-05, "loss": 1.8958, "max_norm": 79.42273712158203, "max_norm/layer0": 79.42273712158203, "mean_norm": 67.85774230957031, "mean_norm/layer0": 67.85774230957031, "multicode_k": 1, "output_norm": 15.055747102101643, "output_norm/layer0": 15.055747102101643, "step": 9250 }, { "MSE": 611.6544079589839, "MSE/layer0": 611.6544079589839, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.97, "input_norm": 31.997499033610026, "input_norm/layer0": 31.997499033610026, "learning_rate": 3.6842105263157895e-05, "loss": 1.8915, "max_norm": 79.44976806640625, "max_norm/layer0": 79.44976806640625, "mean_norm": 67.88099670410156, "mean_norm/layer0": 67.88099670410156, "multicode_k": 1, "output_norm": 15.057963668505355, "output_norm/layer0": 15.057963668505355, "step": 9300 }, { "MSE": 611.321997172038, "MSE/layer0": 611.321997172038, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.98, "input_norm": 31.997498153050746, "input_norm/layer0": 31.997498153050746, "learning_rate": 3.421052631578948e-05, "loss": 1.8893, "max_norm": 79.47447967529297, "max_norm/layer0": 79.47447967529297, "mean_norm": 67.90266799926758, "mean_norm/layer0": 67.90266799926758, "multicode_k": 1, "output_norm": 15.067080327669775, "output_norm/layer0": 15.067080327669775, "step": 9350 }, { "MSE": 611.4500786336266, "MSE/layer0": 611.4500786336266, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.98, "input_norm": 31.997495447794595, "input_norm/layer0": 31.997495447794595, "learning_rate": 3.157894736842105e-05, "loss": 1.894, "max_norm": 79.49812316894531, "max_norm/layer0": 79.49812316894531, "mean_norm": 67.92279815673828, "mean_norm/layer0": 67.92279815673828, "multicode_k": 1, "output_norm": 15.062444001833596, "output_norm/layer0": 15.062444001833596, "step": 9400 }, { "MSE": 611.1107730102539, "MSE/layer0": 611.1107730102539, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.99, "input_norm": 31.997485151290896, "input_norm/layer0": 31.997485151290896, "learning_rate": 2.8947368421052634e-05, "loss": 1.8849, "max_norm": 79.52143096923828, "max_norm/layer0": 79.52143096923828, "mean_norm": 67.94132995605469, "mean_norm/layer0": 67.94132995605469, "multicode_k": 1, "output_norm": 15.076274760564168, "output_norm/layer0": 15.076274760564168, "step": 9450 }, { "MSE": 611.3009430948896, "MSE/layer0": 611.3009430948896, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.99, "input_norm": 31.99749323209126, "input_norm/layer0": 31.99749323209126, "learning_rate": 2.631578947368421e-05, "loss": 1.8967, "max_norm": 79.54227447509766, "max_norm/layer0": 79.54227447509766, "mean_norm": 67.958251953125, "mean_norm/layer0": 67.958251953125, "multicode_k": 1, "output_norm": 15.06888332684835, "output_norm/layer0": 15.06888332684835, "step": 9500 }, { "epoch": 0.99, "eval_MSE/layer0": 610.6120883183328, "eval_accuracy": 0.5425511737500183, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.99749166347134, "eval_loss": 1.8969556093215942, "eval_multicode_k": 1, "eval_output_norm/layer0": 15.09320597480496, "eval_runtime": 73.3984, "eval_samples_per_second": 62.985, "eval_steps_per_second": 7.875, "step": 9500 }, { "MSE": 610.9202908325196, "MSE/layer0": 610.9202908325196, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.0, "input_norm": 31.99748815218606, "input_norm/layer0": 31.99748815218606, "learning_rate": 2.368421052631579e-05, "loss": 1.8917, "max_norm": 79.56092834472656, "max_norm/layer0": 79.56092834472656, "mean_norm": 67.97361755371094, "mean_norm/layer0": 67.97361755371094, "multicode_k": 1, "output_norm": 15.084220841725665, "output_norm/layer0": 15.084220841725665, "step": 9550 }, { "MSE": 610.9847631835939, "MSE/layer0": 610.9847631835939, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.0, "input_norm": 31.997486731211332, "input_norm/layer0": 31.997486731211332, "learning_rate": 2.105263157894737e-05, "loss": 1.8839, "max_norm": 79.57735443115234, "max_norm/layer0": 79.57735443115234, "mean_norm": 67.98743438720703, "mean_norm/layer0": 67.98743438720703, "multicode_k": 1, "output_norm": 15.082832886377968, "output_norm/layer0": 15.082832886377968, "step": 9600 }, { "MSE": 611.2879392496747, "MSE/layer0": 611.2879392496747, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.01, "input_norm": 31.997482639948544, "input_norm/layer0": 31.997482639948544, "learning_rate": 1.8421052631578947e-05, "loss": 1.8851, "max_norm": 79.59221649169922, "max_norm/layer0": 79.59221649169922, "mean_norm": 67.99962997436523, "mean_norm/layer0": 67.99962997436523, "multicode_k": 1, "output_norm": 15.075549699465444, "output_norm/layer0": 15.075549699465444, "step": 9650 }, { "MSE": 611.3861442057291, "MSE/layer0": 611.3861442057291, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.01, "input_norm": 31.997482592264817, "input_norm/layer0": 31.997482592264817, "learning_rate": 1.5789473684210526e-05, "loss": 1.8774, "max_norm": 79.60480499267578, "max_norm/layer0": 79.60480499267578, "mean_norm": 68.01019668579102, "mean_norm/layer0": 68.01019668579102, "multicode_k": 1, "output_norm": 15.07396024545034, "output_norm/layer0": 15.07396024545034, "step": 9700 }, { "MSE": 611.4255168660482, "MSE/layer0": 611.4255168660482, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.02, "input_norm": 31.997479289372762, "input_norm/layer0": 31.997479289372762, "learning_rate": 1.3157894736842104e-05, "loss": 1.8768, "max_norm": 79.6154556274414, "max_norm/layer0": 79.6154556274414, "mean_norm": 68.01911926269531, "mean_norm/layer0": 68.01911926269531, "multicode_k": 1, "output_norm": 15.07339178085327, "output_norm/layer0": 15.07339178085327, "step": 9750 }, { "MSE": 611.6131436157225, "MSE/layer0": 611.6131436157225, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.02, "input_norm": 31.99748600323995, "input_norm/layer0": 31.99748600323995, "learning_rate": 1.0526315789473684e-05, "loss": 1.8905, "max_norm": 79.62410736083984, "max_norm/layer0": 79.62410736083984, "mean_norm": 68.02641677856445, "mean_norm/layer0": 68.02641677856445, "multicode_k": 1, "output_norm": 15.068124500910447, "output_norm/layer0": 15.068124500910447, "step": 9800 }, { "MSE": 611.5507637532555, "MSE/layer0": 611.5507637532555, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.03, "input_norm": 31.997482582728068, "input_norm/layer0": 31.997482582728068, "learning_rate": 7.894736842105263e-06, "loss": 1.8798, "max_norm": 79.63082122802734, "max_norm/layer0": 79.63082122802734, "mean_norm": 68.03211212158203, "mean_norm/layer0": 68.03211212158203, "multicode_k": 1, "output_norm": 15.072520554860436, "output_norm/layer0": 15.072520554860436, "step": 9850 }, { "MSE": 611.7908610026044, "MSE/layer0": 611.7908610026044, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.03, "input_norm": 31.99747860272725, "input_norm/layer0": 31.99747860272725, "learning_rate": 5.263157894736842e-06, "loss": 1.8807, "max_norm": 79.63563537597656, "max_norm/layer0": 79.63563537597656, "mean_norm": 68.03619003295898, "mean_norm/layer0": 68.03619003295898, "multicode_k": 1, "output_norm": 15.06489105542501, "output_norm/layer0": 15.06489105542501, "step": 9900 }, { "MSE": 611.5220219930011, "MSE/layer0": 611.5220219930011, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.04, "input_norm": 31.997478488286326, "input_norm/layer0": 31.997478488286326, "learning_rate": 2.631578947368421e-06, "loss": 1.8795, "max_norm": 79.63849639892578, "max_norm/layer0": 79.63849639892578, "mean_norm": 68.03863906860352, "mean_norm/layer0": 68.03863906860352, "multicode_k": 1, "output_norm": 15.07397619565328, "output_norm/layer0": 15.07397619565328, "step": 9950 }, { "MSE": 611.5742947387696, "MSE/layer0": 611.5742947387696, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 1.04, "input_norm": 31.997486855189003, "input_norm/layer0": 31.997486855189003, "learning_rate": 0.0, "loss": 1.8942, "max_norm": 79.63946533203125, "max_norm/layer0": 79.63946533203125, "mean_norm": 68.03947448730469, "mean_norm/layer0": 68.03947448730469, "multicode_k": 1, "output_norm": 15.069696005185442, "output_norm/layer0": 15.069696005185442, "step": 10000 }, { "epoch": 1.04, "eval_MSE/layer0": 611.1571513346564, "eval_accuracy": 0.5429091526514649, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.997479090978388, "eval_loss": 1.89570152759552, "eval_multicode_k": 1, "eval_output_norm/layer0": 15.087154228553715, "eval_runtime": 73.2125, "eval_samples_per_second": 63.145, "eval_steps_per_second": 7.895, "step": 10000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.04, "input_norm": 0.0, "input_norm/layer0": 0.0, "max_norm": 79.63946533203125, "max_norm/layer0": 79.63946533203125, "mean_norm": 68.03947448730469, "mean_norm/layer0": 68.03947448730469, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 10000, "total_flos": 3.715683581952e+16, "train_loss": 2.0762174885749816, "train_runtime": 12054.7701, "train_samples_per_second": 39.818, "train_steps_per_second": 0.83 } ], "logging_steps": 50, "max_steps": 10000, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3.715683581952e+16, "trial_name": null, "trial_params": null }