empty-michael's picture
End of training
b497d54 verified
raw
history blame
136 kB
{
"best_metric": 1.89570152759552,
"best_model_checkpoint": "/tmp/wandb/run-20240207_044253-56k3p8kp/files/train_output/checkpoint-10000",
"epoch": 1.044022968505307,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"MSE": 872.5187733968098,
"MSE/layer0": 872.5187733968098,
"dead_code_fraction": 0.0276,
"dead_code_fraction/layer0": 0.0276,
"epoch": 0.0,
"input_norm": 31.997111479441326,
"input_norm/layer0": 31.997111479441326,
"learning_rate": 1e-06,
"loss": 9.0051,
"max_norm": 34.71393966674805,
"max_norm/layer0": 34.71393966674805,
"mean_norm": 31.98521327972412,
"mean_norm/layer0": 31.98521327972412,
"multicode_k": 1,
"output_norm": 4.134780248006185,
"output_norm/layer0": 4.134780248006185,
"step": 1
},
{
"MSE": 871.4381560241286,
"MSE/layer0": 871.4381560241286,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.99644809839677,
"input_norm/layer0": 31.99644809839677,
"learning_rate": 5e-05,
"loss": 7.0703,
"max_norm": 34.72187423706055,
"max_norm/layer0": 34.72187423706055,
"mean_norm": 31.991936683654785,
"mean_norm/layer0": 31.991936683654785,
"multicode_k": 1,
"output_norm": 4.145846879401173,
"output_norm/layer0": 4.145846879401173,
"step": 50
},
{
"MSE": 868.4475470987957,
"MSE/layer0": 868.4475470987957,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.995786774953213,
"input_norm/layer0": 31.995786774953213,
"learning_rate": 0.0001,
"loss": 4.1515,
"max_norm": 34.748802185058594,
"max_norm/layer0": 34.748802185058594,
"mean_norm": 32.0172176361084,
"mean_norm/layer0": 32.0172176361084,
"multicode_k": 1,
"output_norm": 4.178660261631009,
"output_norm/layer0": 4.178660261631009,
"step": 100
},
{
"MSE": 864.7878089396156,
"MSE/layer0": 864.7878089396156,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.02,
"input_norm": 31.995868380864444,
"input_norm/layer0": 31.995868380864444,
"learning_rate": 0.00015,
"loss": 3.596,
"max_norm": 34.7879753112793,
"max_norm/layer0": 34.7879753112793,
"mean_norm": 32.057809829711914,
"mean_norm/layer0": 32.057809829711914,
"multicode_k": 1,
"output_norm": 4.227458424568177,
"output_norm/layer0": 4.227458424568177,
"step": 150
},
{
"MSE": 862.2720657348631,
"MSE/layer0": 862.2720657348631,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.02,
"input_norm": 31.996261011759444,
"input_norm/layer0": 31.996261011759444,
"learning_rate": 0.0002,
"loss": 3.3864,
"max_norm": 34.834476470947266,
"max_norm/layer0": 34.834476470947266,
"mean_norm": 32.09993934631348,
"mean_norm/layer0": 32.09993934631348,
"multicode_k": 1,
"output_norm": 4.271083230972291,
"output_norm/layer0": 4.271083230972291,
"step": 200
},
{
"MSE": 860.8860168457031,
"MSE/layer0": 860.8860168457031,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.03,
"input_norm": 31.99663330396016,
"input_norm/layer0": 31.99663330396016,
"learning_rate": 0.00025,
"loss": 3.1841,
"max_norm": 34.880577087402344,
"max_norm/layer0": 34.880577087402344,
"mean_norm": 32.15042304992676,
"mean_norm/layer0": 32.15042304992676,
"multicode_k": 1,
"output_norm": 4.302526236375174,
"output_norm/layer0": 4.302526236375174,
"step": 250
},
{
"MSE": 859.4145241292313,
"MSE/layer0": 859.4145241292313,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.03,
"input_norm": 31.99707999547323,
"input_norm/layer0": 31.99707999547323,
"learning_rate": 0.0003,
"loss": 2.9941,
"max_norm": 34.94011688232422,
"max_norm/layer0": 34.94011688232422,
"mean_norm": 32.21405220031738,
"mean_norm/layer0": 32.21405220031738,
"multicode_k": 1,
"output_norm": 4.340623443921407,
"output_norm/layer0": 4.340623443921407,
"step": 300
},
{
"MSE": 857.4514228312173,
"MSE/layer0": 857.4514228312173,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.04,
"input_norm": 31.997263495127353,
"input_norm/layer0": 31.997263495127353,
"learning_rate": 0.00035,
"loss": 2.8154,
"max_norm": 35.02033996582031,
"max_norm/layer0": 35.02033996582031,
"mean_norm": 32.2895393371582,
"mean_norm/layer0": 32.2895393371582,
"multicode_k": 1,
"output_norm": 4.388785634040833,
"output_norm/layer0": 4.388785634040833,
"step": 350
},
{
"MSE": 855.6023776245115,
"MSE/layer0": 855.6023776245115,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.04,
"input_norm": 31.997391548156735,
"input_norm/layer0": 31.997391548156735,
"learning_rate": 0.0004,
"loss": 2.6472,
"max_norm": 35.093902587890625,
"max_norm/layer0": 35.093902587890625,
"mean_norm": 32.36477088928223,
"mean_norm/layer0": 32.36477088928223,
"multicode_k": 1,
"output_norm": 4.438224600950877,
"output_norm/layer0": 4.438224600950877,
"step": 400
},
{
"MSE": 852.2393107096357,
"MSE/layer0": 852.2393107096357,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.05,
"input_norm": 31.997483587265002,
"input_norm/layer0": 31.997483587265002,
"learning_rate": 0.00045000000000000004,
"loss": 2.5584,
"max_norm": 35.304176330566406,
"max_norm/layer0": 35.304176330566406,
"mean_norm": 32.54551696777344,
"mean_norm/layer0": 32.54551696777344,
"multicode_k": 1,
"output_norm": 4.531697844664256,
"output_norm/layer0": 4.531697844664256,
"step": 450
},
{
"MSE": 845.160081481933,
"MSE/layer0": 845.160081481933,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.05,
"input_norm": 31.997576513290404,
"input_norm/layer0": 31.997576513290404,
"learning_rate": 0.0005,
"loss": 2.5072,
"max_norm": 35.78097915649414,
"max_norm/layer0": 35.78097915649414,
"mean_norm": 32.836992263793945,
"mean_norm/layer0": 32.836992263793945,
"multicode_k": 1,
"output_norm": 4.75731077671051,
"output_norm/layer0": 4.75731077671051,
"step": 500
},
{
"epoch": 0.05,
"eval_MSE/layer0": 841.1602262364518,
"eval_accuracy": 0.4578774282778804,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.99765928777141,
"eval_loss": 2.476405382156372,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 4.911408371361153,
"eval_runtime": 73.5499,
"eval_samples_per_second": 62.855,
"eval_steps_per_second": 7.859,
"step": 500
},
{
"MSE": 837.1320628865564,
"MSE/layer0": 837.1320628865564,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.06,
"input_norm": 31.997703491846714,
"input_norm/layer0": 31.997703491846714,
"learning_rate": 0.0004973684210526315,
"loss": 2.446,
"max_norm": 36.301849365234375,
"max_norm/layer0": 36.301849365234375,
"mean_norm": 33.16576957702637,
"mean_norm/layer0": 33.16576957702637,
"multicode_k": 1,
"output_norm": 5.083427506287892,
"output_norm/layer0": 5.083427506287892,
"step": 550
},
{
"MSE": 829.8174697875975,
"MSE/layer0": 829.8174697875975,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.06,
"input_norm": 31.997781289418548,
"input_norm/layer0": 31.997781289418548,
"learning_rate": 0.0004947368421052632,
"loss": 2.4026,
"max_norm": 36.790077209472656,
"max_norm/layer0": 36.790077209472656,
"mean_norm": 33.519426345825195,
"mean_norm/layer0": 33.519426345825195,
"multicode_k": 1,
"output_norm": 5.438902084827422,
"output_norm/layer0": 5.438902084827422,
"step": 600
},
{
"MSE": 823.647299601237,
"MSE/layer0": 823.647299601237,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.07,
"input_norm": 31.997854344050104,
"input_norm/layer0": 31.997854344050104,
"learning_rate": 0.0004921052631578947,
"loss": 2.3506,
"max_norm": 37.23988723754883,
"max_norm/layer0": 37.23988723754883,
"mean_norm": 33.882219314575195,
"mean_norm/layer0": 33.882219314575195,
"multicode_k": 1,
"output_norm": 5.780141766071318,
"output_norm/layer0": 5.780141766071318,
"step": 650
},
{
"MSE": 818.3900874837236,
"MSE/layer0": 818.3900874837236,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.07,
"input_norm": 31.99789853731792,
"input_norm/layer0": 31.99789853731792,
"learning_rate": 0.0004894736842105264,
"loss": 2.3252,
"max_norm": 37.74921417236328,
"max_norm/layer0": 37.74921417236328,
"mean_norm": 34.241193771362305,
"mean_norm/layer0": 34.241193771362305,
"multicode_k": 1,
"output_norm": 6.09345253547033,
"output_norm/layer0": 6.09345253547033,
"step": 700
},
{
"MSE": 813.5141651407878,
"MSE/layer0": 813.5141651407878,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.08,
"input_norm": 31.99791718482971,
"input_norm/layer0": 31.99791718482971,
"learning_rate": 0.0004868421052631579,
"loss": 2.2972,
"max_norm": 38.29411315917969,
"max_norm/layer0": 38.29411315917969,
"mean_norm": 34.602651596069336,
"mean_norm/layer0": 34.602651596069336,
"multicode_k": 1,
"output_norm": 6.373116828600564,
"output_norm/layer0": 6.373116828600564,
"step": 750
},
{
"MSE": 808.9583784993486,
"MSE/layer0": 808.9583784993486,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.08,
"input_norm": 31.997929916381842,
"input_norm/layer0": 31.997929916381842,
"learning_rate": 0.0004842105263157895,
"loss": 2.2848,
"max_norm": 38.83885955810547,
"max_norm/layer0": 38.83885955810547,
"mean_norm": 34.96581268310547,
"mean_norm/layer0": 34.96581268310547,
"multicode_k": 1,
"output_norm": 6.6348445963859515,
"output_norm/layer0": 6.6348445963859515,
"step": 800
},
{
"MSE": 805.0894353230792,
"MSE/layer0": 805.0894353230792,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.09,
"input_norm": 31.99793632825216,
"input_norm/layer0": 31.99793632825216,
"learning_rate": 0.00048157894736842105,
"loss": 2.2718,
"max_norm": 39.34720993041992,
"max_norm/layer0": 39.34720993041992,
"mean_norm": 35.32806396484375,
"mean_norm/layer0": 35.32806396484375,
"multicode_k": 1,
"output_norm": 6.866891795794173,
"output_norm/layer0": 6.866891795794173,
"step": 850
},
{
"MSE": 801.1131992594401,
"MSE/layer0": 801.1131992594401,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.09,
"input_norm": 31.997941767374677,
"input_norm/layer0": 31.997941767374677,
"learning_rate": 0.00047894736842105264,
"loss": 2.2552,
"max_norm": 39.885169982910156,
"max_norm/layer0": 39.885169982910156,
"mean_norm": 35.689327239990234,
"mean_norm/layer0": 35.689327239990234,
"multicode_k": 1,
"output_norm": 7.08060004631678,
"output_norm/layer0": 7.08060004631678,
"step": 900
},
{
"MSE": 797.5655348714191,
"MSE/layer0": 797.5655348714191,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.1,
"input_norm": 31.997945496241247,
"input_norm/layer0": 31.997945496241247,
"learning_rate": 0.0004763157894736842,
"loss": 2.2364,
"max_norm": 40.398529052734375,
"max_norm/layer0": 40.398529052734375,
"mean_norm": 36.051015853881836,
"mean_norm/layer0": 36.051015853881836,
"multicode_k": 1,
"output_norm": 7.280441036224362,
"output_norm/layer0": 7.280441036224362,
"step": 950
},
{
"MSE": 794.0057167561844,
"MSE/layer0": 794.0057167561844,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.1,
"input_norm": 31.997958205540975,
"input_norm/layer0": 31.997958205540975,
"learning_rate": 0.00047368421052631577,
"loss": 2.2285,
"max_norm": 40.882999420166016,
"max_norm/layer0": 40.882999420166016,
"mean_norm": 36.412479400634766,
"mean_norm/layer0": 36.412479400634766,
"multicode_k": 1,
"output_norm": 7.463625483512881,
"output_norm/layer0": 7.463625483512881,
"step": 1000
},
{
"epoch": 0.1,
"eval_MSE/layer0": 792.3022871601257,
"eval_accuracy": 0.49262569806414397,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997970815399036,
"eval_loss": 2.2265193462371826,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 7.55243569582849,
"eval_runtime": 73.9102,
"eval_samples_per_second": 62.549,
"eval_steps_per_second": 7.82,
"step": 1000
},
{
"MSE": 790.4031213378905,
"MSE/layer0": 790.4031213378905,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.11,
"input_norm": 31.997961893081662,
"input_norm/layer0": 31.997961893081662,
"learning_rate": 0.0004710526315789474,
"loss": 2.2276,
"max_norm": 41.373714447021484,
"max_norm/layer0": 41.373714447021484,
"mean_norm": 36.77394676208496,
"mean_norm/layer0": 36.77394676208496,
"multicode_k": 1,
"output_norm": 7.636834317048386,
"output_norm/layer0": 7.636834317048386,
"step": 1050
},
{
"MSE": 786.9933625284832,
"MSE/layer0": 786.9933625284832,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.11,
"input_norm": 31.99796496391297,
"input_norm/layer0": 31.99796496391297,
"learning_rate": 0.00046842105263157895,
"loss": 2.2167,
"max_norm": 41.845481872558594,
"max_norm/layer0": 41.845481872558594,
"mean_norm": 37.13482093811035,
"mean_norm/layer0": 37.13482093811035,
"multicode_k": 1,
"output_norm": 7.803330462773646,
"output_norm/layer0": 7.803330462773646,
"step": 1100
},
{
"MSE": 783.8570914713541,
"MSE/layer0": 783.8570914713541,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.12,
"input_norm": 31.997962007522577,
"input_norm/layer0": 31.997962007522577,
"learning_rate": 0.00046578947368421054,
"loss": 2.2049,
"max_norm": 42.328094482421875,
"max_norm/layer0": 42.328094482421875,
"mean_norm": 37.49737358093262,
"mean_norm/layer0": 37.49737358093262,
"multicode_k": 1,
"output_norm": 7.957673575878145,
"output_norm/layer0": 7.957673575878145,
"step": 1150
},
{
"MSE": 780.325506286621,
"MSE/layer0": 780.325506286621,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.13,
"input_norm": 31.997955818176273,
"input_norm/layer0": 31.997955818176273,
"learning_rate": 0.00046315789473684214,
"loss": 2.2048,
"max_norm": 42.827125549316406,
"max_norm/layer0": 42.827125549316406,
"mean_norm": 37.85981369018555,
"mean_norm/layer0": 37.85981369018555,
"multicode_k": 1,
"output_norm": 8.110501464207967,
"output_norm/layer0": 8.110501464207967,
"step": 1200
},
{
"MSE": 777.4963677978517,
"MSE/layer0": 777.4963677978517,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.13,
"input_norm": 31.997957773208608,
"input_norm/layer0": 31.997957773208608,
"learning_rate": 0.0004605263157894737,
"loss": 2.1813,
"max_norm": 43.32162094116211,
"max_norm/layer0": 43.32162094116211,
"mean_norm": 38.223052978515625,
"mean_norm/layer0": 38.223052978515625,
"multicode_k": 1,
"output_norm": 8.244436805248263,
"output_norm/layer0": 8.244436805248263,
"step": 1250
},
{
"MSE": 774.260437520345,
"MSE/layer0": 774.260437520345,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.14,
"input_norm": 31.99796381632487,
"input_norm/layer0": 31.99796381632487,
"learning_rate": 0.00045789473684210527,
"loss": 2.1836,
"max_norm": 43.81217575073242,
"max_norm/layer0": 43.81217575073242,
"mean_norm": 38.58406066894531,
"mean_norm/layer0": 38.58406066894531,
"multicode_k": 1,
"output_norm": 8.38570425987244,
"output_norm/layer0": 8.38570425987244,
"step": 1300
},
{
"MSE": 771.4710861206056,
"MSE/layer0": 771.4710861206056,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.14,
"input_norm": 31.997958866755184,
"input_norm/layer0": 31.997958866755184,
"learning_rate": 0.00045526315789473686,
"loss": 2.1749,
"max_norm": 44.29291915893555,
"max_norm/layer0": 44.29291915893555,
"mean_norm": 38.94841957092285,
"mean_norm/layer0": 38.94841957092285,
"multicode_k": 1,
"output_norm": 8.50825534900029,
"output_norm/layer0": 8.50825534900029,
"step": 1350
},
{
"MSE": 768.6556185913084,
"MSE/layer0": 768.6556185913084,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.15,
"input_norm": 31.99795674959818,
"input_norm/layer0": 31.99795674959818,
"learning_rate": 0.00045263157894736845,
"loss": 2.1767,
"max_norm": 44.80799865722656,
"max_norm/layer0": 44.80799865722656,
"mean_norm": 39.31004524230957,
"mean_norm/layer0": 39.31004524230957,
"multicode_k": 1,
"output_norm": 8.633222222328184,
"output_norm/layer0": 8.633222222328184,
"step": 1400
},
{
"MSE": 765.9088921101885,
"MSE/layer0": 765.9088921101885,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.15,
"input_norm": 31.99795736630759,
"input_norm/layer0": 31.99795736630759,
"learning_rate": 0.00045000000000000004,
"loss": 2.1614,
"max_norm": 45.24712371826172,
"max_norm/layer0": 45.24712371826172,
"mean_norm": 39.66674041748047,
"mean_norm/layer0": 39.66674041748047,
"multicode_k": 1,
"output_norm": 8.743508942921961,
"output_norm/layer0": 8.743508942921961,
"step": 1450
},
{
"MSE": 763.2800780232742,
"MSE/layer0": 763.2800780232742,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.16,
"input_norm": 31.997952944437664,
"input_norm/layer0": 31.997952944437664,
"learning_rate": 0.0004473684210526316,
"loss": 2.1472,
"max_norm": 45.6886100769043,
"max_norm/layer0": 45.6886100769043,
"mean_norm": 40.02728462219238,
"mean_norm/layer0": 40.02728462219238,
"multicode_k": 1,
"output_norm": 8.859908480644224,
"output_norm/layer0": 8.859908480644224,
"step": 1500
},
{
"epoch": 0.16,
"eval_MSE/layer0": 761.8682555426203,
"eval_accuracy": 0.502513147213907,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.99796608230291,
"eval_loss": 2.1583588123321533,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 8.92388377993132,
"eval_runtime": 73.3386,
"eval_samples_per_second": 63.036,
"eval_steps_per_second": 7.881,
"step": 1500
},
{
"MSE": 760.1600253295896,
"MSE/layer0": 760.1600253295896,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.16,
"input_norm": 31.997961203257255,
"input_norm/layer0": 31.997961203257255,
"learning_rate": 0.00044473684210526317,
"loss": 2.1601,
"max_norm": 46.172386169433594,
"max_norm/layer0": 46.172386169433594,
"mean_norm": 40.38890838623047,
"mean_norm/layer0": 40.38890838623047,
"multicode_k": 1,
"output_norm": 8.976485926310215,
"output_norm/layer0": 8.976485926310215,
"step": 1550
},
{
"MSE": 757.7968755086266,
"MSE/layer0": 757.7968755086266,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.17,
"input_norm": 31.99795768419901,
"input_norm/layer0": 31.99795768419901,
"learning_rate": 0.0004421052631578947,
"loss": 2.1503,
"max_norm": 46.59892272949219,
"max_norm/layer0": 46.59892272949219,
"mean_norm": 40.74970626831055,
"mean_norm/layer0": 40.74970626831055,
"multicode_k": 1,
"output_norm": 9.079196619192757,
"output_norm/layer0": 9.079196619192757,
"step": 1600
},
{
"MSE": 755.1489293416341,
"MSE/layer0": 755.1489293416341,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.17,
"input_norm": 31.997956597010287,
"input_norm/layer0": 31.997956597010287,
"learning_rate": 0.0004394736842105263,
"loss": 2.1474,
"max_norm": 47.01366424560547,
"max_norm/layer0": 47.01366424560547,
"mean_norm": 41.107492446899414,
"mean_norm/layer0": 41.107492446899414,
"multicode_k": 1,
"output_norm": 9.18502354939779,
"output_norm/layer0": 9.18502354939779,
"step": 1650
},
{
"MSE": 752.7132907104492,
"MSE/layer0": 752.7132907104492,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.18,
"input_norm": 31.997961333592727,
"input_norm/layer0": 31.997961333592727,
"learning_rate": 0.00043684210526315795,
"loss": 2.1451,
"max_norm": 47.46398162841797,
"max_norm/layer0": 47.46398162841797,
"mean_norm": 41.466739654541016,
"mean_norm/layer0": 41.466739654541016,
"multicode_k": 1,
"output_norm": 9.288365476131446,
"output_norm/layer0": 9.288365476131446,
"step": 1700
},
{
"MSE": 750.1894300333656,
"MSE/layer0": 750.1894300333656,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.18,
"input_norm": 31.99795596122742,
"input_norm/layer0": 31.99795596122742,
"learning_rate": 0.0004342105263157895,
"loss": 2.1298,
"max_norm": 47.89784240722656,
"max_norm/layer0": 47.89784240722656,
"mean_norm": 41.825233459472656,
"mean_norm/layer0": 41.825233459472656,
"multicode_k": 1,
"output_norm": 9.383608838717148,
"output_norm/layer0": 9.383608838717148,
"step": 1750
},
{
"MSE": 747.6542997233073,
"MSE/layer0": 747.6542997233073,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.19,
"input_norm": 31.997955916722606,
"input_norm/layer0": 31.997955916722606,
"learning_rate": 0.0004315789473684211,
"loss": 2.1238,
"max_norm": 48.32524871826172,
"max_norm/layer0": 48.32524871826172,
"mean_norm": 42.18182373046875,
"mean_norm/layer0": 42.18182373046875,
"multicode_k": 1,
"output_norm": 9.481378455162048,
"output_norm/layer0": 9.481378455162048,
"step": 1800
},
{
"MSE": 745.4623332722983,
"MSE/layer0": 745.4623332722983,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.19,
"input_norm": 31.99795308430989,
"input_norm/layer0": 31.99795308430989,
"learning_rate": 0.0004289473684210526,
"loss": 2.1193,
"max_norm": 48.75049591064453,
"max_norm/layer0": 48.75049591064453,
"mean_norm": 42.53817176818848,
"mean_norm/layer0": 42.53817176818848,
"multicode_k": 1,
"output_norm": 9.570223178863522,
"output_norm/layer0": 9.570223178863522,
"step": 1850
},
{
"MSE": 743.2356170654296,
"MSE/layer0": 743.2356170654296,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.2,
"input_norm": 31.997956037521366,
"input_norm/layer0": 31.997956037521366,
"learning_rate": 0.0004263157894736842,
"loss": 2.114,
"max_norm": 49.169532775878906,
"max_norm/layer0": 49.169532775878906,
"mean_norm": 42.89301300048828,
"mean_norm/layer0": 42.89301300048828,
"multicode_k": 1,
"output_norm": 9.656177865664167,
"output_norm/layer0": 9.656177865664167,
"step": 1900
},
{
"MSE": 740.6696187337238,
"MSE/layer0": 740.6696187337238,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.2,
"input_norm": 31.997947629292796,
"input_norm/layer0": 31.997947629292796,
"learning_rate": 0.0004236842105263158,
"loss": 2.1208,
"max_norm": 49.5915641784668,
"max_norm/layer0": 49.5915641784668,
"mean_norm": 43.247257232666016,
"mean_norm/layer0": 43.247257232666016,
"multicode_k": 1,
"output_norm": 9.750187404950456,
"output_norm/layer0": 9.750187404950456,
"step": 1950
},
{
"MSE": 738.2711766560866,
"MSE/layer0": 738.2711766560866,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.21,
"input_norm": 31.99795049031576,
"input_norm/layer0": 31.99795049031576,
"learning_rate": 0.00042105263157894734,
"loss": 2.1144,
"max_norm": 50.01121520996094,
"max_norm/layer0": 50.01121520996094,
"mean_norm": 43.60071563720703,
"mean_norm/layer0": 43.60071563720703,
"multicode_k": 1,
"output_norm": 9.839046444892887,
"output_norm/layer0": 9.839046444892887,
"step": 2000
},
{
"epoch": 0.21,
"eval_MSE/layer0": 737.1842960305685,
"eval_accuracy": 0.5089533842961654,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997949216728358,
"eval_loss": 2.112781524658203,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 9.899169789850005,
"eval_runtime": 73.2721,
"eval_samples_per_second": 63.094,
"eval_steps_per_second": 7.888,
"step": 2000
},
{
"MSE": 736.3252647908528,
"MSE/layer0": 736.3252647908528,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.21,
"input_norm": 31.997952928543082,
"input_norm/layer0": 31.997952928543082,
"learning_rate": 0.000418421052631579,
"loss": 2.1054,
"max_norm": 50.480525970458984,
"max_norm/layer0": 50.480525970458984,
"mean_norm": 43.9530086517334,
"mean_norm/layer0": 43.9530086517334,
"multicode_k": 1,
"output_norm": 9.923008087476088,
"output_norm/layer0": 9.923008087476088,
"step": 2050
},
{
"MSE": 734.2413449096682,
"MSE/layer0": 734.2413449096682,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.22,
"input_norm": 31.99795233090719,
"input_norm/layer0": 31.99795233090719,
"learning_rate": 0.0004157894736842106,
"loss": 2.114,
"max_norm": 50.909828186035156,
"max_norm/layer0": 50.909828186035156,
"mean_norm": 44.302608489990234,
"mean_norm/layer0": 44.302608489990234,
"multicode_k": 1,
"output_norm": 9.99465080579122,
"output_norm/layer0": 9.99465080579122,
"step": 2100
},
{
"MSE": 732.1211085001627,
"MSE/layer0": 732.1211085001627,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.22,
"input_norm": 31.997947177886957,
"input_norm/layer0": 31.997947177886957,
"learning_rate": 0.0004131578947368421,
"loss": 2.1053,
"max_norm": 51.30076217651367,
"max_norm/layer0": 51.30076217651367,
"mean_norm": 44.650190353393555,
"mean_norm/layer0": 44.650190353393555,
"multicode_k": 1,
"output_norm": 10.083865798314415,
"output_norm/layer0": 10.083865798314415,
"step": 2150
},
{
"MSE": 729.7699541219072,
"MSE/layer0": 729.7699541219072,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.23,
"input_norm": 31.997944199244184,
"input_norm/layer0": 31.997944199244184,
"learning_rate": 0.0004105263157894737,
"loss": 2.092,
"max_norm": 51.70292282104492,
"max_norm/layer0": 51.70292282104492,
"mean_norm": 44.99736022949219,
"mean_norm/layer0": 44.99736022949219,
"multicode_k": 1,
"output_norm": 10.171215546925865,
"output_norm/layer0": 10.171215546925865,
"step": 2200
},
{
"MSE": 727.7426215616864,
"MSE/layer0": 727.7426215616864,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.23,
"input_norm": 31.997949040730795,
"input_norm/layer0": 31.997949040730795,
"learning_rate": 0.00040789473684210524,
"loss": 2.0989,
"max_norm": 52.09043502807617,
"max_norm/layer0": 52.09043502807617,
"mean_norm": 45.34288787841797,
"mean_norm/layer0": 45.34288787841797,
"multicode_k": 1,
"output_norm": 10.245072917938227,
"output_norm/layer0": 10.245072917938227,
"step": 2250
},
{
"MSE": 725.7510225423177,
"MSE/layer0": 725.7510225423177,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.24,
"input_norm": 31.997945086161295,
"input_norm/layer0": 31.997945086161295,
"learning_rate": 0.00040526315789473684,
"loss": 2.0921,
"max_norm": 52.48381423950195,
"max_norm/layer0": 52.48381423950195,
"mean_norm": 45.685386657714844,
"mean_norm/layer0": 45.685386657714844,
"multicode_k": 1,
"output_norm": 10.316563812891642,
"output_norm/layer0": 10.316563812891642,
"step": 2300
},
{
"MSE": 723.730980834961,
"MSE/layer0": 723.730980834961,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.25,
"input_norm": 31.997938013076794,
"input_norm/layer0": 31.997938013076794,
"learning_rate": 0.00040263157894736843,
"loss": 2.0863,
"max_norm": 52.871910095214844,
"max_norm/layer0": 52.871910095214844,
"mean_norm": 46.027950286865234,
"mean_norm/layer0": 46.027950286865234,
"multicode_k": 1,
"output_norm": 10.396288099288938,
"output_norm/layer0": 10.396288099288938,
"step": 2350
},
{
"MSE": 721.850106608073,
"MSE/layer0": 721.850106608073,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.25,
"input_norm": 31.99794317245484,
"input_norm/layer0": 31.99794317245484,
"learning_rate": 0.0004,
"loss": 2.0883,
"max_norm": 53.25300598144531,
"max_norm/layer0": 53.25300598144531,
"mean_norm": 46.366220474243164,
"mean_norm/layer0": 46.366220474243164,
"multicode_k": 1,
"output_norm": 10.462737544377642,
"output_norm/layer0": 10.462737544377642,
"step": 2400
},
{
"MSE": 720.002911987305,
"MSE/layer0": 720.002911987305,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.26,
"input_norm": 31.997945442199722,
"input_norm/layer0": 31.997945442199722,
"learning_rate": 0.0003973684210526316,
"loss": 2.0813,
"max_norm": 53.6557502746582,
"max_norm/layer0": 53.6557502746582,
"mean_norm": 46.70218849182129,
"mean_norm/layer0": 46.70218849182129,
"multicode_k": 1,
"output_norm": 10.54251501719157,
"output_norm/layer0": 10.54251501719157,
"step": 2450
},
{
"MSE": 717.8726328531905,
"MSE/layer0": 717.8726328531905,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.26,
"input_norm": 31.997946141560867,
"input_norm/layer0": 31.997946141560867,
"learning_rate": 0.00039473684210526315,
"loss": 2.0847,
"max_norm": 54.013648986816406,
"max_norm/layer0": 54.013648986816406,
"mean_norm": 47.03492546081543,
"mean_norm/layer0": 47.03492546081543,
"multicode_k": 1,
"output_norm": 10.61746094703674,
"output_norm/layer0": 10.61746094703674,
"step": 2500
},
{
"epoch": 0.26,
"eval_MSE/layer0": 716.9390104187793,
"eval_accuracy": 0.5142129041984603,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997947305666536,
"eval_loss": 2.0790653228759766,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 10.657726783760687,
"eval_runtime": 73.6422,
"eval_samples_per_second": 62.776,
"eval_steps_per_second": 7.849,
"step": 2500
},
{
"MSE": 715.8716929117836,
"MSE/layer0": 715.8716929117836,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.27,
"input_norm": 31.99793814023335,
"input_norm/layer0": 31.99793814023335,
"learning_rate": 0.00039210526315789474,
"loss": 2.0789,
"max_norm": 54.395057678222656,
"max_norm/layer0": 54.395057678222656,
"mean_norm": 47.36547088623047,
"mean_norm/layer0": 47.36547088623047,
"multicode_k": 1,
"output_norm": 10.687965892155965,
"output_norm/layer0": 10.687965892155965,
"step": 2550
},
{
"MSE": 713.9484742228188,
"MSE/layer0": 713.9484742228188,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.27,
"input_norm": 31.997940645217888,
"input_norm/layer0": 31.997940645217888,
"learning_rate": 0.00038947368421052633,
"loss": 2.0747,
"max_norm": 54.81391525268555,
"max_norm/layer0": 54.81391525268555,
"mean_norm": 47.6934928894043,
"mean_norm/layer0": 47.6934928894043,
"multicode_k": 1,
"output_norm": 10.762619382540386,
"output_norm/layer0": 10.762619382540386,
"step": 2600
},
{
"MSE": 711.9854763793942,
"MSE/layer0": 711.9854763793942,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.28,
"input_norm": 31.997925097147615,
"input_norm/layer0": 31.997925097147615,
"learning_rate": 0.00038684210526315787,
"loss": 2.0679,
"max_norm": 55.21110916137695,
"max_norm/layer0": 55.21110916137695,
"mean_norm": 48.01936340332031,
"mean_norm/layer0": 48.01936340332031,
"multicode_k": 1,
"output_norm": 10.838534935315447,
"output_norm/layer0": 10.838534935315447,
"step": 2650
},
{
"MSE": 710.4415082804362,
"MSE/layer0": 710.4415082804362,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.28,
"input_norm": 31.997930930455517,
"input_norm/layer0": 31.997930930455517,
"learning_rate": 0.00038421052631578946,
"loss": 2.0619,
"max_norm": 55.63144302368164,
"max_norm/layer0": 55.63144302368164,
"mean_norm": 48.34212875366211,
"mean_norm/layer0": 48.34212875366211,
"multicode_k": 1,
"output_norm": 10.893479135831196,
"output_norm/layer0": 10.893479135831196,
"step": 2700
},
{
"MSE": 708.5378164672845,
"MSE/layer0": 708.5378164672845,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.29,
"input_norm": 31.99792820294698,
"input_norm/layer0": 31.99792820294698,
"learning_rate": 0.00038157894736842105,
"loss": 2.0461,
"max_norm": 56.01336669921875,
"max_norm/layer0": 56.01336669921875,
"mean_norm": 48.66323280334473,
"mean_norm/layer0": 48.66323280334473,
"multicode_k": 1,
"output_norm": 10.971131575902309,
"output_norm/layer0": 10.971131575902309,
"step": 2750
},
{
"MSE": 706.6155220540361,
"MSE/layer0": 706.6155220540361,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.29,
"input_norm": 31.997930272420245,
"input_norm/layer0": 31.997930272420245,
"learning_rate": 0.00037894736842105265,
"loss": 2.0594,
"max_norm": 56.40309143066406,
"max_norm/layer0": 56.40309143066406,
"mean_norm": 48.980411529541016,
"mean_norm/layer0": 48.980411529541016,
"multicode_k": 1,
"output_norm": 11.042961815198257,
"output_norm/layer0": 11.042961815198257,
"step": 2800
},
{
"MSE": 704.6534555053711,
"MSE/layer0": 704.6534555053711,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.3,
"input_norm": 31.99792085011799,
"input_norm/layer0": 31.99792085011799,
"learning_rate": 0.00037631578947368424,
"loss": 2.0499,
"max_norm": 56.79050064086914,
"max_norm/layer0": 56.79050064086914,
"mean_norm": 49.293588638305664,
"mean_norm/layer0": 49.293588638305664,
"multicode_k": 1,
"output_norm": 11.11463791847229,
"output_norm/layer0": 11.11463791847229,
"step": 2850
},
{
"MSE": 702.691480916341,
"MSE/layer0": 702.691480916341,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.3,
"input_norm": 31.997921177546203,
"input_norm/layer0": 31.997921177546203,
"learning_rate": 0.0003736842105263158,
"loss": 2.0472,
"max_norm": 57.16228103637695,
"max_norm/layer0": 57.16228103637695,
"mean_norm": 49.60378646850586,
"mean_norm/layer0": 49.60378646850586,
"multicode_k": 1,
"output_norm": 11.188902417818706,
"output_norm/layer0": 11.188902417818706,
"step": 2900
},
{
"MSE": 700.9804660034181,
"MSE/layer0": 700.9804660034181,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.31,
"input_norm": 31.997924566268914,
"input_norm/layer0": 31.997924566268914,
"learning_rate": 0.00037105263157894737,
"loss": 2.0557,
"max_norm": 57.52459716796875,
"max_norm/layer0": 57.52459716796875,
"mean_norm": 49.91103553771973,
"mean_norm/layer0": 49.91103553771973,
"multicode_k": 1,
"output_norm": 11.253552745183304,
"output_norm/layer0": 11.253552745183304,
"step": 2950
},
{
"MSE": 699.5130490112299,
"MSE/layer0": 699.5130490112299,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.31,
"input_norm": 31.997922519048053,
"input_norm/layer0": 31.997922519048053,
"learning_rate": 0.00036842105263157896,
"loss": 2.0439,
"max_norm": 57.87739562988281,
"max_norm/layer0": 57.87739562988281,
"mean_norm": 50.21486854553223,
"mean_norm/layer0": 50.21486854553223,
"multicode_k": 1,
"output_norm": 11.316310184796652,
"output_norm/layer0": 11.316310184796652,
"step": 3000
},
{
"epoch": 0.31,
"eval_MSE/layer0": 698.7265792011616,
"eval_accuracy": 0.5184875063671823,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997919895214224,
"eval_loss": 2.0482470989227295,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 11.359921689315088,
"eval_runtime": 74.2109,
"eval_samples_per_second": 62.295,
"eval_steps_per_second": 7.789,
"step": 3000
},
{
"MSE": 697.8663801066077,
"MSE/layer0": 697.8663801066077,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.32,
"input_norm": 31.997911771138504,
"input_norm/layer0": 31.997911771138504,
"learning_rate": 0.00036578947368421055,
"loss": 2.0511,
"max_norm": 58.24200439453125,
"max_norm/layer0": 58.24200439453125,
"mean_norm": 50.51446723937988,
"mean_norm/layer0": 50.51446723937988,
"multicode_k": 1,
"output_norm": 11.388139980634046,
"output_norm/layer0": 11.388139980634046,
"step": 3050
},
{
"MSE": 696.0450835164395,
"MSE/layer0": 696.0450835164395,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.32,
"input_norm": 31.9979209582011,
"input_norm/layer0": 31.9979209582011,
"learning_rate": 0.00036315789473684214,
"loss": 2.0466,
"max_norm": 58.58406066894531,
"max_norm/layer0": 58.58406066894531,
"mean_norm": 50.81120681762695,
"mean_norm/layer0": 50.81120681762695,
"multicode_k": 1,
"output_norm": 11.455551563898727,
"output_norm/layer0": 11.455551563898727,
"step": 3100
},
{
"MSE": 694.5301999918622,
"MSE/layer0": 694.5301999918622,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.33,
"input_norm": 31.99790574709574,
"input_norm/layer0": 31.99790574709574,
"learning_rate": 0.0003605263157894737,
"loss": 2.0294,
"max_norm": 58.931087493896484,
"max_norm/layer0": 58.931087493896484,
"mean_norm": 51.104164123535156,
"mean_norm/layer0": 51.104164123535156,
"multicode_k": 1,
"output_norm": 11.512675134340917,
"output_norm/layer0": 11.512675134340917,
"step": 3150
},
{
"MSE": 692.5095411173497,
"MSE/layer0": 692.5095411173497,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.33,
"input_norm": 31.997909634908044,
"input_norm/layer0": 31.997909634908044,
"learning_rate": 0.0003578947368421053,
"loss": 2.0455,
"max_norm": 59.2867546081543,
"max_norm/layer0": 59.2867546081543,
"mean_norm": 51.39415168762207,
"mean_norm/layer0": 51.39415168762207,
"multicode_k": 1,
"output_norm": 11.587491785685224,
"output_norm/layer0": 11.587491785685224,
"step": 3200
},
{
"MSE": 691.1425885009767,
"MSE/layer0": 691.1425885009767,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.34,
"input_norm": 31.99791768709818,
"input_norm/layer0": 31.99791768709818,
"learning_rate": 0.00035526315789473687,
"loss": 2.0348,
"max_norm": 59.64825439453125,
"max_norm/layer0": 59.64825439453125,
"mean_norm": 51.68109130859375,
"mean_norm/layer0": 51.68109130859375,
"multicode_k": 1,
"output_norm": 11.643148959477745,
"output_norm/layer0": 11.643148959477745,
"step": 3250
},
{
"MSE": 689.2906094360355,
"MSE/layer0": 689.2906094360355,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.34,
"input_norm": 31.997913980483997,
"input_norm/layer0": 31.997913980483997,
"learning_rate": 0.0003526315789473684,
"loss": 2.0293,
"max_norm": 59.97624206542969,
"max_norm/layer0": 59.97624206542969,
"mean_norm": 51.965484619140625,
"mean_norm/layer0": 51.965484619140625,
"multicode_k": 1,
"output_norm": 11.714975148836775,
"output_norm/layer0": 11.714975148836775,
"step": 3300
},
{
"MSE": 688.0525922648112,
"MSE/layer0": 688.0525922648112,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.35,
"input_norm": 31.997908350626624,
"input_norm/layer0": 31.997908350626624,
"learning_rate": 0.00035,
"loss": 2.0389,
"max_norm": 60.30556869506836,
"max_norm/layer0": 60.30556869506836,
"mean_norm": 52.2458438873291,
"mean_norm/layer0": 52.2458438873291,
"multicode_k": 1,
"output_norm": 11.772027517954506,
"output_norm/layer0": 11.772027517954506,
"step": 3350
},
{
"MSE": 686.4814953613279,
"MSE/layer0": 686.4814953613279,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.35,
"input_norm": 31.997902415593472,
"input_norm/layer0": 31.997902415593472,
"learning_rate": 0.0003473684210526316,
"loss": 2.0266,
"max_norm": 60.628334045410156,
"max_norm/layer0": 60.628334045410156,
"mean_norm": 52.522024154663086,
"mean_norm/layer0": 52.522024154663086,
"multicode_k": 1,
"output_norm": 11.842156640688584,
"output_norm/layer0": 11.842156640688584,
"step": 3400
},
{
"MSE": 684.6515231323242,
"MSE/layer0": 684.6515231323242,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.36,
"input_norm": 31.99791290283203,
"input_norm/layer0": 31.99791290283203,
"learning_rate": 0.0003447368421052632,
"loss": 2.0248,
"max_norm": 60.95072555541992,
"max_norm/layer0": 60.95072555541992,
"mean_norm": 52.79400825500488,
"mean_norm/layer0": 52.79400825500488,
"multicode_k": 1,
"output_norm": 11.907371897697445,
"output_norm/layer0": 11.907371897697445,
"step": 3450
},
{
"MSE": 683.5430062866212,
"MSE/layer0": 683.5430062866212,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.37,
"input_norm": 31.9979167175293,
"input_norm/layer0": 31.9979167175293,
"learning_rate": 0.00034210526315789477,
"loss": 2.0263,
"max_norm": 61.270816802978516,
"max_norm/layer0": 61.270816802978516,
"mean_norm": 53.06429481506348,
"mean_norm/layer0": 53.06429481506348,
"multicode_k": 1,
"output_norm": 11.956860675811765,
"output_norm/layer0": 11.956860675811765,
"step": 3500
},
{
"epoch": 0.37,
"eval_MSE/layer0": 682.2680427869782,
"eval_accuracy": 0.5224062440993215,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997916449774355,
"eval_loss": 2.0253396034240723,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 12.010468493388789,
"eval_runtime": 73.2716,
"eval_samples_per_second": 63.094,
"eval_steps_per_second": 7.888,
"step": 3500
},
{
"MSE": 682.0599540201822,
"MSE/layer0": 682.0599540201822,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.37,
"input_norm": 31.9979091612498,
"input_norm/layer0": 31.9979091612498,
"learning_rate": 0.0003394736842105263,
"loss": 2.035,
"max_norm": 61.60363006591797,
"max_norm/layer0": 61.60363006591797,
"mean_norm": 53.33056831359863,
"mean_norm/layer0": 53.33056831359863,
"multicode_k": 1,
"output_norm": 12.018342121442167,
"output_norm/layer0": 12.018342121442167,
"step": 3550
},
{
"MSE": 680.5750654093424,
"MSE/layer0": 680.5750654093424,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.38,
"input_norm": 31.997909587224328,
"input_norm/layer0": 31.997909587224328,
"learning_rate": 0.0003368421052631579,
"loss": 2.0232,
"max_norm": 61.922420501708984,
"max_norm/layer0": 61.922420501708984,
"mean_norm": 53.59366035461426,
"mean_norm/layer0": 53.59366035461426,
"multicode_k": 1,
"output_norm": 12.078021968205773,
"output_norm/layer0": 12.078021968205773,
"step": 3600
},
{
"MSE": 678.8478289794925,
"MSE/layer0": 678.8478289794925,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.38,
"input_norm": 31.99789888381958,
"input_norm/layer0": 31.99789888381958,
"learning_rate": 0.00033421052631578944,
"loss": 2.0171,
"max_norm": 62.24449157714844,
"max_norm/layer0": 62.24449157714844,
"mean_norm": 53.85357475280762,
"mean_norm/layer0": 53.85357475280762,
"multicode_k": 1,
"output_norm": 12.149001522064214,
"output_norm/layer0": 12.149001522064214,
"step": 3650
},
{
"MSE": 677.7631386311848,
"MSE/layer0": 677.7631386311848,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.39,
"input_norm": 31.997902571360274,
"input_norm/layer0": 31.997902571360274,
"learning_rate": 0.00033157894736842103,
"loss": 2.0212,
"max_norm": 62.564937591552734,
"max_norm/layer0": 62.564937591552734,
"mean_norm": 54.10923385620117,
"mean_norm/layer0": 54.10923385620117,
"multicode_k": 1,
"output_norm": 12.200160818099977,
"output_norm/layer0": 12.200160818099977,
"step": 3700
},
{
"MSE": 676.4079176839191,
"MSE/layer0": 676.4079176839191,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.39,
"input_norm": 31.99789404869079,
"input_norm/layer0": 31.99789404869079,
"learning_rate": 0.0003289473684210527,
"loss": 2.0184,
"max_norm": 62.88063430786133,
"max_norm/layer0": 62.88063430786133,
"mean_norm": 54.362863540649414,
"mean_norm/layer0": 54.362863540649414,
"multicode_k": 1,
"output_norm": 12.259288868904115,
"output_norm/layer0": 12.259288868904115,
"step": 3750
},
{
"MSE": 675.2395422363282,
"MSE/layer0": 675.2395422363282,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.4,
"input_norm": 31.99789286295573,
"input_norm/layer0": 31.99789286295573,
"learning_rate": 0.0003263157894736842,
"loss": 2.0058,
"max_norm": 63.18323516845703,
"max_norm/layer0": 63.18323516845703,
"mean_norm": 54.61160659790039,
"mean_norm/layer0": 54.61160659790039,
"multicode_k": 1,
"output_norm": 12.305311093330385,
"output_norm/layer0": 12.305311093330385,
"step": 3800
},
{
"MSE": 673.5289611816404,
"MSE/layer0": 673.5289611816404,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.4,
"input_norm": 31.997895905176787,
"input_norm/layer0": 31.997895905176787,
"learning_rate": 0.0003236842105263158,
"loss": 2.0147,
"max_norm": 63.47829055786133,
"max_norm/layer0": 63.47829055786133,
"mean_norm": 54.85733413696289,
"mean_norm/layer0": 54.85733413696289,
"multicode_k": 1,
"output_norm": 12.368880640665692,
"output_norm/layer0": 12.368880640665692,
"step": 3850
},
{
"MSE": 672.7262348429363,
"MSE/layer0": 672.7262348429363,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.41,
"input_norm": 31.997892808914187,
"input_norm/layer0": 31.997892808914187,
"learning_rate": 0.0003210526315789474,
"loss": 2.0011,
"max_norm": 63.7920036315918,
"max_norm/layer0": 63.7920036315918,
"mean_norm": 55.099992752075195,
"mean_norm/layer0": 55.099992752075195,
"multicode_k": 1,
"output_norm": 12.413625540733335,
"output_norm/layer0": 12.413625540733335,
"step": 3900
},
{
"MSE": 671.2364042154949,
"MSE/layer0": 671.2364042154949,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.41,
"input_norm": 31.997892484664916,
"input_norm/layer0": 31.997892484664916,
"learning_rate": 0.00031842105263157894,
"loss": 2.0068,
"max_norm": 64.07488250732422,
"max_norm/layer0": 64.07488250732422,
"mean_norm": 55.33942985534668,
"mean_norm/layer0": 55.33942985534668,
"multicode_k": 1,
"output_norm": 12.478335504531861,
"output_norm/layer0": 12.478335504531861,
"step": 3950
},
{
"MSE": 669.9738427734378,
"MSE/layer0": 669.9738427734378,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.42,
"input_norm": 31.997889916102086,
"input_norm/layer0": 31.997889916102086,
"learning_rate": 0.00031578947368421053,
"loss": 1.9906,
"max_norm": 64.34879302978516,
"max_norm/layer0": 64.34879302978516,
"mean_norm": 55.576541900634766,
"mean_norm/layer0": 55.576541900634766,
"multicode_k": 1,
"output_norm": 12.524646544456482,
"output_norm/layer0": 12.524646544456482,
"step": 4000
},
{
"epoch": 0.42,
"eval_MSE/layer0": 669.1965223770751,
"eval_accuracy": 0.5253332978103237,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997898890449704,
"eval_loss": 2.006638526916504,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 12.556819209953474,
"eval_runtime": 73.3751,
"eval_samples_per_second": 63.005,
"eval_steps_per_second": 7.877,
"step": 4000
},
{
"MSE": 668.3390091959637,
"MSE/layer0": 668.3390091959637,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.42,
"input_norm": 31.99788640658062,
"input_norm/layer0": 31.99788640658062,
"learning_rate": 0.00031315789473684207,
"loss": 1.9962,
"max_norm": 64.65262603759766,
"max_norm/layer0": 64.65262603759766,
"mean_norm": 55.811140060424805,
"mean_norm/layer0": 55.811140060424805,
"multicode_k": 1,
"output_norm": 12.584023051261894,
"output_norm/layer0": 12.584023051261894,
"step": 4050
},
{
"MSE": 667.4144735717773,
"MSE/layer0": 667.4144735717773,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.43,
"input_norm": 31.99788414637247,
"input_norm/layer0": 31.99788414637247,
"learning_rate": 0.0003105263157894737,
"loss": 2.0038,
"max_norm": 64.9332275390625,
"max_norm/layer0": 64.9332275390625,
"mean_norm": 56.04119682312012,
"mean_norm/layer0": 56.04119682312012,
"multicode_k": 1,
"output_norm": 12.633416105906175,
"output_norm/layer0": 12.633416105906175,
"step": 4100
},
{
"MSE": 666.502211812337,
"MSE/layer0": 666.502211812337,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.43,
"input_norm": 31.997885338465373,
"input_norm/layer0": 31.997885338465373,
"learning_rate": 0.0003078947368421053,
"loss": 2.0046,
"max_norm": 65.20265197753906,
"max_norm/layer0": 65.20265197753906,
"mean_norm": 56.26777458190918,
"mean_norm/layer0": 56.26777458190918,
"multicode_k": 1,
"output_norm": 12.67455391089122,
"output_norm/layer0": 12.67455391089122,
"step": 4150
},
{
"MSE": 665.0832258097332,
"MSE/layer0": 665.0832258097332,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.44,
"input_norm": 31.997875661849967,
"input_norm/layer0": 31.997875661849967,
"learning_rate": 0.00030526315789473684,
"loss": 2.0066,
"max_norm": 65.46887969970703,
"max_norm/layer0": 65.46887969970703,
"mean_norm": 56.49208450317383,
"mean_norm/layer0": 56.49208450317383,
"multicode_k": 1,
"output_norm": 12.73067569255829,
"output_norm/layer0": 12.73067569255829,
"step": 4200
},
{
"MSE": 663.9124774169925,
"MSE/layer0": 663.9124774169925,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.44,
"input_norm": 31.997874129613244,
"input_norm/layer0": 31.997874129613244,
"learning_rate": 0.00030263157894736844,
"loss": 2.0006,
"max_norm": 65.73078918457031,
"max_norm/layer0": 65.73078918457031,
"mean_norm": 56.712989807128906,
"mean_norm/layer0": 56.712989807128906,
"multicode_k": 1,
"output_norm": 12.783326719601945,
"output_norm/layer0": 12.783326719601945,
"step": 4250
},
{
"MSE": 663.0191631062823,
"MSE/layer0": 663.0191631062823,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.45,
"input_norm": 31.99787082672119,
"input_norm/layer0": 31.99787082672119,
"learning_rate": 0.0003,
"loss": 1.9862,
"max_norm": 65.99481964111328,
"max_norm/layer0": 65.99481964111328,
"mean_norm": 56.93141746520996,
"mean_norm/layer0": 56.93141746520996,
"multicode_k": 1,
"output_norm": 12.824343484242757,
"output_norm/layer0": 12.824343484242757,
"step": 4300
},
{
"MSE": 661.9175501505531,
"MSE/layer0": 661.9175501505531,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.45,
"input_norm": 31.997863101959226,
"input_norm/layer0": 31.997863101959226,
"learning_rate": 0.00029736842105263157,
"loss": 1.9891,
"max_norm": 66.25289916992188,
"max_norm/layer0": 66.25289916992188,
"mean_norm": 57.14705848693848,
"mean_norm/layer0": 57.14705848693848,
"multicode_k": 1,
"output_norm": 12.873331023852028,
"output_norm/layer0": 12.873331023852028,
"step": 4350
},
{
"MSE": 660.8278486124677,
"MSE/layer0": 660.8278486124677,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.46,
"input_norm": 31.99786113739014,
"input_norm/layer0": 31.99786113739014,
"learning_rate": 0.00029473684210526316,
"loss": 1.9874,
"max_norm": 66.49950408935547,
"max_norm/layer0": 66.49950408935547,
"mean_norm": 57.3592414855957,
"mean_norm/layer0": 57.3592414855957,
"multicode_k": 1,
"output_norm": 12.925755645434062,
"output_norm/layer0": 12.925755645434062,
"step": 4400
},
{
"MSE": 659.7812182617188,
"MSE/layer0": 659.7812182617188,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.46,
"input_norm": 31.997859818140668,
"input_norm/layer0": 31.997859818140668,
"learning_rate": 0.00029210526315789475,
"loss": 1.9894,
"max_norm": 66.74647521972656,
"max_norm/layer0": 66.74647521972656,
"mean_norm": 57.56860542297363,
"mean_norm/layer0": 57.56860542297363,
"multicode_k": 1,
"output_norm": 12.969949612617494,
"output_norm/layer0": 12.969949612617494,
"step": 4450
},
{
"MSE": 658.2862462361654,
"MSE/layer0": 658.2862462361654,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.47,
"input_norm": 31.997855517069482,
"input_norm/layer0": 31.997855517069482,
"learning_rate": 0.00028947368421052634,
"loss": 1.9852,
"max_norm": 67.0057373046875,
"max_norm/layer0": 67.0057373046875,
"mean_norm": 57.77582931518555,
"mean_norm/layer0": 57.77582931518555,
"multicode_k": 1,
"output_norm": 13.019407332738238,
"output_norm/layer0": 13.019407332738238,
"step": 4500
},
{
"epoch": 0.47,
"eval_MSE/layer0": 657.5871718611108,
"eval_accuracy": 0.5279040641917702,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997854675071842,
"eval_loss": 1.9898165464401245,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 13.052642994207561,
"eval_runtime": 74.0479,
"eval_samples_per_second": 62.433,
"eval_steps_per_second": 7.806,
"step": 4500
},
{
"MSE": 657.2974259440105,
"MSE/layer0": 657.2974259440105,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.48,
"input_norm": 31.99785115559897,
"input_norm/layer0": 31.99785115559897,
"learning_rate": 0.0002868421052631579,
"loss": 1.9727,
"max_norm": 67.25566101074219,
"max_norm/layer0": 67.25566101074219,
"mean_norm": 57.98077964782715,
"mean_norm/layer0": 57.98077964782715,
"multicode_k": 1,
"output_norm": 13.063522001902262,
"output_norm/layer0": 13.063522001902262,
"step": 4550
},
{
"MSE": 656.5759895833334,
"MSE/layer0": 656.5759895833334,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.48,
"input_norm": 31.997858088811242,
"input_norm/layer0": 31.997858088811242,
"learning_rate": 0.00028421052631578947,
"loss": 1.9897,
"max_norm": 67.49605560302734,
"max_norm/layer0": 67.49605560302734,
"mean_norm": 58.182559967041016,
"mean_norm/layer0": 58.182559967041016,
"multicode_k": 1,
"output_norm": 13.099744346936546,
"output_norm/layer0": 13.099744346936546,
"step": 4600
},
{
"MSE": 655.8373800659178,
"MSE/layer0": 655.8373800659178,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.49,
"input_norm": 31.997857850392663,
"input_norm/layer0": 31.997857850392663,
"learning_rate": 0.00028157894736842106,
"loss": 1.9918,
"max_norm": 67.72962188720703,
"max_norm/layer0": 67.72962188720703,
"mean_norm": 58.38115119934082,
"mean_norm/layer0": 58.38115119934082,
"multicode_k": 1,
"output_norm": 13.13247790972392,
"output_norm/layer0": 13.13247790972392,
"step": 4650
},
{
"MSE": 654.6057424926755,
"MSE/layer0": 654.6057424926755,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.49,
"input_norm": 31.997855739593504,
"input_norm/layer0": 31.997855739593504,
"learning_rate": 0.0002789473684210526,
"loss": 1.9908,
"max_norm": 67.96855163574219,
"max_norm/layer0": 67.96855163574219,
"mean_norm": 58.57722091674805,
"mean_norm/layer0": 58.57722091674805,
"multicode_k": 1,
"output_norm": 13.187800091107682,
"output_norm/layer0": 13.187800091107682,
"step": 4700
},
{
"MSE": 653.7336292521161,
"MSE/layer0": 653.7336292521161,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.5,
"input_norm": 31.997861677805588,
"input_norm/layer0": 31.997861677805588,
"learning_rate": 0.00027631578947368425,
"loss": 1.9919,
"max_norm": 68.20356750488281,
"max_norm/layer0": 68.20356750488281,
"mean_norm": 58.77041053771973,
"mean_norm/layer0": 58.77041053771973,
"multicode_k": 1,
"output_norm": 13.224705770810434,
"output_norm/layer0": 13.224705770810434,
"step": 4750
},
{
"MSE": 652.4711893717447,
"MSE/layer0": 652.4711893717447,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.5,
"input_norm": 31.997852430343634,
"input_norm/layer0": 31.997852430343634,
"learning_rate": 0.00027368421052631584,
"loss": 1.9777,
"max_norm": 68.42557525634766,
"max_norm/layer0": 68.42557525634766,
"mean_norm": 58.96235466003418,
"mean_norm/layer0": 58.96235466003418,
"multicode_k": 1,
"output_norm": 13.275700616836549,
"output_norm/layer0": 13.275700616836549,
"step": 4800
},
{
"MSE": 651.660216674805,
"MSE/layer0": 651.660216674805,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.51,
"input_norm": 31.997857831319166,
"input_norm/layer0": 31.997857831319166,
"learning_rate": 0.0002710526315789474,
"loss": 1.9728,
"max_norm": 68.6562271118164,
"max_norm/layer0": 68.6562271118164,
"mean_norm": 59.151214599609375,
"mean_norm/layer0": 59.151214599609375,
"multicode_k": 1,
"output_norm": 13.316913062731425,
"output_norm/layer0": 13.316913062731425,
"step": 4850
},
{
"MSE": 651.1180463663741,
"MSE/layer0": 651.1180463663741,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.51,
"input_norm": 31.997851276397704,
"input_norm/layer0": 31.997851276397704,
"learning_rate": 0.00026842105263157897,
"loss": 1.9806,
"max_norm": 68.8842544555664,
"max_norm/layer0": 68.8842544555664,
"mean_norm": 59.336891174316406,
"mean_norm/layer0": 59.336891174316406,
"multicode_k": 1,
"output_norm": 13.348248120943708,
"output_norm/layer0": 13.348248120943708,
"step": 4900
},
{
"MSE": 650.0774853515621,
"MSE/layer0": 650.0774853515621,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.52,
"input_norm": 31.997846142450957,
"input_norm/layer0": 31.997846142450957,
"learning_rate": 0.0002657894736842105,
"loss": 1.9718,
"max_norm": 69.09481811523438,
"max_norm/layer0": 69.09481811523438,
"mean_norm": 59.52014923095703,
"mean_norm/layer0": 59.52014923095703,
"multicode_k": 1,
"output_norm": 13.38570951779683,
"output_norm/layer0": 13.38570951779683,
"step": 4950
},
{
"MSE": 649.2541728719073,
"MSE/layer0": 649.2541728719073,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.52,
"input_norm": 31.997852964401247,
"input_norm/layer0": 31.997852964401247,
"learning_rate": 0.0002631578947368421,
"loss": 1.9687,
"max_norm": 69.3100357055664,
"max_norm/layer0": 69.3100357055664,
"mean_norm": 59.70068359375,
"mean_norm/layer0": 59.70068359375,
"multicode_k": 1,
"output_norm": 13.423000381787617,
"output_norm/layer0": 13.423000381787617,
"step": 5000
},
{
"epoch": 0.52,
"eval_MSE/layer0": 648.246248562512,
"eval_accuracy": 0.5299863891896716,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997853133679993,
"eval_loss": 1.975706934928894,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 13.449585199510798,
"eval_runtime": 73.7352,
"eval_samples_per_second": 62.697,
"eval_steps_per_second": 7.839,
"step": 5000
},
{
"MSE": 648.4500269571938,
"MSE/layer0": 648.4500269571938,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.53,
"input_norm": 31.99784724235535,
"input_norm/layer0": 31.99784724235535,
"learning_rate": 0.0002605263157894737,
"loss": 1.9816,
"max_norm": 69.5140151977539,
"max_norm/layer0": 69.5140151977539,
"mean_norm": 59.87860107421875,
"mean_norm/layer0": 59.87860107421875,
"multicode_k": 1,
"output_norm": 13.459953915278113,
"output_norm/layer0": 13.459953915278113,
"step": 5050
},
{
"MSE": 647.5120207722985,
"MSE/layer0": 647.5120207722985,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.53,
"input_norm": 31.997845083872484,
"input_norm/layer0": 31.997845083872484,
"learning_rate": 0.0002578947368421053,
"loss": 1.9778,
"max_norm": 69.72222137451172,
"max_norm/layer0": 69.72222137451172,
"mean_norm": 60.054636001586914,
"mean_norm/layer0": 60.054636001586914,
"multicode_k": 1,
"output_norm": 13.495457221666976,
"output_norm/layer0": 13.495457221666976,
"step": 5100
},
{
"MSE": 646.8909526570638,
"MSE/layer0": 646.8909526570638,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.54,
"input_norm": 31.99783927281696,
"input_norm/layer0": 31.99783927281696,
"learning_rate": 0.0002552631578947369,
"loss": 1.9608,
"max_norm": 69.93621826171875,
"max_norm/layer0": 69.93621826171875,
"mean_norm": 60.228532791137695,
"mean_norm/layer0": 60.228532791137695,
"multicode_k": 1,
"output_norm": 13.523821023305253,
"output_norm/layer0": 13.523821023305253,
"step": 5150
},
{
"MSE": 645.6001059977214,
"MSE/layer0": 645.6001059977214,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.54,
"input_norm": 31.997829329172767,
"input_norm/layer0": 31.997829329172767,
"learning_rate": 0.0002526315789473684,
"loss": 1.9514,
"max_norm": 70.1629867553711,
"max_norm/layer0": 70.1629867553711,
"mean_norm": 60.39993667602539,
"mean_norm/layer0": 60.39993667602539,
"multicode_k": 1,
"output_norm": 13.575601536432904,
"output_norm/layer0": 13.575601536432904,
"step": 5200
},
{
"MSE": 645.0477313232423,
"MSE/layer0": 645.0477313232423,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.55,
"input_norm": 31.997829844156904,
"input_norm/layer0": 31.997829844156904,
"learning_rate": 0.00025,
"loss": 1.953,
"max_norm": 70.36659240722656,
"max_norm/layer0": 70.36659240722656,
"mean_norm": 60.568695068359375,
"mean_norm/layer0": 60.568695068359375,
"multicode_k": 1,
"output_norm": 13.606370126406352,
"output_norm/layer0": 13.606370126406352,
"step": 5250
},
{
"MSE": 644.0795441691082,
"MSE/layer0": 644.0795441691082,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.55,
"input_norm": 31.997827720642086,
"input_norm/layer0": 31.997827720642086,
"learning_rate": 0.0002473684210526316,
"loss": 1.9664,
"max_norm": 70.58203125,
"max_norm/layer0": 70.58203125,
"mean_norm": 60.73503303527832,
"mean_norm/layer0": 60.73503303527832,
"multicode_k": 1,
"output_norm": 13.644356350898736,
"output_norm/layer0": 13.644356350898736,
"step": 5300
},
{
"MSE": 643.4398297119142,
"MSE/layer0": 643.4398297119142,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.56,
"input_norm": 31.99783255259196,
"input_norm/layer0": 31.99783255259196,
"learning_rate": 0.0002447368421052632,
"loss": 1.9612,
"max_norm": 70.80116271972656,
"max_norm/layer0": 70.80116271972656,
"mean_norm": 60.89903450012207,
"mean_norm/layer0": 60.89903450012207,
"multicode_k": 1,
"output_norm": 13.676611545880633,
"output_norm/layer0": 13.676611545880633,
"step": 5350
},
{
"MSE": 642.6565199788413,
"MSE/layer0": 642.6565199788413,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.56,
"input_norm": 31.997826932271334,
"input_norm/layer0": 31.997826932271334,
"learning_rate": 0.00024210526315789475,
"loss": 1.9695,
"max_norm": 71.0198745727539,
"max_norm/layer0": 71.0198745727539,
"mean_norm": 61.06051063537598,
"mean_norm/layer0": 61.06051063537598,
"multicode_k": 1,
"output_norm": 13.705395914713542,
"output_norm/layer0": 13.705395914713542,
"step": 5400
},
{
"MSE": 641.5518863932293,
"MSE/layer0": 641.5518863932293,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.57,
"input_norm": 31.99782320658366,
"input_norm/layer0": 31.99782320658366,
"learning_rate": 0.00023947368421052632,
"loss": 1.9708,
"max_norm": 71.22209930419922,
"max_norm/layer0": 71.22209930419922,
"mean_norm": 61.22001647949219,
"mean_norm/layer0": 61.22001647949219,
"multicode_k": 1,
"output_norm": 13.747722525596622,
"output_norm/layer0": 13.747722525596622,
"step": 5450
},
{
"MSE": 641.0277577718095,
"MSE/layer0": 641.0277577718095,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.57,
"input_norm": 31.997817249298095,
"input_norm/layer0": 31.997817249298095,
"learning_rate": 0.00023684210526315788,
"loss": 1.9672,
"max_norm": 71.42549896240234,
"max_norm/layer0": 71.42549896240234,
"mean_norm": 61.377342224121094,
"mean_norm/layer0": 61.377342224121094,
"multicode_k": 1,
"output_norm": 13.775313488642375,
"output_norm/layer0": 13.775313488642375,
"step": 5500
},
{
"epoch": 0.57,
"eval_MSE/layer0": 640.0821653411886,
"eval_accuracy": 0.5321348969378108,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997811444105338,
"eval_loss": 1.9619895219802856,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 13.80778875099279,
"eval_runtime": 73.8101,
"eval_samples_per_second": 62.634,
"eval_steps_per_second": 7.831,
"step": 5500
},
{
"MSE": 640.2260070800783,
"MSE/layer0": 640.2260070800783,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.58,
"input_norm": 31.997807060877484,
"input_norm/layer0": 31.997807060877484,
"learning_rate": 0.00023421052631578948,
"loss": 1.9526,
"max_norm": 71.6324691772461,
"max_norm/layer0": 71.6324691772461,
"mean_norm": 61.532691955566406,
"mean_norm/layer0": 61.532691955566406,
"multicode_k": 1,
"output_norm": 13.81434581597646,
"output_norm/layer0": 13.81434581597646,
"step": 5550
},
{
"MSE": 639.6603690592448,
"MSE/layer0": 639.6603690592448,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.58,
"input_norm": 31.997815796534205,
"input_norm/layer0": 31.997815796534205,
"learning_rate": 0.00023157894736842107,
"loss": 1.9592,
"max_norm": 71.83050537109375,
"max_norm/layer0": 71.83050537109375,
"mean_norm": 61.68556213378906,
"mean_norm/layer0": 61.68556213378906,
"multicode_k": 1,
"output_norm": 13.843803273836771,
"output_norm/layer0": 13.843803273836771,
"step": 5600
},
{
"MSE": 638.8630006917316,
"MSE/layer0": 638.8630006917316,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.59,
"input_norm": 31.997804651260374,
"input_norm/layer0": 31.997804651260374,
"learning_rate": 0.00022894736842105263,
"loss": 1.9582,
"max_norm": 72.0186767578125,
"max_norm/layer0": 72.0186767578125,
"mean_norm": 61.836381912231445,
"mean_norm/layer0": 61.836381912231445,
"multicode_k": 1,
"output_norm": 13.87206829547882,
"output_norm/layer0": 13.87206829547882,
"step": 5650
},
{
"MSE": 638.6114538574218,
"MSE/layer0": 638.6114538574218,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.6,
"input_norm": 31.997799615859993,
"input_norm/layer0": 31.997799615859993,
"learning_rate": 0.00022631578947368422,
"loss": 1.9581,
"max_norm": 72.212158203125,
"max_norm/layer0": 72.212158203125,
"mean_norm": 61.984375,
"mean_norm/layer0": 61.984375,
"multicode_k": 1,
"output_norm": 13.890618721644087,
"output_norm/layer0": 13.890618721644087,
"step": 5700
},
{
"MSE": 637.4200433349613,
"MSE/layer0": 637.4200433349613,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.6,
"input_norm": 31.9977961031596,
"input_norm/layer0": 31.9977961031596,
"learning_rate": 0.0002236842105263158,
"loss": 1.9563,
"max_norm": 72.40010833740234,
"max_norm/layer0": 72.40010833740234,
"mean_norm": 62.13043212890625,
"mean_norm/layer0": 62.13043212890625,
"multicode_k": 1,
"output_norm": 13.935336654980983,
"output_norm/layer0": 13.935336654980983,
"step": 5750
},
{
"MSE": 636.9881141153974,
"MSE/layer0": 636.9881141153974,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.61,
"input_norm": 31.997795972824097,
"input_norm/layer0": 31.997795972824097,
"learning_rate": 0.00022105263157894735,
"loss": 1.9652,
"max_norm": 72.58822631835938,
"max_norm/layer0": 72.58822631835938,
"mean_norm": 62.274553298950195,
"mean_norm/layer0": 62.274553298950195,
"multicode_k": 1,
"output_norm": 13.960987841288247,
"output_norm/layer0": 13.960987841288247,
"step": 5800
},
{
"MSE": 636.22215037028,
"MSE/layer0": 636.22215037028,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.61,
"input_norm": 31.997794774373368,
"input_norm/layer0": 31.997794774373368,
"learning_rate": 0.00021842105263157897,
"loss": 1.9509,
"max_norm": 72.77027130126953,
"max_norm/layer0": 72.77027130126953,
"mean_norm": 62.417043685913086,
"mean_norm/layer0": 62.417043685913086,
"multicode_k": 1,
"output_norm": 13.98557560602824,
"output_norm/layer0": 13.98557560602824,
"step": 5850
},
{
"MSE": 635.6220120239254,
"MSE/layer0": 635.6220120239254,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.62,
"input_norm": 31.997796500523897,
"input_norm/layer0": 31.997796500523897,
"learning_rate": 0.00021578947368421054,
"loss": 1.9637,
"max_norm": 72.93942260742188,
"max_norm/layer0": 72.93942260742188,
"mean_norm": 62.5573787689209,
"mean_norm/layer0": 62.5573787689209,
"multicode_k": 1,
"output_norm": 14.011822309494022,
"output_norm/layer0": 14.011822309494022,
"step": 5900
},
{
"MSE": 635.1990796915693,
"MSE/layer0": 635.1990796915693,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.62,
"input_norm": 31.99778350830077,
"input_norm/layer0": 31.99778350830077,
"learning_rate": 0.0002131578947368421,
"loss": 1.9417,
"max_norm": 73.11217498779297,
"max_norm/layer0": 73.11217498779297,
"mean_norm": 62.69554328918457,
"mean_norm/layer0": 62.69554328918457,
"multicode_k": 1,
"output_norm": 14.040199557940166,
"output_norm/layer0": 14.040199557940166,
"step": 5950
},
{
"MSE": 634.617561645508,
"MSE/layer0": 634.617561645508,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.63,
"input_norm": 31.9977766195933,
"input_norm/layer0": 31.9977766195933,
"learning_rate": 0.00021052631578947367,
"loss": 1.9441,
"max_norm": 73.27617645263672,
"max_norm/layer0": 73.27617645263672,
"mean_norm": 62.831491470336914,
"mean_norm/layer0": 62.831491470336914,
"multicode_k": 1,
"output_norm": 14.065582130750016,
"output_norm/layer0": 14.065582130750016,
"step": 6000
},
{
"epoch": 0.63,
"eval_MSE/layer0": 633.8831350106634,
"eval_accuracy": 0.5338761587531762,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997772803689244,
"eval_loss": 1.951315999031067,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.101806794000915,
"eval_runtime": 73.5977,
"eval_samples_per_second": 62.814,
"eval_steps_per_second": 7.854,
"step": 6000
},
{
"MSE": 633.8391249593099,
"MSE/layer0": 633.8391249593099,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.63,
"input_norm": 31.99777683258057,
"input_norm/layer0": 31.99777683258057,
"learning_rate": 0.0002078947368421053,
"loss": 1.9507,
"max_norm": 73.43240356445312,
"max_norm/layer0": 73.43240356445312,
"mean_norm": 62.96537971496582,
"mean_norm/layer0": 62.96537971496582,
"multicode_k": 1,
"output_norm": 14.0993266805013,
"output_norm/layer0": 14.0993266805013,
"step": 6050
},
{
"MSE": 633.1878758748373,
"MSE/layer0": 633.1878758748373,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.64,
"input_norm": 31.997768185933438,
"input_norm/layer0": 31.997768185933438,
"learning_rate": 0.00020526315789473685,
"loss": 1.9535,
"max_norm": 73.59780883789062,
"max_norm/layer0": 73.59780883789062,
"mean_norm": 63.09744453430176,
"mean_norm/layer0": 63.09744453430176,
"multicode_k": 1,
"output_norm": 14.12703340212504,
"output_norm/layer0": 14.12703340212504,
"step": 6100
},
{
"MSE": 632.4774736531577,
"MSE/layer0": 632.4774736531577,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.64,
"input_norm": 31.997762225468954,
"input_norm/layer0": 31.997762225468954,
"learning_rate": 0.00020263157894736842,
"loss": 1.9502,
"max_norm": 73.7634506225586,
"max_norm/layer0": 73.7634506225586,
"mean_norm": 63.227373123168945,
"mean_norm/layer0": 63.227373123168945,
"multicode_k": 1,
"output_norm": 14.155767776171366,
"output_norm/layer0": 14.155767776171366,
"step": 6150
},
{
"MSE": 632.0819724527997,
"MSE/layer0": 632.0819724527997,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.65,
"input_norm": 31.997758595148735,
"input_norm/layer0": 31.997758595148735,
"learning_rate": 0.0002,
"loss": 1.948,
"max_norm": 73.93152618408203,
"max_norm/layer0": 73.93152618408203,
"mean_norm": 63.35538673400879,
"mean_norm/layer0": 63.35538673400879,
"multicode_k": 1,
"output_norm": 14.17972202301026,
"output_norm/layer0": 14.17972202301026,
"step": 6200
},
{
"MSE": 631.3937511189779,
"MSE/layer0": 631.3937511189779,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.65,
"input_norm": 31.997760909398387,
"input_norm/layer0": 31.997760909398387,
"learning_rate": 0.00019736842105263157,
"loss": 1.9449,
"max_norm": 74.07744598388672,
"max_norm/layer0": 74.07744598388672,
"mean_norm": 63.481435775756836,
"mean_norm/layer0": 63.481435775756836,
"multicode_k": 1,
"output_norm": 14.207703741391498,
"output_norm/layer0": 14.207703741391498,
"step": 6250
},
{
"MSE": 631.1097898356121,
"MSE/layer0": 631.1097898356121,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.66,
"input_norm": 31.997752253214514,
"input_norm/layer0": 31.997752253214514,
"learning_rate": 0.00019473684210526317,
"loss": 1.9256,
"max_norm": 74.23124694824219,
"max_norm/layer0": 74.23124694824219,
"mean_norm": 63.605464935302734,
"mean_norm/layer0": 63.605464935302734,
"multicode_k": 1,
"output_norm": 14.22562705675761,
"output_norm/layer0": 14.22562705675761,
"step": 6300
},
{
"MSE": 630.4715811157231,
"MSE/layer0": 630.4715811157231,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.66,
"input_norm": 31.99775326093037,
"input_norm/layer0": 31.99775326093037,
"learning_rate": 0.00019210526315789473,
"loss": 1.9394,
"max_norm": 74.37789154052734,
"max_norm/layer0": 74.37789154052734,
"mean_norm": 63.72765922546387,
"mean_norm/layer0": 63.72765922546387,
"multicode_k": 1,
"output_norm": 14.252348532676702,
"output_norm/layer0": 14.252348532676702,
"step": 6350
},
{
"MSE": 629.5616383870444,
"MSE/layer0": 629.5616383870444,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.67,
"input_norm": 31.9977592086792,
"input_norm/layer0": 31.9977592086792,
"learning_rate": 0.00018947368421052632,
"loss": 1.948,
"max_norm": 74.52799987792969,
"max_norm/layer0": 74.52799987792969,
"mean_norm": 63.84817886352539,
"mean_norm/layer0": 63.84817886352539,
"multicode_k": 1,
"output_norm": 14.278619543711342,
"output_norm/layer0": 14.278619543711342,
"step": 6400
},
{
"MSE": 628.9405068969726,
"MSE/layer0": 628.9405068969726,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.67,
"input_norm": 31.9977388159434,
"input_norm/layer0": 31.9977388159434,
"learning_rate": 0.0001868421052631579,
"loss": 1.9365,
"max_norm": 74.66854095458984,
"max_norm/layer0": 74.66854095458984,
"mean_norm": 63.96674346923828,
"mean_norm/layer0": 63.96674346923828,
"multicode_k": 1,
"output_norm": 14.308290360768634,
"output_norm/layer0": 14.308290360768634,
"step": 6450
},
{
"MSE": 628.8358187866208,
"MSE/layer0": 628.8358187866208,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.68,
"input_norm": 31.997742996215806,
"input_norm/layer0": 31.997742996215806,
"learning_rate": 0.00018421052631578948,
"loss": 1.9408,
"max_norm": 74.80778503417969,
"max_norm/layer0": 74.80778503417969,
"mean_norm": 64.08341407775879,
"mean_norm/layer0": 64.08341407775879,
"multicode_k": 1,
"output_norm": 14.319794411659238,
"output_norm/layer0": 14.319794411659238,
"step": 6500
},
{
"epoch": 0.68,
"eval_MSE/layer0": 628.092910030562,
"eval_accuracy": 0.5357603583933366,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997742160687373,
"eval_loss": 1.9396723508834839,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.354976222496019,
"eval_runtime": 73.9338,
"eval_samples_per_second": 62.529,
"eval_steps_per_second": 7.818,
"step": 6500
},
{
"MSE": 628.4872816975908,
"MSE/layer0": 628.4872816975908,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.68,
"input_norm": 31.997741378148394,
"input_norm/layer0": 31.997741378148394,
"learning_rate": 0.00018157894736842107,
"loss": 1.9392,
"max_norm": 74.95079040527344,
"max_norm/layer0": 74.95079040527344,
"mean_norm": 64.19818496704102,
"mean_norm/layer0": 64.19818496704102,
"multicode_k": 1,
"output_norm": 14.340212704340617,
"output_norm/layer0": 14.340212704340617,
"step": 6550
},
{
"MSE": 627.595106302897,
"MSE/layer0": 627.595106302897,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.69,
"input_norm": 31.99773236592611,
"input_norm/layer0": 31.99773236592611,
"learning_rate": 0.00017894736842105264,
"loss": 1.9312,
"max_norm": 75.08959197998047,
"max_norm/layer0": 75.08959197998047,
"mean_norm": 64.3110408782959,
"mean_norm/layer0": 64.3110408782959,
"multicode_k": 1,
"output_norm": 14.375651826858522,
"output_norm/layer0": 14.375651826858522,
"step": 6600
},
{
"MSE": 627.2688003540036,
"MSE/layer0": 627.2688003540036,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.69,
"input_norm": 31.997724459966015,
"input_norm/layer0": 31.997724459966015,
"learning_rate": 0.0001763157894736842,
"loss": 1.9454,
"max_norm": 75.23365783691406,
"max_norm/layer0": 75.23365783691406,
"mean_norm": 64.42234230041504,
"mean_norm/layer0": 64.42234230041504,
"multicode_k": 1,
"output_norm": 14.385090745290121,
"output_norm/layer0": 14.385090745290121,
"step": 6650
},
{
"MSE": 626.5893623860678,
"MSE/layer0": 626.5893623860678,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.7,
"input_norm": 31.997720209757503,
"input_norm/layer0": 31.997720209757503,
"learning_rate": 0.0001736842105263158,
"loss": 1.9428,
"max_norm": 75.36791229248047,
"max_norm/layer0": 75.36791229248047,
"mean_norm": 64.5310287475586,
"mean_norm/layer0": 64.5310287475586,
"multicode_k": 1,
"output_norm": 14.414791498184208,
"output_norm/layer0": 14.414791498184208,
"step": 6700
},
{
"MSE": 626.1687516276043,
"MSE/layer0": 626.1687516276043,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.7,
"input_norm": 31.997717237472536,
"input_norm/layer0": 31.997717237472536,
"learning_rate": 0.00017105263157894739,
"loss": 1.9341,
"max_norm": 75.49561309814453,
"max_norm/layer0": 75.49561309814453,
"mean_norm": 64.63836669921875,
"mean_norm/layer0": 64.63836669921875,
"multicode_k": 1,
"output_norm": 14.436859647432962,
"output_norm/layer0": 14.436859647432962,
"step": 6750
},
{
"MSE": 625.7842074584966,
"MSE/layer0": 625.7842074584966,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.71,
"input_norm": 31.997723042170207,
"input_norm/layer0": 31.997723042170207,
"learning_rate": 0.00016842105263157895,
"loss": 1.9391,
"max_norm": 75.62852478027344,
"max_norm/layer0": 75.62852478027344,
"mean_norm": 64.74386024475098,
"mean_norm/layer0": 64.74386024475098,
"multicode_k": 1,
"output_norm": 14.45211536884308,
"output_norm/layer0": 14.45211536884308,
"step": 6800
},
{
"MSE": 625.3583324178057,
"MSE/layer0": 625.3583324178057,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.72,
"input_norm": 31.997710723876956,
"input_norm/layer0": 31.997710723876956,
"learning_rate": 0.00016578947368421052,
"loss": 1.9214,
"max_norm": 75.7518081665039,
"max_norm/layer0": 75.7518081665039,
"mean_norm": 64.84785079956055,
"mean_norm/layer0": 64.84785079956055,
"multicode_k": 1,
"output_norm": 14.472083713213603,
"output_norm/layer0": 14.472083713213603,
"step": 6850
},
{
"MSE": 625.0808269246418,
"MSE/layer0": 625.0808269246418,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.72,
"input_norm": 31.997701005935667,
"input_norm/layer0": 31.997701005935667,
"learning_rate": 0.0001631578947368421,
"loss": 1.9248,
"max_norm": 75.8736343383789,
"max_norm/layer0": 75.8736343383789,
"mean_norm": 64.94989013671875,
"mean_norm/layer0": 64.94989013671875,
"multicode_k": 1,
"output_norm": 14.49320138454437,
"output_norm/layer0": 14.49320138454437,
"step": 6900
},
{
"MSE": 624.4893544514975,
"MSE/layer0": 624.4893544514975,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.73,
"input_norm": 31.997702992757166,
"input_norm/layer0": 31.997702992757166,
"learning_rate": 0.0001605263157894737,
"loss": 1.9357,
"max_norm": 75.99244689941406,
"max_norm/layer0": 75.99244689941406,
"mean_norm": 65.05006790161133,
"mean_norm/layer0": 65.05006790161133,
"multicode_k": 1,
"output_norm": 14.515017460187277,
"output_norm/layer0": 14.515017460187277,
"step": 6950
},
{
"MSE": 623.983821309408,
"MSE/layer0": 623.983821309408,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.73,
"input_norm": 31.997692581812547,
"input_norm/layer0": 31.997692581812547,
"learning_rate": 0.00015789473684210527,
"loss": 1.9256,
"max_norm": 76.1169204711914,
"max_norm/layer0": 76.1169204711914,
"mean_norm": 65.14841270446777,
"mean_norm/layer0": 65.14841270446777,
"multicode_k": 1,
"output_norm": 14.531605450312297,
"output_norm/layer0": 14.531605450312297,
"step": 7000
},
{
"epoch": 0.73,
"eval_MSE/layer0": 623.2726008245854,
"eval_accuracy": 0.5373965313049694,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.99768957905041,
"eval_loss": 1.9302037954330444,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.553397603295936,
"eval_runtime": 73.3018,
"eval_samples_per_second": 63.068,
"eval_steps_per_second": 7.885,
"step": 7000
},
{
"MSE": 623.8173256429034,
"MSE/layer0": 623.8173256429034,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.74,
"input_norm": 31.997689800262457,
"input_norm/layer0": 31.997689800262457,
"learning_rate": 0.00015526315789473686,
"loss": 1.9215,
"max_norm": 76.22943115234375,
"max_norm/layer0": 76.22943115234375,
"mean_norm": 65.2452278137207,
"mean_norm/layer0": 65.2452278137207,
"multicode_k": 1,
"output_norm": 14.544135572115584,
"output_norm/layer0": 14.544135572115584,
"step": 7050
},
{
"MSE": 623.4564833577472,
"MSE/layer0": 623.4564833577472,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.74,
"input_norm": 31.997697146733607,
"input_norm/layer0": 31.997697146733607,
"learning_rate": 0.00015263157894736842,
"loss": 1.9291,
"max_norm": 76.35796356201172,
"max_norm/layer0": 76.35796356201172,
"mean_norm": 65.33997344970703,
"mean_norm/layer0": 65.33997344970703,
"multicode_k": 1,
"output_norm": 14.557166822751359,
"output_norm/layer0": 14.557166822751359,
"step": 7100
},
{
"MSE": 622.3157424926754,
"MSE/layer0": 622.3157424926754,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.75,
"input_norm": 31.997690575917574,
"input_norm/layer0": 31.997690575917574,
"learning_rate": 0.00015,
"loss": 1.9272,
"max_norm": 76.47930145263672,
"max_norm/layer0": 76.47930145263672,
"mean_norm": 65.4333724975586,
"mean_norm/layer0": 65.4333724975586,
"multicode_k": 1,
"output_norm": 14.59491890271505,
"output_norm/layer0": 14.59491890271505,
"step": 7150
},
{
"MSE": 622.1008169555663,
"MSE/layer0": 622.1008169555663,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.75,
"input_norm": 31.997691469192503,
"input_norm/layer0": 31.997691469192503,
"learning_rate": 0.00014736842105263158,
"loss": 1.9421,
"max_norm": 76.5845947265625,
"max_norm/layer0": 76.5845947265625,
"mean_norm": 65.52462577819824,
"mean_norm/layer0": 65.52462577819824,
"multicode_k": 1,
"output_norm": 14.608456416130064,
"output_norm/layer0": 14.608456416130064,
"step": 7200
},
{
"MSE": 621.7943653361006,
"MSE/layer0": 621.7943653361006,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.76,
"input_norm": 31.997678140004478,
"input_norm/layer0": 31.997678140004478,
"learning_rate": 0.00014473684210526317,
"loss": 1.9221,
"max_norm": 76.68899536132812,
"max_norm/layer0": 76.68899536132812,
"mean_norm": 65.61434745788574,
"mean_norm/layer0": 65.61434745788574,
"multicode_k": 1,
"output_norm": 14.622403078079222,
"output_norm/layer0": 14.622403078079222,
"step": 7250
},
{
"MSE": 621.7445918782552,
"MSE/layer0": 621.7445918782552,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.76,
"input_norm": 31.997679424285884,
"input_norm/layer0": 31.997679424285884,
"learning_rate": 0.00014210526315789474,
"loss": 1.9172,
"max_norm": 76.79942321777344,
"max_norm/layer0": 76.79942321777344,
"mean_norm": 65.70241737365723,
"mean_norm/layer0": 65.70241737365723,
"multicode_k": 1,
"output_norm": 14.632240413029983,
"output_norm/layer0": 14.632240413029983,
"step": 7300
},
{
"MSE": 621.0073055013017,
"MSE/layer0": 621.0073055013017,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.77,
"input_norm": 31.997667986551914,
"input_norm/layer0": 31.997667986551914,
"learning_rate": 0.0001394736842105263,
"loss": 1.9187,
"max_norm": 76.90473937988281,
"max_norm/layer0": 76.90473937988281,
"mean_norm": 65.78865623474121,
"mean_norm/layer0": 65.78865623474121,
"multicode_k": 1,
"output_norm": 14.659644064903254,
"output_norm/layer0": 14.659644064903254,
"step": 7350
},
{
"MSE": 620.5166587320964,
"MSE/layer0": 620.5166587320964,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.77,
"input_norm": 31.99766827583312,
"input_norm/layer0": 31.99766827583312,
"learning_rate": 0.00013684210526315792,
"loss": 1.9236,
"max_norm": 77.00653839111328,
"max_norm/layer0": 77.00653839111328,
"mean_norm": 65.87344741821289,
"mean_norm/layer0": 65.87344741821289,
"multicode_k": 1,
"output_norm": 14.683248674074807,
"output_norm/layer0": 14.683248674074807,
"step": 7400
},
{
"MSE": 620.4730934651691,
"MSE/layer0": 620.4730934651691,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.78,
"input_norm": 31.99766536712645,
"input_norm/layer0": 31.99766536712645,
"learning_rate": 0.00013421052631578948,
"loss": 1.9181,
"max_norm": 77.11151123046875,
"max_norm/layer0": 77.11151123046875,
"mean_norm": 65.95642852783203,
"mean_norm/layer0": 65.95642852783203,
"multicode_k": 1,
"output_norm": 14.682427426973977,
"output_norm/layer0": 14.682427426973977,
"step": 7450
},
{
"MSE": 619.8806704711913,
"MSE/layer0": 619.8806704711913,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.78,
"input_norm": 31.997652931213374,
"input_norm/layer0": 31.997652931213374,
"learning_rate": 0.00013157894736842105,
"loss": 1.9204,
"max_norm": 77.21614837646484,
"max_norm/layer0": 77.21614837646484,
"mean_norm": 66.03750610351562,
"mean_norm/layer0": 66.03750610351562,
"multicode_k": 1,
"output_norm": 14.709125100771587,
"output_norm/layer0": 14.709125100771587,
"step": 7500
},
{
"epoch": 0.78,
"eval_MSE/layer0": 619.4572802491444,
"eval_accuracy": 0.538146743438657,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997657016941467,
"eval_loss": 1.9224542379379272,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.72584700899003,
"eval_runtime": 73.2809,
"eval_samples_per_second": 63.086,
"eval_steps_per_second": 7.887,
"step": 7500
},
{
"MSE": 619.6498880004883,
"MSE/layer0": 619.6498880004883,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.79,
"input_norm": 31.997653865814208,
"input_norm/layer0": 31.997653865814208,
"learning_rate": 0.00012894736842105264,
"loss": 1.9109,
"max_norm": 77.3195571899414,
"max_norm/layer0": 77.3195571899414,
"mean_norm": 66.11709403991699,
"mean_norm/layer0": 66.11709403991699,
"multicode_k": 1,
"output_norm": 14.724224853515622,
"output_norm/layer0": 14.724224853515622,
"step": 7550
},
{
"MSE": 619.544646809896,
"MSE/layer0": 619.544646809896,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.79,
"input_norm": 31.997655792236333,
"input_norm/layer0": 31.997655792236333,
"learning_rate": 0.0001263157894736842,
"loss": 1.9247,
"max_norm": 77.41654205322266,
"max_norm/layer0": 77.41654205322266,
"mean_norm": 66.19502639770508,
"mean_norm/layer0": 66.19502639770508,
"multicode_k": 1,
"output_norm": 14.729852019945778,
"output_norm/layer0": 14.729852019945778,
"step": 7600
},
{
"MSE": 619.1442233276366,
"MSE/layer0": 619.1442233276366,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.8,
"input_norm": 31.99764471054077,
"input_norm/layer0": 31.99764471054077,
"learning_rate": 0.0001236842105263158,
"loss": 1.9237,
"max_norm": 77.5074234008789,
"max_norm/layer0": 77.5074234008789,
"mean_norm": 66.27114677429199,
"mean_norm/layer0": 66.27114677429199,
"multicode_k": 1,
"output_norm": 14.745990212758379,
"output_norm/layer0": 14.745990212758379,
"step": 7650
},
{
"MSE": 618.6404962158206,
"MSE/layer0": 618.6404962158206,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.8,
"input_norm": 31.997635892232267,
"input_norm/layer0": 31.997635892232267,
"learning_rate": 0.00012105263157894738,
"loss": 1.913,
"max_norm": 77.602294921875,
"max_norm/layer0": 77.602294921875,
"mean_norm": 66.34577751159668,
"mean_norm/layer0": 66.34577751159668,
"multicode_k": 1,
"output_norm": 14.766639779408772,
"output_norm/layer0": 14.766639779408772,
"step": 7700
},
{
"MSE": 618.2833578491213,
"MSE/layer0": 618.2833578491213,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.81,
"input_norm": 31.997631740570075,
"input_norm/layer0": 31.997631740570075,
"learning_rate": 0.00011842105263157894,
"loss": 1.9214,
"max_norm": 77.6917724609375,
"max_norm/layer0": 77.6917724609375,
"mean_norm": 66.41888046264648,
"mean_norm/layer0": 66.41888046264648,
"multicode_k": 1,
"output_norm": 14.779039435386654,
"output_norm/layer0": 14.779039435386654,
"step": 7750
},
{
"MSE": 618.2477112833653,
"MSE/layer0": 618.2477112833653,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.81,
"input_norm": 31.997634382247924,
"input_norm/layer0": 31.997634382247924,
"learning_rate": 0.00011578947368421053,
"loss": 1.9127,
"max_norm": 77.77839660644531,
"max_norm/layer0": 77.77839660644531,
"mean_norm": 66.49017333984375,
"mean_norm/layer0": 66.49017333984375,
"multicode_k": 1,
"output_norm": 14.782011265754704,
"output_norm/layer0": 14.782011265754704,
"step": 7800
},
{
"MSE": 617.7417582194005,
"MSE/layer0": 617.7417582194005,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.82,
"input_norm": 31.997628266016648,
"input_norm/layer0": 31.997628266016648,
"learning_rate": 0.00011315789473684211,
"loss": 1.9084,
"max_norm": 77.86212158203125,
"max_norm/layer0": 77.86212158203125,
"mean_norm": 66.55990791320801,
"mean_norm/layer0": 66.55990791320801,
"multicode_k": 1,
"output_norm": 14.801776518821718,
"output_norm/layer0": 14.801776518821718,
"step": 7850
},
{
"MSE": 617.339886271159,
"MSE/layer0": 617.339886271159,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.82,
"input_norm": 31.99762384732564,
"input_norm/layer0": 31.99762384732564,
"learning_rate": 0.00011052631578947368,
"loss": 1.9115,
"max_norm": 77.94374084472656,
"max_norm/layer0": 77.94374084472656,
"mean_norm": 66.62779235839844,
"mean_norm/layer0": 66.62779235839844,
"multicode_k": 1,
"output_norm": 14.823196705182394,
"output_norm/layer0": 14.823196705182394,
"step": 7900
},
{
"MSE": 617.3184334309897,
"MSE/layer0": 617.3184334309897,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.83,
"input_norm": 31.9976179567973,
"input_norm/layer0": 31.9976179567973,
"learning_rate": 0.00010789473684210527,
"loss": 1.9136,
"max_norm": 78.02580261230469,
"max_norm/layer0": 78.02580261230469,
"mean_norm": 66.69412803649902,
"mean_norm/layer0": 66.69412803649902,
"multicode_k": 1,
"output_norm": 14.828250519434608,
"output_norm/layer0": 14.828250519434608,
"step": 7950
},
{
"MSE": 616.9322255452475,
"MSE/layer0": 616.9322255452475,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.84,
"input_norm": 31.997613105773937,
"input_norm/layer0": 31.997613105773937,
"learning_rate": 0.00010526315789473683,
"loss": 1.907,
"max_norm": 78.10686492919922,
"max_norm/layer0": 78.10686492919922,
"mean_norm": 66.7584114074707,
"mean_norm/layer0": 66.7584114074707,
"multicode_k": 1,
"output_norm": 14.839720834096273,
"output_norm/layer0": 14.839720834096273,
"step": 8000
},
{
"epoch": 0.84,
"eval_MSE/layer0": 616.4379357749087,
"eval_accuracy": 0.5393073732024142,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.99761064584294,
"eval_loss": 1.9150168895721436,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.862492301828695,
"eval_runtime": 73.6278,
"eval_samples_per_second": 62.789,
"eval_steps_per_second": 7.85,
"step": 8000
},
{
"MSE": 616.6813212076825,
"MSE/layer0": 616.6813212076825,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.84,
"input_norm": 31.997603750228897,
"input_norm/layer0": 31.997603750228897,
"learning_rate": 0.00010263157894736843,
"loss": 1.8975,
"max_norm": 78.18397521972656,
"max_norm/layer0": 78.18397521972656,
"mean_norm": 66.82158279418945,
"mean_norm/layer0": 66.82158279418945,
"multicode_k": 1,
"output_norm": 14.848202861150106,
"output_norm/layer0": 14.848202861150106,
"step": 8050
},
{
"MSE": 616.5551970418294,
"MSE/layer0": 616.5551970418294,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.85,
"input_norm": 31.99760689099629,
"input_norm/layer0": 31.99760689099629,
"learning_rate": 0.0001,
"loss": 1.916,
"max_norm": 78.26499938964844,
"max_norm/layer0": 78.26499938964844,
"mean_norm": 66.88335037231445,
"mean_norm/layer0": 66.88335037231445,
"multicode_k": 1,
"output_norm": 14.8604402812322,
"output_norm/layer0": 14.8604402812322,
"step": 8100
},
{
"MSE": 616.288039347331,
"MSE/layer0": 616.288039347331,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.85,
"input_norm": 31.997600466410333,
"input_norm/layer0": 31.997600466410333,
"learning_rate": 9.736842105263158e-05,
"loss": 1.902,
"max_norm": 78.33844757080078,
"max_norm/layer0": 78.33844757080078,
"mean_norm": 66.94340133666992,
"mean_norm/layer0": 66.94340133666992,
"multicode_k": 1,
"output_norm": 14.872187639872232,
"output_norm/layer0": 14.872187639872232,
"step": 8150
},
{
"MSE": 615.8982196044924,
"MSE/layer0": 615.8982196044924,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.86,
"input_norm": 31.997594718933108,
"input_norm/layer0": 31.997594718933108,
"learning_rate": 9.473684210526316e-05,
"loss": 1.9142,
"max_norm": 78.40998077392578,
"max_norm/layer0": 78.40998077392578,
"mean_norm": 67.00171661376953,
"mean_norm/layer0": 67.00171661376953,
"multicode_k": 1,
"output_norm": 14.884622203509018,
"output_norm/layer0": 14.884622203509018,
"step": 8200
},
{
"MSE": 615.649053141276,
"MSE/layer0": 615.649053141276,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.86,
"input_norm": 31.997591203053794,
"input_norm/layer0": 31.997591203053794,
"learning_rate": 9.210526315789474e-05,
"loss": 1.9103,
"max_norm": 78.47700500488281,
"max_norm/layer0": 78.47700500488281,
"mean_norm": 67.05831527709961,
"mean_norm/layer0": 67.05831527709961,
"multicode_k": 1,
"output_norm": 14.896942078272502,
"output_norm/layer0": 14.896942078272502,
"step": 8250
},
{
"MSE": 615.4050069173176,
"MSE/layer0": 615.4050069173176,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.87,
"input_norm": 31.99757507324218,
"input_norm/layer0": 31.99757507324218,
"learning_rate": 8.947368421052632e-05,
"loss": 1.8999,
"max_norm": 78.54086303710938,
"max_norm/layer0": 78.54086303710938,
"mean_norm": 67.11351013183594,
"mean_norm/layer0": 67.11351013183594,
"multicode_k": 1,
"output_norm": 14.907591681480406,
"output_norm/layer0": 14.907591681480406,
"step": 8300
},
{
"MSE": 615.0221789550782,
"MSE/layer0": 615.0221789550782,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.87,
"input_norm": 31.997587076822917,
"input_norm/layer0": 31.997587076822917,
"learning_rate": 8.68421052631579e-05,
"loss": 1.9122,
"max_norm": 78.60425567626953,
"max_norm/layer0": 78.60425567626953,
"mean_norm": 67.1669692993164,
"mean_norm/layer0": 67.1669692993164,
"multicode_k": 1,
"output_norm": 14.918850135803218,
"output_norm/layer0": 14.918850135803218,
"step": 8350
},
{
"MSE": 614.7660255940759,
"MSE/layer0": 614.7660255940759,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.88,
"input_norm": 31.99758012771608,
"input_norm/layer0": 31.99758012771608,
"learning_rate": 8.421052631578948e-05,
"loss": 1.9074,
"max_norm": 78.66250610351562,
"max_norm/layer0": 78.66250610351562,
"mean_norm": 67.21884536743164,
"mean_norm/layer0": 67.21884536743164,
"multicode_k": 1,
"output_norm": 14.930259111722311,
"output_norm/layer0": 14.930259111722311,
"step": 8400
},
{
"MSE": 614.4904387410484,
"MSE/layer0": 614.4904387410484,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.88,
"input_norm": 31.99757884025574,
"input_norm/layer0": 31.99757884025574,
"learning_rate": 8.157894736842105e-05,
"loss": 1.9151,
"max_norm": 78.7247314453125,
"max_norm/layer0": 78.7247314453125,
"mean_norm": 67.26914596557617,
"mean_norm/layer0": 67.26914596557617,
"multicode_k": 1,
"output_norm": 14.941800510088594,
"output_norm/layer0": 14.941800510088594,
"step": 8450
},
{
"MSE": 614.3984759521479,
"MSE/layer0": 614.3984759521479,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.89,
"input_norm": 31.997565978368122,
"input_norm/layer0": 31.997565978368122,
"learning_rate": 7.894736842105263e-05,
"loss": 1.8931,
"max_norm": 78.78428649902344,
"max_norm/layer0": 78.78428649902344,
"mean_norm": 67.31785583496094,
"mean_norm/layer0": 67.31785583496094,
"multicode_k": 1,
"output_norm": 14.948297271728517,
"output_norm/layer0": 14.948297271728517,
"step": 8500
},
{
"epoch": 0.89,
"eval_MSE/layer0": 613.78736410403,
"eval_accuracy": 0.5408171011151899,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997572115378908,
"eval_loss": 1.9076036214828491,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.968526063531659,
"eval_runtime": 73.9448,
"eval_samples_per_second": 62.52,
"eval_steps_per_second": 7.817,
"step": 8500
},
{
"MSE": 614.2003710937502,
"MSE/layer0": 614.2003710937502,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.89,
"input_norm": 31.997571328481037,
"input_norm/layer0": 31.997571328481037,
"learning_rate": 7.631578947368421e-05,
"loss": 1.9006,
"max_norm": 78.83836364746094,
"max_norm/layer0": 78.83836364746094,
"mean_norm": 67.36493301391602,
"mean_norm/layer0": 67.36493301391602,
"multicode_k": 1,
"output_norm": 14.955024781227117,
"output_norm/layer0": 14.955024781227117,
"step": 8550
},
{
"MSE": 613.8168900553383,
"MSE/layer0": 613.8168900553383,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.9,
"input_norm": 31.997557487487796,
"input_norm/layer0": 31.997557487487796,
"learning_rate": 7.368421052631579e-05,
"loss": 1.9045,
"max_norm": 78.8912582397461,
"max_norm/layer0": 78.8912582397461,
"mean_norm": 67.41046524047852,
"mean_norm/layer0": 67.41046524047852,
"multicode_k": 1,
"output_norm": 14.968488362630207,
"output_norm/layer0": 14.968488362630207,
"step": 8600
},
{
"MSE": 613.6968625895179,
"MSE/layer0": 613.6968625895179,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.9,
"input_norm": 31.99754828453064,
"input_norm/layer0": 31.99754828453064,
"learning_rate": 7.105263157894737e-05,
"loss": 1.9009,
"max_norm": 78.942626953125,
"max_norm/layer0": 78.942626953125,
"mean_norm": 67.45438766479492,
"mean_norm/layer0": 67.45438766479492,
"multicode_k": 1,
"output_norm": 14.979475774765014,
"output_norm/layer0": 14.979475774765014,
"step": 8650
},
{
"MSE": 613.3956824747725,
"MSE/layer0": 613.3956824747725,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.91,
"input_norm": 31.997546965281174,
"input_norm/layer0": 31.997546965281174,
"learning_rate": 6.842105263157896e-05,
"loss": 1.9,
"max_norm": 78.99479675292969,
"max_norm/layer0": 78.99479675292969,
"mean_norm": 67.49666595458984,
"mean_norm/layer0": 67.49666595458984,
"multicode_k": 1,
"output_norm": 14.988234910964966,
"output_norm/layer0": 14.988234910964966,
"step": 8700
},
{
"MSE": 613.2128627522789,
"MSE/layer0": 613.2128627522789,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.91,
"input_norm": 31.997544927597048,
"input_norm/layer0": 31.997544927597048,
"learning_rate": 6.578947368421052e-05,
"loss": 1.9059,
"max_norm": 79.04541015625,
"max_norm/layer0": 79.04541015625,
"mean_norm": 67.53742218017578,
"mean_norm/layer0": 67.53742218017578,
"multicode_k": 1,
"output_norm": 14.991036421457924,
"output_norm/layer0": 14.991036421457924,
"step": 8750
},
{
"MSE": 612.9370720418297,
"MSE/layer0": 612.9370720418297,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.92,
"input_norm": 31.99753908475239,
"input_norm/layer0": 31.99753908475239,
"learning_rate": 6.31578947368421e-05,
"loss": 1.9023,
"max_norm": 79.09040069580078,
"max_norm/layer0": 79.09040069580078,
"mean_norm": 67.57658767700195,
"mean_norm/layer0": 67.57658767700195,
"multicode_k": 1,
"output_norm": 15.003661061922706,
"output_norm/layer0": 15.003661061922706,
"step": 8800
},
{
"MSE": 613.0978963216148,
"MSE/layer0": 613.0978963216148,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.92,
"input_norm": 31.997535756429023,
"input_norm/layer0": 31.997535756429023,
"learning_rate": 6.052631578947369e-05,
"loss": 1.9004,
"max_norm": 79.13478088378906,
"max_norm/layer0": 79.13478088378906,
"mean_norm": 67.61412811279297,
"mean_norm/layer0": 67.61412811279297,
"multicode_k": 1,
"output_norm": 14.999437109629307,
"output_norm/layer0": 14.999437109629307,
"step": 8850
},
{
"MSE": 612.746408691406,
"MSE/layer0": 612.746408691406,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.93,
"input_norm": 31.997531512578334,
"input_norm/layer0": 31.997531512578334,
"learning_rate": 5.789473684210527e-05,
"loss": 1.8947,
"max_norm": 79.17863464355469,
"max_norm/layer0": 79.17863464355469,
"mean_norm": 67.65010452270508,
"mean_norm/layer0": 67.65010452270508,
"multicode_k": 1,
"output_norm": 15.013854147593182,
"output_norm/layer0": 15.013854147593182,
"step": 8900
},
{
"MSE": 612.5075473022462,
"MSE/layer0": 612.5075473022462,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.93,
"input_norm": 31.997522573471066,
"input_norm/layer0": 31.997522573471066,
"learning_rate": 5.526315789473684e-05,
"loss": 1.888,
"max_norm": 79.2198257446289,
"max_norm/layer0": 79.2198257446289,
"mean_norm": 67.6845588684082,
"mean_norm/layer0": 67.6845588684082,
"multicode_k": 1,
"output_norm": 15.024005990028382,
"output_norm/layer0": 15.024005990028382,
"step": 8950
},
{
"MSE": 612.4464337158204,
"MSE/layer0": 612.4464337158204,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.94,
"input_norm": 31.99751985549927,
"input_norm/layer0": 31.99751985549927,
"learning_rate": 5.263157894736842e-05,
"loss": 1.9021,
"max_norm": 79.25985717773438,
"max_norm/layer0": 79.25985717773438,
"mean_norm": 67.71733856201172,
"mean_norm/layer0": 67.71733856201172,
"multicode_k": 1,
"output_norm": 15.025202210744226,
"output_norm/layer0": 15.025202210744226,
"step": 9000
},
{
"epoch": 0.94,
"eval_MSE/layer0": 612.012579843437,
"eval_accuracy": 0.5416772654217966,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.99751990196794,
"eval_loss": 1.90205979347229,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 15.037853428586699,
"eval_runtime": 73.2981,
"eval_samples_per_second": 63.071,
"eval_steps_per_second": 7.886,
"step": 9000
},
{
"MSE": 612.1616466267901,
"MSE/layer0": 612.1616466267901,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.94,
"input_norm": 31.997515144348153,
"input_norm/layer0": 31.997515144348153,
"learning_rate": 5e-05,
"loss": 1.8979,
"max_norm": 79.2950668334961,
"max_norm/layer0": 79.2950668334961,
"mean_norm": 67.74863052368164,
"mean_norm/layer0": 67.74863052368164,
"multicode_k": 1,
"output_norm": 15.036479252179465,
"output_norm/layer0": 15.036479252179465,
"step": 9050
},
{
"MSE": 611.8442991129552,
"MSE/layer0": 611.8442991129552,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.95,
"input_norm": 31.99751302719116,
"input_norm/layer0": 31.99751302719116,
"learning_rate": 4.736842105263158e-05,
"loss": 1.8978,
"max_norm": 79.32892608642578,
"max_norm/layer0": 79.32892608642578,
"mean_norm": 67.77827835083008,
"mean_norm/layer0": 67.77827835083008,
"multicode_k": 1,
"output_norm": 15.046743833223978,
"output_norm/layer0": 15.046743833223978,
"step": 9100
},
{
"MSE": 611.9183032226562,
"MSE/layer0": 611.9183032226562,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.96,
"input_norm": 31.99751057942708,
"input_norm/layer0": 31.99751057942708,
"learning_rate": 4.473684210526316e-05,
"loss": 1.8971,
"max_norm": 79.36182403564453,
"max_norm/layer0": 79.36182403564453,
"mean_norm": 67.80632781982422,
"mean_norm/layer0": 67.80632781982422,
"multicode_k": 1,
"output_norm": 15.041637244224557,
"output_norm/layer0": 15.041637244224557,
"step": 9150
},
{
"MSE": 611.5441438802083,
"MSE/layer0": 611.5441438802083,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.96,
"input_norm": 31.99750095685323,
"input_norm/layer0": 31.99750095685323,
"learning_rate": 4.210526315789474e-05,
"loss": 1.8874,
"max_norm": 79.39281463623047,
"max_norm/layer0": 79.39281463623047,
"mean_norm": 67.83284759521484,
"mean_norm/layer0": 67.83284759521484,
"multicode_k": 1,
"output_norm": 15.055660729408274,
"output_norm/layer0": 15.055660729408274,
"step": 9200
},
{
"MSE": 611.5922235107425,
"MSE/layer0": 611.5922235107425,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.97,
"input_norm": 31.99750220934551,
"input_norm/layer0": 31.99750220934551,
"learning_rate": 3.9473684210526316e-05,
"loss": 1.8958,
"max_norm": 79.42273712158203,
"max_norm/layer0": 79.42273712158203,
"mean_norm": 67.85774230957031,
"mean_norm/layer0": 67.85774230957031,
"multicode_k": 1,
"output_norm": 15.055747102101643,
"output_norm/layer0": 15.055747102101643,
"step": 9250
},
{
"MSE": 611.6544079589839,
"MSE/layer0": 611.6544079589839,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.97,
"input_norm": 31.997499033610026,
"input_norm/layer0": 31.997499033610026,
"learning_rate": 3.6842105263157895e-05,
"loss": 1.8915,
"max_norm": 79.44976806640625,
"max_norm/layer0": 79.44976806640625,
"mean_norm": 67.88099670410156,
"mean_norm/layer0": 67.88099670410156,
"multicode_k": 1,
"output_norm": 15.057963668505355,
"output_norm/layer0": 15.057963668505355,
"step": 9300
},
{
"MSE": 611.321997172038,
"MSE/layer0": 611.321997172038,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.98,
"input_norm": 31.997498153050746,
"input_norm/layer0": 31.997498153050746,
"learning_rate": 3.421052631578948e-05,
"loss": 1.8893,
"max_norm": 79.47447967529297,
"max_norm/layer0": 79.47447967529297,
"mean_norm": 67.90266799926758,
"mean_norm/layer0": 67.90266799926758,
"multicode_k": 1,
"output_norm": 15.067080327669775,
"output_norm/layer0": 15.067080327669775,
"step": 9350
},
{
"MSE": 611.4500786336266,
"MSE/layer0": 611.4500786336266,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.98,
"input_norm": 31.997495447794595,
"input_norm/layer0": 31.997495447794595,
"learning_rate": 3.157894736842105e-05,
"loss": 1.894,
"max_norm": 79.49812316894531,
"max_norm/layer0": 79.49812316894531,
"mean_norm": 67.92279815673828,
"mean_norm/layer0": 67.92279815673828,
"multicode_k": 1,
"output_norm": 15.062444001833596,
"output_norm/layer0": 15.062444001833596,
"step": 9400
},
{
"MSE": 611.1107730102539,
"MSE/layer0": 611.1107730102539,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.99,
"input_norm": 31.997485151290896,
"input_norm/layer0": 31.997485151290896,
"learning_rate": 2.8947368421052634e-05,
"loss": 1.8849,
"max_norm": 79.52143096923828,
"max_norm/layer0": 79.52143096923828,
"mean_norm": 67.94132995605469,
"mean_norm/layer0": 67.94132995605469,
"multicode_k": 1,
"output_norm": 15.076274760564168,
"output_norm/layer0": 15.076274760564168,
"step": 9450
},
{
"MSE": 611.3009430948896,
"MSE/layer0": 611.3009430948896,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.99,
"input_norm": 31.99749323209126,
"input_norm/layer0": 31.99749323209126,
"learning_rate": 2.631578947368421e-05,
"loss": 1.8967,
"max_norm": 79.54227447509766,
"max_norm/layer0": 79.54227447509766,
"mean_norm": 67.958251953125,
"mean_norm/layer0": 67.958251953125,
"multicode_k": 1,
"output_norm": 15.06888332684835,
"output_norm/layer0": 15.06888332684835,
"step": 9500
},
{
"epoch": 0.99,
"eval_MSE/layer0": 610.6120883183328,
"eval_accuracy": 0.5425511737500183,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.99749166347134,
"eval_loss": 1.8969556093215942,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 15.09320597480496,
"eval_runtime": 73.3984,
"eval_samples_per_second": 62.985,
"eval_steps_per_second": 7.875,
"step": 9500
},
{
"MSE": 610.9202908325196,
"MSE/layer0": 610.9202908325196,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.0,
"input_norm": 31.99748815218606,
"input_norm/layer0": 31.99748815218606,
"learning_rate": 2.368421052631579e-05,
"loss": 1.8917,
"max_norm": 79.56092834472656,
"max_norm/layer0": 79.56092834472656,
"mean_norm": 67.97361755371094,
"mean_norm/layer0": 67.97361755371094,
"multicode_k": 1,
"output_norm": 15.084220841725665,
"output_norm/layer0": 15.084220841725665,
"step": 9550
},
{
"MSE": 610.9847631835939,
"MSE/layer0": 610.9847631835939,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.0,
"input_norm": 31.997486731211332,
"input_norm/layer0": 31.997486731211332,
"learning_rate": 2.105263157894737e-05,
"loss": 1.8839,
"max_norm": 79.57735443115234,
"max_norm/layer0": 79.57735443115234,
"mean_norm": 67.98743438720703,
"mean_norm/layer0": 67.98743438720703,
"multicode_k": 1,
"output_norm": 15.082832886377968,
"output_norm/layer0": 15.082832886377968,
"step": 9600
},
{
"MSE": 611.2879392496747,
"MSE/layer0": 611.2879392496747,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.01,
"input_norm": 31.997482639948544,
"input_norm/layer0": 31.997482639948544,
"learning_rate": 1.8421052631578947e-05,
"loss": 1.8851,
"max_norm": 79.59221649169922,
"max_norm/layer0": 79.59221649169922,
"mean_norm": 67.99962997436523,
"mean_norm/layer0": 67.99962997436523,
"multicode_k": 1,
"output_norm": 15.075549699465444,
"output_norm/layer0": 15.075549699465444,
"step": 9650
},
{
"MSE": 611.3861442057291,
"MSE/layer0": 611.3861442057291,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.01,
"input_norm": 31.997482592264817,
"input_norm/layer0": 31.997482592264817,
"learning_rate": 1.5789473684210526e-05,
"loss": 1.8774,
"max_norm": 79.60480499267578,
"max_norm/layer0": 79.60480499267578,
"mean_norm": 68.01019668579102,
"mean_norm/layer0": 68.01019668579102,
"multicode_k": 1,
"output_norm": 15.07396024545034,
"output_norm/layer0": 15.07396024545034,
"step": 9700
},
{
"MSE": 611.4255168660482,
"MSE/layer0": 611.4255168660482,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.02,
"input_norm": 31.997479289372762,
"input_norm/layer0": 31.997479289372762,
"learning_rate": 1.3157894736842104e-05,
"loss": 1.8768,
"max_norm": 79.6154556274414,
"max_norm/layer0": 79.6154556274414,
"mean_norm": 68.01911926269531,
"mean_norm/layer0": 68.01911926269531,
"multicode_k": 1,
"output_norm": 15.07339178085327,
"output_norm/layer0": 15.07339178085327,
"step": 9750
},
{
"MSE": 611.6131436157225,
"MSE/layer0": 611.6131436157225,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.02,
"input_norm": 31.99748600323995,
"input_norm/layer0": 31.99748600323995,
"learning_rate": 1.0526315789473684e-05,
"loss": 1.8905,
"max_norm": 79.62410736083984,
"max_norm/layer0": 79.62410736083984,
"mean_norm": 68.02641677856445,
"mean_norm/layer0": 68.02641677856445,
"multicode_k": 1,
"output_norm": 15.068124500910447,
"output_norm/layer0": 15.068124500910447,
"step": 9800
},
{
"MSE": 611.5507637532555,
"MSE/layer0": 611.5507637532555,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.03,
"input_norm": 31.997482582728068,
"input_norm/layer0": 31.997482582728068,
"learning_rate": 7.894736842105263e-06,
"loss": 1.8798,
"max_norm": 79.63082122802734,
"max_norm/layer0": 79.63082122802734,
"mean_norm": 68.03211212158203,
"mean_norm/layer0": 68.03211212158203,
"multicode_k": 1,
"output_norm": 15.072520554860436,
"output_norm/layer0": 15.072520554860436,
"step": 9850
},
{
"MSE": 611.7908610026044,
"MSE/layer0": 611.7908610026044,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.03,
"input_norm": 31.99747860272725,
"input_norm/layer0": 31.99747860272725,
"learning_rate": 5.263157894736842e-06,
"loss": 1.8807,
"max_norm": 79.63563537597656,
"max_norm/layer0": 79.63563537597656,
"mean_norm": 68.03619003295898,
"mean_norm/layer0": 68.03619003295898,
"multicode_k": 1,
"output_norm": 15.06489105542501,
"output_norm/layer0": 15.06489105542501,
"step": 9900
},
{
"MSE": 611.5220219930011,
"MSE/layer0": 611.5220219930011,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.04,
"input_norm": 31.997478488286326,
"input_norm/layer0": 31.997478488286326,
"learning_rate": 2.631578947368421e-06,
"loss": 1.8795,
"max_norm": 79.63849639892578,
"max_norm/layer0": 79.63849639892578,
"mean_norm": 68.03863906860352,
"mean_norm/layer0": 68.03863906860352,
"multicode_k": 1,
"output_norm": 15.07397619565328,
"output_norm/layer0": 15.07397619565328,
"step": 9950
},
{
"MSE": 611.5742947387696,
"MSE/layer0": 611.5742947387696,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 1.04,
"input_norm": 31.997486855189003,
"input_norm/layer0": 31.997486855189003,
"learning_rate": 0.0,
"loss": 1.8942,
"max_norm": 79.63946533203125,
"max_norm/layer0": 79.63946533203125,
"mean_norm": 68.03947448730469,
"mean_norm/layer0": 68.03947448730469,
"multicode_k": 1,
"output_norm": 15.069696005185442,
"output_norm/layer0": 15.069696005185442,
"step": 10000
},
{
"epoch": 1.04,
"eval_MSE/layer0": 611.1571513346564,
"eval_accuracy": 0.5429091526514649,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.997479090978388,
"eval_loss": 1.89570152759552,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 15.087154228553715,
"eval_runtime": 73.2125,
"eval_samples_per_second": 63.145,
"eval_steps_per_second": 7.895,
"step": 10000
},
{
"MSE": 0.0,
"MSE/layer0": 0.0,
"dead_code_fraction": 1.0,
"dead_code_fraction/layer0": 1.0,
"epoch": 1.04,
"input_norm": 0.0,
"input_norm/layer0": 0.0,
"max_norm": 79.63946533203125,
"max_norm/layer0": 79.63946533203125,
"mean_norm": 68.03947448730469,
"mean_norm/layer0": 68.03947448730469,
"multicode_k": 1,
"output_norm": 0.0,
"output_norm/layer0": 0.0,
"step": 10000,
"total_flos": 3.715683581952e+16,
"train_loss": 2.0762174885749816,
"train_runtime": 12054.7701,
"train_samples_per_second": 39.818,
"train_steps_per_second": 0.83
}
],
"logging_steps": 50,
"max_steps": 10000,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 3.715683581952e+16,
"trial_name": null,
"trial_params": null
}