|
{ |
|
"best_metric": 2.132894992828369, |
|
"best_model_checkpoint": "/tmp/wandb/run-20240211_061007-slcnkgcr/files/train_output/checkpoint-10000", |
|
"epoch": 2.042133333333333, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"MSE": 891.9713033040365, |
|
"MSE/layer0": 891.9713033040365, |
|
"dead_code_fraction": 0.1506, |
|
"dead_code_fraction/layer0": 0.1506, |
|
"epoch": 0.0, |
|
"input_norm": 31.997233708699547, |
|
"input_norm/layer0": 31.997233708699547, |
|
"learning_rate": 0.0005, |
|
"loss": 8.0845, |
|
"max_norm": 34.580135345458984, |
|
"max_norm/layer0": 34.580135345458984, |
|
"mean_norm": 31.989344596862793, |
|
"mean_norm/layer0": 31.989344596862793, |
|
"multicode_k": 1, |
|
"output_norm": 8.584638833999634, |
|
"output_norm/layer0": 8.584638833999634, |
|
"step": 1 |
|
}, |
|
{ |
|
"MSE": 883.0105907414232, |
|
"MSE/layer0": 883.0105907414232, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.99778711876902, |
|
"input_norm/layer0": 31.99778711876902, |
|
"learning_rate": 0.0005, |
|
"loss": 4.8444, |
|
"max_norm": 34.610191345214844, |
|
"max_norm/layer0": 34.610191345214844, |
|
"mean_norm": 32.02294731140137, |
|
"mean_norm/layer0": 32.02294731140137, |
|
"multicode_k": 1, |
|
"output_norm": 8.645599765842462, |
|
"output_norm/layer0": 8.645599765842462, |
|
"step": 50 |
|
}, |
|
{ |
|
"MSE": 872.9267329915364, |
|
"MSE/layer0": 872.9267329915364, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.998572165171304, |
|
"input_norm/layer0": 31.998572165171304, |
|
"learning_rate": 0.0005, |
|
"loss": 3.9294, |
|
"max_norm": 34.62763595581055, |
|
"max_norm/layer0": 34.62763595581055, |
|
"mean_norm": 32.06278419494629, |
|
"mean_norm/layer0": 32.06278419494629, |
|
"multicode_k": 1, |
|
"output_norm": 8.74148860613505, |
|
"output_norm/layer0": 8.74148860613505, |
|
"step": 100 |
|
}, |
|
{ |
|
"MSE": 866.7590488688152, |
|
"MSE/layer0": 866.7590488688152, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.99865425427754, |
|
"input_norm/layer0": 31.99865425427754, |
|
"learning_rate": 0.0005, |
|
"loss": 3.5413, |
|
"max_norm": 34.65019607543945, |
|
"max_norm/layer0": 34.65019607543945, |
|
"mean_norm": 32.1027717590332, |
|
"mean_norm/layer0": 32.1027717590332, |
|
"multicode_k": 1, |
|
"output_norm": 8.811674615542097, |
|
"output_norm/layer0": 8.811674615542097, |
|
"step": 150 |
|
}, |
|
{ |
|
"MSE": 858.8314244588221, |
|
"MSE/layer0": 858.8314244588221, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.02, |
|
"input_norm": 31.998634125391646, |
|
"input_norm/layer0": 31.998634125391646, |
|
"learning_rate": 0.0005, |
|
"loss": 3.3381, |
|
"max_norm": 34.73014831542969, |
|
"max_norm/layer0": 34.73014831542969, |
|
"mean_norm": 32.17362403869629, |
|
"mean_norm/layer0": 32.17362403869629, |
|
"multicode_k": 1, |
|
"output_norm": 8.925555121103923, |
|
"output_norm/layer0": 8.925555121103923, |
|
"step": 200 |
|
}, |
|
{ |
|
"MSE": 849.6408699544276, |
|
"MSE/layer0": 849.6408699544276, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.03, |
|
"input_norm": 31.9986141427358, |
|
"input_norm/layer0": 31.9986141427358, |
|
"learning_rate": 0.0005, |
|
"loss": 3.2486, |
|
"max_norm": 34.8281364440918, |
|
"max_norm/layer0": 34.8281364440918, |
|
"mean_norm": 32.26718330383301, |
|
"mean_norm/layer0": 32.26718330383301, |
|
"multicode_k": 1, |
|
"output_norm": 9.101092262268068, |
|
"output_norm/layer0": 9.101092262268068, |
|
"step": 250 |
|
}, |
|
{ |
|
"MSE": 841.0051658121741, |
|
"MSE/layer0": 841.0051658121741, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.03, |
|
"input_norm": 31.99862952232361, |
|
"input_norm/layer0": 31.99862952232361, |
|
"learning_rate": 0.0005, |
|
"loss": 3.1503, |
|
"max_norm": 34.946006774902344, |
|
"max_norm/layer0": 34.946006774902344, |
|
"mean_norm": 32.361915588378906, |
|
"mean_norm/layer0": 32.361915588378906, |
|
"multicode_k": 1, |
|
"output_norm": 9.305952178637185, |
|
"output_norm/layer0": 9.305952178637185, |
|
"step": 300 |
|
}, |
|
{ |
|
"MSE": 833.1103855387371, |
|
"MSE/layer0": 833.1103855387371, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.04, |
|
"input_norm": 31.998617506027223, |
|
"input_norm/layer0": 31.998617506027223, |
|
"learning_rate": 0.0005, |
|
"loss": 3.0966, |
|
"max_norm": 35.09696578979492, |
|
"max_norm/layer0": 35.09696578979492, |
|
"mean_norm": 32.463951110839844, |
|
"mean_norm/layer0": 32.463951110839844, |
|
"multicode_k": 1, |
|
"output_norm": 9.513547644615176, |
|
"output_norm/layer0": 9.513547644615176, |
|
"step": 350 |
|
}, |
|
{ |
|
"MSE": 824.8635622151694, |
|
"MSE/layer0": 824.8635622151694, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.04, |
|
"input_norm": 31.998617092768363, |
|
"input_norm/layer0": 31.998617092768363, |
|
"learning_rate": 0.0005, |
|
"loss": 3.0998, |
|
"max_norm": 35.28767013549805, |
|
"max_norm/layer0": 35.28767013549805, |
|
"mean_norm": 32.571420669555664, |
|
"mean_norm/layer0": 32.571420669555664, |
|
"multicode_k": 1, |
|
"output_norm": 9.74717748324076, |
|
"output_norm/layer0": 9.74717748324076, |
|
"step": 400 |
|
}, |
|
{ |
|
"MSE": 817.218793334961, |
|
"MSE/layer0": 817.218793334961, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.04, |
|
"input_norm": 31.99862334251403, |
|
"input_norm/layer0": 31.99862334251403, |
|
"learning_rate": 0.0005, |
|
"loss": 3.0603, |
|
"max_norm": 35.4771842956543, |
|
"max_norm/layer0": 35.4771842956543, |
|
"mean_norm": 32.68177795410156, |
|
"mean_norm/layer0": 32.68177795410156, |
|
"multicode_k": 1, |
|
"output_norm": 9.985308513641357, |
|
"output_norm/layer0": 9.985308513641357, |
|
"step": 450 |
|
}, |
|
{ |
|
"MSE": 809.1558084106446, |
|
"MSE/layer0": 809.1558084106446, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.05, |
|
"input_norm": 31.998615137736, |
|
"input_norm/layer0": 31.998615137736, |
|
"learning_rate": 0.0005, |
|
"loss": 3.0494, |
|
"max_norm": 35.6486701965332, |
|
"max_norm/layer0": 35.6486701965332, |
|
"mean_norm": 32.793779373168945, |
|
"mean_norm/layer0": 32.793779373168945, |
|
"multicode_k": 1, |
|
"output_norm": 10.232081251144415, |
|
"output_norm/layer0": 10.232081251144415, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_MSE/layer0": 805.1675846628777, |
|
"eval_accuracy": 0.41770872781318447, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.998606410347342, |
|
"eval_loss": 2.992654323577881, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 10.360000263063938, |
|
"eval_runtime": 159.8847, |
|
"eval_samples_per_second": 28.915, |
|
"eval_steps_per_second": 1.808, |
|
"step": 500 |
|
}, |
|
{ |
|
"MSE": 801.7215725708003, |
|
"MSE/layer0": 801.7215725708003, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.06, |
|
"input_norm": 31.998598546981817, |
|
"input_norm/layer0": 31.998598546981817, |
|
"learning_rate": 0.0005, |
|
"loss": 2.9547, |
|
"max_norm": 35.86976623535156, |
|
"max_norm/layer0": 35.86976623535156, |
|
"mean_norm": 32.91193962097168, |
|
"mean_norm/layer0": 32.91193962097168, |
|
"multicode_k": 1, |
|
"output_norm": 10.47719025929769, |
|
"output_norm/layer0": 10.47719025929769, |
|
"step": 550 |
|
}, |
|
{ |
|
"MSE": 794.043483174642, |
|
"MSE/layer0": 794.043483174642, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.06, |
|
"input_norm": 31.99859639167787, |
|
"input_norm/layer0": 31.99859639167787, |
|
"learning_rate": 0.0005, |
|
"loss": 2.9506, |
|
"max_norm": 36.08134078979492, |
|
"max_norm/layer0": 36.08134078979492, |
|
"mean_norm": 33.03110313415527, |
|
"mean_norm/layer0": 33.03110313415527, |
|
"multicode_k": 1, |
|
"output_norm": 10.729146582285566, |
|
"output_norm/layer0": 10.729146582285566, |
|
"step": 600 |
|
}, |
|
{ |
|
"MSE": 786.3193520100913, |
|
"MSE/layer0": 786.3193520100913, |
|
"dead_code_fraction": 5e-05, |
|
"dead_code_fraction/layer0": 5e-05, |
|
"epoch": 0.07, |
|
"input_norm": 31.99857716878254, |
|
"input_norm/layer0": 31.99857716878254, |
|
"learning_rate": 0.0005, |
|
"loss": 2.8944, |
|
"max_norm": 36.33954620361328, |
|
"max_norm/layer0": 36.33954620361328, |
|
"mean_norm": 33.15106773376465, |
|
"mean_norm/layer0": 33.15106773376465, |
|
"multicode_k": 1, |
|
"output_norm": 10.987898168563845, |
|
"output_norm/layer0": 10.987898168563845, |
|
"step": 650 |
|
}, |
|
{ |
|
"MSE": 780.0598099772137, |
|
"MSE/layer0": 780.0598099772137, |
|
"dead_code_fraction": 0.0001, |
|
"dead_code_fraction/layer0": 0.0001, |
|
"epoch": 0.07, |
|
"input_norm": 31.998565645217887, |
|
"input_norm/layer0": 31.998565645217887, |
|
"learning_rate": 0.0005, |
|
"loss": 2.8643, |
|
"max_norm": 36.55862808227539, |
|
"max_norm/layer0": 36.55862808227539, |
|
"mean_norm": 33.269744873046875, |
|
"mean_norm/layer0": 33.269744873046875, |
|
"multicode_k": 1, |
|
"output_norm": 11.218051005999246, |
|
"output_norm/layer0": 11.218051005999246, |
|
"step": 700 |
|
}, |
|
{ |
|
"MSE": 772.4797055053714, |
|
"MSE/layer0": 772.4797055053714, |
|
"dead_code_fraction": 0.00045, |
|
"dead_code_fraction/layer0": 0.00045, |
|
"epoch": 0.07, |
|
"input_norm": 31.998559678395594, |
|
"input_norm/layer0": 31.998559678395594, |
|
"learning_rate": 0.0005, |
|
"loss": 2.8618, |
|
"max_norm": 36.793521881103516, |
|
"max_norm/layer0": 36.793521881103516, |
|
"mean_norm": 33.39421844482422, |
|
"mean_norm/layer0": 33.39421844482422, |
|
"multicode_k": 1, |
|
"output_norm": 11.470201053619387, |
|
"output_norm/layer0": 11.470201053619387, |
|
"step": 750 |
|
}, |
|
{ |
|
"MSE": 766.037492879232, |
|
"MSE/layer0": 766.037492879232, |
|
"dead_code_fraction": 0.00055, |
|
"dead_code_fraction/layer0": 0.00055, |
|
"epoch": 0.08, |
|
"input_norm": 31.99854364713033, |
|
"input_norm/layer0": 31.99854364713033, |
|
"learning_rate": 0.0005, |
|
"loss": 2.8403, |
|
"max_norm": 37.0079231262207, |
|
"max_norm/layer0": 37.0079231262207, |
|
"mean_norm": 33.52132034301758, |
|
"mean_norm/layer0": 33.52132034301758, |
|
"multicode_k": 1, |
|
"output_norm": 11.711471532185875, |
|
"output_norm/layer0": 11.711471532185875, |
|
"step": 800 |
|
}, |
|
{ |
|
"MSE": 759.9610600789387, |
|
"MSE/layer0": 759.9610600789387, |
|
"dead_code_fraction": 0.00135, |
|
"dead_code_fraction/layer0": 0.00135, |
|
"epoch": 0.09, |
|
"input_norm": 31.998529828389472, |
|
"input_norm/layer0": 31.998529828389472, |
|
"learning_rate": 0.0005, |
|
"loss": 2.7453, |
|
"max_norm": 37.20747375488281, |
|
"max_norm/layer0": 37.20747375488281, |
|
"mean_norm": 33.64577674865723, |
|
"mean_norm/layer0": 33.64577674865723, |
|
"multicode_k": 1, |
|
"output_norm": 11.93199801921844, |
|
"output_norm/layer0": 11.93199801921844, |
|
"step": 850 |
|
}, |
|
{ |
|
"MSE": 753.5576912434896, |
|
"MSE/layer0": 753.5576912434896, |
|
"dead_code_fraction": 0.00205, |
|
"dead_code_fraction/layer0": 0.00205, |
|
"epoch": 0.09, |
|
"input_norm": 31.99852911949157, |
|
"input_norm/layer0": 31.99852911949157, |
|
"learning_rate": 0.0005, |
|
"loss": 2.7975, |
|
"max_norm": 37.432743072509766, |
|
"max_norm/layer0": 37.432743072509766, |
|
"mean_norm": 33.778066635131836, |
|
"mean_norm/layer0": 33.778066635131836, |
|
"multicode_k": 1, |
|
"output_norm": 12.165767738024394, |
|
"output_norm/layer0": 12.165767738024394, |
|
"step": 900 |
|
}, |
|
{ |
|
"MSE": 747.6473927815753, |
|
"MSE/layer0": 747.6473927815753, |
|
"dead_code_fraction": 0.00335, |
|
"dead_code_fraction/layer0": 0.00335, |
|
"epoch": 0.1, |
|
"input_norm": 31.998517106374106, |
|
"input_norm/layer0": 31.998517106374106, |
|
"learning_rate": 0.0005, |
|
"loss": 2.7378, |
|
"max_norm": 37.62055969238281, |
|
"max_norm/layer0": 37.62055969238281, |
|
"mean_norm": 33.90963554382324, |
|
"mean_norm/layer0": 33.90963554382324, |
|
"multicode_k": 1, |
|
"output_norm": 12.390189347267153, |
|
"output_norm/layer0": 12.390189347267153, |
|
"step": 950 |
|
}, |
|
{ |
|
"MSE": 742.6674826049805, |
|
"MSE/layer0": 742.6674826049805, |
|
"dead_code_fraction": 0.0048, |
|
"dead_code_fraction/layer0": 0.0048, |
|
"epoch": 0.1, |
|
"input_norm": 31.998499689102182, |
|
"input_norm/layer0": 31.998499689102182, |
|
"learning_rate": 0.0005, |
|
"loss": 2.6986, |
|
"max_norm": 37.880615234375, |
|
"max_norm/layer0": 37.880615234375, |
|
"mean_norm": 34.04428672790527, |
|
"mean_norm/layer0": 34.04428672790527, |
|
"multicode_k": 1, |
|
"output_norm": 12.59642965157827, |
|
"output_norm/layer0": 12.59642965157827, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_MSE/layer0": 739.3243520424373, |
|
"eval_accuracy": 0.44721058737930897, |
|
"eval_dead_code_fraction/layer0": 0.00845, |
|
"eval_input_norm/layer0": 31.998487053973697, |
|
"eval_loss": 2.707960367202759, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 12.71647696584792, |
|
"eval_runtime": 157.5908, |
|
"eval_samples_per_second": 29.335, |
|
"eval_steps_per_second": 1.834, |
|
"step": 1000 |
|
}, |
|
{ |
|
"MSE": 736.2877898152667, |
|
"MSE/layer0": 736.2877898152667, |
|
"dead_code_fraction": 0.00735, |
|
"dead_code_fraction/layer0": 0.00735, |
|
"epoch": 0.1, |
|
"input_norm": 31.998484554290766, |
|
"input_norm/layer0": 31.998484554290766, |
|
"learning_rate": 0.0005, |
|
"loss": 2.7222, |
|
"max_norm": 38.21133804321289, |
|
"max_norm/layer0": 38.21133804321289, |
|
"mean_norm": 34.17984199523926, |
|
"mean_norm/layer0": 34.17984199523926, |
|
"multicode_k": 1, |
|
"output_norm": 12.82279133001963, |
|
"output_norm/layer0": 12.82279133001963, |
|
"step": 1050 |
|
}, |
|
{ |
|
"MSE": 731.6754523722336, |
|
"MSE/layer0": 731.6754523722336, |
|
"dead_code_fraction": 0.01015, |
|
"dead_code_fraction/layer0": 0.01015, |
|
"epoch": 0.11, |
|
"input_norm": 31.998473711013787, |
|
"input_norm/layer0": 31.998473711013787, |
|
"learning_rate": 0.0005, |
|
"loss": 2.652, |
|
"max_norm": 38.533973693847656, |
|
"max_norm/layer0": 38.533973693847656, |
|
"mean_norm": 34.31424903869629, |
|
"mean_norm/layer0": 34.31424903869629, |
|
"multicode_k": 1, |
|
"output_norm": 13.017293116251633, |
|
"output_norm/layer0": 13.017293116251633, |
|
"step": 1100 |
|
}, |
|
{ |
|
"MSE": 726.8081079101562, |
|
"MSE/layer0": 726.8081079101562, |
|
"dead_code_fraction": 0.013, |
|
"dead_code_fraction/layer0": 0.013, |
|
"epoch": 0.12, |
|
"input_norm": 31.99846080144247, |
|
"input_norm/layer0": 31.99846080144247, |
|
"learning_rate": 0.0005, |
|
"loss": 2.6519, |
|
"max_norm": 38.87154769897461, |
|
"max_norm/layer0": 38.87154769897461, |
|
"mean_norm": 34.454498291015625, |
|
"mean_norm/layer0": 34.454498291015625, |
|
"multicode_k": 1, |
|
"output_norm": 13.209378539721174, |
|
"output_norm/layer0": 13.209378539721174, |
|
"step": 1150 |
|
}, |
|
{ |
|
"MSE": 722.3268162027996, |
|
"MSE/layer0": 722.3268162027996, |
|
"dead_code_fraction": 0.01565, |
|
"dead_code_fraction/layer0": 0.01565, |
|
"epoch": 0.12, |
|
"input_norm": 31.998446766535434, |
|
"input_norm/layer0": 31.998446766535434, |
|
"learning_rate": 0.0005, |
|
"loss": 2.6464, |
|
"max_norm": 39.23857879638672, |
|
"max_norm/layer0": 39.23857879638672, |
|
"mean_norm": 34.597312927246094, |
|
"mean_norm/layer0": 34.597312927246094, |
|
"multicode_k": 1, |
|
"output_norm": 13.40400979042053, |
|
"output_norm/layer0": 13.40400979042053, |
|
"step": 1200 |
|
}, |
|
{ |
|
"MSE": 717.3231912231446, |
|
"MSE/layer0": 717.3231912231446, |
|
"dead_code_fraction": 0.0241, |
|
"dead_code_fraction/layer0": 0.0241, |
|
"epoch": 0.12, |
|
"input_norm": 31.998441489537555, |
|
"input_norm/layer0": 31.998441489537555, |
|
"learning_rate": 0.0005, |
|
"loss": 2.6563, |
|
"max_norm": 39.60569381713867, |
|
"max_norm/layer0": 39.60569381713867, |
|
"mean_norm": 34.73863220214844, |
|
"mean_norm/layer0": 34.73863220214844, |
|
"multicode_k": 1, |
|
"output_norm": 13.590513488451638, |
|
"output_norm/layer0": 13.590513488451638, |
|
"step": 1250 |
|
}, |
|
{ |
|
"MSE": 713.6523872884117, |
|
"MSE/layer0": 713.6523872884117, |
|
"dead_code_fraction": 0.02485, |
|
"dead_code_fraction/layer0": 0.02485, |
|
"epoch": 0.13, |
|
"input_norm": 31.998419742584225, |
|
"input_norm/layer0": 31.998419742584225, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5806, |
|
"max_norm": 39.939239501953125, |
|
"max_norm/layer0": 39.939239501953125, |
|
"mean_norm": 34.87986946105957, |
|
"mean_norm/layer0": 34.87986946105957, |
|
"multicode_k": 1, |
|
"output_norm": 13.766959317525227, |
|
"output_norm/layer0": 13.766959317525227, |
|
"step": 1300 |
|
}, |
|
{ |
|
"MSE": 709.5852165730794, |
|
"MSE/layer0": 709.5852165730794, |
|
"dead_code_fraction": 0.02925, |
|
"dead_code_fraction/layer0": 0.02925, |
|
"epoch": 0.14, |
|
"input_norm": 31.998412898381545, |
|
"input_norm/layer0": 31.998412898381545, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5789, |
|
"max_norm": 40.28993225097656, |
|
"max_norm/layer0": 40.28993225097656, |
|
"mean_norm": 35.022348403930664, |
|
"mean_norm/layer0": 35.022348403930664, |
|
"multicode_k": 1, |
|
"output_norm": 13.93345036347707, |
|
"output_norm/layer0": 13.93345036347707, |
|
"step": 1350 |
|
}, |
|
{ |
|
"MSE": 705.2143248494463, |
|
"MSE/layer0": 705.2143248494463, |
|
"dead_code_fraction": 0.03375, |
|
"dead_code_fraction/layer0": 0.03375, |
|
"epoch": 0.14, |
|
"input_norm": 31.9984123802185, |
|
"input_norm/layer0": 31.9984123802185, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5943, |
|
"max_norm": 40.63530349731445, |
|
"max_norm/layer0": 40.63530349731445, |
|
"mean_norm": 35.164276123046875, |
|
"mean_norm/layer0": 35.164276123046875, |
|
"multicode_k": 1, |
|
"output_norm": 14.105911358197524, |
|
"output_norm/layer0": 14.105911358197524, |
|
"step": 1400 |
|
}, |
|
{ |
|
"MSE": 702.3593349202476, |
|
"MSE/layer0": 702.3593349202476, |
|
"dead_code_fraction": 0.0404, |
|
"dead_code_fraction/layer0": 0.0404, |
|
"epoch": 0.14, |
|
"input_norm": 31.99839937845865, |
|
"input_norm/layer0": 31.99839937845865, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5407, |
|
"max_norm": 40.98182678222656, |
|
"max_norm/layer0": 40.98182678222656, |
|
"mean_norm": 35.30343246459961, |
|
"mean_norm/layer0": 35.30343246459961, |
|
"multicode_k": 1, |
|
"output_norm": 14.2450444761912, |
|
"output_norm/layer0": 14.2450444761912, |
|
"step": 1450 |
|
}, |
|
{ |
|
"MSE": 699.0307844034837, |
|
"MSE/layer0": 699.0307844034837, |
|
"dead_code_fraction": 0.04535, |
|
"dead_code_fraction/layer0": 0.04535, |
|
"epoch": 0.15, |
|
"input_norm": 31.998390986124676, |
|
"input_norm/layer0": 31.998390986124676, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5145, |
|
"max_norm": 41.328433990478516, |
|
"max_norm/layer0": 41.328433990478516, |
|
"mean_norm": 35.445411682128906, |
|
"mean_norm/layer0": 35.445411682128906, |
|
"multicode_k": 1, |
|
"output_norm": 14.399013953208918, |
|
"output_norm/layer0": 14.399013953208918, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_MSE/layer0": 697.1178701616536, |
|
"eval_accuracy": 0.4637486628652817, |
|
"eval_dead_code_fraction/layer0": 0.05465, |
|
"eval_input_norm/layer0": 31.99837304089923, |
|
"eval_loss": 2.525156259536743, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.48893911880305, |
|
"eval_runtime": 156.9005, |
|
"eval_samples_per_second": 29.465, |
|
"eval_steps_per_second": 1.842, |
|
"step": 1500 |
|
}, |
|
{ |
|
"MSE": 696.0442759195965, |
|
"MSE/layer0": 696.0442759195965, |
|
"dead_code_fraction": 0.05145, |
|
"dead_code_fraction/layer0": 0.05145, |
|
"epoch": 0.15, |
|
"input_norm": 31.99836520512899, |
|
"input_norm/layer0": 31.99836520512899, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4631, |
|
"max_norm": 41.6606559753418, |
|
"max_norm/layer0": 41.6606559753418, |
|
"mean_norm": 35.58424758911133, |
|
"mean_norm/layer0": 35.58424758911133, |
|
"multicode_k": 1, |
|
"output_norm": 14.54295777956645, |
|
"output_norm/layer0": 14.54295777956645, |
|
"step": 1550 |
|
}, |
|
{ |
|
"MSE": 691.8516132609051, |
|
"MSE/layer0": 691.8516132609051, |
|
"dead_code_fraction": 0.0558, |
|
"dead_code_fraction/layer0": 0.0558, |
|
"epoch": 0.16, |
|
"input_norm": 31.998375968933097, |
|
"input_norm/layer0": 31.998375968933097, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5501, |
|
"max_norm": 42.08574676513672, |
|
"max_norm/layer0": 42.08574676513672, |
|
"mean_norm": 35.72518730163574, |
|
"mean_norm/layer0": 35.72518730163574, |
|
"multicode_k": 1, |
|
"output_norm": 14.692513732910157, |
|
"output_norm/layer0": 14.692513732910157, |
|
"step": 1600 |
|
}, |
|
{ |
|
"MSE": 688.7181396484375, |
|
"MSE/layer0": 688.7181396484375, |
|
"dead_code_fraction": 0.0595, |
|
"dead_code_fraction/layer0": 0.0595, |
|
"epoch": 0.17, |
|
"input_norm": 31.99835859616598, |
|
"input_norm/layer0": 31.99835859616598, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4699, |
|
"max_norm": 42.610233306884766, |
|
"max_norm/layer0": 42.610233306884766, |
|
"mean_norm": 35.86595916748047, |
|
"mean_norm/layer0": 35.86595916748047, |
|
"multicode_k": 1, |
|
"output_norm": 14.833582207361854, |
|
"output_norm/layer0": 14.833582207361854, |
|
"step": 1650 |
|
}, |
|
{ |
|
"MSE": 685.5445822143549, |
|
"MSE/layer0": 685.5445822143549, |
|
"dead_code_fraction": 0.06595, |
|
"dead_code_fraction/layer0": 0.06595, |
|
"epoch": 0.17, |
|
"input_norm": 31.99835782368978, |
|
"input_norm/layer0": 31.99835782368978, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5014, |
|
"max_norm": 43.15216064453125, |
|
"max_norm/layer0": 43.15216064453125, |
|
"mean_norm": 36.00602149963379, |
|
"mean_norm/layer0": 36.00602149963379, |
|
"multicode_k": 1, |
|
"output_norm": 14.96381513118744, |
|
"output_norm/layer0": 14.96381513118744, |
|
"step": 1700 |
|
}, |
|
{ |
|
"MSE": 683.2388099161783, |
|
"MSE/layer0": 683.2388099161783, |
|
"dead_code_fraction": 0.0708, |
|
"dead_code_fraction/layer0": 0.0708, |
|
"epoch": 0.17, |
|
"input_norm": 31.998353064854925, |
|
"input_norm/layer0": 31.998353064854925, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4762, |
|
"max_norm": 43.683807373046875, |
|
"max_norm/layer0": 43.683807373046875, |
|
"mean_norm": 36.14344596862793, |
|
"mean_norm/layer0": 36.14344596862793, |
|
"multicode_k": 1, |
|
"output_norm": 15.08479848066965, |
|
"output_norm/layer0": 15.08479848066965, |
|
"step": 1750 |
|
}, |
|
{ |
|
"MSE": 680.5147140502929, |
|
"MSE/layer0": 680.5147140502929, |
|
"dead_code_fraction": 0.0711, |
|
"dead_code_fraction/layer0": 0.0711, |
|
"epoch": 0.18, |
|
"input_norm": 31.998323942820228, |
|
"input_norm/layer0": 31.998323942820228, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4017, |
|
"max_norm": 44.204158782958984, |
|
"max_norm/layer0": 44.204158782958984, |
|
"mean_norm": 36.281328201293945, |
|
"mean_norm/layer0": 36.281328201293945, |
|
"multicode_k": 1, |
|
"output_norm": 15.21150853157043, |
|
"output_norm/layer0": 15.21150853157043, |
|
"step": 1800 |
|
}, |
|
{ |
|
"MSE": 677.8235699462891, |
|
"MSE/layer0": 677.8235699462891, |
|
"dead_code_fraction": 0.0789, |
|
"dead_code_fraction/layer0": 0.0789, |
|
"epoch": 0.18, |
|
"input_norm": 31.99832211176553, |
|
"input_norm/layer0": 31.99832211176553, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4204, |
|
"max_norm": 44.73421096801758, |
|
"max_norm/layer0": 44.73421096801758, |
|
"mean_norm": 36.41860580444336, |
|
"mean_norm/layer0": 36.41860580444336, |
|
"multicode_k": 1, |
|
"output_norm": 15.32913914521535, |
|
"output_norm/layer0": 15.32913914521535, |
|
"step": 1850 |
|
}, |
|
{ |
|
"MSE": 674.8260657755535, |
|
"MSE/layer0": 674.8260657755535, |
|
"dead_code_fraction": 0.0859, |
|
"dead_code_fraction/layer0": 0.0859, |
|
"epoch": 0.19, |
|
"input_norm": 31.998327109018952, |
|
"input_norm/layer0": 31.998327109018952, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4612, |
|
"max_norm": 45.264217376708984, |
|
"max_norm/layer0": 45.264217376708984, |
|
"mean_norm": 36.55377197265625, |
|
"mean_norm/layer0": 36.55377197265625, |
|
"multicode_k": 1, |
|
"output_norm": 15.449233846664427, |
|
"output_norm/layer0": 15.449233846664427, |
|
"step": 1900 |
|
}, |
|
{ |
|
"MSE": 672.4308366902667, |
|
"MSE/layer0": 672.4308366902667, |
|
"dead_code_fraction": 0.08975, |
|
"dead_code_fraction/layer0": 0.08975, |
|
"epoch": 0.2, |
|
"input_norm": 31.998313461939492, |
|
"input_norm/layer0": 31.998313461939492, |
|
"learning_rate": 0.0005, |
|
"loss": 2.413, |
|
"max_norm": 45.7476692199707, |
|
"max_norm/layer0": 45.7476692199707, |
|
"mean_norm": 36.687320709228516, |
|
"mean_norm/layer0": 36.687320709228516, |
|
"multicode_k": 1, |
|
"output_norm": 15.564360074996952, |
|
"output_norm/layer0": 15.564360074996952, |
|
"step": 1950 |
|
}, |
|
{ |
|
"MSE": 669.9350853474932, |
|
"MSE/layer0": 669.9350853474932, |
|
"dead_code_fraction": 0.09495, |
|
"dead_code_fraction/layer0": 0.09495, |
|
"epoch": 0.2, |
|
"input_norm": 31.998307892481467, |
|
"input_norm/layer0": 31.998307892481467, |
|
"learning_rate": 0.0005, |
|
"loss": 2.4197, |
|
"max_norm": 46.2595100402832, |
|
"max_norm/layer0": 46.2595100402832, |
|
"mean_norm": 36.82127571105957, |
|
"mean_norm/layer0": 36.82127571105957, |
|
"multicode_k": 1, |
|
"output_norm": 15.671763955752056, |
|
"output_norm/layer0": 15.671763955752056, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_MSE/layer0": 670.0254334077002, |
|
"eval_accuracy": 0.47584128742153486, |
|
"eval_dead_code_fraction/layer0": 0.0988, |
|
"eval_input_norm/layer0": 31.99830309178647, |
|
"eval_loss": 2.409283399581909, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 15.728763990528059, |
|
"eval_runtime": 158.0617, |
|
"eval_samples_per_second": 29.248, |
|
"eval_steps_per_second": 1.828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"MSE": 667.9600658162435, |
|
"MSE/layer0": 667.9600658162435, |
|
"dead_code_fraction": 0.09825, |
|
"dead_code_fraction/layer0": 0.09825, |
|
"epoch": 0.2, |
|
"input_norm": 31.99829890569051, |
|
"input_norm/layer0": 31.99829890569051, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3908, |
|
"max_norm": 46.76186752319336, |
|
"max_norm/layer0": 46.76186752319336, |
|
"mean_norm": 36.954044342041016, |
|
"mean_norm/layer0": 36.954044342041016, |
|
"multicode_k": 1, |
|
"output_norm": 15.786985732714339, |
|
"output_norm/layer0": 15.786985732714339, |
|
"step": 2050 |
|
}, |
|
{ |
|
"MSE": 665.8677533976238, |
|
"MSE/layer0": 665.8677533976238, |
|
"dead_code_fraction": 0.10105, |
|
"dead_code_fraction/layer0": 0.10105, |
|
"epoch": 0.21, |
|
"input_norm": 31.998287776311233, |
|
"input_norm/layer0": 31.998287776311233, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3532, |
|
"max_norm": 47.23879623413086, |
|
"max_norm/layer0": 47.23879623413086, |
|
"mean_norm": 37.08414268493652, |
|
"mean_norm/layer0": 37.08414268493652, |
|
"multicode_k": 1, |
|
"output_norm": 15.887771523793544, |
|
"output_norm/layer0": 15.887771523793544, |
|
"step": 2100 |
|
}, |
|
{ |
|
"MSE": 664.0484969075521, |
|
"MSE/layer0": 664.0484969075521, |
|
"dead_code_fraction": 0.10515, |
|
"dead_code_fraction/layer0": 0.10515, |
|
"epoch": 0.21, |
|
"input_norm": 31.998289143244435, |
|
"input_norm/layer0": 31.998289143244435, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3835, |
|
"max_norm": 47.72446823120117, |
|
"max_norm/layer0": 47.72446823120117, |
|
"mean_norm": 37.21368408203125, |
|
"mean_norm/layer0": 37.21368408203125, |
|
"multicode_k": 1, |
|
"output_norm": 15.987558364868171, |
|
"output_norm/layer0": 15.987558364868171, |
|
"step": 2150 |
|
}, |
|
{ |
|
"MSE": 662.043323059082, |
|
"MSE/layer0": 662.043323059082, |
|
"dead_code_fraction": 0.11065, |
|
"dead_code_fraction/layer0": 0.11065, |
|
"epoch": 0.22, |
|
"input_norm": 31.998284489313747, |
|
"input_norm/layer0": 31.998284489313747, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3711, |
|
"max_norm": 48.21998596191406, |
|
"max_norm/layer0": 48.21998596191406, |
|
"mean_norm": 37.34214973449707, |
|
"mean_norm/layer0": 37.34214973449707, |
|
"multicode_k": 1, |
|
"output_norm": 16.084624527295432, |
|
"output_norm/layer0": 16.084624527295432, |
|
"step": 2200 |
|
}, |
|
{ |
|
"MSE": 660.071201883952, |
|
"MSE/layer0": 660.071201883952, |
|
"dead_code_fraction": 0.1138, |
|
"dead_code_fraction/layer0": 0.1138, |
|
"epoch": 0.23, |
|
"input_norm": 31.998274552027382, |
|
"input_norm/layer0": 31.998274552027382, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3361, |
|
"max_norm": 48.656124114990234, |
|
"max_norm/layer0": 48.656124114990234, |
|
"mean_norm": 37.46707344055176, |
|
"mean_norm/layer0": 37.46707344055176, |
|
"multicode_k": 1, |
|
"output_norm": 16.1770029671987, |
|
"output_norm/layer0": 16.1770029671987, |
|
"step": 2250 |
|
}, |
|
{ |
|
"MSE": 658.2848066202794, |
|
"MSE/layer0": 658.2848066202794, |
|
"dead_code_fraction": 0.11715, |
|
"dead_code_fraction/layer0": 0.11715, |
|
"epoch": 0.23, |
|
"input_norm": 31.998281342188513, |
|
"input_norm/layer0": 31.998281342188513, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3697, |
|
"max_norm": 49.14850616455078, |
|
"max_norm/layer0": 49.14850616455078, |
|
"mean_norm": 37.592119216918945, |
|
"mean_norm/layer0": 37.592119216918945, |
|
"multicode_k": 1, |
|
"output_norm": 16.273267321586616, |
|
"output_norm/layer0": 16.273267321586616, |
|
"step": 2300 |
|
}, |
|
{ |
|
"MSE": 656.6614913940434, |
|
"MSE/layer0": 656.6614913940434, |
|
"dead_code_fraction": 0.1208, |
|
"dead_code_fraction/layer0": 0.1208, |
|
"epoch": 0.23, |
|
"input_norm": 31.99827545166017, |
|
"input_norm/layer0": 31.99827545166017, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3691, |
|
"max_norm": 49.611228942871094, |
|
"max_norm/layer0": 49.611228942871094, |
|
"mean_norm": 37.71496772766113, |
|
"mean_norm/layer0": 37.71496772766113, |
|
"multicode_k": 1, |
|
"output_norm": 16.361617434819536, |
|
"output_norm/layer0": 16.361617434819536, |
|
"step": 2350 |
|
}, |
|
{ |
|
"MSE": 654.7551118977863, |
|
"MSE/layer0": 654.7551118977863, |
|
"dead_code_fraction": 0.12205, |
|
"dead_code_fraction/layer0": 0.12205, |
|
"epoch": 0.24, |
|
"input_norm": 31.998258228302007, |
|
"input_norm/layer0": 31.998258228302007, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3413, |
|
"max_norm": 50.082008361816406, |
|
"max_norm/layer0": 50.082008361816406, |
|
"mean_norm": 37.836740493774414, |
|
"mean_norm/layer0": 37.836740493774414, |
|
"multicode_k": 1, |
|
"output_norm": 16.442067163785307, |
|
"output_norm/layer0": 16.442067163785307, |
|
"step": 2400 |
|
}, |
|
{ |
|
"MSE": 653.2320398966472, |
|
"MSE/layer0": 653.2320398966472, |
|
"dead_code_fraction": 0.1261, |
|
"dead_code_fraction/layer0": 0.1261, |
|
"epoch": 0.24, |
|
"input_norm": 31.99826599121093, |
|
"input_norm/layer0": 31.99826599121093, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3415, |
|
"max_norm": 50.542850494384766, |
|
"max_norm/layer0": 50.542850494384766, |
|
"mean_norm": 37.956573486328125, |
|
"mean_norm/layer0": 37.956573486328125, |
|
"multicode_k": 1, |
|
"output_norm": 16.545647277832018, |
|
"output_norm/layer0": 16.545647277832018, |
|
"step": 2450 |
|
}, |
|
{ |
|
"MSE": 652.0689453124999, |
|
"MSE/layer0": 652.0689453124999, |
|
"dead_code_fraction": 0.1305, |
|
"dead_code_fraction/layer0": 0.1305, |
|
"epoch": 0.25, |
|
"input_norm": 31.998266054789227, |
|
"input_norm/layer0": 31.998266054789227, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3541, |
|
"max_norm": 50.972904205322266, |
|
"max_norm/layer0": 50.972904205322266, |
|
"mean_norm": 38.07469177246094, |
|
"mean_norm/layer0": 38.07469177246094, |
|
"multicode_k": 1, |
|
"output_norm": 16.614015088081356, |
|
"output_norm/layer0": 16.614015088081356, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_MSE/layer0": 651.1296869864225, |
|
"eval_accuracy": 0.48371217143066175, |
|
"eval_dead_code_fraction/layer0": 0.1337, |
|
"eval_input_norm/layer0": 31.998264631048162, |
|
"eval_loss": 2.340399742126465, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 16.66022368217996, |
|
"eval_runtime": 157.8946, |
|
"eval_samples_per_second": 29.279, |
|
"eval_steps_per_second": 1.83, |
|
"step": 2500 |
|
}, |
|
{ |
|
"MSE": 650.5154676310221, |
|
"MSE/layer0": 650.5154676310221, |
|
"dead_code_fraction": 0.1312, |
|
"dead_code_fraction/layer0": 0.1312, |
|
"epoch": 0.26, |
|
"input_norm": 31.99826429367065, |
|
"input_norm/layer0": 31.99826429367065, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3374, |
|
"max_norm": 51.42794418334961, |
|
"max_norm/layer0": 51.42794418334961, |
|
"mean_norm": 38.19082260131836, |
|
"mean_norm/layer0": 38.19082260131836, |
|
"multicode_k": 1, |
|
"output_norm": 16.705677251815793, |
|
"output_norm/layer0": 16.705677251815793, |
|
"step": 2550 |
|
}, |
|
{ |
|
"MSE": 649.4798397827149, |
|
"MSE/layer0": 649.4798397827149, |
|
"dead_code_fraction": 0.13625, |
|
"dead_code_fraction/layer0": 0.13625, |
|
"epoch": 0.26, |
|
"input_norm": 31.99826188405354, |
|
"input_norm/layer0": 31.99826188405354, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3364, |
|
"max_norm": 51.84079360961914, |
|
"max_norm/layer0": 51.84079360961914, |
|
"mean_norm": 38.306650161743164, |
|
"mean_norm/layer0": 38.306650161743164, |
|
"multicode_k": 1, |
|
"output_norm": 16.774758176803587, |
|
"output_norm/layer0": 16.774758176803587, |
|
"step": 2600 |
|
}, |
|
{ |
|
"MSE": 648.4373052978513, |
|
"MSE/layer0": 648.4373052978513, |
|
"dead_code_fraction": 0.13795, |
|
"dead_code_fraction/layer0": 0.13795, |
|
"epoch": 0.27, |
|
"input_norm": 31.998252007166542, |
|
"input_norm/layer0": 31.998252007166542, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3162, |
|
"max_norm": 52.24661636352539, |
|
"max_norm/layer0": 52.24661636352539, |
|
"mean_norm": 38.41937828063965, |
|
"mean_norm/layer0": 38.41937828063965, |
|
"multicode_k": 1, |
|
"output_norm": 16.851604979832963, |
|
"output_norm/layer0": 16.851604979832963, |
|
"step": 2650 |
|
}, |
|
{ |
|
"MSE": 647.0678014119467, |
|
"MSE/layer0": 647.0678014119467, |
|
"dead_code_fraction": 0.1397, |
|
"dead_code_fraction/layer0": 0.1397, |
|
"epoch": 0.27, |
|
"input_norm": 31.998265930811563, |
|
"input_norm/layer0": 31.998265930811563, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3497, |
|
"max_norm": 52.66170120239258, |
|
"max_norm/layer0": 52.66170120239258, |
|
"mean_norm": 38.53024482727051, |
|
"mean_norm/layer0": 38.53024482727051, |
|
"multicode_k": 1, |
|
"output_norm": 16.925416787465398, |
|
"output_norm/layer0": 16.925416787465398, |
|
"step": 2700 |
|
}, |
|
{ |
|
"MSE": 646.4085242716471, |
|
"MSE/layer0": 646.4085242716471, |
|
"dead_code_fraction": 0.14125, |
|
"dead_code_fraction/layer0": 0.14125, |
|
"epoch": 0.28, |
|
"input_norm": 31.99825245221455, |
|
"input_norm/layer0": 31.99825245221455, |
|
"learning_rate": 0.0005, |
|
"loss": 2.301, |
|
"max_norm": 53.03037643432617, |
|
"max_norm/layer0": 53.03037643432617, |
|
"mean_norm": 38.63713836669922, |
|
"mean_norm/layer0": 38.63713836669922, |
|
"multicode_k": 1, |
|
"output_norm": 16.985576423009235, |
|
"output_norm/layer0": 16.985576423009235, |
|
"step": 2750 |
|
}, |
|
{ |
|
"MSE": 644.7344170125325, |
|
"MSE/layer0": 644.7344170125325, |
|
"dead_code_fraction": 0.14415, |
|
"dead_code_fraction/layer0": 0.14415, |
|
"epoch": 0.28, |
|
"input_norm": 31.998260081609082, |
|
"input_norm/layer0": 31.998260081609082, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3395, |
|
"max_norm": 53.41487503051758, |
|
"max_norm/layer0": 53.41487503051758, |
|
"mean_norm": 38.74285697937012, |
|
"mean_norm/layer0": 38.74285697937012, |
|
"multicode_k": 1, |
|
"output_norm": 17.068980147043867, |
|
"output_norm/layer0": 17.068980147043867, |
|
"step": 2800 |
|
}, |
|
{ |
|
"MSE": 644.636144104004, |
|
"MSE/layer0": 644.636144104004, |
|
"dead_code_fraction": 0.14565, |
|
"dead_code_fraction/layer0": 0.14565, |
|
"epoch": 0.28, |
|
"input_norm": 31.998243366877247, |
|
"input_norm/layer0": 31.998243366877247, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2757, |
|
"max_norm": 53.792579650878906, |
|
"max_norm/layer0": 53.792579650878906, |
|
"mean_norm": 38.84635543823242, |
|
"mean_norm/layer0": 38.84635543823242, |
|
"multicode_k": 1, |
|
"output_norm": 17.124992834726967, |
|
"output_norm/layer0": 17.124992834726967, |
|
"step": 2850 |
|
}, |
|
{ |
|
"MSE": 643.8843309529623, |
|
"MSE/layer0": 643.8843309529623, |
|
"dead_code_fraction": 0.14495, |
|
"dead_code_fraction/layer0": 0.14495, |
|
"epoch": 0.29, |
|
"input_norm": 31.998242295583093, |
|
"input_norm/layer0": 31.998242295583093, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3057, |
|
"max_norm": 54.146453857421875, |
|
"max_norm/layer0": 54.146453857421875, |
|
"mean_norm": 38.947309494018555, |
|
"mean_norm/layer0": 38.947309494018555, |
|
"multicode_k": 1, |
|
"output_norm": 17.17694611549377, |
|
"output_norm/layer0": 17.17694611549377, |
|
"step": 2900 |
|
}, |
|
{ |
|
"MSE": 642.6776557413741, |
|
"MSE/layer0": 642.6776557413741, |
|
"dead_code_fraction": 0.1504, |
|
"dead_code_fraction/layer0": 0.1504, |
|
"epoch": 0.29, |
|
"input_norm": 31.998272593816125, |
|
"input_norm/layer0": 31.998272593816125, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3545, |
|
"max_norm": 54.51527404785156, |
|
"max_norm/layer0": 54.51527404785156, |
|
"mean_norm": 39.047607421875, |
|
"mean_norm/layer0": 39.047607421875, |
|
"multicode_k": 1, |
|
"output_norm": 17.240235595703133, |
|
"output_norm/layer0": 17.240235595703133, |
|
"step": 2950 |
|
}, |
|
{ |
|
"MSE": 643.1047460937498, |
|
"MSE/layer0": 643.1047460937498, |
|
"dead_code_fraction": 0.1483, |
|
"dead_code_fraction/layer0": 0.1483, |
|
"epoch": 0.3, |
|
"input_norm": 31.998249003092454, |
|
"input_norm/layer0": 31.998249003092454, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2742, |
|
"max_norm": 54.86568832397461, |
|
"max_norm/layer0": 54.86568832397461, |
|
"mean_norm": 39.14469337463379, |
|
"mean_norm/layer0": 39.14469337463379, |
|
"multicode_k": 1, |
|
"output_norm": 17.28876600265503, |
|
"output_norm/layer0": 17.28876600265503, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_MSE/layer0": 642.6360311704152, |
|
"eval_accuracy": 0.49030507287608877, |
|
"eval_dead_code_fraction/layer0": 0.14995, |
|
"eval_input_norm/layer0": 31.998255163205542, |
|
"eval_loss": 2.2907073497772217, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 17.324301861386118, |
|
"eval_runtime": 157.9262, |
|
"eval_samples_per_second": 29.273, |
|
"eval_steps_per_second": 1.83, |
|
"step": 3000 |
|
}, |
|
{ |
|
"MSE": 641.9817254638668, |
|
"MSE/layer0": 641.9817254638668, |
|
"dead_code_fraction": 0.1511, |
|
"dead_code_fraction/layer0": 0.1511, |
|
"epoch": 0.3, |
|
"input_norm": 31.99826343536376, |
|
"input_norm/layer0": 31.99826343536376, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3422, |
|
"max_norm": 55.2226676940918, |
|
"max_norm/layer0": 55.2226676940918, |
|
"mean_norm": 39.23999786376953, |
|
"mean_norm/layer0": 39.23999786376953, |
|
"multicode_k": 1, |
|
"output_norm": 17.350644410451252, |
|
"output_norm/layer0": 17.350644410451252, |
|
"step": 3050 |
|
}, |
|
{ |
|
"MSE": 641.9993333943689, |
|
"MSE/layer0": 641.9993333943689, |
|
"dead_code_fraction": 0.1504, |
|
"dead_code_fraction/layer0": 0.1504, |
|
"epoch": 0.31, |
|
"input_norm": 31.998250141143807, |
|
"input_norm/layer0": 31.998250141143807, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2814, |
|
"max_norm": 55.56163787841797, |
|
"max_norm/layer0": 55.56163787841797, |
|
"mean_norm": 39.33370780944824, |
|
"mean_norm/layer0": 39.33370780944824, |
|
"multicode_k": 1, |
|
"output_norm": 17.39312816301982, |
|
"output_norm/layer0": 17.39312816301982, |
|
"step": 3100 |
|
}, |
|
{ |
|
"MSE": 641.5148900349936, |
|
"MSE/layer0": 641.5148900349936, |
|
"dead_code_fraction": 0.15185, |
|
"dead_code_fraction/layer0": 0.15185, |
|
"epoch": 0.32, |
|
"input_norm": 31.998260364532467, |
|
"input_norm/layer0": 31.998260364532467, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3152, |
|
"max_norm": 55.8856315612793, |
|
"max_norm/layer0": 55.8856315612793, |
|
"mean_norm": 39.42481803894043, |
|
"mean_norm/layer0": 39.42481803894043, |
|
"multicode_k": 1, |
|
"output_norm": 17.44178107897441, |
|
"output_norm/layer0": 17.44178107897441, |
|
"step": 3150 |
|
}, |
|
{ |
|
"MSE": 640.499552408854, |
|
"MSE/layer0": 640.499552408854, |
|
"dead_code_fraction": 0.1516, |
|
"dead_code_fraction/layer0": 0.1516, |
|
"epoch": 0.32, |
|
"input_norm": 31.99825292587281, |
|
"input_norm/layer0": 31.99825292587281, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2462, |
|
"max_norm": 56.21445846557617, |
|
"max_norm/layer0": 56.21445846557617, |
|
"mean_norm": 39.51395606994629, |
|
"mean_norm/layer0": 39.51395606994629, |
|
"multicode_k": 1, |
|
"output_norm": 17.50789775530497, |
|
"output_norm/layer0": 17.50789775530497, |
|
"step": 3200 |
|
}, |
|
{ |
|
"MSE": 640.565166829427, |
|
"MSE/layer0": 640.565166829427, |
|
"dead_code_fraction": 0.15285, |
|
"dead_code_fraction/layer0": 0.15285, |
|
"epoch": 0.33, |
|
"input_norm": 31.998250306447353, |
|
"input_norm/layer0": 31.998250306447353, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2595, |
|
"max_norm": 56.526973724365234, |
|
"max_norm/layer0": 56.526973724365234, |
|
"mean_norm": 39.601173400878906, |
|
"mean_norm/layer0": 39.601173400878906, |
|
"multicode_k": 1, |
|
"output_norm": 17.54366443951924, |
|
"output_norm/layer0": 17.54366443951924, |
|
"step": 3250 |
|
}, |
|
{ |
|
"MSE": 640.8991118367509, |
|
"MSE/layer0": 640.8991118367509, |
|
"dead_code_fraction": 0.1531, |
|
"dead_code_fraction/layer0": 0.1531, |
|
"epoch": 0.33, |
|
"input_norm": 31.998245798746755, |
|
"input_norm/layer0": 31.998245798746755, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2326, |
|
"max_norm": 56.82651138305664, |
|
"max_norm/layer0": 56.82651138305664, |
|
"mean_norm": 39.684635162353516, |
|
"mean_norm/layer0": 39.684635162353516, |
|
"multicode_k": 1, |
|
"output_norm": 17.578553660710664, |
|
"output_norm/layer0": 17.578553660710664, |
|
"step": 3300 |
|
}, |
|
{ |
|
"MSE": 640.486218770345, |
|
"MSE/layer0": 640.486218770345, |
|
"dead_code_fraction": 0.15345, |
|
"dead_code_fraction/layer0": 0.15345, |
|
"epoch": 0.34, |
|
"input_norm": 31.998255780537924, |
|
"input_norm/layer0": 31.998255780537924, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2733, |
|
"max_norm": 57.12877655029297, |
|
"max_norm/layer0": 57.12877655029297, |
|
"mean_norm": 39.76711463928223, |
|
"mean_norm/layer0": 39.76711463928223, |
|
"multicode_k": 1, |
|
"output_norm": 17.619242086410516, |
|
"output_norm/layer0": 17.619242086410516, |
|
"step": 3350 |
|
}, |
|
{ |
|
"MSE": 639.5240251668292, |
|
"MSE/layer0": 639.5240251668292, |
|
"dead_code_fraction": 0.15565, |
|
"dead_code_fraction/layer0": 0.15565, |
|
"epoch": 0.34, |
|
"input_norm": 31.998264500300095, |
|
"input_norm/layer0": 31.998264500300095, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2633, |
|
"max_norm": 57.42041778564453, |
|
"max_norm/layer0": 57.42041778564453, |
|
"mean_norm": 39.84800338745117, |
|
"mean_norm/layer0": 39.84800338745117, |
|
"multicode_k": 1, |
|
"output_norm": 17.667484652201342, |
|
"output_norm/layer0": 17.667484652201342, |
|
"step": 3400 |
|
}, |
|
{ |
|
"MSE": 639.2691174316408, |
|
"MSE/layer0": 639.2691174316408, |
|
"dead_code_fraction": 0.15605, |
|
"dead_code_fraction/layer0": 0.15605, |
|
"epoch": 0.34, |
|
"input_norm": 31.99825723965962, |
|
"input_norm/layer0": 31.99825723965962, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2495, |
|
"max_norm": 57.706260681152344, |
|
"max_norm/layer0": 57.706260681152344, |
|
"mean_norm": 39.92698097229004, |
|
"mean_norm/layer0": 39.92698097229004, |
|
"multicode_k": 1, |
|
"output_norm": 17.705148900349947, |
|
"output_norm/layer0": 17.705148900349947, |
|
"step": 3450 |
|
}, |
|
{ |
|
"MSE": 639.3908192952478, |
|
"MSE/layer0": 639.3908192952478, |
|
"dead_code_fraction": 0.15655, |
|
"dead_code_fraction/layer0": 0.15655, |
|
"epoch": 0.35, |
|
"input_norm": 31.9982618745168, |
|
"input_norm/layer0": 31.9982618745168, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2488, |
|
"max_norm": 57.98209762573242, |
|
"max_norm/layer0": 57.98209762573242, |
|
"mean_norm": 40.005022048950195, |
|
"mean_norm/layer0": 40.005022048950195, |
|
"multicode_k": 1, |
|
"output_norm": 17.73683495521545, |
|
"output_norm/layer0": 17.73683495521545, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_MSE/layer0": 640.3158307464355, |
|
"eval_accuracy": 0.49451074349024987, |
|
"eval_dead_code_fraction/layer0": 0.1575, |
|
"eval_input_norm/layer0": 31.99825158244007, |
|
"eval_loss": 2.2564537525177, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 17.756634140179678, |
|
"eval_runtime": 157.599, |
|
"eval_samples_per_second": 29.334, |
|
"eval_steps_per_second": 1.834, |
|
"step": 3500 |
|
}, |
|
{ |
|
"MSE": 639.6838141886391, |
|
"MSE/layer0": 639.6838141886391, |
|
"dead_code_fraction": 0.157, |
|
"dead_code_fraction/layer0": 0.157, |
|
"epoch": 0.35, |
|
"input_norm": 31.99826737085978, |
|
"input_norm/layer0": 31.99826737085978, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2738, |
|
"max_norm": 58.24713897705078, |
|
"max_norm/layer0": 58.24713897705078, |
|
"mean_norm": 40.08023262023926, |
|
"mean_norm/layer0": 40.08023262023926, |
|
"multicode_k": 1, |
|
"output_norm": 17.755876312255864, |
|
"output_norm/layer0": 17.755876312255864, |
|
"step": 3550 |
|
}, |
|
{ |
|
"MSE": 639.2954257202149, |
|
"MSE/layer0": 639.2954257202149, |
|
"dead_code_fraction": 0.1559, |
|
"dead_code_fraction/layer0": 0.1559, |
|
"epoch": 0.36, |
|
"input_norm": 31.998245531717938, |
|
"input_norm/layer0": 31.998245531717938, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2036, |
|
"max_norm": 58.50635528564453, |
|
"max_norm/layer0": 58.50635528564453, |
|
"mean_norm": 40.15370178222656, |
|
"mean_norm/layer0": 40.15370178222656, |
|
"multicode_k": 1, |
|
"output_norm": 17.812968953450515, |
|
"output_norm/layer0": 17.812968953450515, |
|
"step": 3600 |
|
}, |
|
{ |
|
"MSE": 639.3338773600263, |
|
"MSE/layer0": 639.3338773600263, |
|
"dead_code_fraction": 0.15905, |
|
"dead_code_fraction/layer0": 0.15905, |
|
"epoch": 0.36, |
|
"input_norm": 31.99827084223429, |
|
"input_norm/layer0": 31.99827084223429, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2672, |
|
"max_norm": 58.76622009277344, |
|
"max_norm/layer0": 58.76622009277344, |
|
"mean_norm": 40.22719192504883, |
|
"mean_norm/layer0": 40.22719192504883, |
|
"multicode_k": 1, |
|
"output_norm": 17.821751413345332, |
|
"output_norm/layer0": 17.821751413345332, |
|
"step": 3650 |
|
}, |
|
{ |
|
"MSE": 639.0531684366863, |
|
"MSE/layer0": 639.0531684366863, |
|
"dead_code_fraction": 0.15975, |
|
"dead_code_fraction/layer0": 0.15975, |
|
"epoch": 0.37, |
|
"input_norm": 31.99827636400858, |
|
"input_norm/layer0": 31.99827636400858, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2444, |
|
"max_norm": 59.02393341064453, |
|
"max_norm/layer0": 59.02393341064453, |
|
"mean_norm": 40.298166275024414, |
|
"mean_norm/layer0": 40.298166275024414, |
|
"multicode_k": 1, |
|
"output_norm": 17.85403926849365, |
|
"output_norm/layer0": 17.85403926849365, |
|
"step": 3700 |
|
}, |
|
{ |
|
"MSE": 638.9355230712894, |
|
"MSE/layer0": 638.9355230712894, |
|
"dead_code_fraction": 0.1605, |
|
"dead_code_fraction/layer0": 0.1605, |
|
"epoch": 0.38, |
|
"input_norm": 31.99827863057454, |
|
"input_norm/layer0": 31.99827863057454, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2454, |
|
"max_norm": 59.28853225708008, |
|
"max_norm/layer0": 59.28853225708008, |
|
"mean_norm": 40.36880111694336, |
|
"mean_norm/layer0": 40.36880111694336, |
|
"multicode_k": 1, |
|
"output_norm": 17.88599282582601, |
|
"output_norm/layer0": 17.88599282582601, |
|
"step": 3750 |
|
}, |
|
{ |
|
"MSE": 639.0086972045899, |
|
"MSE/layer0": 639.0086972045899, |
|
"dead_code_fraction": 0.16125, |
|
"dead_code_fraction/layer0": 0.16125, |
|
"epoch": 0.38, |
|
"input_norm": 31.9982850710551, |
|
"input_norm/layer0": 31.9982850710551, |
|
"learning_rate": 0.0005, |
|
"loss": 2.27, |
|
"max_norm": 59.546451568603516, |
|
"max_norm/layer0": 59.546451568603516, |
|
"mean_norm": 40.43776512145996, |
|
"mean_norm/layer0": 40.43776512145996, |
|
"multicode_k": 1, |
|
"output_norm": 17.90943570454915, |
|
"output_norm/layer0": 17.90943570454915, |
|
"step": 3800 |
|
}, |
|
{ |
|
"MSE": 638.9462019856769, |
|
"MSE/layer0": 638.9462019856769, |
|
"dead_code_fraction": 0.1583, |
|
"dead_code_fraction/layer0": 0.1583, |
|
"epoch": 0.39, |
|
"input_norm": 31.998278980255122, |
|
"input_norm/layer0": 31.998278980255122, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2438, |
|
"max_norm": 59.80894470214844, |
|
"max_norm/layer0": 59.80894470214844, |
|
"mean_norm": 40.50556945800781, |
|
"mean_norm/layer0": 40.50556945800781, |
|
"multicode_k": 1, |
|
"output_norm": 17.947645209630338, |
|
"output_norm/layer0": 17.947645209630338, |
|
"step": 3850 |
|
}, |
|
{ |
|
"MSE": 639.4130173746743, |
|
"MSE/layer0": 639.4130173746743, |
|
"dead_code_fraction": 0.16135, |
|
"dead_code_fraction/layer0": 0.16135, |
|
"epoch": 0.39, |
|
"input_norm": 31.998284943898526, |
|
"input_norm/layer0": 31.998284943898526, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2526, |
|
"max_norm": 60.04655075073242, |
|
"max_norm/layer0": 60.04655075073242, |
|
"mean_norm": 40.57136535644531, |
|
"mean_norm/layer0": 40.57136535644531, |
|
"multicode_k": 1, |
|
"output_norm": 17.960218969980872, |
|
"output_norm/layer0": 17.960218969980872, |
|
"step": 3900 |
|
}, |
|
{ |
|
"MSE": 639.8756245930986, |
|
"MSE/layer0": 639.8756245930986, |
|
"dead_code_fraction": 0.15755, |
|
"dead_code_fraction/layer0": 0.15755, |
|
"epoch": 0.4, |
|
"input_norm": 31.998285398483272, |
|
"input_norm/layer0": 31.998285398483272, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2266, |
|
"max_norm": 60.29011154174805, |
|
"max_norm/layer0": 60.29011154174805, |
|
"mean_norm": 40.63625144958496, |
|
"mean_norm/layer0": 40.63625144958496, |
|
"multicode_k": 1, |
|
"output_norm": 17.97526204744974, |
|
"output_norm/layer0": 17.97526204744974, |
|
"step": 3950 |
|
}, |
|
{ |
|
"MSE": 640.046054585775, |
|
"MSE/layer0": 640.046054585775, |
|
"dead_code_fraction": 0.1605, |
|
"dead_code_fraction/layer0": 0.1605, |
|
"epoch": 0.4, |
|
"input_norm": 31.998285433451336, |
|
"input_norm/layer0": 31.998285433451336, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2287, |
|
"max_norm": 60.52168655395508, |
|
"max_norm/layer0": 60.52168655395508, |
|
"mean_norm": 40.698753356933594, |
|
"mean_norm/layer0": 40.698753356933594, |
|
"multicode_k": 1, |
|
"output_norm": 17.997498016357426, |
|
"output_norm/layer0": 17.997498016357426, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_MSE/layer0": 638.8422855589264, |
|
"eval_accuracy": 0.49670513512593434, |
|
"eval_dead_code_fraction/layer0": 0.16135, |
|
"eval_input_norm/layer0": 31.99827300782795, |
|
"eval_loss": 2.2332887649536133, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.022313365115252, |
|
"eval_runtime": 158.1975, |
|
"eval_samples_per_second": 29.223, |
|
"eval_steps_per_second": 1.827, |
|
"step": 4000 |
|
}, |
|
{ |
|
"MSE": 639.952128804525, |
|
"MSE/layer0": 639.952128804525, |
|
"dead_code_fraction": 0.16035, |
|
"dead_code_fraction/layer0": 0.16035, |
|
"epoch": 0.41, |
|
"input_norm": 31.998286927541105, |
|
"input_norm/layer0": 31.998286927541105, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2193, |
|
"max_norm": 60.76009750366211, |
|
"max_norm/layer0": 60.76009750366211, |
|
"mean_norm": 40.75992393493652, |
|
"mean_norm/layer0": 40.75992393493652, |
|
"multicode_k": 1, |
|
"output_norm": 18.024092137018826, |
|
"output_norm/layer0": 18.024092137018826, |
|
"step": 4050 |
|
}, |
|
{ |
|
"MSE": 640.5730131022133, |
|
"MSE/layer0": 640.5730131022133, |
|
"dead_code_fraction": 0.1634, |
|
"dead_code_fraction/layer0": 0.1634, |
|
"epoch": 0.41, |
|
"input_norm": 31.99828769365946, |
|
"input_norm/layer0": 31.99828769365946, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2301, |
|
"max_norm": 60.98118591308594, |
|
"max_norm/layer0": 60.98118591308594, |
|
"mean_norm": 40.8208122253418, |
|
"mean_norm/layer0": 40.8208122253418, |
|
"multicode_k": 1, |
|
"output_norm": 18.02807092984518, |
|
"output_norm/layer0": 18.02807092984518, |
|
"step": 4100 |
|
}, |
|
{ |
|
"MSE": 640.4258350626628, |
|
"MSE/layer0": 640.4258350626628, |
|
"dead_code_fraction": 0.1612, |
|
"dead_code_fraction/layer0": 0.1612, |
|
"epoch": 0.41, |
|
"input_norm": 31.998297268549607, |
|
"input_norm/layer0": 31.998297268549607, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2307, |
|
"max_norm": 61.19542694091797, |
|
"max_norm/layer0": 61.19542694091797, |
|
"mean_norm": 40.88128852844238, |
|
"mean_norm/layer0": 40.88128852844238, |
|
"multicode_k": 1, |
|
"output_norm": 18.04158842404684, |
|
"output_norm/layer0": 18.04158842404684, |
|
"step": 4150 |
|
}, |
|
{ |
|
"MSE": 639.5022987874349, |
|
"MSE/layer0": 639.5022987874349, |
|
"dead_code_fraction": 0.16015, |
|
"dead_code_fraction/layer0": 0.16015, |
|
"epoch": 0.42, |
|
"input_norm": 31.99830362319948, |
|
"input_norm/layer0": 31.99830362319948, |
|
"learning_rate": 0.0005, |
|
"loss": 2.247, |
|
"max_norm": 61.4282341003418, |
|
"max_norm/layer0": 61.4282341003418, |
|
"mean_norm": 40.941017150878906, |
|
"mean_norm/layer0": 40.941017150878906, |
|
"multicode_k": 1, |
|
"output_norm": 18.079462760289516, |
|
"output_norm/layer0": 18.079462760289516, |
|
"step": 4200 |
|
}, |
|
{ |
|
"MSE": 640.0252755737306, |
|
"MSE/layer0": 640.0252755737306, |
|
"dead_code_fraction": 0.1604, |
|
"dead_code_fraction/layer0": 0.1604, |
|
"epoch": 0.42, |
|
"input_norm": 31.99830138524374, |
|
"input_norm/layer0": 31.99830138524374, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2314, |
|
"max_norm": 61.648414611816406, |
|
"max_norm/layer0": 61.648414611816406, |
|
"mean_norm": 40.99977684020996, |
|
"mean_norm/layer0": 40.99977684020996, |
|
"multicode_k": 1, |
|
"output_norm": 18.09024664878845, |
|
"output_norm/layer0": 18.09024664878845, |
|
"step": 4250 |
|
}, |
|
{ |
|
"MSE": 639.7621870930992, |
|
"MSE/layer0": 639.7621870930992, |
|
"dead_code_fraction": 0.16365, |
|
"dead_code_fraction/layer0": 0.16365, |
|
"epoch": 0.43, |
|
"input_norm": 31.99830169359842, |
|
"input_norm/layer0": 31.99830169359842, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2144, |
|
"max_norm": 61.86562728881836, |
|
"max_norm/layer0": 61.86562728881836, |
|
"mean_norm": 41.05688667297363, |
|
"mean_norm/layer0": 41.05688667297363, |
|
"multicode_k": 1, |
|
"output_norm": 18.11899041493734, |
|
"output_norm/layer0": 18.11899041493734, |
|
"step": 4300 |
|
}, |
|
{ |
|
"MSE": 640.3955947875975, |
|
"MSE/layer0": 640.3955947875975, |
|
"dead_code_fraction": 0.1592, |
|
"dead_code_fraction/layer0": 0.1592, |
|
"epoch": 0.43, |
|
"input_norm": 31.998302787144976, |
|
"input_norm/layer0": 31.998302787144976, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2077, |
|
"max_norm": 62.060550689697266, |
|
"max_norm/layer0": 62.060550689697266, |
|
"mean_norm": 41.11246681213379, |
|
"mean_norm/layer0": 41.11246681213379, |
|
"multicode_k": 1, |
|
"output_norm": 18.121066271464024, |
|
"output_norm/layer0": 18.121066271464024, |
|
"step": 4350 |
|
}, |
|
{ |
|
"MSE": 639.8066222127281, |
|
"MSE/layer0": 639.8066222127281, |
|
"dead_code_fraction": 0.1635, |
|
"dead_code_fraction/layer0": 0.1635, |
|
"epoch": 0.44, |
|
"input_norm": 31.998314228057872, |
|
"input_norm/layer0": 31.998314228057872, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2287, |
|
"max_norm": 62.275943756103516, |
|
"max_norm/layer0": 62.275943756103516, |
|
"mean_norm": 41.167396545410156, |
|
"mean_norm/layer0": 41.167396545410156, |
|
"multicode_k": 1, |
|
"output_norm": 18.142933632532753, |
|
"output_norm/layer0": 18.142933632532753, |
|
"step": 4400 |
|
}, |
|
{ |
|
"MSE": 639.8160334269206, |
|
"MSE/layer0": 639.8160334269206, |
|
"dead_code_fraction": 0.16385, |
|
"dead_code_fraction/layer0": 0.16385, |
|
"epoch": 0.45, |
|
"input_norm": 31.99831516901653, |
|
"input_norm/layer0": 31.99831516901653, |
|
"learning_rate": 0.0005, |
|
"loss": 2.215, |
|
"max_norm": 62.486793518066406, |
|
"max_norm/layer0": 62.486793518066406, |
|
"mean_norm": 41.221702575683594, |
|
"mean_norm/layer0": 41.221702575683594, |
|
"multicode_k": 1, |
|
"output_norm": 18.167670075098677, |
|
"output_norm/layer0": 18.167670075098677, |
|
"step": 4450 |
|
}, |
|
{ |
|
"MSE": 640.1416244506836, |
|
"MSE/layer0": 640.1416244506836, |
|
"dead_code_fraction": 0.16675, |
|
"dead_code_fraction/layer0": 0.16675, |
|
"epoch": 0.45, |
|
"input_norm": 31.998327512741074, |
|
"input_norm/layer0": 31.998327512741074, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2576, |
|
"max_norm": 62.67790222167969, |
|
"max_norm/layer0": 62.67790222167969, |
|
"mean_norm": 41.275705337524414, |
|
"mean_norm/layer0": 41.275705337524414, |
|
"multicode_k": 1, |
|
"output_norm": 18.162402251561495, |
|
"output_norm/layer0": 18.162402251561495, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_MSE/layer0": 639.7464034476376, |
|
"eval_accuracy": 0.49916912103175737, |
|
"eval_dead_code_fraction/layer0": 0.16755, |
|
"eval_input_norm/layer0": 31.998309449821527, |
|
"eval_loss": 2.215489387512207, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.191884751910905, |
|
"eval_runtime": 157.9108, |
|
"eval_samples_per_second": 29.276, |
|
"eval_steps_per_second": 1.83, |
|
"step": 4500 |
|
}, |
|
{ |
|
"MSE": 640.4858755493162, |
|
"MSE/layer0": 640.4858755493162, |
|
"dead_code_fraction": 0.1633, |
|
"dead_code_fraction/layer0": 0.1633, |
|
"epoch": 0.46, |
|
"input_norm": 31.99831475257874, |
|
"input_norm/layer0": 31.99831475257874, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1869, |
|
"max_norm": 62.88029861450195, |
|
"max_norm/layer0": 62.88029861450195, |
|
"mean_norm": 41.32845115661621, |
|
"mean_norm/layer0": 41.32845115661621, |
|
"multicode_k": 1, |
|
"output_norm": 18.18791744550069, |
|
"output_norm/layer0": 18.18791744550069, |
|
"step": 4550 |
|
}, |
|
{ |
|
"MSE": 640.7411174519859, |
|
"MSE/layer0": 640.7411174519859, |
|
"dead_code_fraction": 0.16375, |
|
"dead_code_fraction/layer0": 0.16375, |
|
"epoch": 0.46, |
|
"input_norm": 31.998337395985924, |
|
"input_norm/layer0": 31.998337395985924, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2426, |
|
"max_norm": 63.06687545776367, |
|
"max_norm/layer0": 63.06687545776367, |
|
"mean_norm": 41.38063049316406, |
|
"mean_norm/layer0": 41.38063049316406, |
|
"multicode_k": 1, |
|
"output_norm": 18.185693721771244, |
|
"output_norm/layer0": 18.185693721771244, |
|
"step": 4600 |
|
}, |
|
{ |
|
"MSE": 640.3254055786131, |
|
"MSE/layer0": 640.3254055786131, |
|
"dead_code_fraction": 0.1637, |
|
"dead_code_fraction/layer0": 0.1637, |
|
"epoch": 0.47, |
|
"input_norm": 31.998331034978236, |
|
"input_norm/layer0": 31.998331034978236, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2103, |
|
"max_norm": 63.24494171142578, |
|
"max_norm/layer0": 63.24494171142578, |
|
"mean_norm": 41.4316463470459, |
|
"mean_norm/layer0": 41.4316463470459, |
|
"multicode_k": 1, |
|
"output_norm": 18.215761318206788, |
|
"output_norm/layer0": 18.215761318206788, |
|
"step": 4650 |
|
}, |
|
{ |
|
"MSE": 640.0117889404299, |
|
"MSE/layer0": 640.0117889404299, |
|
"dead_code_fraction": 0.1653, |
|
"dead_code_fraction/layer0": 0.1653, |
|
"epoch": 0.47, |
|
"input_norm": 31.998331683476753, |
|
"input_norm/layer0": 31.998331683476753, |
|
"learning_rate": 0.0005, |
|
"loss": 2.189, |
|
"max_norm": 63.429969787597656, |
|
"max_norm/layer0": 63.429969787597656, |
|
"mean_norm": 41.481590270996094, |
|
"mean_norm/layer0": 41.481590270996094, |
|
"multicode_k": 1, |
|
"output_norm": 18.22781534512837, |
|
"output_norm/layer0": 18.22781534512837, |
|
"step": 4700 |
|
}, |
|
{ |
|
"MSE": 640.034366455078, |
|
"MSE/layer0": 640.034366455078, |
|
"dead_code_fraction": 0.16355, |
|
"dead_code_fraction/layer0": 0.16355, |
|
"epoch": 0.47, |
|
"input_norm": 31.998335037231442, |
|
"input_norm/layer0": 31.998335037231442, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1746, |
|
"max_norm": 63.604644775390625, |
|
"max_norm/layer0": 63.604644775390625, |
|
"mean_norm": 41.530447006225586, |
|
"mean_norm/layer0": 41.530447006225586, |
|
"multicode_k": 1, |
|
"output_norm": 18.247568238576257, |
|
"output_norm/layer0": 18.247568238576257, |
|
"step": 4750 |
|
}, |
|
{ |
|
"MSE": 641.3402144411094, |
|
"MSE/layer0": 641.3402144411094, |
|
"dead_code_fraction": 0.16465, |
|
"dead_code_fraction/layer0": 0.16465, |
|
"epoch": 1.0, |
|
"input_norm": 31.998328861016873, |
|
"input_norm/layer0": 31.998328861016873, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1589, |
|
"max_norm": 63.7794303894043, |
|
"max_norm/layer0": 63.7794303894043, |
|
"mean_norm": 41.577613830566406, |
|
"mean_norm/layer0": 41.577613830566406, |
|
"multicode_k": 1, |
|
"output_norm": 18.227145007068557, |
|
"output_norm/layer0": 18.227145007068557, |
|
"step": 4800 |
|
}, |
|
{ |
|
"MSE": 640.0454110717772, |
|
"MSE/layer0": 640.0454110717772, |
|
"dead_code_fraction": 0.16635, |
|
"dead_code_fraction/layer0": 0.16635, |
|
"epoch": 1.01, |
|
"input_norm": 31.998361120224008, |
|
"input_norm/layer0": 31.998361120224008, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2585, |
|
"max_norm": 63.96126937866211, |
|
"max_norm/layer0": 63.96126937866211, |
|
"mean_norm": 41.62501525878906, |
|
"mean_norm/layer0": 41.62501525878906, |
|
"multicode_k": 1, |
|
"output_norm": 18.258941303888953, |
|
"output_norm/layer0": 18.258941303888953, |
|
"step": 4850 |
|
}, |
|
{ |
|
"MSE": 640.0055624389651, |
|
"MSE/layer0": 640.0055624389651, |
|
"dead_code_fraction": 0.16515, |
|
"dead_code_fraction/layer0": 0.16515, |
|
"epoch": 1.01, |
|
"input_norm": 31.998340495427446, |
|
"input_norm/layer0": 31.998340495427446, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1578, |
|
"max_norm": 64.13137817382812, |
|
"max_norm/layer0": 64.13137817382812, |
|
"mean_norm": 41.672542572021484, |
|
"mean_norm/layer0": 41.672542572021484, |
|
"multicode_k": 1, |
|
"output_norm": 18.272732003529867, |
|
"output_norm/layer0": 18.272732003529867, |
|
"step": 4900 |
|
}, |
|
{ |
|
"MSE": 640.108183898926, |
|
"MSE/layer0": 640.108183898926, |
|
"dead_code_fraction": 0.1668, |
|
"dead_code_fraction/layer0": 0.1668, |
|
"epoch": 1.02, |
|
"input_norm": 31.998351519902535, |
|
"input_norm/layer0": 31.998351519902535, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1809, |
|
"max_norm": 64.30120086669922, |
|
"max_norm/layer0": 64.30120086669922, |
|
"mean_norm": 41.71914291381836, |
|
"mean_norm/layer0": 41.71914291381836, |
|
"multicode_k": 1, |
|
"output_norm": 18.278290322621658, |
|
"output_norm/layer0": 18.278290322621658, |
|
"step": 4950 |
|
}, |
|
{ |
|
"MSE": 639.8438139851887, |
|
"MSE/layer0": 639.8438139851887, |
|
"dead_code_fraction": 0.1671, |
|
"dead_code_fraction/layer0": 0.1671, |
|
"epoch": 1.02, |
|
"input_norm": 31.998358796437586, |
|
"input_norm/layer0": 31.998358796437586, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1901, |
|
"max_norm": 64.4720230102539, |
|
"max_norm/layer0": 64.4720230102539, |
|
"mean_norm": 41.76571464538574, |
|
"mean_norm/layer0": 41.76571464538574, |
|
"multicode_k": 1, |
|
"output_norm": 18.29636260350546, |
|
"output_norm/layer0": 18.29636260350546, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_MSE/layer0": 638.1766108092672, |
|
"eval_accuracy": 0.5013711247409516, |
|
"eval_dead_code_fraction/layer0": 0.16955, |
|
"eval_input_norm/layer0": 31.99836045128427, |
|
"eval_loss": 2.202561616897583, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.311866774487846, |
|
"eval_runtime": 158.3836, |
|
"eval_samples_per_second": 29.189, |
|
"eval_steps_per_second": 1.825, |
|
"step": 5000 |
|
}, |
|
{ |
|
"MSE": 639.5863418579103, |
|
"MSE/layer0": 639.5863418579103, |
|
"dead_code_fraction": 0.1675, |
|
"dead_code_fraction/layer0": 0.1675, |
|
"epoch": 1.03, |
|
"input_norm": 31.99836014429728, |
|
"input_norm/layer0": 31.99836014429728, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1914, |
|
"max_norm": 64.65907287597656, |
|
"max_norm/layer0": 64.65907287597656, |
|
"mean_norm": 41.8120174407959, |
|
"mean_norm/layer0": 41.8120174407959, |
|
"multicode_k": 1, |
|
"output_norm": 18.301887426376346, |
|
"output_norm/layer0": 18.301887426376346, |
|
"step": 5050 |
|
}, |
|
{ |
|
"MSE": 639.5830181884764, |
|
"MSE/layer0": 639.5830181884764, |
|
"dead_code_fraction": 0.16545, |
|
"dead_code_fraction/layer0": 0.16545, |
|
"epoch": 1.03, |
|
"input_norm": 31.998363596598292, |
|
"input_norm/layer0": 31.998363596598292, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1503, |
|
"max_norm": 64.83207702636719, |
|
"max_norm/layer0": 64.83207702636719, |
|
"mean_norm": 41.85700988769531, |
|
"mean_norm/layer0": 41.85700988769531, |
|
"multicode_k": 1, |
|
"output_norm": 18.3204355875651, |
|
"output_norm/layer0": 18.3204355875651, |
|
"step": 5100 |
|
}, |
|
{ |
|
"MSE": 640.3749603271485, |
|
"MSE/layer0": 640.3749603271485, |
|
"dead_code_fraction": 0.16725, |
|
"dead_code_fraction/layer0": 0.16725, |
|
"epoch": 1.04, |
|
"input_norm": 31.9983703358968, |
|
"input_norm/layer0": 31.9983703358968, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1634, |
|
"max_norm": 65.003662109375, |
|
"max_norm/layer0": 65.003662109375, |
|
"mean_norm": 41.90180778503418, |
|
"mean_norm/layer0": 41.90180778503418, |
|
"multicode_k": 1, |
|
"output_norm": 18.316434319814057, |
|
"output_norm/layer0": 18.316434319814057, |
|
"step": 5150 |
|
}, |
|
{ |
|
"MSE": 639.0211893717446, |
|
"MSE/layer0": 639.0211893717446, |
|
"dead_code_fraction": 0.16875, |
|
"dead_code_fraction/layer0": 0.16875, |
|
"epoch": 1.04, |
|
"input_norm": 31.998389561971024, |
|
"input_norm/layer0": 31.998389561971024, |
|
"learning_rate": 0.0005, |
|
"loss": 2.224, |
|
"max_norm": 65.19213104248047, |
|
"max_norm/layer0": 65.19213104248047, |
|
"mean_norm": 41.94645309448242, |
|
"mean_norm/layer0": 41.94645309448242, |
|
"multicode_k": 1, |
|
"output_norm": 18.33804360071819, |
|
"output_norm/layer0": 18.33804360071819, |
|
"step": 5200 |
|
}, |
|
{ |
|
"MSE": 638.6207899983721, |
|
"MSE/layer0": 638.6207899983721, |
|
"dead_code_fraction": 0.17055, |
|
"dead_code_fraction/layer0": 0.17055, |
|
"epoch": 1.05, |
|
"input_norm": 31.998394203186038, |
|
"input_norm/layer0": 31.998394203186038, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2235, |
|
"max_norm": 65.36846160888672, |
|
"max_norm/layer0": 65.36846160888672, |
|
"mean_norm": 41.991315841674805, |
|
"mean_norm/layer0": 41.991315841674805, |
|
"multicode_k": 1, |
|
"output_norm": 18.346421286265045, |
|
"output_norm/layer0": 18.346421286265045, |
|
"step": 5250 |
|
}, |
|
{ |
|
"MSE": 638.3484961954751, |
|
"MSE/layer0": 638.3484961954751, |
|
"dead_code_fraction": 0.1704, |
|
"dead_code_fraction/layer0": 0.1704, |
|
"epoch": 1.05, |
|
"input_norm": 31.998402004241942, |
|
"input_norm/layer0": 31.998402004241942, |
|
"learning_rate": 0.0005, |
|
"loss": 2.209, |
|
"max_norm": 65.53041076660156, |
|
"max_norm/layer0": 65.53041076660156, |
|
"mean_norm": 42.0357780456543, |
|
"mean_norm/layer0": 42.0357780456543, |
|
"multicode_k": 1, |
|
"output_norm": 18.351918992996215, |
|
"output_norm/layer0": 18.351918992996215, |
|
"step": 5300 |
|
}, |
|
{ |
|
"MSE": 638.9349023437496, |
|
"MSE/layer0": 638.9349023437496, |
|
"dead_code_fraction": 0.1671, |
|
"dead_code_fraction/layer0": 0.1671, |
|
"epoch": 1.06, |
|
"input_norm": 31.998392171859756, |
|
"input_norm/layer0": 31.998392171859756, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1737, |
|
"max_norm": 65.69444274902344, |
|
"max_norm/layer0": 65.69444274902344, |
|
"mean_norm": 42.078935623168945, |
|
"mean_norm/layer0": 42.078935623168945, |
|
"multicode_k": 1, |
|
"output_norm": 18.365610707600908, |
|
"output_norm/layer0": 18.365610707600908, |
|
"step": 5350 |
|
}, |
|
{ |
|
"MSE": 638.1850768025716, |
|
"MSE/layer0": 638.1850768025716, |
|
"dead_code_fraction": 0.17125, |
|
"dead_code_fraction/layer0": 0.17125, |
|
"epoch": 1.06, |
|
"input_norm": 31.99840373039246, |
|
"input_norm/layer0": 31.99840373039246, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1904, |
|
"max_norm": 65.84613037109375, |
|
"max_norm/layer0": 65.84613037109375, |
|
"mean_norm": 42.122589111328125, |
|
"mean_norm/layer0": 42.122589111328125, |
|
"multicode_k": 1, |
|
"output_norm": 18.371175734202062, |
|
"output_norm/layer0": 18.371175734202062, |
|
"step": 5400 |
|
}, |
|
{ |
|
"MSE": 637.5771400960282, |
|
"MSE/layer0": 637.5771400960282, |
|
"dead_code_fraction": 0.17005, |
|
"dead_code_fraction/layer0": 0.17005, |
|
"epoch": 1.07, |
|
"input_norm": 31.998408838907892, |
|
"input_norm/layer0": 31.998408838907892, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2013, |
|
"max_norm": 66.00259399414062, |
|
"max_norm/layer0": 66.00259399414062, |
|
"mean_norm": 42.16551399230957, |
|
"mean_norm/layer0": 42.16551399230957, |
|
"multicode_k": 1, |
|
"output_norm": 18.396056934992465, |
|
"output_norm/layer0": 18.396056934992465, |
|
"step": 5450 |
|
}, |
|
{ |
|
"MSE": 637.4973764038084, |
|
"MSE/layer0": 637.4973764038084, |
|
"dead_code_fraction": 0.17135, |
|
"dead_code_fraction/layer0": 0.17135, |
|
"epoch": 1.07, |
|
"input_norm": 31.998402996063238, |
|
"input_norm/layer0": 31.998402996063238, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1686, |
|
"max_norm": 66.15951538085938, |
|
"max_norm/layer0": 66.15951538085938, |
|
"mean_norm": 42.207963943481445, |
|
"mean_norm/layer0": 42.207963943481445, |
|
"multicode_k": 1, |
|
"output_norm": 18.402882191340133, |
|
"output_norm/layer0": 18.402882191340133, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_MSE/layer0": 638.6084431543663, |
|
"eval_accuracy": 0.5026125270625071, |
|
"eval_dead_code_fraction/layer0": 0.17165, |
|
"eval_input_norm/layer0": 31.99841410479916, |
|
"eval_loss": 2.1934523582458496, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.401259186926417, |
|
"eval_runtime": 158.4926, |
|
"eval_samples_per_second": 29.169, |
|
"eval_steps_per_second": 1.823, |
|
"step": 5500 |
|
}, |
|
{ |
|
"MSE": 637.3816906738282, |
|
"MSE/layer0": 637.3816906738282, |
|
"dead_code_fraction": 0.17125, |
|
"dead_code_fraction/layer0": 0.17125, |
|
"epoch": 1.08, |
|
"input_norm": 31.998415158589676, |
|
"input_norm/layer0": 31.998415158589676, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2097, |
|
"max_norm": 66.32366180419922, |
|
"max_norm/layer0": 66.32366180419922, |
|
"mean_norm": 42.25027084350586, |
|
"mean_norm/layer0": 42.25027084350586, |
|
"multicode_k": 1, |
|
"output_norm": 18.40568763732911, |
|
"output_norm/layer0": 18.40568763732911, |
|
"step": 5550 |
|
}, |
|
{ |
|
"MSE": 636.5928268432615, |
|
"MSE/layer0": 636.5928268432615, |
|
"dead_code_fraction": 0.1711, |
|
"dead_code_fraction/layer0": 0.1711, |
|
"epoch": 1.08, |
|
"input_norm": 31.99841807047526, |
|
"input_norm/layer0": 31.99841807047526, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1987, |
|
"max_norm": 66.49840545654297, |
|
"max_norm/layer0": 66.49840545654297, |
|
"mean_norm": 42.29284858703613, |
|
"mean_norm/layer0": 42.29284858703613, |
|
"multicode_k": 1, |
|
"output_norm": 18.424939454396565, |
|
"output_norm/layer0": 18.424939454396565, |
|
"step": 5600 |
|
}, |
|
{ |
|
"MSE": 637.195534973145, |
|
"MSE/layer0": 637.195534973145, |
|
"dead_code_fraction": 0.17175, |
|
"dead_code_fraction/layer0": 0.17175, |
|
"epoch": 1.09, |
|
"input_norm": 31.99841377894082, |
|
"input_norm/layer0": 31.99841377894082, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1571, |
|
"max_norm": 66.6655502319336, |
|
"max_norm/layer0": 66.6655502319336, |
|
"mean_norm": 42.33401679992676, |
|
"mean_norm/layer0": 42.33401679992676, |
|
"multicode_k": 1, |
|
"output_norm": 18.427337226867675, |
|
"output_norm/layer0": 18.427337226867675, |
|
"step": 5650 |
|
}, |
|
{ |
|
"MSE": 635.8865025838217, |
|
"MSE/layer0": 635.8865025838217, |
|
"dead_code_fraction": 0.1736, |
|
"dead_code_fraction/layer0": 0.1736, |
|
"epoch": 1.09, |
|
"input_norm": 31.998435058593753, |
|
"input_norm/layer0": 31.998435058593753, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2146, |
|
"max_norm": 66.82868957519531, |
|
"max_norm/layer0": 66.82868957519531, |
|
"mean_norm": 42.37582206726074, |
|
"mean_norm/layer0": 42.37582206726074, |
|
"multicode_k": 1, |
|
"output_norm": 18.443573204676298, |
|
"output_norm/layer0": 18.443573204676298, |
|
"step": 5700 |
|
}, |
|
{ |
|
"MSE": 636.1581252034503, |
|
"MSE/layer0": 636.1581252034503, |
|
"dead_code_fraction": 0.17225, |
|
"dead_code_fraction/layer0": 0.17225, |
|
"epoch": 1.1, |
|
"input_norm": 31.998433354695635, |
|
"input_norm/layer0": 31.998433354695635, |
|
"learning_rate": 0.0005, |
|
"loss": 2.171, |
|
"max_norm": 66.9796371459961, |
|
"max_norm/layer0": 66.9796371459961, |
|
"mean_norm": 42.41728591918945, |
|
"mean_norm/layer0": 42.41728591918945, |
|
"multicode_k": 1, |
|
"output_norm": 18.440257479349775, |
|
"output_norm/layer0": 18.440257479349775, |
|
"step": 5750 |
|
}, |
|
{ |
|
"MSE": 636.7286339314779, |
|
"MSE/layer0": 636.7286339314779, |
|
"dead_code_fraction": 0.1738, |
|
"dead_code_fraction/layer0": 0.1738, |
|
"epoch": 1.1, |
|
"input_norm": 31.998429416020713, |
|
"input_norm/layer0": 31.998429416020713, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1502, |
|
"max_norm": 67.13478088378906, |
|
"max_norm/layer0": 67.13478088378906, |
|
"mean_norm": 42.45817756652832, |
|
"mean_norm/layer0": 42.45817756652832, |
|
"multicode_k": 1, |
|
"output_norm": 18.442232058842986, |
|
"output_norm/layer0": 18.442232058842986, |
|
"step": 5800 |
|
}, |
|
{ |
|
"MSE": 635.2576449584958, |
|
"MSE/layer0": 635.2576449584958, |
|
"dead_code_fraction": 0.17405, |
|
"dead_code_fraction/layer0": 0.17405, |
|
"epoch": 1.11, |
|
"input_norm": 31.99844219843547, |
|
"input_norm/layer0": 31.99844219843547, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2067, |
|
"max_norm": 67.28919982910156, |
|
"max_norm/layer0": 67.28919982910156, |
|
"mean_norm": 42.49948501586914, |
|
"mean_norm/layer0": 42.49948501586914, |
|
"multicode_k": 1, |
|
"output_norm": 18.46717386881511, |
|
"output_norm/layer0": 18.46717386881511, |
|
"step": 5850 |
|
}, |
|
{ |
|
"MSE": 636.0759664916989, |
|
"MSE/layer0": 636.0759664916989, |
|
"dead_code_fraction": 0.17355, |
|
"dead_code_fraction/layer0": 0.17355, |
|
"epoch": 1.11, |
|
"input_norm": 31.998439470926915, |
|
"input_norm/layer0": 31.998439470926915, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1543, |
|
"max_norm": 67.44383239746094, |
|
"max_norm/layer0": 67.44383239746094, |
|
"mean_norm": 42.53946495056152, |
|
"mean_norm/layer0": 42.53946495056152, |
|
"multicode_k": 1, |
|
"output_norm": 18.469777971903483, |
|
"output_norm/layer0": 18.469777971903483, |
|
"step": 5900 |
|
}, |
|
{ |
|
"MSE": 635.3813305664057, |
|
"MSE/layer0": 635.3813305664057, |
|
"dead_code_fraction": 0.17405, |
|
"dead_code_fraction/layer0": 0.17405, |
|
"epoch": 1.12, |
|
"input_norm": 31.99844372113545, |
|
"input_norm/layer0": 31.99844372113545, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1846, |
|
"max_norm": 67.59025573730469, |
|
"max_norm/layer0": 67.59025573730469, |
|
"mean_norm": 42.58071327209473, |
|
"mean_norm/layer0": 42.58071327209473, |
|
"multicode_k": 1, |
|
"output_norm": 18.477715517679847, |
|
"output_norm/layer0": 18.477715517679847, |
|
"step": 5950 |
|
}, |
|
{ |
|
"MSE": 634.5524212646484, |
|
"MSE/layer0": 634.5524212646484, |
|
"dead_code_fraction": 0.17535, |
|
"dead_code_fraction/layer0": 0.17535, |
|
"epoch": 1.12, |
|
"input_norm": 31.998457225163776, |
|
"input_norm/layer0": 31.998457225163776, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2158, |
|
"max_norm": 67.7379379272461, |
|
"max_norm/layer0": 67.7379379272461, |
|
"mean_norm": 42.62178421020508, |
|
"mean_norm/layer0": 42.62178421020508, |
|
"multicode_k": 1, |
|
"output_norm": 18.489366165796913, |
|
"output_norm/layer0": 18.489366165796913, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_MSE/layer0": 632.9325560995336, |
|
"eval_accuracy": 0.5036799089257694, |
|
"eval_dead_code_fraction/layer0": 0.17795, |
|
"eval_input_norm/layer0": 31.998461353451354, |
|
"eval_loss": 2.1832942962646484, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.51493810096293, |
|
"eval_runtime": 158.8489, |
|
"eval_samples_per_second": 29.103, |
|
"eval_steps_per_second": 1.819, |
|
"step": 6000 |
|
}, |
|
{ |
|
"MSE": 634.7784757486979, |
|
"MSE/layer0": 634.7784757486979, |
|
"dead_code_fraction": 0.1755, |
|
"dead_code_fraction/layer0": 0.1755, |
|
"epoch": 1.13, |
|
"input_norm": 31.99845712025961, |
|
"input_norm/layer0": 31.99845712025961, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1789, |
|
"max_norm": 67.8902816772461, |
|
"max_norm/layer0": 67.8902816772461, |
|
"mean_norm": 42.66269874572754, |
|
"mean_norm/layer0": 42.66269874572754, |
|
"multicode_k": 1, |
|
"output_norm": 18.49625307718913, |
|
"output_norm/layer0": 18.49625307718913, |
|
"step": 6050 |
|
}, |
|
{ |
|
"MSE": 634.5078458658851, |
|
"MSE/layer0": 634.5078458658851, |
|
"dead_code_fraction": 0.17445, |
|
"dead_code_fraction/layer0": 0.17445, |
|
"epoch": 1.13, |
|
"input_norm": 31.99845917383831, |
|
"input_norm/layer0": 31.99845917383831, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2009, |
|
"max_norm": 68.04124450683594, |
|
"max_norm/layer0": 68.04124450683594, |
|
"mean_norm": 42.70250701904297, |
|
"mean_norm/layer0": 42.70250701904297, |
|
"multicode_k": 1, |
|
"output_norm": 18.514623686472582, |
|
"output_norm/layer0": 18.514623686472582, |
|
"step": 6100 |
|
}, |
|
{ |
|
"MSE": 634.443066914876, |
|
"MSE/layer0": 634.443066914876, |
|
"dead_code_fraction": 0.17575, |
|
"dead_code_fraction/layer0": 0.17575, |
|
"epoch": 1.14, |
|
"input_norm": 31.99845913887024, |
|
"input_norm/layer0": 31.99845913887024, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1623, |
|
"max_norm": 68.17865753173828, |
|
"max_norm/layer0": 68.17865753173828, |
|
"mean_norm": 42.742488861083984, |
|
"mean_norm/layer0": 42.742488861083984, |
|
"multicode_k": 1, |
|
"output_norm": 18.513023862838743, |
|
"output_norm/layer0": 18.513023862838743, |
|
"step": 6150 |
|
}, |
|
{ |
|
"MSE": 633.6522382609048, |
|
"MSE/layer0": 633.6522382609048, |
|
"dead_code_fraction": 0.17475, |
|
"dead_code_fraction/layer0": 0.17475, |
|
"epoch": 1.14, |
|
"input_norm": 31.998471844991045, |
|
"input_norm/layer0": 31.998471844991045, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1824, |
|
"max_norm": 68.31253051757812, |
|
"max_norm/layer0": 68.31253051757812, |
|
"mean_norm": 42.782148361206055, |
|
"mean_norm/layer0": 42.782148361206055, |
|
"multicode_k": 1, |
|
"output_norm": 18.529316590627033, |
|
"output_norm/layer0": 18.529316590627033, |
|
"step": 6200 |
|
}, |
|
{ |
|
"MSE": 634.0474910481774, |
|
"MSE/layer0": 634.0474910481774, |
|
"dead_code_fraction": 0.1771, |
|
"dead_code_fraction/layer0": 0.1771, |
|
"epoch": 1.15, |
|
"input_norm": 31.998480736414585, |
|
"input_norm/layer0": 31.998480736414585, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1948, |
|
"max_norm": 68.44271850585938, |
|
"max_norm/layer0": 68.44271850585938, |
|
"mean_norm": 42.82079887390137, |
|
"mean_norm/layer0": 42.82079887390137, |
|
"multicode_k": 1, |
|
"output_norm": 18.524528849919633, |
|
"output_norm/layer0": 18.524528849919633, |
|
"step": 6250 |
|
}, |
|
{ |
|
"MSE": 633.648407084147, |
|
"MSE/layer0": 633.648407084147, |
|
"dead_code_fraction": 0.1745, |
|
"dead_code_fraction/layer0": 0.1745, |
|
"epoch": 1.15, |
|
"input_norm": 31.998468182881673, |
|
"input_norm/layer0": 31.998468182881673, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1145, |
|
"max_norm": 68.57721710205078, |
|
"max_norm/layer0": 68.57721710205078, |
|
"mean_norm": 42.859825134277344, |
|
"mean_norm/layer0": 42.859825134277344, |
|
"multicode_k": 1, |
|
"output_norm": 18.540853935877482, |
|
"output_norm/layer0": 18.540853935877482, |
|
"step": 6300 |
|
}, |
|
{ |
|
"MSE": 633.5945191446937, |
|
"MSE/layer0": 633.5945191446937, |
|
"dead_code_fraction": 0.17705, |
|
"dead_code_fraction/layer0": 0.17705, |
|
"epoch": 1.16, |
|
"input_norm": 31.99847273508707, |
|
"input_norm/layer0": 31.99847273508707, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1507, |
|
"max_norm": 68.7186050415039, |
|
"max_norm/layer0": 68.7186050415039, |
|
"mean_norm": 42.897830963134766, |
|
"mean_norm/layer0": 42.897830963134766, |
|
"multicode_k": 1, |
|
"output_norm": 18.55124579429626, |
|
"output_norm/layer0": 18.55124579429626, |
|
"step": 6350 |
|
}, |
|
{ |
|
"MSE": 632.1478841145836, |
|
"MSE/layer0": 632.1478841145836, |
|
"dead_code_fraction": 0.1775, |
|
"dead_code_fraction/layer0": 0.1775, |
|
"epoch": 1.16, |
|
"input_norm": 31.9984964243571, |
|
"input_norm/layer0": 31.9984964243571, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1962, |
|
"max_norm": 68.85418701171875, |
|
"max_norm/layer0": 68.85418701171875, |
|
"mean_norm": 42.937448501586914, |
|
"mean_norm/layer0": 42.937448501586914, |
|
"multicode_k": 1, |
|
"output_norm": 18.5615934785207, |
|
"output_norm/layer0": 18.5615934785207, |
|
"step": 6400 |
|
}, |
|
{ |
|
"MSE": 632.57952931722, |
|
"MSE/layer0": 632.57952931722, |
|
"dead_code_fraction": 0.1777, |
|
"dead_code_fraction/layer0": 0.1777, |
|
"epoch": 1.17, |
|
"input_norm": 31.998487294514977, |
|
"input_norm/layer0": 31.998487294514977, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1627, |
|
"max_norm": 69.0008316040039, |
|
"max_norm/layer0": 69.0008316040039, |
|
"mean_norm": 42.97622108459473, |
|
"mean_norm/layer0": 42.97622108459473, |
|
"multicode_k": 1, |
|
"output_norm": 18.57248200734457, |
|
"output_norm/layer0": 18.57248200734457, |
|
"step": 6450 |
|
}, |
|
{ |
|
"MSE": 631.0360174560547, |
|
"MSE/layer0": 631.0360174560547, |
|
"dead_code_fraction": 0.1784, |
|
"dead_code_fraction/layer0": 0.1784, |
|
"epoch": 1.17, |
|
"input_norm": 31.998495709101356, |
|
"input_norm/layer0": 31.998495709101356, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1843, |
|
"max_norm": 69.13652038574219, |
|
"max_norm/layer0": 69.13652038574219, |
|
"mean_norm": 43.01558876037598, |
|
"mean_norm/layer0": 43.01558876037598, |
|
"multicode_k": 1, |
|
"output_norm": 18.591586551666268, |
|
"output_norm/layer0": 18.591586551666268, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_MSE/layer0": 631.2925020152297, |
|
"eval_accuracy": 0.5039093283634951, |
|
"eval_dead_code_fraction/layer0": 0.1797, |
|
"eval_input_norm/layer0": 31.99848882414009, |
|
"eval_loss": 2.175981044769287, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.598594732777567, |
|
"eval_runtime": 158.1453, |
|
"eval_samples_per_second": 29.233, |
|
"eval_steps_per_second": 1.827, |
|
"step": 6500 |
|
}, |
|
{ |
|
"MSE": 631.294188741048, |
|
"MSE/layer0": 631.294188741048, |
|
"dead_code_fraction": 0.1796, |
|
"dead_code_fraction/layer0": 0.1796, |
|
"epoch": 1.18, |
|
"input_norm": 31.998505541483564, |
|
"input_norm/layer0": 31.998505541483564, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1855, |
|
"max_norm": 69.26646423339844, |
|
"max_norm/layer0": 69.26646423339844, |
|
"mean_norm": 43.0548152923584, |
|
"mean_norm/layer0": 43.0548152923584, |
|
"multicode_k": 1, |
|
"output_norm": 18.585241152445477, |
|
"output_norm/layer0": 18.585241152445477, |
|
"step": 6550 |
|
}, |
|
{ |
|
"MSE": 631.297376505534, |
|
"MSE/layer0": 631.297376505534, |
|
"dead_code_fraction": 0.1779, |
|
"dead_code_fraction/layer0": 0.1779, |
|
"epoch": 1.18, |
|
"input_norm": 31.998487745920816, |
|
"input_norm/layer0": 31.998487745920816, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1197, |
|
"max_norm": 69.3987808227539, |
|
"max_norm/layer0": 69.3987808227539, |
|
"mean_norm": 43.093589782714844, |
|
"mean_norm/layer0": 43.093589782714844, |
|
"multicode_k": 1, |
|
"output_norm": 18.605287278493257, |
|
"output_norm/layer0": 18.605287278493257, |
|
"step": 6600 |
|
}, |
|
{ |
|
"MSE": 630.8991915893555, |
|
"MSE/layer0": 630.8991915893555, |
|
"dead_code_fraction": 0.17815, |
|
"dead_code_fraction/layer0": 0.17815, |
|
"epoch": 1.19, |
|
"input_norm": 31.99848988215129, |
|
"input_norm/layer0": 31.99848988215129, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1248, |
|
"max_norm": 69.52507019042969, |
|
"max_norm/layer0": 69.52507019042969, |
|
"mean_norm": 43.132524490356445, |
|
"mean_norm/layer0": 43.132524490356445, |
|
"multicode_k": 1, |
|
"output_norm": 18.61235850652059, |
|
"output_norm/layer0": 18.61235850652059, |
|
"step": 6650 |
|
}, |
|
{ |
|
"MSE": 629.604686584473, |
|
"MSE/layer0": 629.604686584473, |
|
"dead_code_fraction": 0.17965, |
|
"dead_code_fraction/layer0": 0.17965, |
|
"epoch": 1.19, |
|
"input_norm": 31.99852681477865, |
|
"input_norm/layer0": 31.99852681477865, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2265, |
|
"max_norm": 69.66030883789062, |
|
"max_norm/layer0": 69.66030883789062, |
|
"mean_norm": 43.17206573486328, |
|
"mean_norm/layer0": 43.17206573486328, |
|
"multicode_k": 1, |
|
"output_norm": 18.626948499679564, |
|
"output_norm/layer0": 18.626948499679564, |
|
"step": 6700 |
|
}, |
|
{ |
|
"MSE": 629.7875715128578, |
|
"MSE/layer0": 629.7875715128578, |
|
"dead_code_fraction": 0.1802, |
|
"dead_code_fraction/layer0": 0.1802, |
|
"epoch": 1.2, |
|
"input_norm": 31.998509550094596, |
|
"input_norm/layer0": 31.998509550094596, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1432, |
|
"max_norm": 69.78119659423828, |
|
"max_norm/layer0": 69.78119659423828, |
|
"mean_norm": 43.21029472351074, |
|
"mean_norm/layer0": 43.21029472351074, |
|
"multicode_k": 1, |
|
"output_norm": 18.639319947560622, |
|
"output_norm/layer0": 18.639319947560622, |
|
"step": 6750 |
|
}, |
|
{ |
|
"MSE": 629.3708419799802, |
|
"MSE/layer0": 629.3708419799802, |
|
"dead_code_fraction": 0.18015, |
|
"dead_code_fraction/layer0": 0.18015, |
|
"epoch": 1.2, |
|
"input_norm": 31.99851152102152, |
|
"input_norm/layer0": 31.99851152102152, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1606, |
|
"max_norm": 69.91252899169922, |
|
"max_norm/layer0": 69.91252899169922, |
|
"mean_norm": 43.24948692321777, |
|
"mean_norm/layer0": 43.24948692321777, |
|
"multicode_k": 1, |
|
"output_norm": 18.64606482187906, |
|
"output_norm/layer0": 18.64606482187906, |
|
"step": 6800 |
|
}, |
|
{ |
|
"MSE": 628.4038922119142, |
|
"MSE/layer0": 628.4038922119142, |
|
"dead_code_fraction": 0.1806, |
|
"dead_code_fraction/layer0": 0.1806, |
|
"epoch": 1.21, |
|
"input_norm": 31.998516721725462, |
|
"input_norm/layer0": 31.998516721725462, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1582, |
|
"max_norm": 70.04332733154297, |
|
"max_norm/layer0": 70.04332733154297, |
|
"mean_norm": 43.28862762451172, |
|
"mean_norm/layer0": 43.28862762451172, |
|
"multicode_k": 1, |
|
"output_norm": 18.669758415222162, |
|
"output_norm/layer0": 18.669758415222162, |
|
"step": 6850 |
|
}, |
|
{ |
|
"MSE": 628.1812467447919, |
|
"MSE/layer0": 628.1812467447919, |
|
"dead_code_fraction": 0.18055, |
|
"dead_code_fraction/layer0": 0.18055, |
|
"epoch": 1.21, |
|
"input_norm": 31.998515844345086, |
|
"input_norm/layer0": 31.998515844345086, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1433, |
|
"max_norm": 70.16979217529297, |
|
"max_norm/layer0": 70.16979217529297, |
|
"mean_norm": 43.327192306518555, |
|
"mean_norm/layer0": 43.327192306518555, |
|
"multicode_k": 1, |
|
"output_norm": 18.674684073130294, |
|
"output_norm/layer0": 18.674684073130294, |
|
"step": 6900 |
|
}, |
|
{ |
|
"MSE": 628.1862957763672, |
|
"MSE/layer0": 628.1862957763672, |
|
"dead_code_fraction": 0.18045, |
|
"dead_code_fraction/layer0": 0.18045, |
|
"epoch": 1.22, |
|
"input_norm": 31.99852259953816, |
|
"input_norm/layer0": 31.99852259953816, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1458, |
|
"max_norm": 70.29747772216797, |
|
"max_norm/layer0": 70.29747772216797, |
|
"mean_norm": 43.36609077453613, |
|
"mean_norm/layer0": 43.36609077453613, |
|
"multicode_k": 1, |
|
"output_norm": 18.682749029795335, |
|
"output_norm/layer0": 18.682749029795335, |
|
"step": 6950 |
|
}, |
|
{ |
|
"MSE": 627.7981392415361, |
|
"MSE/layer0": 627.7981392415361, |
|
"dead_code_fraction": 0.18045, |
|
"dead_code_fraction/layer0": 0.18045, |
|
"epoch": 1.22, |
|
"input_norm": 31.998523871103927, |
|
"input_norm/layer0": 31.998523871103927, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1339, |
|
"max_norm": 70.425537109375, |
|
"max_norm/layer0": 70.425537109375, |
|
"mean_norm": 43.40445899963379, |
|
"mean_norm/layer0": 43.40445899963379, |
|
"multicode_k": 1, |
|
"output_norm": 18.696380834579458, |
|
"output_norm/layer0": 18.696380834579458, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_MSE/layer0": 627.9790743019787, |
|
"eval_accuracy": 0.5048263717749389, |
|
"eval_dead_code_fraction/layer0": 0.1819, |
|
"eval_input_norm/layer0": 31.998524618592334, |
|
"eval_loss": 2.1696202754974365, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.705300997223095, |
|
"eval_runtime": 159.1692, |
|
"eval_samples_per_second": 29.045, |
|
"eval_steps_per_second": 1.816, |
|
"step": 7000 |
|
}, |
|
{ |
|
"MSE": 627.3165437825519, |
|
"MSE/layer0": 627.3165437825519, |
|
"dead_code_fraction": 0.1822, |
|
"dead_code_fraction/layer0": 0.1822, |
|
"epoch": 1.23, |
|
"input_norm": 31.99852600097656, |
|
"input_norm/layer0": 31.99852600097656, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1483, |
|
"max_norm": 70.54450988769531, |
|
"max_norm/layer0": 70.54450988769531, |
|
"mean_norm": 43.442848205566406, |
|
"mean_norm/layer0": 43.442848205566406, |
|
"multicode_k": 1, |
|
"output_norm": 18.700957148869843, |
|
"output_norm/layer0": 18.700957148869843, |
|
"step": 7050 |
|
}, |
|
{ |
|
"MSE": 626.7479965209961, |
|
"MSE/layer0": 626.7479965209961, |
|
"dead_code_fraction": 0.1804, |
|
"dead_code_fraction/layer0": 0.1804, |
|
"epoch": 1.23, |
|
"input_norm": 31.998541386922206, |
|
"input_norm/layer0": 31.998541386922206, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1512, |
|
"max_norm": 70.66608428955078, |
|
"max_norm/layer0": 70.66608428955078, |
|
"mean_norm": 43.48159599304199, |
|
"mean_norm/layer0": 43.48159599304199, |
|
"multicode_k": 1, |
|
"output_norm": 18.714396947224948, |
|
"output_norm/layer0": 18.714396947224948, |
|
"step": 7100 |
|
}, |
|
{ |
|
"MSE": 626.4450497436519, |
|
"MSE/layer0": 626.4450497436519, |
|
"dead_code_fraction": 0.1823, |
|
"dead_code_fraction/layer0": 0.1823, |
|
"epoch": 1.24, |
|
"input_norm": 31.998545411427806, |
|
"input_norm/layer0": 31.998545411427806, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1586, |
|
"max_norm": 70.7937240600586, |
|
"max_norm/layer0": 70.7937240600586, |
|
"mean_norm": 43.5198860168457, |
|
"mean_norm/layer0": 43.5198860168457, |
|
"multicode_k": 1, |
|
"output_norm": 18.726943721771242, |
|
"output_norm/layer0": 18.726943721771242, |
|
"step": 7150 |
|
}, |
|
{ |
|
"MSE": 626.1652618408202, |
|
"MSE/layer0": 626.1652618408202, |
|
"dead_code_fraction": 0.1814, |
|
"dead_code_fraction/layer0": 0.1814, |
|
"epoch": 1.24, |
|
"input_norm": 31.998541978200272, |
|
"input_norm/layer0": 31.998541978200272, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1552, |
|
"max_norm": 70.90862274169922, |
|
"max_norm/layer0": 70.90862274169922, |
|
"mean_norm": 43.55833053588867, |
|
"mean_norm/layer0": 43.55833053588867, |
|
"multicode_k": 1, |
|
"output_norm": 18.731371542612706, |
|
"output_norm/layer0": 18.731371542612706, |
|
"step": 7200 |
|
}, |
|
{ |
|
"MSE": 625.2572497558597, |
|
"MSE/layer0": 625.2572497558597, |
|
"dead_code_fraction": 0.1839, |
|
"dead_code_fraction/layer0": 0.1839, |
|
"epoch": 1.25, |
|
"input_norm": 31.998552770614626, |
|
"input_norm/layer0": 31.998552770614626, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1673, |
|
"max_norm": 71.0332260131836, |
|
"max_norm/layer0": 71.0332260131836, |
|
"mean_norm": 43.5967960357666, |
|
"mean_norm/layer0": 43.5967960357666, |
|
"multicode_k": 1, |
|
"output_norm": 18.756609748204536, |
|
"output_norm/layer0": 18.756609748204536, |
|
"step": 7250 |
|
}, |
|
{ |
|
"MSE": 624.7860372924804, |
|
"MSE/layer0": 624.7860372924804, |
|
"dead_code_fraction": 0.1831, |
|
"dead_code_fraction/layer0": 0.1831, |
|
"epoch": 1.25, |
|
"input_norm": 31.998555002212534, |
|
"input_norm/layer0": 31.998555002212534, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1575, |
|
"max_norm": 71.15364837646484, |
|
"max_norm/layer0": 71.15364837646484, |
|
"mean_norm": 43.63525199890137, |
|
"mean_norm/layer0": 43.63525199890137, |
|
"multicode_k": 1, |
|
"output_norm": 18.767410192489628, |
|
"output_norm/layer0": 18.767410192489628, |
|
"step": 7300 |
|
}, |
|
{ |
|
"MSE": 624.7060753377278, |
|
"MSE/layer0": 624.7060753377278, |
|
"dead_code_fraction": 0.18335, |
|
"dead_code_fraction/layer0": 0.18335, |
|
"epoch": 1.26, |
|
"input_norm": 31.99856230099995, |
|
"input_norm/layer0": 31.99856230099995, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1622, |
|
"max_norm": 71.2812271118164, |
|
"max_norm/layer0": 71.2812271118164, |
|
"mean_norm": 43.67383575439453, |
|
"mean_norm/layer0": 43.67383575439453, |
|
"multicode_k": 1, |
|
"output_norm": 18.77556623776755, |
|
"output_norm/layer0": 18.77556623776755, |
|
"step": 7350 |
|
}, |
|
{ |
|
"MSE": 623.9612900797528, |
|
"MSE/layer0": 623.9612900797528, |
|
"dead_code_fraction": 0.1834, |
|
"dead_code_fraction/layer0": 0.1834, |
|
"epoch": 1.26, |
|
"input_norm": 31.998564265569062, |
|
"input_norm/layer0": 31.998564265569062, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1721, |
|
"max_norm": 71.4082260131836, |
|
"max_norm/layer0": 71.4082260131836, |
|
"mean_norm": 43.71280097961426, |
|
"mean_norm/layer0": 43.71280097961426, |
|
"multicode_k": 1, |
|
"output_norm": 18.78839166323344, |
|
"output_norm/layer0": 18.78839166323344, |
|
"step": 7400 |
|
}, |
|
{ |
|
"MSE": 623.9870674641929, |
|
"MSE/layer0": 623.9870674641929, |
|
"dead_code_fraction": 0.18355, |
|
"dead_code_fraction/layer0": 0.18355, |
|
"epoch": 1.27, |
|
"input_norm": 31.998560991287228, |
|
"input_norm/layer0": 31.998560991287228, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1424, |
|
"max_norm": 71.52973937988281, |
|
"max_norm/layer0": 71.52973937988281, |
|
"mean_norm": 43.75117111206055, |
|
"mean_norm/layer0": 43.75117111206055, |
|
"multicode_k": 1, |
|
"output_norm": 18.79942525227863, |
|
"output_norm/layer0": 18.79942525227863, |
|
"step": 7450 |
|
}, |
|
{ |
|
"MSE": 622.7629538981118, |
|
"MSE/layer0": 622.7629538981118, |
|
"dead_code_fraction": 0.1844, |
|
"dead_code_fraction/layer0": 0.1844, |
|
"epoch": 1.27, |
|
"input_norm": 31.998580735524506, |
|
"input_norm/layer0": 31.998580735524506, |
|
"learning_rate": 0.0005, |
|
"loss": 2.187, |
|
"max_norm": 71.64968872070312, |
|
"max_norm/layer0": 71.64968872070312, |
|
"mean_norm": 43.790061950683594, |
|
"mean_norm/layer0": 43.790061950683594, |
|
"multicode_k": 1, |
|
"output_norm": 18.81509483655294, |
|
"output_norm/layer0": 18.81509483655294, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_MSE/layer0": 622.122652727573, |
|
"eval_accuracy": 0.5062701283839631, |
|
"eval_dead_code_fraction/layer0": 0.18665, |
|
"eval_input_norm/layer0": 31.998566619663464, |
|
"eval_loss": 2.1583967208862305, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.83381110374323, |
|
"eval_runtime": 158.6442, |
|
"eval_samples_per_second": 29.141, |
|
"eval_steps_per_second": 1.822, |
|
"step": 7500 |
|
}, |
|
{ |
|
"MSE": 622.9042826334635, |
|
"MSE/layer0": 622.9042826334635, |
|
"dead_code_fraction": 0.1841, |
|
"dead_code_fraction/layer0": 0.1841, |
|
"epoch": 1.28, |
|
"input_norm": 31.998572101593023, |
|
"input_norm/layer0": 31.998572101593023, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1434, |
|
"max_norm": 71.76019287109375, |
|
"max_norm/layer0": 71.76019287109375, |
|
"mean_norm": 43.828460693359375, |
|
"mean_norm/layer0": 43.828460693359375, |
|
"multicode_k": 1, |
|
"output_norm": 18.82229045232136, |
|
"output_norm/layer0": 18.82229045232136, |
|
"step": 7550 |
|
}, |
|
{ |
|
"MSE": 621.695281575521, |
|
"MSE/layer0": 621.695281575521, |
|
"dead_code_fraction": 0.1854, |
|
"dead_code_fraction/layer0": 0.1854, |
|
"epoch": 1.28, |
|
"input_norm": 31.998584995269773, |
|
"input_norm/layer0": 31.998584995269773, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1712, |
|
"max_norm": 71.87606048583984, |
|
"max_norm/layer0": 71.87606048583984, |
|
"mean_norm": 43.867136001586914, |
|
"mean_norm/layer0": 43.867136001586914, |
|
"multicode_k": 1, |
|
"output_norm": 18.84749958992006, |
|
"output_norm/layer0": 18.84749958992006, |
|
"step": 7600 |
|
}, |
|
{ |
|
"MSE": 622.6274766031902, |
|
"MSE/layer0": 622.6274766031902, |
|
"dead_code_fraction": 0.18355, |
|
"dead_code_fraction/layer0": 0.18355, |
|
"epoch": 1.29, |
|
"input_norm": 31.998571812311802, |
|
"input_norm/layer0": 31.998571812311802, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1412, |
|
"max_norm": 71.98139953613281, |
|
"max_norm/layer0": 71.98139953613281, |
|
"mean_norm": 43.90544891357422, |
|
"mean_norm/layer0": 43.90544891357422, |
|
"multicode_k": 1, |
|
"output_norm": 18.83851943016053, |
|
"output_norm/layer0": 18.83851943016053, |
|
"step": 7650 |
|
}, |
|
{ |
|
"MSE": 621.3046355183919, |
|
"MSE/layer0": 621.3046355183919, |
|
"dead_code_fraction": 0.18495, |
|
"dead_code_fraction/layer0": 0.18495, |
|
"epoch": 1.29, |
|
"input_norm": 31.998585087458295, |
|
"input_norm/layer0": 31.998585087458295, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1711, |
|
"max_norm": 72.08447265625, |
|
"max_norm/layer0": 72.08447265625, |
|
"mean_norm": 43.94407653808594, |
|
"mean_norm/layer0": 43.94407653808594, |
|
"multicode_k": 1, |
|
"output_norm": 18.86037411053976, |
|
"output_norm/layer0": 18.86037411053976, |
|
"step": 7700 |
|
}, |
|
{ |
|
"MSE": 620.5873645019533, |
|
"MSE/layer0": 620.5873645019533, |
|
"dead_code_fraction": 0.18485, |
|
"dead_code_fraction/layer0": 0.18485, |
|
"epoch": 1.3, |
|
"input_norm": 31.998606751759848, |
|
"input_norm/layer0": 31.998606751759848, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2069, |
|
"max_norm": 72.18034362792969, |
|
"max_norm/layer0": 72.18034362792969, |
|
"mean_norm": 43.9833927154541, |
|
"mean_norm/layer0": 43.9833927154541, |
|
"multicode_k": 1, |
|
"output_norm": 18.87507179578146, |
|
"output_norm/layer0": 18.87507179578146, |
|
"step": 7750 |
|
}, |
|
{ |
|
"MSE": 621.2272378540041, |
|
"MSE/layer0": 621.2272378540041, |
|
"dead_code_fraction": 0.18385, |
|
"dead_code_fraction/layer0": 0.18385, |
|
"epoch": 1.3, |
|
"input_norm": 31.998583949406935, |
|
"input_norm/layer0": 31.998583949406935, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1217, |
|
"max_norm": 72.27928924560547, |
|
"max_norm/layer0": 72.27928924560547, |
|
"mean_norm": 44.021806716918945, |
|
"mean_norm/layer0": 44.021806716918945, |
|
"multicode_k": 1, |
|
"output_norm": 18.877027104695642, |
|
"output_norm/layer0": 18.877027104695642, |
|
"step": 7800 |
|
}, |
|
{ |
|
"MSE": 620.067134602865, |
|
"MSE/layer0": 620.067134602865, |
|
"dead_code_fraction": 0.18535, |
|
"dead_code_fraction/layer0": 0.18535, |
|
"epoch": 1.31, |
|
"input_norm": 31.998594888051343, |
|
"input_norm/layer0": 31.998594888051343, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1753, |
|
"max_norm": 72.39033508300781, |
|
"max_norm/layer0": 72.39033508300781, |
|
"mean_norm": 44.060611724853516, |
|
"mean_norm/layer0": 44.060611724853516, |
|
"multicode_k": 1, |
|
"output_norm": 18.89820697466533, |
|
"output_norm/layer0": 18.89820697466533, |
|
"step": 7850 |
|
}, |
|
{ |
|
"MSE": 620.6704218546549, |
|
"MSE/layer0": 620.6704218546549, |
|
"dead_code_fraction": 0.18735, |
|
"dead_code_fraction/layer0": 0.18735, |
|
"epoch": 1.31, |
|
"input_norm": 31.998597246805822, |
|
"input_norm/layer0": 31.998597246805822, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1778, |
|
"max_norm": 72.4916000366211, |
|
"max_norm/layer0": 72.4916000366211, |
|
"mean_norm": 44.09913635253906, |
|
"mean_norm/layer0": 44.09913635253906, |
|
"multicode_k": 1, |
|
"output_norm": 18.890051161448145, |
|
"output_norm/layer0": 18.890051161448145, |
|
"step": 7900 |
|
}, |
|
{ |
|
"MSE": 619.2155123901367, |
|
"MSE/layer0": 619.2155123901367, |
|
"dead_code_fraction": 0.1863, |
|
"dead_code_fraction/layer0": 0.1863, |
|
"epoch": 1.32, |
|
"input_norm": 31.99860541343688, |
|
"input_norm/layer0": 31.99860541343688, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1684, |
|
"max_norm": 72.59037017822266, |
|
"max_norm/layer0": 72.59037017822266, |
|
"mean_norm": 44.13744926452637, |
|
"mean_norm/layer0": 44.13744926452637, |
|
"multicode_k": 1, |
|
"output_norm": 18.920912733078, |
|
"output_norm/layer0": 18.920912733078, |
|
"step": 7950 |
|
}, |
|
{ |
|
"MSE": 618.8985408528646, |
|
"MSE/layer0": 618.8985408528646, |
|
"dead_code_fraction": 0.1867, |
|
"dead_code_fraction/layer0": 0.1867, |
|
"epoch": 1.32, |
|
"input_norm": 31.998596220016488, |
|
"input_norm/layer0": 31.998596220016488, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1302, |
|
"max_norm": 72.69281768798828, |
|
"max_norm/layer0": 72.69281768798828, |
|
"mean_norm": 44.176042556762695, |
|
"mean_norm/layer0": 44.176042556762695, |
|
"multicode_k": 1, |
|
"output_norm": 18.93559975624085, |
|
"output_norm/layer0": 18.93559975624085, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_MSE/layer0": 617.7161538934592, |
|
"eval_accuracy": 0.5071360017457022, |
|
"eval_dead_code_fraction/layer0": 0.18755, |
|
"eval_input_norm/layer0": 31.99860155017712, |
|
"eval_loss": 2.150786876678467, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.949325338731256, |
|
"eval_runtime": 158.4669, |
|
"eval_samples_per_second": 29.173, |
|
"eval_steps_per_second": 1.824, |
|
"step": 8000 |
|
}, |
|
{ |
|
"MSE": 619.1937561035155, |
|
"MSE/layer0": 619.1937561035155, |
|
"dead_code_fraction": 0.18685, |
|
"dead_code_fraction/layer0": 0.18685, |
|
"epoch": 1.33, |
|
"input_norm": 31.998596970240285, |
|
"input_norm/layer0": 31.998596970240285, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1279, |
|
"max_norm": 72.79032135009766, |
|
"max_norm/layer0": 72.79032135009766, |
|
"mean_norm": 44.21445846557617, |
|
"mean_norm/layer0": 44.21445846557617, |
|
"multicode_k": 1, |
|
"output_norm": 18.93686810175578, |
|
"output_norm/layer0": 18.93686810175578, |
|
"step": 8050 |
|
}, |
|
{ |
|
"MSE": 619.539402567546, |
|
"MSE/layer0": 619.539402567546, |
|
"dead_code_fraction": 0.18665, |
|
"dead_code_fraction/layer0": 0.18665, |
|
"epoch": 1.33, |
|
"input_norm": 31.998598492940268, |
|
"input_norm/layer0": 31.998598492940268, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1113, |
|
"max_norm": 72.88322448730469, |
|
"max_norm/layer0": 72.88322448730469, |
|
"mean_norm": 44.251609802246094, |
|
"mean_norm/layer0": 44.251609802246094, |
|
"multicode_k": 1, |
|
"output_norm": 18.939144274393726, |
|
"output_norm/layer0": 18.939144274393726, |
|
"step": 8100 |
|
}, |
|
{ |
|
"MSE": 617.7248203531905, |
|
"MSE/layer0": 617.7248203531905, |
|
"dead_code_fraction": 0.18555, |
|
"dead_code_fraction/layer0": 0.18555, |
|
"epoch": 1.34, |
|
"input_norm": 31.99861437161764, |
|
"input_norm/layer0": 31.99861437161764, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1592, |
|
"max_norm": 72.97504425048828, |
|
"max_norm/layer0": 72.97504425048828, |
|
"mean_norm": 44.289913177490234, |
|
"mean_norm/layer0": 44.289913177490234, |
|
"multicode_k": 1, |
|
"output_norm": 18.963457323710102, |
|
"output_norm/layer0": 18.963457323710102, |
|
"step": 8150 |
|
}, |
|
{ |
|
"MSE": 617.1626446533202, |
|
"MSE/layer0": 617.1626446533202, |
|
"dead_code_fraction": 0.1856, |
|
"dead_code_fraction/layer0": 0.1856, |
|
"epoch": 1.34, |
|
"input_norm": 31.998610553741443, |
|
"input_norm/layer0": 31.998610553741443, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1339, |
|
"max_norm": 73.06546020507812, |
|
"max_norm/layer0": 73.06546020507812, |
|
"mean_norm": 44.32819747924805, |
|
"mean_norm/layer0": 44.32819747924805, |
|
"multicode_k": 1, |
|
"output_norm": 18.980771627426144, |
|
"output_norm/layer0": 18.980771627426144, |
|
"step": 8200 |
|
}, |
|
{ |
|
"MSE": 616.5359758504233, |
|
"MSE/layer0": 616.5359758504233, |
|
"dead_code_fraction": 0.18785, |
|
"dead_code_fraction/layer0": 0.18785, |
|
"epoch": 1.35, |
|
"input_norm": 31.99861484845479, |
|
"input_norm/layer0": 31.99861484845479, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1287, |
|
"max_norm": 73.1684341430664, |
|
"max_norm/layer0": 73.1684341430664, |
|
"mean_norm": 44.36627197265625, |
|
"mean_norm/layer0": 44.36627197265625, |
|
"multicode_k": 1, |
|
"output_norm": 19.002285525004055, |
|
"output_norm/layer0": 19.002285525004055, |
|
"step": 8250 |
|
}, |
|
{ |
|
"MSE": 616.9324924723311, |
|
"MSE/layer0": 616.9324924723311, |
|
"dead_code_fraction": 0.18715, |
|
"dead_code_fraction/layer0": 0.18715, |
|
"epoch": 1.35, |
|
"input_norm": 31.998625895182286, |
|
"input_norm/layer0": 31.998625895182286, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1575, |
|
"max_norm": 73.259521484375, |
|
"max_norm/layer0": 73.259521484375, |
|
"mean_norm": 44.40446090698242, |
|
"mean_norm/layer0": 44.40446090698242, |
|
"multicode_k": 1, |
|
"output_norm": 18.992992315292362, |
|
"output_norm/layer0": 18.992992315292362, |
|
"step": 8300 |
|
}, |
|
{ |
|
"MSE": 616.2650039672851, |
|
"MSE/layer0": 616.2650039672851, |
|
"dead_code_fraction": 0.18655, |
|
"dead_code_fraction/layer0": 0.18655, |
|
"epoch": 1.36, |
|
"input_norm": 31.99862662315369, |
|
"input_norm/layer0": 31.99862662315369, |
|
"learning_rate": 0.0005, |
|
"loss": 2.139, |
|
"max_norm": 73.36270141601562, |
|
"max_norm/layer0": 73.36270141601562, |
|
"mean_norm": 44.44254493713379, |
|
"mean_norm/layer0": 44.44254493713379, |
|
"multicode_k": 1, |
|
"output_norm": 19.00672375679015, |
|
"output_norm/layer0": 19.00672375679015, |
|
"step": 8350 |
|
}, |
|
{ |
|
"MSE": 615.5159185791019, |
|
"MSE/layer0": 615.5159185791019, |
|
"dead_code_fraction": 0.18685, |
|
"dead_code_fraction/layer0": 0.18685, |
|
"epoch": 1.36, |
|
"input_norm": 31.998618663152055, |
|
"input_norm/layer0": 31.998618663152055, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1207, |
|
"max_norm": 73.45561981201172, |
|
"max_norm/layer0": 73.45561981201172, |
|
"mean_norm": 44.48077201843262, |
|
"mean_norm/layer0": 44.48077201843262, |
|
"multicode_k": 1, |
|
"output_norm": 19.030768597920748, |
|
"output_norm/layer0": 19.030768597920748, |
|
"step": 8400 |
|
}, |
|
{ |
|
"MSE": 615.7112675984704, |
|
"MSE/layer0": 615.7112675984704, |
|
"dead_code_fraction": 0.18675, |
|
"dead_code_fraction/layer0": 0.18675, |
|
"epoch": 1.37, |
|
"input_norm": 31.99863114674885, |
|
"input_norm/layer0": 31.99863114674885, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1394, |
|
"max_norm": 73.54468536376953, |
|
"max_norm/layer0": 73.54468536376953, |
|
"mean_norm": 44.5194206237793, |
|
"mean_norm/layer0": 44.5194206237793, |
|
"multicode_k": 1, |
|
"output_norm": 19.03362373669942, |
|
"output_norm/layer0": 19.03362373669942, |
|
"step": 8450 |
|
}, |
|
{ |
|
"MSE": 615.0864140828453, |
|
"MSE/layer0": 615.0864140828453, |
|
"dead_code_fraction": 0.1866, |
|
"dead_code_fraction/layer0": 0.1866, |
|
"epoch": 1.37, |
|
"input_norm": 31.9986399269104, |
|
"input_norm/layer0": 31.9986399269104, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1471, |
|
"max_norm": 73.64068603515625, |
|
"max_norm/layer0": 73.64068603515625, |
|
"mean_norm": 44.55780220031738, |
|
"mean_norm/layer0": 44.55780220031738, |
|
"multicode_k": 1, |
|
"output_norm": 19.04360143979391, |
|
"output_norm/layer0": 19.04360143979391, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_MSE/layer0": 613.7248421548741, |
|
"eval_accuracy": 0.5081896395873495, |
|
"eval_dead_code_fraction/layer0": 0.1885, |
|
"eval_input_norm/layer0": 31.998632826486393, |
|
"eval_loss": 2.1443779468536377, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 19.066619998676906, |
|
"eval_runtime": 158.5923, |
|
"eval_samples_per_second": 29.15, |
|
"eval_steps_per_second": 1.822, |
|
"step": 8500 |
|
}, |
|
{ |
|
"MSE": 614.1585445149744, |
|
"MSE/layer0": 614.1585445149744, |
|
"dead_code_fraction": 0.18715, |
|
"dead_code_fraction/layer0": 0.18715, |
|
"epoch": 1.38, |
|
"input_norm": 31.99863867441813, |
|
"input_norm/layer0": 31.99863867441813, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1506, |
|
"max_norm": 73.73002624511719, |
|
"max_norm/layer0": 73.73002624511719, |
|
"mean_norm": 44.597002029418945, |
|
"mean_norm/layer0": 44.597002029418945, |
|
"multicode_k": 1, |
|
"output_norm": 19.06499721844991, |
|
"output_norm/layer0": 19.06499721844991, |
|
"step": 8550 |
|
}, |
|
{ |
|
"MSE": 614.256539204915, |
|
"MSE/layer0": 614.256539204915, |
|
"dead_code_fraction": 0.1879, |
|
"dead_code_fraction/layer0": 0.1879, |
|
"epoch": 1.38, |
|
"input_norm": 31.998648173014317, |
|
"input_norm/layer0": 31.998648173014317, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1643, |
|
"max_norm": 73.80333709716797, |
|
"max_norm/layer0": 73.80333709716797, |
|
"mean_norm": 44.63543891906738, |
|
"mean_norm/layer0": 44.63543891906738, |
|
"multicode_k": 1, |
|
"output_norm": 19.078293412526467, |
|
"output_norm/layer0": 19.078293412526467, |
|
"step": 8600 |
|
}, |
|
{ |
|
"MSE": 613.3546946207681, |
|
"MSE/layer0": 613.3546946207681, |
|
"dead_code_fraction": 0.1879, |
|
"dead_code_fraction/layer0": 0.1879, |
|
"epoch": 1.39, |
|
"input_norm": 31.99864864667257, |
|
"input_norm/layer0": 31.99864864667257, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1535, |
|
"max_norm": 73.89517974853516, |
|
"max_norm/layer0": 73.89517974853516, |
|
"mean_norm": 44.674211502075195, |
|
"mean_norm/layer0": 44.674211502075195, |
|
"multicode_k": 1, |
|
"output_norm": 19.09559381167095, |
|
"output_norm/layer0": 19.09559381167095, |
|
"step": 8650 |
|
}, |
|
{ |
|
"MSE": 613.6053087361654, |
|
"MSE/layer0": 613.6053087361654, |
|
"dead_code_fraction": 0.18645, |
|
"dead_code_fraction/layer0": 0.18645, |
|
"epoch": 1.39, |
|
"input_norm": 31.998652140299477, |
|
"input_norm/layer0": 31.998652140299477, |
|
"learning_rate": 0.0005, |
|
"loss": 2.137, |
|
"max_norm": 73.9770736694336, |
|
"max_norm/layer0": 73.9770736694336, |
|
"mean_norm": 44.71265983581543, |
|
"mean_norm/layer0": 44.71265983581543, |
|
"multicode_k": 1, |
|
"output_norm": 19.098618446985878, |
|
"output_norm/layer0": 19.098618446985878, |
|
"step": 8700 |
|
}, |
|
{ |
|
"MSE": 613.292506408691, |
|
"MSE/layer0": 613.292506408691, |
|
"dead_code_fraction": 0.1876, |
|
"dead_code_fraction/layer0": 0.1876, |
|
"epoch": 1.4, |
|
"input_norm": 31.998654588063562, |
|
"input_norm/layer0": 31.998654588063562, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1482, |
|
"max_norm": 74.05269622802734, |
|
"max_norm/layer0": 74.05269622802734, |
|
"mean_norm": 44.750946044921875, |
|
"mean_norm/layer0": 44.750946044921875, |
|
"multicode_k": 1, |
|
"output_norm": 19.104494848251342, |
|
"output_norm/layer0": 19.104494848251342, |
|
"step": 8750 |
|
}, |
|
{ |
|
"MSE": 613.8824895222986, |
|
"MSE/layer0": 613.8824895222986, |
|
"dead_code_fraction": 0.1868, |
|
"dead_code_fraction/layer0": 0.1868, |
|
"epoch": 1.4, |
|
"input_norm": 31.998655049006146, |
|
"input_norm/layer0": 31.998655049006146, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1331, |
|
"max_norm": 74.12651824951172, |
|
"max_norm/layer0": 74.12651824951172, |
|
"mean_norm": 44.7886848449707, |
|
"mean_norm/layer0": 44.7886848449707, |
|
"multicode_k": 1, |
|
"output_norm": 19.110120385487882, |
|
"output_norm/layer0": 19.110120385487882, |
|
"step": 8800 |
|
}, |
|
{ |
|
"MSE": 613.8568901570636, |
|
"MSE/layer0": 613.8568901570636, |
|
"dead_code_fraction": 0.18675, |
|
"dead_code_fraction/layer0": 0.18675, |
|
"epoch": 1.41, |
|
"input_norm": 31.99864878336588, |
|
"input_norm/layer0": 31.99864878336588, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1038, |
|
"max_norm": 74.20288848876953, |
|
"max_norm/layer0": 74.20288848876953, |
|
"mean_norm": 44.82563400268555, |
|
"mean_norm/layer0": 44.82563400268555, |
|
"multicode_k": 1, |
|
"output_norm": 19.120709832509363, |
|
"output_norm/layer0": 19.120709832509363, |
|
"step": 8850 |
|
}, |
|
{ |
|
"MSE": 612.8203454589843, |
|
"MSE/layer0": 612.8203454589843, |
|
"dead_code_fraction": 0.18635, |
|
"dead_code_fraction/layer0": 0.18635, |
|
"epoch": 1.41, |
|
"input_norm": 31.99866209030152, |
|
"input_norm/layer0": 31.99866209030152, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1619, |
|
"max_norm": 74.27029418945312, |
|
"max_norm/layer0": 74.27029418945312, |
|
"mean_norm": 44.863847732543945, |
|
"mean_norm/layer0": 44.863847732543945, |
|
"multicode_k": 1, |
|
"output_norm": 19.13362557093303, |
|
"output_norm/layer0": 19.13362557093303, |
|
"step": 8900 |
|
}, |
|
{ |
|
"MSE": 612.7508836873369, |
|
"MSE/layer0": 612.7508836873369, |
|
"dead_code_fraction": 0.1865, |
|
"dead_code_fraction/layer0": 0.1865, |
|
"epoch": 1.42, |
|
"input_norm": 31.998662964502977, |
|
"input_norm/layer0": 31.998662964502977, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1274, |
|
"max_norm": 74.35165405273438, |
|
"max_norm/layer0": 74.35165405273438, |
|
"mean_norm": 44.90276908874512, |
|
"mean_norm/layer0": 44.90276908874512, |
|
"multicode_k": 1, |
|
"output_norm": 19.13368027687074, |
|
"output_norm/layer0": 19.13368027687074, |
|
"step": 8950 |
|
}, |
|
{ |
|
"MSE": 611.3088948567707, |
|
"MSE/layer0": 611.3088948567707, |
|
"dead_code_fraction": 0.18625, |
|
"dead_code_fraction/layer0": 0.18625, |
|
"epoch": 1.42, |
|
"input_norm": 31.998670199712116, |
|
"input_norm/layer0": 31.998670199712116, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1556, |
|
"max_norm": 74.43575286865234, |
|
"max_norm/layer0": 74.43575286865234, |
|
"mean_norm": 44.94179916381836, |
|
"mean_norm/layer0": 44.94179916381836, |
|
"multicode_k": 1, |
|
"output_norm": 19.165478760401413, |
|
"output_norm/layer0": 19.165478760401413, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_MSE/layer0": 610.3757424029645, |
|
"eval_accuracy": 0.5087341142897861, |
|
"eval_dead_code_fraction/layer0": 0.18805, |
|
"eval_input_norm/layer0": 31.998659288421646, |
|
"eval_loss": 2.139230489730835, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 19.181722183648382, |
|
"eval_runtime": 158.0526, |
|
"eval_samples_per_second": 29.25, |
|
"eval_steps_per_second": 1.829, |
|
"step": 9000 |
|
}, |
|
{ |
|
"MSE": 611.2356985473632, |
|
"MSE/layer0": 611.2356985473632, |
|
"dead_code_fraction": 0.1879, |
|
"dead_code_fraction/layer0": 0.1879, |
|
"epoch": 1.43, |
|
"input_norm": 31.998666836420703, |
|
"input_norm/layer0": 31.998666836420703, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1388, |
|
"max_norm": 74.51050567626953, |
|
"max_norm/layer0": 74.51050567626953, |
|
"mean_norm": 44.98063850402832, |
|
"mean_norm/layer0": 44.98063850402832, |
|
"multicode_k": 1, |
|
"output_norm": 19.177389281590777, |
|
"output_norm/layer0": 19.177389281590777, |
|
"step": 9050 |
|
}, |
|
{ |
|
"MSE": 610.8344569905598, |
|
"MSE/layer0": 610.8344569905598, |
|
"dead_code_fraction": 0.18865, |
|
"dead_code_fraction/layer0": 0.18865, |
|
"epoch": 1.43, |
|
"input_norm": 31.99867141723631, |
|
"input_norm/layer0": 31.99867141723631, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1328, |
|
"max_norm": 74.59440612792969, |
|
"max_norm/layer0": 74.59440612792969, |
|
"mean_norm": 45.01910400390625, |
|
"mean_norm/layer0": 45.01910400390625, |
|
"multicode_k": 1, |
|
"output_norm": 19.185275354385375, |
|
"output_norm/layer0": 19.185275354385375, |
|
"step": 9100 |
|
}, |
|
{ |
|
"MSE": 610.7402758789062, |
|
"MSE/layer0": 610.7402758789062, |
|
"dead_code_fraction": 0.1871, |
|
"dead_code_fraction/layer0": 0.1871, |
|
"epoch": 1.44, |
|
"input_norm": 31.99866997400921, |
|
"input_norm/layer0": 31.99866997400921, |
|
"learning_rate": 0.0005, |
|
"loss": 2.117, |
|
"max_norm": 74.67122650146484, |
|
"max_norm/layer0": 74.67122650146484, |
|
"mean_norm": 45.05727577209473, |
|
"mean_norm/layer0": 45.05727577209473, |
|
"multicode_k": 1, |
|
"output_norm": 19.190109596252437, |
|
"output_norm/layer0": 19.190109596252437, |
|
"step": 9150 |
|
}, |
|
{ |
|
"MSE": 610.1339531453451, |
|
"MSE/layer0": 610.1339531453451, |
|
"dead_code_fraction": 0.18745, |
|
"dead_code_fraction/layer0": 0.18745, |
|
"epoch": 1.44, |
|
"input_norm": 31.998679358164473, |
|
"input_norm/layer0": 31.998679358164473, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1459, |
|
"max_norm": 74.7430419921875, |
|
"max_norm/layer0": 74.7430419921875, |
|
"mean_norm": 45.095571517944336, |
|
"mean_norm/layer0": 45.095571517944336, |
|
"multicode_k": 1, |
|
"output_norm": 19.203376553853335, |
|
"output_norm/layer0": 19.203376553853335, |
|
"step": 9200 |
|
}, |
|
{ |
|
"MSE": 609.6957601928709, |
|
"MSE/layer0": 609.6957601928709, |
|
"dead_code_fraction": 0.1878, |
|
"dead_code_fraction/layer0": 0.1878, |
|
"epoch": 1.45, |
|
"input_norm": 31.99868172009785, |
|
"input_norm/layer0": 31.99868172009785, |
|
"learning_rate": 0.0005, |
|
"loss": 2.142, |
|
"max_norm": 74.8177490234375, |
|
"max_norm/layer0": 74.8177490234375, |
|
"mean_norm": 45.133853912353516, |
|
"mean_norm/layer0": 45.133853912353516, |
|
"multicode_k": 1, |
|
"output_norm": 19.22210531552632, |
|
"output_norm/layer0": 19.22210531552632, |
|
"step": 9250 |
|
}, |
|
{ |
|
"MSE": 609.5997785441082, |
|
"MSE/layer0": 609.5997785441082, |
|
"dead_code_fraction": 0.18805, |
|
"dead_code_fraction/layer0": 0.18805, |
|
"epoch": 1.45, |
|
"input_norm": 31.998693205515544, |
|
"input_norm/layer0": 31.998693205515544, |
|
"learning_rate": 0.0005, |
|
"loss": 2.18, |
|
"max_norm": 74.87744140625, |
|
"max_norm/layer0": 74.87744140625, |
|
"mean_norm": 45.172555923461914, |
|
"mean_norm/layer0": 45.172555923461914, |
|
"multicode_k": 1, |
|
"output_norm": 19.226630802154542, |
|
"output_norm/layer0": 19.226630802154542, |
|
"step": 9300 |
|
}, |
|
{ |
|
"MSE": 609.8342389933271, |
|
"MSE/layer0": 609.8342389933271, |
|
"dead_code_fraction": 0.18735, |
|
"dead_code_fraction/layer0": 0.18735, |
|
"epoch": 1.46, |
|
"input_norm": 31.998687505722053, |
|
"input_norm/layer0": 31.998687505722053, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1164, |
|
"max_norm": 74.94609069824219, |
|
"max_norm/layer0": 74.94609069824219, |
|
"mean_norm": 45.21059799194336, |
|
"mean_norm/layer0": 45.21059799194336, |
|
"multicode_k": 1, |
|
"output_norm": 19.234882882436114, |
|
"output_norm/layer0": 19.234882882436114, |
|
"step": 9350 |
|
}, |
|
{ |
|
"MSE": 609.2034523518882, |
|
"MSE/layer0": 609.2034523518882, |
|
"dead_code_fraction": 0.1869, |
|
"dead_code_fraction/layer0": 0.1869, |
|
"epoch": 1.46, |
|
"input_norm": 31.99869050979616, |
|
"input_norm/layer0": 31.99869050979616, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1316, |
|
"max_norm": 75.01142883300781, |
|
"max_norm/layer0": 75.01142883300781, |
|
"mean_norm": 45.248979568481445, |
|
"mean_norm/layer0": 45.248979568481445, |
|
"multicode_k": 1, |
|
"output_norm": 19.247848326365144, |
|
"output_norm/layer0": 19.247848326365144, |
|
"step": 9400 |
|
}, |
|
{ |
|
"MSE": 609.0324313354497, |
|
"MSE/layer0": 609.0324313354497, |
|
"dead_code_fraction": 0.18745, |
|
"dead_code_fraction/layer0": 0.18745, |
|
"epoch": 1.47, |
|
"input_norm": 31.99869132041931, |
|
"input_norm/layer0": 31.99869132041931, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1214, |
|
"max_norm": 75.07112121582031, |
|
"max_norm/layer0": 75.07112121582031, |
|
"mean_norm": 45.287214279174805, |
|
"mean_norm/layer0": 45.287214279174805, |
|
"multicode_k": 1, |
|
"output_norm": 19.25519768079122, |
|
"output_norm/layer0": 19.25519768079122, |
|
"step": 9450 |
|
}, |
|
{ |
|
"MSE": 607.8594933064783, |
|
"MSE/layer0": 607.8594933064783, |
|
"dead_code_fraction": 0.18835, |
|
"dead_code_fraction/layer0": 0.18835, |
|
"epoch": 1.47, |
|
"input_norm": 31.998687744140625, |
|
"input_norm/layer0": 31.998687744140625, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1067, |
|
"max_norm": 75.15766143798828, |
|
"max_norm/layer0": 75.15766143798828, |
|
"mean_norm": 45.32560920715332, |
|
"mean_norm/layer0": 45.32560920715332, |
|
"multicode_k": 1, |
|
"output_norm": 19.27704188664754, |
|
"output_norm/layer0": 19.27704188664754, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_MSE/layer0": 608.6866096036146, |
|
"eval_accuracy": 0.5090880757079915, |
|
"eval_dead_code_fraction/layer0": 0.18755, |
|
"eval_input_norm/layer0": 31.998685899710146, |
|
"eval_loss": 2.1350600719451904, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 19.283631281241068, |
|
"eval_runtime": 158.1797, |
|
"eval_samples_per_second": 29.226, |
|
"eval_steps_per_second": 1.827, |
|
"step": 9500 |
|
}, |
|
{ |
|
"MSE": 607.5302533983886, |
|
"MSE/layer0": 607.5302533983886, |
|
"dead_code_fraction": 0.1872, |
|
"dead_code_fraction/layer0": 0.1872, |
|
"epoch": 1.48, |
|
"input_norm": 31.99869025141972, |
|
"input_norm/layer0": 31.99869025141972, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1075, |
|
"max_norm": 75.2263412475586, |
|
"max_norm/layer0": 75.2263412475586, |
|
"mean_norm": 45.363752365112305, |
|
"mean_norm/layer0": 45.363752365112305, |
|
"multicode_k": 1, |
|
"output_norm": 19.2927733112995, |
|
"output_norm/layer0": 19.2927733112995, |
|
"step": 9550 |
|
}, |
|
{ |
|
"MSE": 608.902215973978, |
|
"MSE/layer0": 608.902215973978, |
|
"dead_code_fraction": 0.187, |
|
"dead_code_fraction/layer0": 0.187, |
|
"epoch": 2.0, |
|
"input_norm": 31.998686492629858, |
|
"input_norm/layer0": 31.998686492629858, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1013, |
|
"max_norm": 75.294677734375, |
|
"max_norm/layer0": 75.294677734375, |
|
"mean_norm": 45.40024948120117, |
|
"mean_norm/layer0": 45.40024948120117, |
|
"multicode_k": 1, |
|
"output_norm": 19.268582361188244, |
|
"output_norm/layer0": 19.268582361188244, |
|
"step": 9600 |
|
}, |
|
{ |
|
"MSE": 606.3796120198567, |
|
"MSE/layer0": 606.3796120198567, |
|
"dead_code_fraction": 0.18715, |
|
"dead_code_fraction/layer0": 0.18715, |
|
"epoch": 2.01, |
|
"input_norm": 31.998710851669312, |
|
"input_norm/layer0": 31.998710851669312, |
|
"learning_rate": 0.0005, |
|
"loss": 2.17, |
|
"max_norm": 75.35186004638672, |
|
"max_norm/layer0": 75.35186004638672, |
|
"mean_norm": 45.4382266998291, |
|
"mean_norm/layer0": 45.4382266998291, |
|
"multicode_k": 1, |
|
"output_norm": 19.314183537165327, |
|
"output_norm/layer0": 19.314183537165327, |
|
"step": 9650 |
|
}, |
|
{ |
|
"MSE": 606.9239878336591, |
|
"MSE/layer0": 606.9239878336591, |
|
"dead_code_fraction": 0.1877, |
|
"dead_code_fraction/layer0": 0.1877, |
|
"epoch": 2.01, |
|
"input_norm": 31.99869126637776, |
|
"input_norm/layer0": 31.99869126637776, |
|
"learning_rate": 0.0005, |
|
"loss": 2.0661, |
|
"max_norm": 75.44601440429688, |
|
"max_norm/layer0": 75.44601440429688, |
|
"mean_norm": 45.47653579711914, |
|
"mean_norm/layer0": 45.47653579711914, |
|
"multicode_k": 1, |
|
"output_norm": 19.313949975967407, |
|
"output_norm/layer0": 19.313949975967407, |
|
"step": 9700 |
|
}, |
|
{ |
|
"MSE": 606.1468785603844, |
|
"MSE/layer0": 606.1468785603844, |
|
"dead_code_fraction": 0.18755, |
|
"dead_code_fraction/layer0": 0.18755, |
|
"epoch": 2.02, |
|
"input_norm": 31.998706903457652, |
|
"input_norm/layer0": 31.998706903457652, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1325, |
|
"max_norm": 75.6237564086914, |
|
"max_norm/layer0": 75.6237564086914, |
|
"mean_norm": 45.51473808288574, |
|
"mean_norm/layer0": 45.51473808288574, |
|
"multicode_k": 1, |
|
"output_norm": 19.331538470586143, |
|
"output_norm/layer0": 19.331538470586143, |
|
"step": 9750 |
|
}, |
|
{ |
|
"MSE": 606.2908910115561, |
|
"MSE/layer0": 606.2908910115561, |
|
"dead_code_fraction": 0.18715, |
|
"dead_code_fraction/layer0": 0.18715, |
|
"epoch": 2.02, |
|
"input_norm": 31.998702777226768, |
|
"input_norm/layer0": 31.998702777226768, |
|
"learning_rate": 0.0005, |
|
"loss": 2.0999, |
|
"max_norm": 75.77623748779297, |
|
"max_norm/layer0": 75.77623748779297, |
|
"mean_norm": 45.55307388305664, |
|
"mean_norm/layer0": 45.55307388305664, |
|
"multicode_k": 1, |
|
"output_norm": 19.340178826649982, |
|
"output_norm/layer0": 19.340178826649982, |
|
"step": 9800 |
|
}, |
|
{ |
|
"MSE": 605.7215723673501, |
|
"MSE/layer0": 605.7215723673501, |
|
"dead_code_fraction": 0.18635, |
|
"dead_code_fraction/layer0": 0.18635, |
|
"epoch": 2.03, |
|
"input_norm": 31.998708073298122, |
|
"input_norm/layer0": 31.998708073298122, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1015, |
|
"max_norm": 75.92095184326172, |
|
"max_norm/layer0": 75.92095184326172, |
|
"mean_norm": 45.591548919677734, |
|
"mean_norm/layer0": 45.591548919677734, |
|
"multicode_k": 1, |
|
"output_norm": 19.351260058085124, |
|
"output_norm/layer0": 19.351260058085124, |
|
"step": 9850 |
|
}, |
|
{ |
|
"MSE": 605.7307819620769, |
|
"MSE/layer0": 605.7307819620769, |
|
"dead_code_fraction": 0.1879, |
|
"dead_code_fraction/layer0": 0.1879, |
|
"epoch": 2.03, |
|
"input_norm": 31.99871432304383, |
|
"input_norm/layer0": 31.99871432304383, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1079, |
|
"max_norm": 76.06104278564453, |
|
"max_norm/layer0": 76.06104278564453, |
|
"mean_norm": 45.62945747375488, |
|
"mean_norm/layer0": 45.62945747375488, |
|
"multicode_k": 1, |
|
"output_norm": 19.36078415234882, |
|
"output_norm/layer0": 19.36078415234882, |
|
"step": 9900 |
|
}, |
|
{ |
|
"MSE": 605.7736006673174, |
|
"MSE/layer0": 605.7736006673174, |
|
"dead_code_fraction": 0.1873, |
|
"dead_code_fraction/layer0": 0.1873, |
|
"epoch": 2.04, |
|
"input_norm": 31.99871180534363, |
|
"input_norm/layer0": 31.99871180534363, |
|
"learning_rate": 0.0005, |
|
"loss": 2.102, |
|
"max_norm": 76.22486877441406, |
|
"max_norm/layer0": 76.22486877441406, |
|
"mean_norm": 45.66733360290527, |
|
"mean_norm/layer0": 45.66733360290527, |
|
"multicode_k": 1, |
|
"output_norm": 19.36815209388733, |
|
"output_norm/layer0": 19.36815209388733, |
|
"step": 9950 |
|
}, |
|
{ |
|
"MSE": 604.9809751383466, |
|
"MSE/layer0": 604.9809751383466, |
|
"dead_code_fraction": 0.1872, |
|
"dead_code_fraction/layer0": 0.1872, |
|
"epoch": 2.04, |
|
"input_norm": 31.998728539148978, |
|
"input_norm/layer0": 31.998728539148978, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1536, |
|
"max_norm": 76.40007019042969, |
|
"max_norm/layer0": 76.40007019042969, |
|
"mean_norm": 45.70543670654297, |
|
"mean_norm/layer0": 45.70543670654297, |
|
"multicode_k": 1, |
|
"output_norm": 19.38911464373271, |
|
"output_norm/layer0": 19.38911464373271, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_MSE/layer0": 604.5096733395267, |
|
"eval_accuracy": 0.5091345939349958, |
|
"eval_dead_code_fraction/layer0": 0.18795, |
|
"eval_input_norm/layer0": 31.99872850438308, |
|
"eval_loss": 2.132894992828369, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 19.389702240368152, |
|
"eval_runtime": 158.9177, |
|
"eval_samples_per_second": 29.091, |
|
"eval_steps_per_second": 1.819, |
|
"step": 10000 |
|
}, |
|
{ |
|
"MSE": 0.0, |
|
"MSE/layer0": 0.0, |
|
"dead_code_fraction": 1.0, |
|
"dead_code_fraction/layer0": 1.0, |
|
"epoch": 2.04, |
|
"input_norm": 0.0, |
|
"input_norm/layer0": 0.0, |
|
"max_norm": 76.40007019042969, |
|
"max_norm/layer0": 76.40007019042969, |
|
"mean_norm": 45.70543670654297, |
|
"mean_norm/layer0": 45.70543670654297, |
|
"multicode_k": 1, |
|
"output_norm": 0.0, |
|
"output_norm/layer0": 0.0, |
|
"step": 10000, |
|
"total_flos": 7.43098011353088e+16, |
|
"train_loss": 2.325971780395508, |
|
"train_runtime": 15639.0026, |
|
"train_samples_per_second": 61.385, |
|
"train_steps_per_second": 0.639 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 10000, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 7.43098011353088e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|