|
{ |
|
"best_metric": 1.89570152759552, |
|
"best_model_checkpoint": "/tmp/wandb/run-20240207_044253-56k3p8kp/files/train_output/checkpoint-10000", |
|
"epoch": 1.044022968505307, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"MSE": 872.5187733968098, |
|
"MSE/layer0": 872.5187733968098, |
|
"dead_code_fraction": 0.0276, |
|
"dead_code_fraction/layer0": 0.0276, |
|
"epoch": 0.0, |
|
"input_norm": 31.997111479441326, |
|
"input_norm/layer0": 31.997111479441326, |
|
"learning_rate": 1e-06, |
|
"loss": 9.0051, |
|
"max_norm": 34.71393966674805, |
|
"max_norm/layer0": 34.71393966674805, |
|
"mean_norm": 31.98521327972412, |
|
"mean_norm/layer0": 31.98521327972412, |
|
"multicode_k": 1, |
|
"output_norm": 4.134780248006185, |
|
"output_norm/layer0": 4.134780248006185, |
|
"step": 1 |
|
}, |
|
{ |
|
"MSE": 871.4381560241286, |
|
"MSE/layer0": 871.4381560241286, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.99644809839677, |
|
"input_norm/layer0": 31.99644809839677, |
|
"learning_rate": 5e-05, |
|
"loss": 7.0703, |
|
"max_norm": 34.72187423706055, |
|
"max_norm/layer0": 34.72187423706055, |
|
"mean_norm": 31.991936683654785, |
|
"mean_norm/layer0": 31.991936683654785, |
|
"multicode_k": 1, |
|
"output_norm": 4.145846879401173, |
|
"output_norm/layer0": 4.145846879401173, |
|
"step": 50 |
|
}, |
|
{ |
|
"MSE": 868.4475470987957, |
|
"MSE/layer0": 868.4475470987957, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.995786774953213, |
|
"input_norm/layer0": 31.995786774953213, |
|
"learning_rate": 0.0001, |
|
"loss": 4.1515, |
|
"max_norm": 34.748802185058594, |
|
"max_norm/layer0": 34.748802185058594, |
|
"mean_norm": 32.0172176361084, |
|
"mean_norm/layer0": 32.0172176361084, |
|
"multicode_k": 1, |
|
"output_norm": 4.178660261631009, |
|
"output_norm/layer0": 4.178660261631009, |
|
"step": 100 |
|
}, |
|
{ |
|
"MSE": 864.7878089396156, |
|
"MSE/layer0": 864.7878089396156, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.02, |
|
"input_norm": 31.995868380864444, |
|
"input_norm/layer0": 31.995868380864444, |
|
"learning_rate": 0.00015, |
|
"loss": 3.596, |
|
"max_norm": 34.7879753112793, |
|
"max_norm/layer0": 34.7879753112793, |
|
"mean_norm": 32.057809829711914, |
|
"mean_norm/layer0": 32.057809829711914, |
|
"multicode_k": 1, |
|
"output_norm": 4.227458424568177, |
|
"output_norm/layer0": 4.227458424568177, |
|
"step": 150 |
|
}, |
|
{ |
|
"MSE": 862.2720657348631, |
|
"MSE/layer0": 862.2720657348631, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.02, |
|
"input_norm": 31.996261011759444, |
|
"input_norm/layer0": 31.996261011759444, |
|
"learning_rate": 0.0002, |
|
"loss": 3.3864, |
|
"max_norm": 34.834476470947266, |
|
"max_norm/layer0": 34.834476470947266, |
|
"mean_norm": 32.09993934631348, |
|
"mean_norm/layer0": 32.09993934631348, |
|
"multicode_k": 1, |
|
"output_norm": 4.271083230972291, |
|
"output_norm/layer0": 4.271083230972291, |
|
"step": 200 |
|
}, |
|
{ |
|
"MSE": 860.8860168457031, |
|
"MSE/layer0": 860.8860168457031, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.03, |
|
"input_norm": 31.99663330396016, |
|
"input_norm/layer0": 31.99663330396016, |
|
"learning_rate": 0.00025, |
|
"loss": 3.1841, |
|
"max_norm": 34.880577087402344, |
|
"max_norm/layer0": 34.880577087402344, |
|
"mean_norm": 32.15042304992676, |
|
"mean_norm/layer0": 32.15042304992676, |
|
"multicode_k": 1, |
|
"output_norm": 4.302526236375174, |
|
"output_norm/layer0": 4.302526236375174, |
|
"step": 250 |
|
}, |
|
{ |
|
"MSE": 859.4145241292313, |
|
"MSE/layer0": 859.4145241292313, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.03, |
|
"input_norm": 31.99707999547323, |
|
"input_norm/layer0": 31.99707999547323, |
|
"learning_rate": 0.0003, |
|
"loss": 2.9941, |
|
"max_norm": 34.94011688232422, |
|
"max_norm/layer0": 34.94011688232422, |
|
"mean_norm": 32.21405220031738, |
|
"mean_norm/layer0": 32.21405220031738, |
|
"multicode_k": 1, |
|
"output_norm": 4.340623443921407, |
|
"output_norm/layer0": 4.340623443921407, |
|
"step": 300 |
|
}, |
|
{ |
|
"MSE": 857.4514228312173, |
|
"MSE/layer0": 857.4514228312173, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.04, |
|
"input_norm": 31.997263495127353, |
|
"input_norm/layer0": 31.997263495127353, |
|
"learning_rate": 0.00035, |
|
"loss": 2.8154, |
|
"max_norm": 35.02033996582031, |
|
"max_norm/layer0": 35.02033996582031, |
|
"mean_norm": 32.2895393371582, |
|
"mean_norm/layer0": 32.2895393371582, |
|
"multicode_k": 1, |
|
"output_norm": 4.388785634040833, |
|
"output_norm/layer0": 4.388785634040833, |
|
"step": 350 |
|
}, |
|
{ |
|
"MSE": 855.6023776245115, |
|
"MSE/layer0": 855.6023776245115, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.04, |
|
"input_norm": 31.997391548156735, |
|
"input_norm/layer0": 31.997391548156735, |
|
"learning_rate": 0.0004, |
|
"loss": 2.6472, |
|
"max_norm": 35.093902587890625, |
|
"max_norm/layer0": 35.093902587890625, |
|
"mean_norm": 32.36477088928223, |
|
"mean_norm/layer0": 32.36477088928223, |
|
"multicode_k": 1, |
|
"output_norm": 4.438224600950877, |
|
"output_norm/layer0": 4.438224600950877, |
|
"step": 400 |
|
}, |
|
{ |
|
"MSE": 852.2393107096357, |
|
"MSE/layer0": 852.2393107096357, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.05, |
|
"input_norm": 31.997483587265002, |
|
"input_norm/layer0": 31.997483587265002, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 2.5584, |
|
"max_norm": 35.304176330566406, |
|
"max_norm/layer0": 35.304176330566406, |
|
"mean_norm": 32.54551696777344, |
|
"mean_norm/layer0": 32.54551696777344, |
|
"multicode_k": 1, |
|
"output_norm": 4.531697844664256, |
|
"output_norm/layer0": 4.531697844664256, |
|
"step": 450 |
|
}, |
|
{ |
|
"MSE": 845.160081481933, |
|
"MSE/layer0": 845.160081481933, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.05, |
|
"input_norm": 31.997576513290404, |
|
"input_norm/layer0": 31.997576513290404, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5072, |
|
"max_norm": 35.78097915649414, |
|
"max_norm/layer0": 35.78097915649414, |
|
"mean_norm": 32.836992263793945, |
|
"mean_norm/layer0": 32.836992263793945, |
|
"multicode_k": 1, |
|
"output_norm": 4.75731077671051, |
|
"output_norm/layer0": 4.75731077671051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_MSE/layer0": 841.1602262364518, |
|
"eval_accuracy": 0.4578774282778804, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.99765928777141, |
|
"eval_loss": 2.476405382156372, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 4.911408371361153, |
|
"eval_runtime": 73.5499, |
|
"eval_samples_per_second": 62.855, |
|
"eval_steps_per_second": 7.859, |
|
"step": 500 |
|
}, |
|
{ |
|
"MSE": 837.1320628865564, |
|
"MSE/layer0": 837.1320628865564, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.06, |
|
"input_norm": 31.997703491846714, |
|
"input_norm/layer0": 31.997703491846714, |
|
"learning_rate": 0.0004973684210526315, |
|
"loss": 2.446, |
|
"max_norm": 36.301849365234375, |
|
"max_norm/layer0": 36.301849365234375, |
|
"mean_norm": 33.16576957702637, |
|
"mean_norm/layer0": 33.16576957702637, |
|
"multicode_k": 1, |
|
"output_norm": 5.083427506287892, |
|
"output_norm/layer0": 5.083427506287892, |
|
"step": 550 |
|
}, |
|
{ |
|
"MSE": 829.8174697875975, |
|
"MSE/layer0": 829.8174697875975, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.06, |
|
"input_norm": 31.997781289418548, |
|
"input_norm/layer0": 31.997781289418548, |
|
"learning_rate": 0.0004947368421052632, |
|
"loss": 2.4026, |
|
"max_norm": 36.790077209472656, |
|
"max_norm/layer0": 36.790077209472656, |
|
"mean_norm": 33.519426345825195, |
|
"mean_norm/layer0": 33.519426345825195, |
|
"multicode_k": 1, |
|
"output_norm": 5.438902084827422, |
|
"output_norm/layer0": 5.438902084827422, |
|
"step": 600 |
|
}, |
|
{ |
|
"MSE": 823.647299601237, |
|
"MSE/layer0": 823.647299601237, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.07, |
|
"input_norm": 31.997854344050104, |
|
"input_norm/layer0": 31.997854344050104, |
|
"learning_rate": 0.0004921052631578947, |
|
"loss": 2.3506, |
|
"max_norm": 37.23988723754883, |
|
"max_norm/layer0": 37.23988723754883, |
|
"mean_norm": 33.882219314575195, |
|
"mean_norm/layer0": 33.882219314575195, |
|
"multicode_k": 1, |
|
"output_norm": 5.780141766071318, |
|
"output_norm/layer0": 5.780141766071318, |
|
"step": 650 |
|
}, |
|
{ |
|
"MSE": 818.3900874837236, |
|
"MSE/layer0": 818.3900874837236, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.07, |
|
"input_norm": 31.99789853731792, |
|
"input_norm/layer0": 31.99789853731792, |
|
"learning_rate": 0.0004894736842105264, |
|
"loss": 2.3252, |
|
"max_norm": 37.74921417236328, |
|
"max_norm/layer0": 37.74921417236328, |
|
"mean_norm": 34.241193771362305, |
|
"mean_norm/layer0": 34.241193771362305, |
|
"multicode_k": 1, |
|
"output_norm": 6.09345253547033, |
|
"output_norm/layer0": 6.09345253547033, |
|
"step": 700 |
|
}, |
|
{ |
|
"MSE": 813.5141651407878, |
|
"MSE/layer0": 813.5141651407878, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.08, |
|
"input_norm": 31.99791718482971, |
|
"input_norm/layer0": 31.99791718482971, |
|
"learning_rate": 0.0004868421052631579, |
|
"loss": 2.2972, |
|
"max_norm": 38.29411315917969, |
|
"max_norm/layer0": 38.29411315917969, |
|
"mean_norm": 34.602651596069336, |
|
"mean_norm/layer0": 34.602651596069336, |
|
"multicode_k": 1, |
|
"output_norm": 6.373116828600564, |
|
"output_norm/layer0": 6.373116828600564, |
|
"step": 750 |
|
}, |
|
{ |
|
"MSE": 808.9583784993486, |
|
"MSE/layer0": 808.9583784993486, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.08, |
|
"input_norm": 31.997929916381842, |
|
"input_norm/layer0": 31.997929916381842, |
|
"learning_rate": 0.0004842105263157895, |
|
"loss": 2.2848, |
|
"max_norm": 38.83885955810547, |
|
"max_norm/layer0": 38.83885955810547, |
|
"mean_norm": 34.96581268310547, |
|
"mean_norm/layer0": 34.96581268310547, |
|
"multicode_k": 1, |
|
"output_norm": 6.6348445963859515, |
|
"output_norm/layer0": 6.6348445963859515, |
|
"step": 800 |
|
}, |
|
{ |
|
"MSE": 805.0894353230792, |
|
"MSE/layer0": 805.0894353230792, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.09, |
|
"input_norm": 31.99793632825216, |
|
"input_norm/layer0": 31.99793632825216, |
|
"learning_rate": 0.00048157894736842105, |
|
"loss": 2.2718, |
|
"max_norm": 39.34720993041992, |
|
"max_norm/layer0": 39.34720993041992, |
|
"mean_norm": 35.32806396484375, |
|
"mean_norm/layer0": 35.32806396484375, |
|
"multicode_k": 1, |
|
"output_norm": 6.866891795794173, |
|
"output_norm/layer0": 6.866891795794173, |
|
"step": 850 |
|
}, |
|
{ |
|
"MSE": 801.1131992594401, |
|
"MSE/layer0": 801.1131992594401, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.09, |
|
"input_norm": 31.997941767374677, |
|
"input_norm/layer0": 31.997941767374677, |
|
"learning_rate": 0.00047894736842105264, |
|
"loss": 2.2552, |
|
"max_norm": 39.885169982910156, |
|
"max_norm/layer0": 39.885169982910156, |
|
"mean_norm": 35.689327239990234, |
|
"mean_norm/layer0": 35.689327239990234, |
|
"multicode_k": 1, |
|
"output_norm": 7.08060004631678, |
|
"output_norm/layer0": 7.08060004631678, |
|
"step": 900 |
|
}, |
|
{ |
|
"MSE": 797.5655348714191, |
|
"MSE/layer0": 797.5655348714191, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.1, |
|
"input_norm": 31.997945496241247, |
|
"input_norm/layer0": 31.997945496241247, |
|
"learning_rate": 0.0004763157894736842, |
|
"loss": 2.2364, |
|
"max_norm": 40.398529052734375, |
|
"max_norm/layer0": 40.398529052734375, |
|
"mean_norm": 36.051015853881836, |
|
"mean_norm/layer0": 36.051015853881836, |
|
"multicode_k": 1, |
|
"output_norm": 7.280441036224362, |
|
"output_norm/layer0": 7.280441036224362, |
|
"step": 950 |
|
}, |
|
{ |
|
"MSE": 794.0057167561844, |
|
"MSE/layer0": 794.0057167561844, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.1, |
|
"input_norm": 31.997958205540975, |
|
"input_norm/layer0": 31.997958205540975, |
|
"learning_rate": 0.00047368421052631577, |
|
"loss": 2.2285, |
|
"max_norm": 40.882999420166016, |
|
"max_norm/layer0": 40.882999420166016, |
|
"mean_norm": 36.412479400634766, |
|
"mean_norm/layer0": 36.412479400634766, |
|
"multicode_k": 1, |
|
"output_norm": 7.463625483512881, |
|
"output_norm/layer0": 7.463625483512881, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_MSE/layer0": 792.3022871601257, |
|
"eval_accuracy": 0.49262569806414397, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997970815399036, |
|
"eval_loss": 2.2265193462371826, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 7.55243569582849, |
|
"eval_runtime": 73.9102, |
|
"eval_samples_per_second": 62.549, |
|
"eval_steps_per_second": 7.82, |
|
"step": 1000 |
|
}, |
|
{ |
|
"MSE": 790.4031213378905, |
|
"MSE/layer0": 790.4031213378905, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.11, |
|
"input_norm": 31.997961893081662, |
|
"input_norm/layer0": 31.997961893081662, |
|
"learning_rate": 0.0004710526315789474, |
|
"loss": 2.2276, |
|
"max_norm": 41.373714447021484, |
|
"max_norm/layer0": 41.373714447021484, |
|
"mean_norm": 36.77394676208496, |
|
"mean_norm/layer0": 36.77394676208496, |
|
"multicode_k": 1, |
|
"output_norm": 7.636834317048386, |
|
"output_norm/layer0": 7.636834317048386, |
|
"step": 1050 |
|
}, |
|
{ |
|
"MSE": 786.9933625284832, |
|
"MSE/layer0": 786.9933625284832, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.11, |
|
"input_norm": 31.99796496391297, |
|
"input_norm/layer0": 31.99796496391297, |
|
"learning_rate": 0.00046842105263157895, |
|
"loss": 2.2167, |
|
"max_norm": 41.845481872558594, |
|
"max_norm/layer0": 41.845481872558594, |
|
"mean_norm": 37.13482093811035, |
|
"mean_norm/layer0": 37.13482093811035, |
|
"multicode_k": 1, |
|
"output_norm": 7.803330462773646, |
|
"output_norm/layer0": 7.803330462773646, |
|
"step": 1100 |
|
}, |
|
{ |
|
"MSE": 783.8570914713541, |
|
"MSE/layer0": 783.8570914713541, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.12, |
|
"input_norm": 31.997962007522577, |
|
"input_norm/layer0": 31.997962007522577, |
|
"learning_rate": 0.00046578947368421054, |
|
"loss": 2.2049, |
|
"max_norm": 42.328094482421875, |
|
"max_norm/layer0": 42.328094482421875, |
|
"mean_norm": 37.49737358093262, |
|
"mean_norm/layer0": 37.49737358093262, |
|
"multicode_k": 1, |
|
"output_norm": 7.957673575878145, |
|
"output_norm/layer0": 7.957673575878145, |
|
"step": 1150 |
|
}, |
|
{ |
|
"MSE": 780.325506286621, |
|
"MSE/layer0": 780.325506286621, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.13, |
|
"input_norm": 31.997955818176273, |
|
"input_norm/layer0": 31.997955818176273, |
|
"learning_rate": 0.00046315789473684214, |
|
"loss": 2.2048, |
|
"max_norm": 42.827125549316406, |
|
"max_norm/layer0": 42.827125549316406, |
|
"mean_norm": 37.85981369018555, |
|
"mean_norm/layer0": 37.85981369018555, |
|
"multicode_k": 1, |
|
"output_norm": 8.110501464207967, |
|
"output_norm/layer0": 8.110501464207967, |
|
"step": 1200 |
|
}, |
|
{ |
|
"MSE": 777.4963677978517, |
|
"MSE/layer0": 777.4963677978517, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.13, |
|
"input_norm": 31.997957773208608, |
|
"input_norm/layer0": 31.997957773208608, |
|
"learning_rate": 0.0004605263157894737, |
|
"loss": 2.1813, |
|
"max_norm": 43.32162094116211, |
|
"max_norm/layer0": 43.32162094116211, |
|
"mean_norm": 38.223052978515625, |
|
"mean_norm/layer0": 38.223052978515625, |
|
"multicode_k": 1, |
|
"output_norm": 8.244436805248263, |
|
"output_norm/layer0": 8.244436805248263, |
|
"step": 1250 |
|
}, |
|
{ |
|
"MSE": 774.260437520345, |
|
"MSE/layer0": 774.260437520345, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.14, |
|
"input_norm": 31.99796381632487, |
|
"input_norm/layer0": 31.99796381632487, |
|
"learning_rate": 0.00045789473684210527, |
|
"loss": 2.1836, |
|
"max_norm": 43.81217575073242, |
|
"max_norm/layer0": 43.81217575073242, |
|
"mean_norm": 38.58406066894531, |
|
"mean_norm/layer0": 38.58406066894531, |
|
"multicode_k": 1, |
|
"output_norm": 8.38570425987244, |
|
"output_norm/layer0": 8.38570425987244, |
|
"step": 1300 |
|
}, |
|
{ |
|
"MSE": 771.4710861206056, |
|
"MSE/layer0": 771.4710861206056, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.14, |
|
"input_norm": 31.997958866755184, |
|
"input_norm/layer0": 31.997958866755184, |
|
"learning_rate": 0.00045526315789473686, |
|
"loss": 2.1749, |
|
"max_norm": 44.29291915893555, |
|
"max_norm/layer0": 44.29291915893555, |
|
"mean_norm": 38.94841957092285, |
|
"mean_norm/layer0": 38.94841957092285, |
|
"multicode_k": 1, |
|
"output_norm": 8.50825534900029, |
|
"output_norm/layer0": 8.50825534900029, |
|
"step": 1350 |
|
}, |
|
{ |
|
"MSE": 768.6556185913084, |
|
"MSE/layer0": 768.6556185913084, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.15, |
|
"input_norm": 31.99795674959818, |
|
"input_norm/layer0": 31.99795674959818, |
|
"learning_rate": 0.00045263157894736845, |
|
"loss": 2.1767, |
|
"max_norm": 44.80799865722656, |
|
"max_norm/layer0": 44.80799865722656, |
|
"mean_norm": 39.31004524230957, |
|
"mean_norm/layer0": 39.31004524230957, |
|
"multicode_k": 1, |
|
"output_norm": 8.633222222328184, |
|
"output_norm/layer0": 8.633222222328184, |
|
"step": 1400 |
|
}, |
|
{ |
|
"MSE": 765.9088921101885, |
|
"MSE/layer0": 765.9088921101885, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.15, |
|
"input_norm": 31.99795736630759, |
|
"input_norm/layer0": 31.99795736630759, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 2.1614, |
|
"max_norm": 45.24712371826172, |
|
"max_norm/layer0": 45.24712371826172, |
|
"mean_norm": 39.66674041748047, |
|
"mean_norm/layer0": 39.66674041748047, |
|
"multicode_k": 1, |
|
"output_norm": 8.743508942921961, |
|
"output_norm/layer0": 8.743508942921961, |
|
"step": 1450 |
|
}, |
|
{ |
|
"MSE": 763.2800780232742, |
|
"MSE/layer0": 763.2800780232742, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.16, |
|
"input_norm": 31.997952944437664, |
|
"input_norm/layer0": 31.997952944437664, |
|
"learning_rate": 0.0004473684210526316, |
|
"loss": 2.1472, |
|
"max_norm": 45.6886100769043, |
|
"max_norm/layer0": 45.6886100769043, |
|
"mean_norm": 40.02728462219238, |
|
"mean_norm/layer0": 40.02728462219238, |
|
"multicode_k": 1, |
|
"output_norm": 8.859908480644224, |
|
"output_norm/layer0": 8.859908480644224, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_MSE/layer0": 761.8682555426203, |
|
"eval_accuracy": 0.502513147213907, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.99796608230291, |
|
"eval_loss": 2.1583588123321533, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 8.92388377993132, |
|
"eval_runtime": 73.3386, |
|
"eval_samples_per_second": 63.036, |
|
"eval_steps_per_second": 7.881, |
|
"step": 1500 |
|
}, |
|
{ |
|
"MSE": 760.1600253295896, |
|
"MSE/layer0": 760.1600253295896, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.16, |
|
"input_norm": 31.997961203257255, |
|
"input_norm/layer0": 31.997961203257255, |
|
"learning_rate": 0.00044473684210526317, |
|
"loss": 2.1601, |
|
"max_norm": 46.172386169433594, |
|
"max_norm/layer0": 46.172386169433594, |
|
"mean_norm": 40.38890838623047, |
|
"mean_norm/layer0": 40.38890838623047, |
|
"multicode_k": 1, |
|
"output_norm": 8.976485926310215, |
|
"output_norm/layer0": 8.976485926310215, |
|
"step": 1550 |
|
}, |
|
{ |
|
"MSE": 757.7968755086266, |
|
"MSE/layer0": 757.7968755086266, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.17, |
|
"input_norm": 31.99795768419901, |
|
"input_norm/layer0": 31.99795768419901, |
|
"learning_rate": 0.0004421052631578947, |
|
"loss": 2.1503, |
|
"max_norm": 46.59892272949219, |
|
"max_norm/layer0": 46.59892272949219, |
|
"mean_norm": 40.74970626831055, |
|
"mean_norm/layer0": 40.74970626831055, |
|
"multicode_k": 1, |
|
"output_norm": 9.079196619192757, |
|
"output_norm/layer0": 9.079196619192757, |
|
"step": 1600 |
|
}, |
|
{ |
|
"MSE": 755.1489293416341, |
|
"MSE/layer0": 755.1489293416341, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.17, |
|
"input_norm": 31.997956597010287, |
|
"input_norm/layer0": 31.997956597010287, |
|
"learning_rate": 0.0004394736842105263, |
|
"loss": 2.1474, |
|
"max_norm": 47.01366424560547, |
|
"max_norm/layer0": 47.01366424560547, |
|
"mean_norm": 41.107492446899414, |
|
"mean_norm/layer0": 41.107492446899414, |
|
"multicode_k": 1, |
|
"output_norm": 9.18502354939779, |
|
"output_norm/layer0": 9.18502354939779, |
|
"step": 1650 |
|
}, |
|
{ |
|
"MSE": 752.7132907104492, |
|
"MSE/layer0": 752.7132907104492, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.18, |
|
"input_norm": 31.997961333592727, |
|
"input_norm/layer0": 31.997961333592727, |
|
"learning_rate": 0.00043684210526315795, |
|
"loss": 2.1451, |
|
"max_norm": 47.46398162841797, |
|
"max_norm/layer0": 47.46398162841797, |
|
"mean_norm": 41.466739654541016, |
|
"mean_norm/layer0": 41.466739654541016, |
|
"multicode_k": 1, |
|
"output_norm": 9.288365476131446, |
|
"output_norm/layer0": 9.288365476131446, |
|
"step": 1700 |
|
}, |
|
{ |
|
"MSE": 750.1894300333656, |
|
"MSE/layer0": 750.1894300333656, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.18, |
|
"input_norm": 31.99795596122742, |
|
"input_norm/layer0": 31.99795596122742, |
|
"learning_rate": 0.0004342105263157895, |
|
"loss": 2.1298, |
|
"max_norm": 47.89784240722656, |
|
"max_norm/layer0": 47.89784240722656, |
|
"mean_norm": 41.825233459472656, |
|
"mean_norm/layer0": 41.825233459472656, |
|
"multicode_k": 1, |
|
"output_norm": 9.383608838717148, |
|
"output_norm/layer0": 9.383608838717148, |
|
"step": 1750 |
|
}, |
|
{ |
|
"MSE": 747.6542997233073, |
|
"MSE/layer0": 747.6542997233073, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.19, |
|
"input_norm": 31.997955916722606, |
|
"input_norm/layer0": 31.997955916722606, |
|
"learning_rate": 0.0004315789473684211, |
|
"loss": 2.1238, |
|
"max_norm": 48.32524871826172, |
|
"max_norm/layer0": 48.32524871826172, |
|
"mean_norm": 42.18182373046875, |
|
"mean_norm/layer0": 42.18182373046875, |
|
"multicode_k": 1, |
|
"output_norm": 9.481378455162048, |
|
"output_norm/layer0": 9.481378455162048, |
|
"step": 1800 |
|
}, |
|
{ |
|
"MSE": 745.4623332722983, |
|
"MSE/layer0": 745.4623332722983, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.19, |
|
"input_norm": 31.99795308430989, |
|
"input_norm/layer0": 31.99795308430989, |
|
"learning_rate": 0.0004289473684210526, |
|
"loss": 2.1193, |
|
"max_norm": 48.75049591064453, |
|
"max_norm/layer0": 48.75049591064453, |
|
"mean_norm": 42.53817176818848, |
|
"mean_norm/layer0": 42.53817176818848, |
|
"multicode_k": 1, |
|
"output_norm": 9.570223178863522, |
|
"output_norm/layer0": 9.570223178863522, |
|
"step": 1850 |
|
}, |
|
{ |
|
"MSE": 743.2356170654296, |
|
"MSE/layer0": 743.2356170654296, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.2, |
|
"input_norm": 31.997956037521366, |
|
"input_norm/layer0": 31.997956037521366, |
|
"learning_rate": 0.0004263157894736842, |
|
"loss": 2.114, |
|
"max_norm": 49.169532775878906, |
|
"max_norm/layer0": 49.169532775878906, |
|
"mean_norm": 42.89301300048828, |
|
"mean_norm/layer0": 42.89301300048828, |
|
"multicode_k": 1, |
|
"output_norm": 9.656177865664167, |
|
"output_norm/layer0": 9.656177865664167, |
|
"step": 1900 |
|
}, |
|
{ |
|
"MSE": 740.6696187337238, |
|
"MSE/layer0": 740.6696187337238, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.2, |
|
"input_norm": 31.997947629292796, |
|
"input_norm/layer0": 31.997947629292796, |
|
"learning_rate": 0.0004236842105263158, |
|
"loss": 2.1208, |
|
"max_norm": 49.5915641784668, |
|
"max_norm/layer0": 49.5915641784668, |
|
"mean_norm": 43.247257232666016, |
|
"mean_norm/layer0": 43.247257232666016, |
|
"multicode_k": 1, |
|
"output_norm": 9.750187404950456, |
|
"output_norm/layer0": 9.750187404950456, |
|
"step": 1950 |
|
}, |
|
{ |
|
"MSE": 738.2711766560866, |
|
"MSE/layer0": 738.2711766560866, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.21, |
|
"input_norm": 31.99795049031576, |
|
"input_norm/layer0": 31.99795049031576, |
|
"learning_rate": 0.00042105263157894734, |
|
"loss": 2.1144, |
|
"max_norm": 50.01121520996094, |
|
"max_norm/layer0": 50.01121520996094, |
|
"mean_norm": 43.60071563720703, |
|
"mean_norm/layer0": 43.60071563720703, |
|
"multicode_k": 1, |
|
"output_norm": 9.839046444892887, |
|
"output_norm/layer0": 9.839046444892887, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_MSE/layer0": 737.1842960305685, |
|
"eval_accuracy": 0.5089533842961654, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997949216728358, |
|
"eval_loss": 2.112781524658203, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 9.899169789850005, |
|
"eval_runtime": 73.2721, |
|
"eval_samples_per_second": 63.094, |
|
"eval_steps_per_second": 7.888, |
|
"step": 2000 |
|
}, |
|
{ |
|
"MSE": 736.3252647908528, |
|
"MSE/layer0": 736.3252647908528, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.21, |
|
"input_norm": 31.997952928543082, |
|
"input_norm/layer0": 31.997952928543082, |
|
"learning_rate": 0.000418421052631579, |
|
"loss": 2.1054, |
|
"max_norm": 50.480525970458984, |
|
"max_norm/layer0": 50.480525970458984, |
|
"mean_norm": 43.9530086517334, |
|
"mean_norm/layer0": 43.9530086517334, |
|
"multicode_k": 1, |
|
"output_norm": 9.923008087476088, |
|
"output_norm/layer0": 9.923008087476088, |
|
"step": 2050 |
|
}, |
|
{ |
|
"MSE": 734.2413449096682, |
|
"MSE/layer0": 734.2413449096682, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.22, |
|
"input_norm": 31.99795233090719, |
|
"input_norm/layer0": 31.99795233090719, |
|
"learning_rate": 0.0004157894736842106, |
|
"loss": 2.114, |
|
"max_norm": 50.909828186035156, |
|
"max_norm/layer0": 50.909828186035156, |
|
"mean_norm": 44.302608489990234, |
|
"mean_norm/layer0": 44.302608489990234, |
|
"multicode_k": 1, |
|
"output_norm": 9.99465080579122, |
|
"output_norm/layer0": 9.99465080579122, |
|
"step": 2100 |
|
}, |
|
{ |
|
"MSE": 732.1211085001627, |
|
"MSE/layer0": 732.1211085001627, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.22, |
|
"input_norm": 31.997947177886957, |
|
"input_norm/layer0": 31.997947177886957, |
|
"learning_rate": 0.0004131578947368421, |
|
"loss": 2.1053, |
|
"max_norm": 51.30076217651367, |
|
"max_norm/layer0": 51.30076217651367, |
|
"mean_norm": 44.650190353393555, |
|
"mean_norm/layer0": 44.650190353393555, |
|
"multicode_k": 1, |
|
"output_norm": 10.083865798314415, |
|
"output_norm/layer0": 10.083865798314415, |
|
"step": 2150 |
|
}, |
|
{ |
|
"MSE": 729.7699541219072, |
|
"MSE/layer0": 729.7699541219072, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.23, |
|
"input_norm": 31.997944199244184, |
|
"input_norm/layer0": 31.997944199244184, |
|
"learning_rate": 0.0004105263157894737, |
|
"loss": 2.092, |
|
"max_norm": 51.70292282104492, |
|
"max_norm/layer0": 51.70292282104492, |
|
"mean_norm": 44.99736022949219, |
|
"mean_norm/layer0": 44.99736022949219, |
|
"multicode_k": 1, |
|
"output_norm": 10.171215546925865, |
|
"output_norm/layer0": 10.171215546925865, |
|
"step": 2200 |
|
}, |
|
{ |
|
"MSE": 727.7426215616864, |
|
"MSE/layer0": 727.7426215616864, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.23, |
|
"input_norm": 31.997949040730795, |
|
"input_norm/layer0": 31.997949040730795, |
|
"learning_rate": 0.00040789473684210524, |
|
"loss": 2.0989, |
|
"max_norm": 52.09043502807617, |
|
"max_norm/layer0": 52.09043502807617, |
|
"mean_norm": 45.34288787841797, |
|
"mean_norm/layer0": 45.34288787841797, |
|
"multicode_k": 1, |
|
"output_norm": 10.245072917938227, |
|
"output_norm/layer0": 10.245072917938227, |
|
"step": 2250 |
|
}, |
|
{ |
|
"MSE": 725.7510225423177, |
|
"MSE/layer0": 725.7510225423177, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.24, |
|
"input_norm": 31.997945086161295, |
|
"input_norm/layer0": 31.997945086161295, |
|
"learning_rate": 0.00040526315789473684, |
|
"loss": 2.0921, |
|
"max_norm": 52.48381423950195, |
|
"max_norm/layer0": 52.48381423950195, |
|
"mean_norm": 45.685386657714844, |
|
"mean_norm/layer0": 45.685386657714844, |
|
"multicode_k": 1, |
|
"output_norm": 10.316563812891642, |
|
"output_norm/layer0": 10.316563812891642, |
|
"step": 2300 |
|
}, |
|
{ |
|
"MSE": 723.730980834961, |
|
"MSE/layer0": 723.730980834961, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.25, |
|
"input_norm": 31.997938013076794, |
|
"input_norm/layer0": 31.997938013076794, |
|
"learning_rate": 0.00040263157894736843, |
|
"loss": 2.0863, |
|
"max_norm": 52.871910095214844, |
|
"max_norm/layer0": 52.871910095214844, |
|
"mean_norm": 46.027950286865234, |
|
"mean_norm/layer0": 46.027950286865234, |
|
"multicode_k": 1, |
|
"output_norm": 10.396288099288938, |
|
"output_norm/layer0": 10.396288099288938, |
|
"step": 2350 |
|
}, |
|
{ |
|
"MSE": 721.850106608073, |
|
"MSE/layer0": 721.850106608073, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.25, |
|
"input_norm": 31.99794317245484, |
|
"input_norm/layer0": 31.99794317245484, |
|
"learning_rate": 0.0004, |
|
"loss": 2.0883, |
|
"max_norm": 53.25300598144531, |
|
"max_norm/layer0": 53.25300598144531, |
|
"mean_norm": 46.366220474243164, |
|
"mean_norm/layer0": 46.366220474243164, |
|
"multicode_k": 1, |
|
"output_norm": 10.462737544377642, |
|
"output_norm/layer0": 10.462737544377642, |
|
"step": 2400 |
|
}, |
|
{ |
|
"MSE": 720.002911987305, |
|
"MSE/layer0": 720.002911987305, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.26, |
|
"input_norm": 31.997945442199722, |
|
"input_norm/layer0": 31.997945442199722, |
|
"learning_rate": 0.0003973684210526316, |
|
"loss": 2.0813, |
|
"max_norm": 53.6557502746582, |
|
"max_norm/layer0": 53.6557502746582, |
|
"mean_norm": 46.70218849182129, |
|
"mean_norm/layer0": 46.70218849182129, |
|
"multicode_k": 1, |
|
"output_norm": 10.54251501719157, |
|
"output_norm/layer0": 10.54251501719157, |
|
"step": 2450 |
|
}, |
|
{ |
|
"MSE": 717.8726328531905, |
|
"MSE/layer0": 717.8726328531905, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.26, |
|
"input_norm": 31.997946141560867, |
|
"input_norm/layer0": 31.997946141560867, |
|
"learning_rate": 0.00039473684210526315, |
|
"loss": 2.0847, |
|
"max_norm": 54.013648986816406, |
|
"max_norm/layer0": 54.013648986816406, |
|
"mean_norm": 47.03492546081543, |
|
"mean_norm/layer0": 47.03492546081543, |
|
"multicode_k": 1, |
|
"output_norm": 10.61746094703674, |
|
"output_norm/layer0": 10.61746094703674, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_MSE/layer0": 716.9390104187793, |
|
"eval_accuracy": 0.5142129041984603, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997947305666536, |
|
"eval_loss": 2.0790653228759766, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 10.657726783760687, |
|
"eval_runtime": 73.6422, |
|
"eval_samples_per_second": 62.776, |
|
"eval_steps_per_second": 7.849, |
|
"step": 2500 |
|
}, |
|
{ |
|
"MSE": 715.8716929117836, |
|
"MSE/layer0": 715.8716929117836, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.27, |
|
"input_norm": 31.99793814023335, |
|
"input_norm/layer0": 31.99793814023335, |
|
"learning_rate": 0.00039210526315789474, |
|
"loss": 2.0789, |
|
"max_norm": 54.395057678222656, |
|
"max_norm/layer0": 54.395057678222656, |
|
"mean_norm": 47.36547088623047, |
|
"mean_norm/layer0": 47.36547088623047, |
|
"multicode_k": 1, |
|
"output_norm": 10.687965892155965, |
|
"output_norm/layer0": 10.687965892155965, |
|
"step": 2550 |
|
}, |
|
{ |
|
"MSE": 713.9484742228188, |
|
"MSE/layer0": 713.9484742228188, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.27, |
|
"input_norm": 31.997940645217888, |
|
"input_norm/layer0": 31.997940645217888, |
|
"learning_rate": 0.00038947368421052633, |
|
"loss": 2.0747, |
|
"max_norm": 54.81391525268555, |
|
"max_norm/layer0": 54.81391525268555, |
|
"mean_norm": 47.6934928894043, |
|
"mean_norm/layer0": 47.6934928894043, |
|
"multicode_k": 1, |
|
"output_norm": 10.762619382540386, |
|
"output_norm/layer0": 10.762619382540386, |
|
"step": 2600 |
|
}, |
|
{ |
|
"MSE": 711.9854763793942, |
|
"MSE/layer0": 711.9854763793942, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.28, |
|
"input_norm": 31.997925097147615, |
|
"input_norm/layer0": 31.997925097147615, |
|
"learning_rate": 0.00038684210526315787, |
|
"loss": 2.0679, |
|
"max_norm": 55.21110916137695, |
|
"max_norm/layer0": 55.21110916137695, |
|
"mean_norm": 48.01936340332031, |
|
"mean_norm/layer0": 48.01936340332031, |
|
"multicode_k": 1, |
|
"output_norm": 10.838534935315447, |
|
"output_norm/layer0": 10.838534935315447, |
|
"step": 2650 |
|
}, |
|
{ |
|
"MSE": 710.4415082804362, |
|
"MSE/layer0": 710.4415082804362, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.28, |
|
"input_norm": 31.997930930455517, |
|
"input_norm/layer0": 31.997930930455517, |
|
"learning_rate": 0.00038421052631578946, |
|
"loss": 2.0619, |
|
"max_norm": 55.63144302368164, |
|
"max_norm/layer0": 55.63144302368164, |
|
"mean_norm": 48.34212875366211, |
|
"mean_norm/layer0": 48.34212875366211, |
|
"multicode_k": 1, |
|
"output_norm": 10.893479135831196, |
|
"output_norm/layer0": 10.893479135831196, |
|
"step": 2700 |
|
}, |
|
{ |
|
"MSE": 708.5378164672845, |
|
"MSE/layer0": 708.5378164672845, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.29, |
|
"input_norm": 31.99792820294698, |
|
"input_norm/layer0": 31.99792820294698, |
|
"learning_rate": 0.00038157894736842105, |
|
"loss": 2.0461, |
|
"max_norm": 56.01336669921875, |
|
"max_norm/layer0": 56.01336669921875, |
|
"mean_norm": 48.66323280334473, |
|
"mean_norm/layer0": 48.66323280334473, |
|
"multicode_k": 1, |
|
"output_norm": 10.971131575902309, |
|
"output_norm/layer0": 10.971131575902309, |
|
"step": 2750 |
|
}, |
|
{ |
|
"MSE": 706.6155220540361, |
|
"MSE/layer0": 706.6155220540361, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.29, |
|
"input_norm": 31.997930272420245, |
|
"input_norm/layer0": 31.997930272420245, |
|
"learning_rate": 0.00037894736842105265, |
|
"loss": 2.0594, |
|
"max_norm": 56.40309143066406, |
|
"max_norm/layer0": 56.40309143066406, |
|
"mean_norm": 48.980411529541016, |
|
"mean_norm/layer0": 48.980411529541016, |
|
"multicode_k": 1, |
|
"output_norm": 11.042961815198257, |
|
"output_norm/layer0": 11.042961815198257, |
|
"step": 2800 |
|
}, |
|
{ |
|
"MSE": 704.6534555053711, |
|
"MSE/layer0": 704.6534555053711, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.3, |
|
"input_norm": 31.99792085011799, |
|
"input_norm/layer0": 31.99792085011799, |
|
"learning_rate": 0.00037631578947368424, |
|
"loss": 2.0499, |
|
"max_norm": 56.79050064086914, |
|
"max_norm/layer0": 56.79050064086914, |
|
"mean_norm": 49.293588638305664, |
|
"mean_norm/layer0": 49.293588638305664, |
|
"multicode_k": 1, |
|
"output_norm": 11.11463791847229, |
|
"output_norm/layer0": 11.11463791847229, |
|
"step": 2850 |
|
}, |
|
{ |
|
"MSE": 702.691480916341, |
|
"MSE/layer0": 702.691480916341, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.3, |
|
"input_norm": 31.997921177546203, |
|
"input_norm/layer0": 31.997921177546203, |
|
"learning_rate": 0.0003736842105263158, |
|
"loss": 2.0472, |
|
"max_norm": 57.16228103637695, |
|
"max_norm/layer0": 57.16228103637695, |
|
"mean_norm": 49.60378646850586, |
|
"mean_norm/layer0": 49.60378646850586, |
|
"multicode_k": 1, |
|
"output_norm": 11.188902417818706, |
|
"output_norm/layer0": 11.188902417818706, |
|
"step": 2900 |
|
}, |
|
{ |
|
"MSE": 700.9804660034181, |
|
"MSE/layer0": 700.9804660034181, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.31, |
|
"input_norm": 31.997924566268914, |
|
"input_norm/layer0": 31.997924566268914, |
|
"learning_rate": 0.00037105263157894737, |
|
"loss": 2.0557, |
|
"max_norm": 57.52459716796875, |
|
"max_norm/layer0": 57.52459716796875, |
|
"mean_norm": 49.91103553771973, |
|
"mean_norm/layer0": 49.91103553771973, |
|
"multicode_k": 1, |
|
"output_norm": 11.253552745183304, |
|
"output_norm/layer0": 11.253552745183304, |
|
"step": 2950 |
|
}, |
|
{ |
|
"MSE": 699.5130490112299, |
|
"MSE/layer0": 699.5130490112299, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.31, |
|
"input_norm": 31.997922519048053, |
|
"input_norm/layer0": 31.997922519048053, |
|
"learning_rate": 0.00036842105263157896, |
|
"loss": 2.0439, |
|
"max_norm": 57.87739562988281, |
|
"max_norm/layer0": 57.87739562988281, |
|
"mean_norm": 50.21486854553223, |
|
"mean_norm/layer0": 50.21486854553223, |
|
"multicode_k": 1, |
|
"output_norm": 11.316310184796652, |
|
"output_norm/layer0": 11.316310184796652, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_MSE/layer0": 698.7265792011616, |
|
"eval_accuracy": 0.5184875063671823, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997919895214224, |
|
"eval_loss": 2.0482470989227295, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 11.359921689315088, |
|
"eval_runtime": 74.2109, |
|
"eval_samples_per_second": 62.295, |
|
"eval_steps_per_second": 7.789, |
|
"step": 3000 |
|
}, |
|
{ |
|
"MSE": 697.8663801066077, |
|
"MSE/layer0": 697.8663801066077, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.32, |
|
"input_norm": 31.997911771138504, |
|
"input_norm/layer0": 31.997911771138504, |
|
"learning_rate": 0.00036578947368421055, |
|
"loss": 2.0511, |
|
"max_norm": 58.24200439453125, |
|
"max_norm/layer0": 58.24200439453125, |
|
"mean_norm": 50.51446723937988, |
|
"mean_norm/layer0": 50.51446723937988, |
|
"multicode_k": 1, |
|
"output_norm": 11.388139980634046, |
|
"output_norm/layer0": 11.388139980634046, |
|
"step": 3050 |
|
}, |
|
{ |
|
"MSE": 696.0450835164395, |
|
"MSE/layer0": 696.0450835164395, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.32, |
|
"input_norm": 31.9979209582011, |
|
"input_norm/layer0": 31.9979209582011, |
|
"learning_rate": 0.00036315789473684214, |
|
"loss": 2.0466, |
|
"max_norm": 58.58406066894531, |
|
"max_norm/layer0": 58.58406066894531, |
|
"mean_norm": 50.81120681762695, |
|
"mean_norm/layer0": 50.81120681762695, |
|
"multicode_k": 1, |
|
"output_norm": 11.455551563898727, |
|
"output_norm/layer0": 11.455551563898727, |
|
"step": 3100 |
|
}, |
|
{ |
|
"MSE": 694.5301999918622, |
|
"MSE/layer0": 694.5301999918622, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.33, |
|
"input_norm": 31.99790574709574, |
|
"input_norm/layer0": 31.99790574709574, |
|
"learning_rate": 0.0003605263157894737, |
|
"loss": 2.0294, |
|
"max_norm": 58.931087493896484, |
|
"max_norm/layer0": 58.931087493896484, |
|
"mean_norm": 51.104164123535156, |
|
"mean_norm/layer0": 51.104164123535156, |
|
"multicode_k": 1, |
|
"output_norm": 11.512675134340917, |
|
"output_norm/layer0": 11.512675134340917, |
|
"step": 3150 |
|
}, |
|
{ |
|
"MSE": 692.5095411173497, |
|
"MSE/layer0": 692.5095411173497, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.33, |
|
"input_norm": 31.997909634908044, |
|
"input_norm/layer0": 31.997909634908044, |
|
"learning_rate": 0.0003578947368421053, |
|
"loss": 2.0455, |
|
"max_norm": 59.2867546081543, |
|
"max_norm/layer0": 59.2867546081543, |
|
"mean_norm": 51.39415168762207, |
|
"mean_norm/layer0": 51.39415168762207, |
|
"multicode_k": 1, |
|
"output_norm": 11.587491785685224, |
|
"output_norm/layer0": 11.587491785685224, |
|
"step": 3200 |
|
}, |
|
{ |
|
"MSE": 691.1425885009767, |
|
"MSE/layer0": 691.1425885009767, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.34, |
|
"input_norm": 31.99791768709818, |
|
"input_norm/layer0": 31.99791768709818, |
|
"learning_rate": 0.00035526315789473687, |
|
"loss": 2.0348, |
|
"max_norm": 59.64825439453125, |
|
"max_norm/layer0": 59.64825439453125, |
|
"mean_norm": 51.68109130859375, |
|
"mean_norm/layer0": 51.68109130859375, |
|
"multicode_k": 1, |
|
"output_norm": 11.643148959477745, |
|
"output_norm/layer0": 11.643148959477745, |
|
"step": 3250 |
|
}, |
|
{ |
|
"MSE": 689.2906094360355, |
|
"MSE/layer0": 689.2906094360355, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.34, |
|
"input_norm": 31.997913980483997, |
|
"input_norm/layer0": 31.997913980483997, |
|
"learning_rate": 0.0003526315789473684, |
|
"loss": 2.0293, |
|
"max_norm": 59.97624206542969, |
|
"max_norm/layer0": 59.97624206542969, |
|
"mean_norm": 51.965484619140625, |
|
"mean_norm/layer0": 51.965484619140625, |
|
"multicode_k": 1, |
|
"output_norm": 11.714975148836775, |
|
"output_norm/layer0": 11.714975148836775, |
|
"step": 3300 |
|
}, |
|
{ |
|
"MSE": 688.0525922648112, |
|
"MSE/layer0": 688.0525922648112, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.35, |
|
"input_norm": 31.997908350626624, |
|
"input_norm/layer0": 31.997908350626624, |
|
"learning_rate": 0.00035, |
|
"loss": 2.0389, |
|
"max_norm": 60.30556869506836, |
|
"max_norm/layer0": 60.30556869506836, |
|
"mean_norm": 52.2458438873291, |
|
"mean_norm/layer0": 52.2458438873291, |
|
"multicode_k": 1, |
|
"output_norm": 11.772027517954506, |
|
"output_norm/layer0": 11.772027517954506, |
|
"step": 3350 |
|
}, |
|
{ |
|
"MSE": 686.4814953613279, |
|
"MSE/layer0": 686.4814953613279, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.35, |
|
"input_norm": 31.997902415593472, |
|
"input_norm/layer0": 31.997902415593472, |
|
"learning_rate": 0.0003473684210526316, |
|
"loss": 2.0266, |
|
"max_norm": 60.628334045410156, |
|
"max_norm/layer0": 60.628334045410156, |
|
"mean_norm": 52.522024154663086, |
|
"mean_norm/layer0": 52.522024154663086, |
|
"multicode_k": 1, |
|
"output_norm": 11.842156640688584, |
|
"output_norm/layer0": 11.842156640688584, |
|
"step": 3400 |
|
}, |
|
{ |
|
"MSE": 684.6515231323242, |
|
"MSE/layer0": 684.6515231323242, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.36, |
|
"input_norm": 31.99791290283203, |
|
"input_norm/layer0": 31.99791290283203, |
|
"learning_rate": 0.0003447368421052632, |
|
"loss": 2.0248, |
|
"max_norm": 60.95072555541992, |
|
"max_norm/layer0": 60.95072555541992, |
|
"mean_norm": 52.79400825500488, |
|
"mean_norm/layer0": 52.79400825500488, |
|
"multicode_k": 1, |
|
"output_norm": 11.907371897697445, |
|
"output_norm/layer0": 11.907371897697445, |
|
"step": 3450 |
|
}, |
|
{ |
|
"MSE": 683.5430062866212, |
|
"MSE/layer0": 683.5430062866212, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.37, |
|
"input_norm": 31.9979167175293, |
|
"input_norm/layer0": 31.9979167175293, |
|
"learning_rate": 0.00034210526315789477, |
|
"loss": 2.0263, |
|
"max_norm": 61.270816802978516, |
|
"max_norm/layer0": 61.270816802978516, |
|
"mean_norm": 53.06429481506348, |
|
"mean_norm/layer0": 53.06429481506348, |
|
"multicode_k": 1, |
|
"output_norm": 11.956860675811765, |
|
"output_norm/layer0": 11.956860675811765, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_MSE/layer0": 682.2680427869782, |
|
"eval_accuracy": 0.5224062440993215, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997916449774355, |
|
"eval_loss": 2.0253396034240723, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 12.010468493388789, |
|
"eval_runtime": 73.2716, |
|
"eval_samples_per_second": 63.094, |
|
"eval_steps_per_second": 7.888, |
|
"step": 3500 |
|
}, |
|
{ |
|
"MSE": 682.0599540201822, |
|
"MSE/layer0": 682.0599540201822, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.37, |
|
"input_norm": 31.9979091612498, |
|
"input_norm/layer0": 31.9979091612498, |
|
"learning_rate": 0.0003394736842105263, |
|
"loss": 2.035, |
|
"max_norm": 61.60363006591797, |
|
"max_norm/layer0": 61.60363006591797, |
|
"mean_norm": 53.33056831359863, |
|
"mean_norm/layer0": 53.33056831359863, |
|
"multicode_k": 1, |
|
"output_norm": 12.018342121442167, |
|
"output_norm/layer0": 12.018342121442167, |
|
"step": 3550 |
|
}, |
|
{ |
|
"MSE": 680.5750654093424, |
|
"MSE/layer0": 680.5750654093424, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.38, |
|
"input_norm": 31.997909587224328, |
|
"input_norm/layer0": 31.997909587224328, |
|
"learning_rate": 0.0003368421052631579, |
|
"loss": 2.0232, |
|
"max_norm": 61.922420501708984, |
|
"max_norm/layer0": 61.922420501708984, |
|
"mean_norm": 53.59366035461426, |
|
"mean_norm/layer0": 53.59366035461426, |
|
"multicode_k": 1, |
|
"output_norm": 12.078021968205773, |
|
"output_norm/layer0": 12.078021968205773, |
|
"step": 3600 |
|
}, |
|
{ |
|
"MSE": 678.8478289794925, |
|
"MSE/layer0": 678.8478289794925, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.38, |
|
"input_norm": 31.99789888381958, |
|
"input_norm/layer0": 31.99789888381958, |
|
"learning_rate": 0.00033421052631578944, |
|
"loss": 2.0171, |
|
"max_norm": 62.24449157714844, |
|
"max_norm/layer0": 62.24449157714844, |
|
"mean_norm": 53.85357475280762, |
|
"mean_norm/layer0": 53.85357475280762, |
|
"multicode_k": 1, |
|
"output_norm": 12.149001522064214, |
|
"output_norm/layer0": 12.149001522064214, |
|
"step": 3650 |
|
}, |
|
{ |
|
"MSE": 677.7631386311848, |
|
"MSE/layer0": 677.7631386311848, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.39, |
|
"input_norm": 31.997902571360274, |
|
"input_norm/layer0": 31.997902571360274, |
|
"learning_rate": 0.00033157894736842103, |
|
"loss": 2.0212, |
|
"max_norm": 62.564937591552734, |
|
"max_norm/layer0": 62.564937591552734, |
|
"mean_norm": 54.10923385620117, |
|
"mean_norm/layer0": 54.10923385620117, |
|
"multicode_k": 1, |
|
"output_norm": 12.200160818099977, |
|
"output_norm/layer0": 12.200160818099977, |
|
"step": 3700 |
|
}, |
|
{ |
|
"MSE": 676.4079176839191, |
|
"MSE/layer0": 676.4079176839191, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.39, |
|
"input_norm": 31.99789404869079, |
|
"input_norm/layer0": 31.99789404869079, |
|
"learning_rate": 0.0003289473684210527, |
|
"loss": 2.0184, |
|
"max_norm": 62.88063430786133, |
|
"max_norm/layer0": 62.88063430786133, |
|
"mean_norm": 54.362863540649414, |
|
"mean_norm/layer0": 54.362863540649414, |
|
"multicode_k": 1, |
|
"output_norm": 12.259288868904115, |
|
"output_norm/layer0": 12.259288868904115, |
|
"step": 3750 |
|
}, |
|
{ |
|
"MSE": 675.2395422363282, |
|
"MSE/layer0": 675.2395422363282, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.4, |
|
"input_norm": 31.99789286295573, |
|
"input_norm/layer0": 31.99789286295573, |
|
"learning_rate": 0.0003263157894736842, |
|
"loss": 2.0058, |
|
"max_norm": 63.18323516845703, |
|
"max_norm/layer0": 63.18323516845703, |
|
"mean_norm": 54.61160659790039, |
|
"mean_norm/layer0": 54.61160659790039, |
|
"multicode_k": 1, |
|
"output_norm": 12.305311093330385, |
|
"output_norm/layer0": 12.305311093330385, |
|
"step": 3800 |
|
}, |
|
{ |
|
"MSE": 673.5289611816404, |
|
"MSE/layer0": 673.5289611816404, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.4, |
|
"input_norm": 31.997895905176787, |
|
"input_norm/layer0": 31.997895905176787, |
|
"learning_rate": 0.0003236842105263158, |
|
"loss": 2.0147, |
|
"max_norm": 63.47829055786133, |
|
"max_norm/layer0": 63.47829055786133, |
|
"mean_norm": 54.85733413696289, |
|
"mean_norm/layer0": 54.85733413696289, |
|
"multicode_k": 1, |
|
"output_norm": 12.368880640665692, |
|
"output_norm/layer0": 12.368880640665692, |
|
"step": 3850 |
|
}, |
|
{ |
|
"MSE": 672.7262348429363, |
|
"MSE/layer0": 672.7262348429363, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.41, |
|
"input_norm": 31.997892808914187, |
|
"input_norm/layer0": 31.997892808914187, |
|
"learning_rate": 0.0003210526315789474, |
|
"loss": 2.0011, |
|
"max_norm": 63.7920036315918, |
|
"max_norm/layer0": 63.7920036315918, |
|
"mean_norm": 55.099992752075195, |
|
"mean_norm/layer0": 55.099992752075195, |
|
"multicode_k": 1, |
|
"output_norm": 12.413625540733335, |
|
"output_norm/layer0": 12.413625540733335, |
|
"step": 3900 |
|
}, |
|
{ |
|
"MSE": 671.2364042154949, |
|
"MSE/layer0": 671.2364042154949, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.41, |
|
"input_norm": 31.997892484664916, |
|
"input_norm/layer0": 31.997892484664916, |
|
"learning_rate": 0.00031842105263157894, |
|
"loss": 2.0068, |
|
"max_norm": 64.07488250732422, |
|
"max_norm/layer0": 64.07488250732422, |
|
"mean_norm": 55.33942985534668, |
|
"mean_norm/layer0": 55.33942985534668, |
|
"multicode_k": 1, |
|
"output_norm": 12.478335504531861, |
|
"output_norm/layer0": 12.478335504531861, |
|
"step": 3950 |
|
}, |
|
{ |
|
"MSE": 669.9738427734378, |
|
"MSE/layer0": 669.9738427734378, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.42, |
|
"input_norm": 31.997889916102086, |
|
"input_norm/layer0": 31.997889916102086, |
|
"learning_rate": 0.00031578947368421053, |
|
"loss": 1.9906, |
|
"max_norm": 64.34879302978516, |
|
"max_norm/layer0": 64.34879302978516, |
|
"mean_norm": 55.576541900634766, |
|
"mean_norm/layer0": 55.576541900634766, |
|
"multicode_k": 1, |
|
"output_norm": 12.524646544456482, |
|
"output_norm/layer0": 12.524646544456482, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_MSE/layer0": 669.1965223770751, |
|
"eval_accuracy": 0.5253332978103237, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997898890449704, |
|
"eval_loss": 2.006638526916504, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 12.556819209953474, |
|
"eval_runtime": 73.3751, |
|
"eval_samples_per_second": 63.005, |
|
"eval_steps_per_second": 7.877, |
|
"step": 4000 |
|
}, |
|
{ |
|
"MSE": 668.3390091959637, |
|
"MSE/layer0": 668.3390091959637, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.42, |
|
"input_norm": 31.99788640658062, |
|
"input_norm/layer0": 31.99788640658062, |
|
"learning_rate": 0.00031315789473684207, |
|
"loss": 1.9962, |
|
"max_norm": 64.65262603759766, |
|
"max_norm/layer0": 64.65262603759766, |
|
"mean_norm": 55.811140060424805, |
|
"mean_norm/layer0": 55.811140060424805, |
|
"multicode_k": 1, |
|
"output_norm": 12.584023051261894, |
|
"output_norm/layer0": 12.584023051261894, |
|
"step": 4050 |
|
}, |
|
{ |
|
"MSE": 667.4144735717773, |
|
"MSE/layer0": 667.4144735717773, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.43, |
|
"input_norm": 31.99788414637247, |
|
"input_norm/layer0": 31.99788414637247, |
|
"learning_rate": 0.0003105263157894737, |
|
"loss": 2.0038, |
|
"max_norm": 64.9332275390625, |
|
"max_norm/layer0": 64.9332275390625, |
|
"mean_norm": 56.04119682312012, |
|
"mean_norm/layer0": 56.04119682312012, |
|
"multicode_k": 1, |
|
"output_norm": 12.633416105906175, |
|
"output_norm/layer0": 12.633416105906175, |
|
"step": 4100 |
|
}, |
|
{ |
|
"MSE": 666.502211812337, |
|
"MSE/layer0": 666.502211812337, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.43, |
|
"input_norm": 31.997885338465373, |
|
"input_norm/layer0": 31.997885338465373, |
|
"learning_rate": 0.0003078947368421053, |
|
"loss": 2.0046, |
|
"max_norm": 65.20265197753906, |
|
"max_norm/layer0": 65.20265197753906, |
|
"mean_norm": 56.26777458190918, |
|
"mean_norm/layer0": 56.26777458190918, |
|
"multicode_k": 1, |
|
"output_norm": 12.67455391089122, |
|
"output_norm/layer0": 12.67455391089122, |
|
"step": 4150 |
|
}, |
|
{ |
|
"MSE": 665.0832258097332, |
|
"MSE/layer0": 665.0832258097332, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.44, |
|
"input_norm": 31.997875661849967, |
|
"input_norm/layer0": 31.997875661849967, |
|
"learning_rate": 0.00030526315789473684, |
|
"loss": 2.0066, |
|
"max_norm": 65.46887969970703, |
|
"max_norm/layer0": 65.46887969970703, |
|
"mean_norm": 56.49208450317383, |
|
"mean_norm/layer0": 56.49208450317383, |
|
"multicode_k": 1, |
|
"output_norm": 12.73067569255829, |
|
"output_norm/layer0": 12.73067569255829, |
|
"step": 4200 |
|
}, |
|
{ |
|
"MSE": 663.9124774169925, |
|
"MSE/layer0": 663.9124774169925, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.44, |
|
"input_norm": 31.997874129613244, |
|
"input_norm/layer0": 31.997874129613244, |
|
"learning_rate": 0.00030263157894736844, |
|
"loss": 2.0006, |
|
"max_norm": 65.73078918457031, |
|
"max_norm/layer0": 65.73078918457031, |
|
"mean_norm": 56.712989807128906, |
|
"mean_norm/layer0": 56.712989807128906, |
|
"multicode_k": 1, |
|
"output_norm": 12.783326719601945, |
|
"output_norm/layer0": 12.783326719601945, |
|
"step": 4250 |
|
}, |
|
{ |
|
"MSE": 663.0191631062823, |
|
"MSE/layer0": 663.0191631062823, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.45, |
|
"input_norm": 31.99787082672119, |
|
"input_norm/layer0": 31.99787082672119, |
|
"learning_rate": 0.0003, |
|
"loss": 1.9862, |
|
"max_norm": 65.99481964111328, |
|
"max_norm/layer0": 65.99481964111328, |
|
"mean_norm": 56.93141746520996, |
|
"mean_norm/layer0": 56.93141746520996, |
|
"multicode_k": 1, |
|
"output_norm": 12.824343484242757, |
|
"output_norm/layer0": 12.824343484242757, |
|
"step": 4300 |
|
}, |
|
{ |
|
"MSE": 661.9175501505531, |
|
"MSE/layer0": 661.9175501505531, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.45, |
|
"input_norm": 31.997863101959226, |
|
"input_norm/layer0": 31.997863101959226, |
|
"learning_rate": 0.00029736842105263157, |
|
"loss": 1.9891, |
|
"max_norm": 66.25289916992188, |
|
"max_norm/layer0": 66.25289916992188, |
|
"mean_norm": 57.14705848693848, |
|
"mean_norm/layer0": 57.14705848693848, |
|
"multicode_k": 1, |
|
"output_norm": 12.873331023852028, |
|
"output_norm/layer0": 12.873331023852028, |
|
"step": 4350 |
|
}, |
|
{ |
|
"MSE": 660.8278486124677, |
|
"MSE/layer0": 660.8278486124677, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.46, |
|
"input_norm": 31.99786113739014, |
|
"input_norm/layer0": 31.99786113739014, |
|
"learning_rate": 0.00029473684210526316, |
|
"loss": 1.9874, |
|
"max_norm": 66.49950408935547, |
|
"max_norm/layer0": 66.49950408935547, |
|
"mean_norm": 57.3592414855957, |
|
"mean_norm/layer0": 57.3592414855957, |
|
"multicode_k": 1, |
|
"output_norm": 12.925755645434062, |
|
"output_norm/layer0": 12.925755645434062, |
|
"step": 4400 |
|
}, |
|
{ |
|
"MSE": 659.7812182617188, |
|
"MSE/layer0": 659.7812182617188, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.46, |
|
"input_norm": 31.997859818140668, |
|
"input_norm/layer0": 31.997859818140668, |
|
"learning_rate": 0.00029210526315789475, |
|
"loss": 1.9894, |
|
"max_norm": 66.74647521972656, |
|
"max_norm/layer0": 66.74647521972656, |
|
"mean_norm": 57.56860542297363, |
|
"mean_norm/layer0": 57.56860542297363, |
|
"multicode_k": 1, |
|
"output_norm": 12.969949612617494, |
|
"output_norm/layer0": 12.969949612617494, |
|
"step": 4450 |
|
}, |
|
{ |
|
"MSE": 658.2862462361654, |
|
"MSE/layer0": 658.2862462361654, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.47, |
|
"input_norm": 31.997855517069482, |
|
"input_norm/layer0": 31.997855517069482, |
|
"learning_rate": 0.00028947368421052634, |
|
"loss": 1.9852, |
|
"max_norm": 67.0057373046875, |
|
"max_norm/layer0": 67.0057373046875, |
|
"mean_norm": 57.77582931518555, |
|
"mean_norm/layer0": 57.77582931518555, |
|
"multicode_k": 1, |
|
"output_norm": 13.019407332738238, |
|
"output_norm/layer0": 13.019407332738238, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_MSE/layer0": 657.5871718611108, |
|
"eval_accuracy": 0.5279040641917702, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997854675071842, |
|
"eval_loss": 1.9898165464401245, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 13.052642994207561, |
|
"eval_runtime": 74.0479, |
|
"eval_samples_per_second": 62.433, |
|
"eval_steps_per_second": 7.806, |
|
"step": 4500 |
|
}, |
|
{ |
|
"MSE": 657.2974259440105, |
|
"MSE/layer0": 657.2974259440105, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.48, |
|
"input_norm": 31.99785115559897, |
|
"input_norm/layer0": 31.99785115559897, |
|
"learning_rate": 0.0002868421052631579, |
|
"loss": 1.9727, |
|
"max_norm": 67.25566101074219, |
|
"max_norm/layer0": 67.25566101074219, |
|
"mean_norm": 57.98077964782715, |
|
"mean_norm/layer0": 57.98077964782715, |
|
"multicode_k": 1, |
|
"output_norm": 13.063522001902262, |
|
"output_norm/layer0": 13.063522001902262, |
|
"step": 4550 |
|
}, |
|
{ |
|
"MSE": 656.5759895833334, |
|
"MSE/layer0": 656.5759895833334, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.48, |
|
"input_norm": 31.997858088811242, |
|
"input_norm/layer0": 31.997858088811242, |
|
"learning_rate": 0.00028421052631578947, |
|
"loss": 1.9897, |
|
"max_norm": 67.49605560302734, |
|
"max_norm/layer0": 67.49605560302734, |
|
"mean_norm": 58.182559967041016, |
|
"mean_norm/layer0": 58.182559967041016, |
|
"multicode_k": 1, |
|
"output_norm": 13.099744346936546, |
|
"output_norm/layer0": 13.099744346936546, |
|
"step": 4600 |
|
}, |
|
{ |
|
"MSE": 655.8373800659178, |
|
"MSE/layer0": 655.8373800659178, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.49, |
|
"input_norm": 31.997857850392663, |
|
"input_norm/layer0": 31.997857850392663, |
|
"learning_rate": 0.00028157894736842106, |
|
"loss": 1.9918, |
|
"max_norm": 67.72962188720703, |
|
"max_norm/layer0": 67.72962188720703, |
|
"mean_norm": 58.38115119934082, |
|
"mean_norm/layer0": 58.38115119934082, |
|
"multicode_k": 1, |
|
"output_norm": 13.13247790972392, |
|
"output_norm/layer0": 13.13247790972392, |
|
"step": 4650 |
|
}, |
|
{ |
|
"MSE": 654.6057424926755, |
|
"MSE/layer0": 654.6057424926755, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.49, |
|
"input_norm": 31.997855739593504, |
|
"input_norm/layer0": 31.997855739593504, |
|
"learning_rate": 0.0002789473684210526, |
|
"loss": 1.9908, |
|
"max_norm": 67.96855163574219, |
|
"max_norm/layer0": 67.96855163574219, |
|
"mean_norm": 58.57722091674805, |
|
"mean_norm/layer0": 58.57722091674805, |
|
"multicode_k": 1, |
|
"output_norm": 13.187800091107682, |
|
"output_norm/layer0": 13.187800091107682, |
|
"step": 4700 |
|
}, |
|
{ |
|
"MSE": 653.7336292521161, |
|
"MSE/layer0": 653.7336292521161, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.5, |
|
"input_norm": 31.997861677805588, |
|
"input_norm/layer0": 31.997861677805588, |
|
"learning_rate": 0.00027631578947368425, |
|
"loss": 1.9919, |
|
"max_norm": 68.20356750488281, |
|
"max_norm/layer0": 68.20356750488281, |
|
"mean_norm": 58.77041053771973, |
|
"mean_norm/layer0": 58.77041053771973, |
|
"multicode_k": 1, |
|
"output_norm": 13.224705770810434, |
|
"output_norm/layer0": 13.224705770810434, |
|
"step": 4750 |
|
}, |
|
{ |
|
"MSE": 652.4711893717447, |
|
"MSE/layer0": 652.4711893717447, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.5, |
|
"input_norm": 31.997852430343634, |
|
"input_norm/layer0": 31.997852430343634, |
|
"learning_rate": 0.00027368421052631584, |
|
"loss": 1.9777, |
|
"max_norm": 68.42557525634766, |
|
"max_norm/layer0": 68.42557525634766, |
|
"mean_norm": 58.96235466003418, |
|
"mean_norm/layer0": 58.96235466003418, |
|
"multicode_k": 1, |
|
"output_norm": 13.275700616836549, |
|
"output_norm/layer0": 13.275700616836549, |
|
"step": 4800 |
|
}, |
|
{ |
|
"MSE": 651.660216674805, |
|
"MSE/layer0": 651.660216674805, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.51, |
|
"input_norm": 31.997857831319166, |
|
"input_norm/layer0": 31.997857831319166, |
|
"learning_rate": 0.0002710526315789474, |
|
"loss": 1.9728, |
|
"max_norm": 68.6562271118164, |
|
"max_norm/layer0": 68.6562271118164, |
|
"mean_norm": 59.151214599609375, |
|
"mean_norm/layer0": 59.151214599609375, |
|
"multicode_k": 1, |
|
"output_norm": 13.316913062731425, |
|
"output_norm/layer0": 13.316913062731425, |
|
"step": 4850 |
|
}, |
|
{ |
|
"MSE": 651.1180463663741, |
|
"MSE/layer0": 651.1180463663741, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.51, |
|
"input_norm": 31.997851276397704, |
|
"input_norm/layer0": 31.997851276397704, |
|
"learning_rate": 0.00026842105263157897, |
|
"loss": 1.9806, |
|
"max_norm": 68.8842544555664, |
|
"max_norm/layer0": 68.8842544555664, |
|
"mean_norm": 59.336891174316406, |
|
"mean_norm/layer0": 59.336891174316406, |
|
"multicode_k": 1, |
|
"output_norm": 13.348248120943708, |
|
"output_norm/layer0": 13.348248120943708, |
|
"step": 4900 |
|
}, |
|
{ |
|
"MSE": 650.0774853515621, |
|
"MSE/layer0": 650.0774853515621, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.52, |
|
"input_norm": 31.997846142450957, |
|
"input_norm/layer0": 31.997846142450957, |
|
"learning_rate": 0.0002657894736842105, |
|
"loss": 1.9718, |
|
"max_norm": 69.09481811523438, |
|
"max_norm/layer0": 69.09481811523438, |
|
"mean_norm": 59.52014923095703, |
|
"mean_norm/layer0": 59.52014923095703, |
|
"multicode_k": 1, |
|
"output_norm": 13.38570951779683, |
|
"output_norm/layer0": 13.38570951779683, |
|
"step": 4950 |
|
}, |
|
{ |
|
"MSE": 649.2541728719073, |
|
"MSE/layer0": 649.2541728719073, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.52, |
|
"input_norm": 31.997852964401247, |
|
"input_norm/layer0": 31.997852964401247, |
|
"learning_rate": 0.0002631578947368421, |
|
"loss": 1.9687, |
|
"max_norm": 69.3100357055664, |
|
"max_norm/layer0": 69.3100357055664, |
|
"mean_norm": 59.70068359375, |
|
"mean_norm/layer0": 59.70068359375, |
|
"multicode_k": 1, |
|
"output_norm": 13.423000381787617, |
|
"output_norm/layer0": 13.423000381787617, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_MSE/layer0": 648.246248562512, |
|
"eval_accuracy": 0.5299863891896716, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997853133679993, |
|
"eval_loss": 1.975706934928894, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 13.449585199510798, |
|
"eval_runtime": 73.7352, |
|
"eval_samples_per_second": 62.697, |
|
"eval_steps_per_second": 7.839, |
|
"step": 5000 |
|
}, |
|
{ |
|
"MSE": 648.4500269571938, |
|
"MSE/layer0": 648.4500269571938, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.53, |
|
"input_norm": 31.99784724235535, |
|
"input_norm/layer0": 31.99784724235535, |
|
"learning_rate": 0.0002605263157894737, |
|
"loss": 1.9816, |
|
"max_norm": 69.5140151977539, |
|
"max_norm/layer0": 69.5140151977539, |
|
"mean_norm": 59.87860107421875, |
|
"mean_norm/layer0": 59.87860107421875, |
|
"multicode_k": 1, |
|
"output_norm": 13.459953915278113, |
|
"output_norm/layer0": 13.459953915278113, |
|
"step": 5050 |
|
}, |
|
{ |
|
"MSE": 647.5120207722985, |
|
"MSE/layer0": 647.5120207722985, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.53, |
|
"input_norm": 31.997845083872484, |
|
"input_norm/layer0": 31.997845083872484, |
|
"learning_rate": 0.0002578947368421053, |
|
"loss": 1.9778, |
|
"max_norm": 69.72222137451172, |
|
"max_norm/layer0": 69.72222137451172, |
|
"mean_norm": 60.054636001586914, |
|
"mean_norm/layer0": 60.054636001586914, |
|
"multicode_k": 1, |
|
"output_norm": 13.495457221666976, |
|
"output_norm/layer0": 13.495457221666976, |
|
"step": 5100 |
|
}, |
|
{ |
|
"MSE": 646.8909526570638, |
|
"MSE/layer0": 646.8909526570638, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.54, |
|
"input_norm": 31.99783927281696, |
|
"input_norm/layer0": 31.99783927281696, |
|
"learning_rate": 0.0002552631578947369, |
|
"loss": 1.9608, |
|
"max_norm": 69.93621826171875, |
|
"max_norm/layer0": 69.93621826171875, |
|
"mean_norm": 60.228532791137695, |
|
"mean_norm/layer0": 60.228532791137695, |
|
"multicode_k": 1, |
|
"output_norm": 13.523821023305253, |
|
"output_norm/layer0": 13.523821023305253, |
|
"step": 5150 |
|
}, |
|
{ |
|
"MSE": 645.6001059977214, |
|
"MSE/layer0": 645.6001059977214, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.54, |
|
"input_norm": 31.997829329172767, |
|
"input_norm/layer0": 31.997829329172767, |
|
"learning_rate": 0.0002526315789473684, |
|
"loss": 1.9514, |
|
"max_norm": 70.1629867553711, |
|
"max_norm/layer0": 70.1629867553711, |
|
"mean_norm": 60.39993667602539, |
|
"mean_norm/layer0": 60.39993667602539, |
|
"multicode_k": 1, |
|
"output_norm": 13.575601536432904, |
|
"output_norm/layer0": 13.575601536432904, |
|
"step": 5200 |
|
}, |
|
{ |
|
"MSE": 645.0477313232423, |
|
"MSE/layer0": 645.0477313232423, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.55, |
|
"input_norm": 31.997829844156904, |
|
"input_norm/layer0": 31.997829844156904, |
|
"learning_rate": 0.00025, |
|
"loss": 1.953, |
|
"max_norm": 70.36659240722656, |
|
"max_norm/layer0": 70.36659240722656, |
|
"mean_norm": 60.568695068359375, |
|
"mean_norm/layer0": 60.568695068359375, |
|
"multicode_k": 1, |
|
"output_norm": 13.606370126406352, |
|
"output_norm/layer0": 13.606370126406352, |
|
"step": 5250 |
|
}, |
|
{ |
|
"MSE": 644.0795441691082, |
|
"MSE/layer0": 644.0795441691082, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.55, |
|
"input_norm": 31.997827720642086, |
|
"input_norm/layer0": 31.997827720642086, |
|
"learning_rate": 0.0002473684210526316, |
|
"loss": 1.9664, |
|
"max_norm": 70.58203125, |
|
"max_norm/layer0": 70.58203125, |
|
"mean_norm": 60.73503303527832, |
|
"mean_norm/layer0": 60.73503303527832, |
|
"multicode_k": 1, |
|
"output_norm": 13.644356350898736, |
|
"output_norm/layer0": 13.644356350898736, |
|
"step": 5300 |
|
}, |
|
{ |
|
"MSE": 643.4398297119142, |
|
"MSE/layer0": 643.4398297119142, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.56, |
|
"input_norm": 31.99783255259196, |
|
"input_norm/layer0": 31.99783255259196, |
|
"learning_rate": 0.0002447368421052632, |
|
"loss": 1.9612, |
|
"max_norm": 70.80116271972656, |
|
"max_norm/layer0": 70.80116271972656, |
|
"mean_norm": 60.89903450012207, |
|
"mean_norm/layer0": 60.89903450012207, |
|
"multicode_k": 1, |
|
"output_norm": 13.676611545880633, |
|
"output_norm/layer0": 13.676611545880633, |
|
"step": 5350 |
|
}, |
|
{ |
|
"MSE": 642.6565199788413, |
|
"MSE/layer0": 642.6565199788413, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.56, |
|
"input_norm": 31.997826932271334, |
|
"input_norm/layer0": 31.997826932271334, |
|
"learning_rate": 0.00024210526315789475, |
|
"loss": 1.9695, |
|
"max_norm": 71.0198745727539, |
|
"max_norm/layer0": 71.0198745727539, |
|
"mean_norm": 61.06051063537598, |
|
"mean_norm/layer0": 61.06051063537598, |
|
"multicode_k": 1, |
|
"output_norm": 13.705395914713542, |
|
"output_norm/layer0": 13.705395914713542, |
|
"step": 5400 |
|
}, |
|
{ |
|
"MSE": 641.5518863932293, |
|
"MSE/layer0": 641.5518863932293, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.57, |
|
"input_norm": 31.99782320658366, |
|
"input_norm/layer0": 31.99782320658366, |
|
"learning_rate": 0.00023947368421052632, |
|
"loss": 1.9708, |
|
"max_norm": 71.22209930419922, |
|
"max_norm/layer0": 71.22209930419922, |
|
"mean_norm": 61.22001647949219, |
|
"mean_norm/layer0": 61.22001647949219, |
|
"multicode_k": 1, |
|
"output_norm": 13.747722525596622, |
|
"output_norm/layer0": 13.747722525596622, |
|
"step": 5450 |
|
}, |
|
{ |
|
"MSE": 641.0277577718095, |
|
"MSE/layer0": 641.0277577718095, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.57, |
|
"input_norm": 31.997817249298095, |
|
"input_norm/layer0": 31.997817249298095, |
|
"learning_rate": 0.00023684210526315788, |
|
"loss": 1.9672, |
|
"max_norm": 71.42549896240234, |
|
"max_norm/layer0": 71.42549896240234, |
|
"mean_norm": 61.377342224121094, |
|
"mean_norm/layer0": 61.377342224121094, |
|
"multicode_k": 1, |
|
"output_norm": 13.775313488642375, |
|
"output_norm/layer0": 13.775313488642375, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_MSE/layer0": 640.0821653411886, |
|
"eval_accuracy": 0.5321348969378108, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997811444105338, |
|
"eval_loss": 1.9619895219802856, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 13.80778875099279, |
|
"eval_runtime": 73.8101, |
|
"eval_samples_per_second": 62.634, |
|
"eval_steps_per_second": 7.831, |
|
"step": 5500 |
|
}, |
|
{ |
|
"MSE": 640.2260070800783, |
|
"MSE/layer0": 640.2260070800783, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.58, |
|
"input_norm": 31.997807060877484, |
|
"input_norm/layer0": 31.997807060877484, |
|
"learning_rate": 0.00023421052631578948, |
|
"loss": 1.9526, |
|
"max_norm": 71.6324691772461, |
|
"max_norm/layer0": 71.6324691772461, |
|
"mean_norm": 61.532691955566406, |
|
"mean_norm/layer0": 61.532691955566406, |
|
"multicode_k": 1, |
|
"output_norm": 13.81434581597646, |
|
"output_norm/layer0": 13.81434581597646, |
|
"step": 5550 |
|
}, |
|
{ |
|
"MSE": 639.6603690592448, |
|
"MSE/layer0": 639.6603690592448, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.58, |
|
"input_norm": 31.997815796534205, |
|
"input_norm/layer0": 31.997815796534205, |
|
"learning_rate": 0.00023157894736842107, |
|
"loss": 1.9592, |
|
"max_norm": 71.83050537109375, |
|
"max_norm/layer0": 71.83050537109375, |
|
"mean_norm": 61.68556213378906, |
|
"mean_norm/layer0": 61.68556213378906, |
|
"multicode_k": 1, |
|
"output_norm": 13.843803273836771, |
|
"output_norm/layer0": 13.843803273836771, |
|
"step": 5600 |
|
}, |
|
{ |
|
"MSE": 638.8630006917316, |
|
"MSE/layer0": 638.8630006917316, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.59, |
|
"input_norm": 31.997804651260374, |
|
"input_norm/layer0": 31.997804651260374, |
|
"learning_rate": 0.00022894736842105263, |
|
"loss": 1.9582, |
|
"max_norm": 72.0186767578125, |
|
"max_norm/layer0": 72.0186767578125, |
|
"mean_norm": 61.836381912231445, |
|
"mean_norm/layer0": 61.836381912231445, |
|
"multicode_k": 1, |
|
"output_norm": 13.87206829547882, |
|
"output_norm/layer0": 13.87206829547882, |
|
"step": 5650 |
|
}, |
|
{ |
|
"MSE": 638.6114538574218, |
|
"MSE/layer0": 638.6114538574218, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.6, |
|
"input_norm": 31.997799615859993, |
|
"input_norm/layer0": 31.997799615859993, |
|
"learning_rate": 0.00022631578947368422, |
|
"loss": 1.9581, |
|
"max_norm": 72.212158203125, |
|
"max_norm/layer0": 72.212158203125, |
|
"mean_norm": 61.984375, |
|
"mean_norm/layer0": 61.984375, |
|
"multicode_k": 1, |
|
"output_norm": 13.890618721644087, |
|
"output_norm/layer0": 13.890618721644087, |
|
"step": 5700 |
|
}, |
|
{ |
|
"MSE": 637.4200433349613, |
|
"MSE/layer0": 637.4200433349613, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.6, |
|
"input_norm": 31.9977961031596, |
|
"input_norm/layer0": 31.9977961031596, |
|
"learning_rate": 0.0002236842105263158, |
|
"loss": 1.9563, |
|
"max_norm": 72.40010833740234, |
|
"max_norm/layer0": 72.40010833740234, |
|
"mean_norm": 62.13043212890625, |
|
"mean_norm/layer0": 62.13043212890625, |
|
"multicode_k": 1, |
|
"output_norm": 13.935336654980983, |
|
"output_norm/layer0": 13.935336654980983, |
|
"step": 5750 |
|
}, |
|
{ |
|
"MSE": 636.9881141153974, |
|
"MSE/layer0": 636.9881141153974, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.61, |
|
"input_norm": 31.997795972824097, |
|
"input_norm/layer0": 31.997795972824097, |
|
"learning_rate": 0.00022105263157894735, |
|
"loss": 1.9652, |
|
"max_norm": 72.58822631835938, |
|
"max_norm/layer0": 72.58822631835938, |
|
"mean_norm": 62.274553298950195, |
|
"mean_norm/layer0": 62.274553298950195, |
|
"multicode_k": 1, |
|
"output_norm": 13.960987841288247, |
|
"output_norm/layer0": 13.960987841288247, |
|
"step": 5800 |
|
}, |
|
{ |
|
"MSE": 636.22215037028, |
|
"MSE/layer0": 636.22215037028, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.61, |
|
"input_norm": 31.997794774373368, |
|
"input_norm/layer0": 31.997794774373368, |
|
"learning_rate": 0.00021842105263157897, |
|
"loss": 1.9509, |
|
"max_norm": 72.77027130126953, |
|
"max_norm/layer0": 72.77027130126953, |
|
"mean_norm": 62.417043685913086, |
|
"mean_norm/layer0": 62.417043685913086, |
|
"multicode_k": 1, |
|
"output_norm": 13.98557560602824, |
|
"output_norm/layer0": 13.98557560602824, |
|
"step": 5850 |
|
}, |
|
{ |
|
"MSE": 635.6220120239254, |
|
"MSE/layer0": 635.6220120239254, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.62, |
|
"input_norm": 31.997796500523897, |
|
"input_norm/layer0": 31.997796500523897, |
|
"learning_rate": 0.00021578947368421054, |
|
"loss": 1.9637, |
|
"max_norm": 72.93942260742188, |
|
"max_norm/layer0": 72.93942260742188, |
|
"mean_norm": 62.5573787689209, |
|
"mean_norm/layer0": 62.5573787689209, |
|
"multicode_k": 1, |
|
"output_norm": 14.011822309494022, |
|
"output_norm/layer0": 14.011822309494022, |
|
"step": 5900 |
|
}, |
|
{ |
|
"MSE": 635.1990796915693, |
|
"MSE/layer0": 635.1990796915693, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.62, |
|
"input_norm": 31.99778350830077, |
|
"input_norm/layer0": 31.99778350830077, |
|
"learning_rate": 0.0002131578947368421, |
|
"loss": 1.9417, |
|
"max_norm": 73.11217498779297, |
|
"max_norm/layer0": 73.11217498779297, |
|
"mean_norm": 62.69554328918457, |
|
"mean_norm/layer0": 62.69554328918457, |
|
"multicode_k": 1, |
|
"output_norm": 14.040199557940166, |
|
"output_norm/layer0": 14.040199557940166, |
|
"step": 5950 |
|
}, |
|
{ |
|
"MSE": 634.617561645508, |
|
"MSE/layer0": 634.617561645508, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.63, |
|
"input_norm": 31.9977766195933, |
|
"input_norm/layer0": 31.9977766195933, |
|
"learning_rate": 0.00021052631578947367, |
|
"loss": 1.9441, |
|
"max_norm": 73.27617645263672, |
|
"max_norm/layer0": 73.27617645263672, |
|
"mean_norm": 62.831491470336914, |
|
"mean_norm/layer0": 62.831491470336914, |
|
"multicode_k": 1, |
|
"output_norm": 14.065582130750016, |
|
"output_norm/layer0": 14.065582130750016, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_MSE/layer0": 633.8831350106634, |
|
"eval_accuracy": 0.5338761587531762, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997772803689244, |
|
"eval_loss": 1.951315999031067, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.101806794000915, |
|
"eval_runtime": 73.5977, |
|
"eval_samples_per_second": 62.814, |
|
"eval_steps_per_second": 7.854, |
|
"step": 6000 |
|
}, |
|
{ |
|
"MSE": 633.8391249593099, |
|
"MSE/layer0": 633.8391249593099, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.63, |
|
"input_norm": 31.99777683258057, |
|
"input_norm/layer0": 31.99777683258057, |
|
"learning_rate": 0.0002078947368421053, |
|
"loss": 1.9507, |
|
"max_norm": 73.43240356445312, |
|
"max_norm/layer0": 73.43240356445312, |
|
"mean_norm": 62.96537971496582, |
|
"mean_norm/layer0": 62.96537971496582, |
|
"multicode_k": 1, |
|
"output_norm": 14.0993266805013, |
|
"output_norm/layer0": 14.0993266805013, |
|
"step": 6050 |
|
}, |
|
{ |
|
"MSE": 633.1878758748373, |
|
"MSE/layer0": 633.1878758748373, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.64, |
|
"input_norm": 31.997768185933438, |
|
"input_norm/layer0": 31.997768185933438, |
|
"learning_rate": 0.00020526315789473685, |
|
"loss": 1.9535, |
|
"max_norm": 73.59780883789062, |
|
"max_norm/layer0": 73.59780883789062, |
|
"mean_norm": 63.09744453430176, |
|
"mean_norm/layer0": 63.09744453430176, |
|
"multicode_k": 1, |
|
"output_norm": 14.12703340212504, |
|
"output_norm/layer0": 14.12703340212504, |
|
"step": 6100 |
|
}, |
|
{ |
|
"MSE": 632.4774736531577, |
|
"MSE/layer0": 632.4774736531577, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.64, |
|
"input_norm": 31.997762225468954, |
|
"input_norm/layer0": 31.997762225468954, |
|
"learning_rate": 0.00020263157894736842, |
|
"loss": 1.9502, |
|
"max_norm": 73.7634506225586, |
|
"max_norm/layer0": 73.7634506225586, |
|
"mean_norm": 63.227373123168945, |
|
"mean_norm/layer0": 63.227373123168945, |
|
"multicode_k": 1, |
|
"output_norm": 14.155767776171366, |
|
"output_norm/layer0": 14.155767776171366, |
|
"step": 6150 |
|
}, |
|
{ |
|
"MSE": 632.0819724527997, |
|
"MSE/layer0": 632.0819724527997, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.65, |
|
"input_norm": 31.997758595148735, |
|
"input_norm/layer0": 31.997758595148735, |
|
"learning_rate": 0.0002, |
|
"loss": 1.948, |
|
"max_norm": 73.93152618408203, |
|
"max_norm/layer0": 73.93152618408203, |
|
"mean_norm": 63.35538673400879, |
|
"mean_norm/layer0": 63.35538673400879, |
|
"multicode_k": 1, |
|
"output_norm": 14.17972202301026, |
|
"output_norm/layer0": 14.17972202301026, |
|
"step": 6200 |
|
}, |
|
{ |
|
"MSE": 631.3937511189779, |
|
"MSE/layer0": 631.3937511189779, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.65, |
|
"input_norm": 31.997760909398387, |
|
"input_norm/layer0": 31.997760909398387, |
|
"learning_rate": 0.00019736842105263157, |
|
"loss": 1.9449, |
|
"max_norm": 74.07744598388672, |
|
"max_norm/layer0": 74.07744598388672, |
|
"mean_norm": 63.481435775756836, |
|
"mean_norm/layer0": 63.481435775756836, |
|
"multicode_k": 1, |
|
"output_norm": 14.207703741391498, |
|
"output_norm/layer0": 14.207703741391498, |
|
"step": 6250 |
|
}, |
|
{ |
|
"MSE": 631.1097898356121, |
|
"MSE/layer0": 631.1097898356121, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.66, |
|
"input_norm": 31.997752253214514, |
|
"input_norm/layer0": 31.997752253214514, |
|
"learning_rate": 0.00019473684210526317, |
|
"loss": 1.9256, |
|
"max_norm": 74.23124694824219, |
|
"max_norm/layer0": 74.23124694824219, |
|
"mean_norm": 63.605464935302734, |
|
"mean_norm/layer0": 63.605464935302734, |
|
"multicode_k": 1, |
|
"output_norm": 14.22562705675761, |
|
"output_norm/layer0": 14.22562705675761, |
|
"step": 6300 |
|
}, |
|
{ |
|
"MSE": 630.4715811157231, |
|
"MSE/layer0": 630.4715811157231, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.66, |
|
"input_norm": 31.99775326093037, |
|
"input_norm/layer0": 31.99775326093037, |
|
"learning_rate": 0.00019210526315789473, |
|
"loss": 1.9394, |
|
"max_norm": 74.37789154052734, |
|
"max_norm/layer0": 74.37789154052734, |
|
"mean_norm": 63.72765922546387, |
|
"mean_norm/layer0": 63.72765922546387, |
|
"multicode_k": 1, |
|
"output_norm": 14.252348532676702, |
|
"output_norm/layer0": 14.252348532676702, |
|
"step": 6350 |
|
}, |
|
{ |
|
"MSE": 629.5616383870444, |
|
"MSE/layer0": 629.5616383870444, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.67, |
|
"input_norm": 31.9977592086792, |
|
"input_norm/layer0": 31.9977592086792, |
|
"learning_rate": 0.00018947368421052632, |
|
"loss": 1.948, |
|
"max_norm": 74.52799987792969, |
|
"max_norm/layer0": 74.52799987792969, |
|
"mean_norm": 63.84817886352539, |
|
"mean_norm/layer0": 63.84817886352539, |
|
"multicode_k": 1, |
|
"output_norm": 14.278619543711342, |
|
"output_norm/layer0": 14.278619543711342, |
|
"step": 6400 |
|
}, |
|
{ |
|
"MSE": 628.9405068969726, |
|
"MSE/layer0": 628.9405068969726, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.67, |
|
"input_norm": 31.9977388159434, |
|
"input_norm/layer0": 31.9977388159434, |
|
"learning_rate": 0.0001868421052631579, |
|
"loss": 1.9365, |
|
"max_norm": 74.66854095458984, |
|
"max_norm/layer0": 74.66854095458984, |
|
"mean_norm": 63.96674346923828, |
|
"mean_norm/layer0": 63.96674346923828, |
|
"multicode_k": 1, |
|
"output_norm": 14.308290360768634, |
|
"output_norm/layer0": 14.308290360768634, |
|
"step": 6450 |
|
}, |
|
{ |
|
"MSE": 628.8358187866208, |
|
"MSE/layer0": 628.8358187866208, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.68, |
|
"input_norm": 31.997742996215806, |
|
"input_norm/layer0": 31.997742996215806, |
|
"learning_rate": 0.00018421052631578948, |
|
"loss": 1.9408, |
|
"max_norm": 74.80778503417969, |
|
"max_norm/layer0": 74.80778503417969, |
|
"mean_norm": 64.08341407775879, |
|
"mean_norm/layer0": 64.08341407775879, |
|
"multicode_k": 1, |
|
"output_norm": 14.319794411659238, |
|
"output_norm/layer0": 14.319794411659238, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_MSE/layer0": 628.092910030562, |
|
"eval_accuracy": 0.5357603583933366, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997742160687373, |
|
"eval_loss": 1.9396723508834839, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.354976222496019, |
|
"eval_runtime": 73.9338, |
|
"eval_samples_per_second": 62.529, |
|
"eval_steps_per_second": 7.818, |
|
"step": 6500 |
|
}, |
|
{ |
|
"MSE": 628.4872816975908, |
|
"MSE/layer0": 628.4872816975908, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.68, |
|
"input_norm": 31.997741378148394, |
|
"input_norm/layer0": 31.997741378148394, |
|
"learning_rate": 0.00018157894736842107, |
|
"loss": 1.9392, |
|
"max_norm": 74.95079040527344, |
|
"max_norm/layer0": 74.95079040527344, |
|
"mean_norm": 64.19818496704102, |
|
"mean_norm/layer0": 64.19818496704102, |
|
"multicode_k": 1, |
|
"output_norm": 14.340212704340617, |
|
"output_norm/layer0": 14.340212704340617, |
|
"step": 6550 |
|
}, |
|
{ |
|
"MSE": 627.595106302897, |
|
"MSE/layer0": 627.595106302897, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.69, |
|
"input_norm": 31.99773236592611, |
|
"input_norm/layer0": 31.99773236592611, |
|
"learning_rate": 0.00017894736842105264, |
|
"loss": 1.9312, |
|
"max_norm": 75.08959197998047, |
|
"max_norm/layer0": 75.08959197998047, |
|
"mean_norm": 64.3110408782959, |
|
"mean_norm/layer0": 64.3110408782959, |
|
"multicode_k": 1, |
|
"output_norm": 14.375651826858522, |
|
"output_norm/layer0": 14.375651826858522, |
|
"step": 6600 |
|
}, |
|
{ |
|
"MSE": 627.2688003540036, |
|
"MSE/layer0": 627.2688003540036, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.69, |
|
"input_norm": 31.997724459966015, |
|
"input_norm/layer0": 31.997724459966015, |
|
"learning_rate": 0.0001763157894736842, |
|
"loss": 1.9454, |
|
"max_norm": 75.23365783691406, |
|
"max_norm/layer0": 75.23365783691406, |
|
"mean_norm": 64.42234230041504, |
|
"mean_norm/layer0": 64.42234230041504, |
|
"multicode_k": 1, |
|
"output_norm": 14.385090745290121, |
|
"output_norm/layer0": 14.385090745290121, |
|
"step": 6650 |
|
}, |
|
{ |
|
"MSE": 626.5893623860678, |
|
"MSE/layer0": 626.5893623860678, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.7, |
|
"input_norm": 31.997720209757503, |
|
"input_norm/layer0": 31.997720209757503, |
|
"learning_rate": 0.0001736842105263158, |
|
"loss": 1.9428, |
|
"max_norm": 75.36791229248047, |
|
"max_norm/layer0": 75.36791229248047, |
|
"mean_norm": 64.5310287475586, |
|
"mean_norm/layer0": 64.5310287475586, |
|
"multicode_k": 1, |
|
"output_norm": 14.414791498184208, |
|
"output_norm/layer0": 14.414791498184208, |
|
"step": 6700 |
|
}, |
|
{ |
|
"MSE": 626.1687516276043, |
|
"MSE/layer0": 626.1687516276043, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.7, |
|
"input_norm": 31.997717237472536, |
|
"input_norm/layer0": 31.997717237472536, |
|
"learning_rate": 0.00017105263157894739, |
|
"loss": 1.9341, |
|
"max_norm": 75.49561309814453, |
|
"max_norm/layer0": 75.49561309814453, |
|
"mean_norm": 64.63836669921875, |
|
"mean_norm/layer0": 64.63836669921875, |
|
"multicode_k": 1, |
|
"output_norm": 14.436859647432962, |
|
"output_norm/layer0": 14.436859647432962, |
|
"step": 6750 |
|
}, |
|
{ |
|
"MSE": 625.7842074584966, |
|
"MSE/layer0": 625.7842074584966, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.71, |
|
"input_norm": 31.997723042170207, |
|
"input_norm/layer0": 31.997723042170207, |
|
"learning_rate": 0.00016842105263157895, |
|
"loss": 1.9391, |
|
"max_norm": 75.62852478027344, |
|
"max_norm/layer0": 75.62852478027344, |
|
"mean_norm": 64.74386024475098, |
|
"mean_norm/layer0": 64.74386024475098, |
|
"multicode_k": 1, |
|
"output_norm": 14.45211536884308, |
|
"output_norm/layer0": 14.45211536884308, |
|
"step": 6800 |
|
}, |
|
{ |
|
"MSE": 625.3583324178057, |
|
"MSE/layer0": 625.3583324178057, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.72, |
|
"input_norm": 31.997710723876956, |
|
"input_norm/layer0": 31.997710723876956, |
|
"learning_rate": 0.00016578947368421052, |
|
"loss": 1.9214, |
|
"max_norm": 75.7518081665039, |
|
"max_norm/layer0": 75.7518081665039, |
|
"mean_norm": 64.84785079956055, |
|
"mean_norm/layer0": 64.84785079956055, |
|
"multicode_k": 1, |
|
"output_norm": 14.472083713213603, |
|
"output_norm/layer0": 14.472083713213603, |
|
"step": 6850 |
|
}, |
|
{ |
|
"MSE": 625.0808269246418, |
|
"MSE/layer0": 625.0808269246418, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.72, |
|
"input_norm": 31.997701005935667, |
|
"input_norm/layer0": 31.997701005935667, |
|
"learning_rate": 0.0001631578947368421, |
|
"loss": 1.9248, |
|
"max_norm": 75.8736343383789, |
|
"max_norm/layer0": 75.8736343383789, |
|
"mean_norm": 64.94989013671875, |
|
"mean_norm/layer0": 64.94989013671875, |
|
"multicode_k": 1, |
|
"output_norm": 14.49320138454437, |
|
"output_norm/layer0": 14.49320138454437, |
|
"step": 6900 |
|
}, |
|
{ |
|
"MSE": 624.4893544514975, |
|
"MSE/layer0": 624.4893544514975, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.73, |
|
"input_norm": 31.997702992757166, |
|
"input_norm/layer0": 31.997702992757166, |
|
"learning_rate": 0.0001605263157894737, |
|
"loss": 1.9357, |
|
"max_norm": 75.99244689941406, |
|
"max_norm/layer0": 75.99244689941406, |
|
"mean_norm": 65.05006790161133, |
|
"mean_norm/layer0": 65.05006790161133, |
|
"multicode_k": 1, |
|
"output_norm": 14.515017460187277, |
|
"output_norm/layer0": 14.515017460187277, |
|
"step": 6950 |
|
}, |
|
{ |
|
"MSE": 623.983821309408, |
|
"MSE/layer0": 623.983821309408, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.73, |
|
"input_norm": 31.997692581812547, |
|
"input_norm/layer0": 31.997692581812547, |
|
"learning_rate": 0.00015789473684210527, |
|
"loss": 1.9256, |
|
"max_norm": 76.1169204711914, |
|
"max_norm/layer0": 76.1169204711914, |
|
"mean_norm": 65.14841270446777, |
|
"mean_norm/layer0": 65.14841270446777, |
|
"multicode_k": 1, |
|
"output_norm": 14.531605450312297, |
|
"output_norm/layer0": 14.531605450312297, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_MSE/layer0": 623.2726008245854, |
|
"eval_accuracy": 0.5373965313049694, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.99768957905041, |
|
"eval_loss": 1.9302037954330444, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.553397603295936, |
|
"eval_runtime": 73.3018, |
|
"eval_samples_per_second": 63.068, |
|
"eval_steps_per_second": 7.885, |
|
"step": 7000 |
|
}, |
|
{ |
|
"MSE": 623.8173256429034, |
|
"MSE/layer0": 623.8173256429034, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.74, |
|
"input_norm": 31.997689800262457, |
|
"input_norm/layer0": 31.997689800262457, |
|
"learning_rate": 0.00015526315789473686, |
|
"loss": 1.9215, |
|
"max_norm": 76.22943115234375, |
|
"max_norm/layer0": 76.22943115234375, |
|
"mean_norm": 65.2452278137207, |
|
"mean_norm/layer0": 65.2452278137207, |
|
"multicode_k": 1, |
|
"output_norm": 14.544135572115584, |
|
"output_norm/layer0": 14.544135572115584, |
|
"step": 7050 |
|
}, |
|
{ |
|
"MSE": 623.4564833577472, |
|
"MSE/layer0": 623.4564833577472, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.74, |
|
"input_norm": 31.997697146733607, |
|
"input_norm/layer0": 31.997697146733607, |
|
"learning_rate": 0.00015263157894736842, |
|
"loss": 1.9291, |
|
"max_norm": 76.35796356201172, |
|
"max_norm/layer0": 76.35796356201172, |
|
"mean_norm": 65.33997344970703, |
|
"mean_norm/layer0": 65.33997344970703, |
|
"multicode_k": 1, |
|
"output_norm": 14.557166822751359, |
|
"output_norm/layer0": 14.557166822751359, |
|
"step": 7100 |
|
}, |
|
{ |
|
"MSE": 622.3157424926754, |
|
"MSE/layer0": 622.3157424926754, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.75, |
|
"input_norm": 31.997690575917574, |
|
"input_norm/layer0": 31.997690575917574, |
|
"learning_rate": 0.00015, |
|
"loss": 1.9272, |
|
"max_norm": 76.47930145263672, |
|
"max_norm/layer0": 76.47930145263672, |
|
"mean_norm": 65.4333724975586, |
|
"mean_norm/layer0": 65.4333724975586, |
|
"multicode_k": 1, |
|
"output_norm": 14.59491890271505, |
|
"output_norm/layer0": 14.59491890271505, |
|
"step": 7150 |
|
}, |
|
{ |
|
"MSE": 622.1008169555663, |
|
"MSE/layer0": 622.1008169555663, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.75, |
|
"input_norm": 31.997691469192503, |
|
"input_norm/layer0": 31.997691469192503, |
|
"learning_rate": 0.00014736842105263158, |
|
"loss": 1.9421, |
|
"max_norm": 76.5845947265625, |
|
"max_norm/layer0": 76.5845947265625, |
|
"mean_norm": 65.52462577819824, |
|
"mean_norm/layer0": 65.52462577819824, |
|
"multicode_k": 1, |
|
"output_norm": 14.608456416130064, |
|
"output_norm/layer0": 14.608456416130064, |
|
"step": 7200 |
|
}, |
|
{ |
|
"MSE": 621.7943653361006, |
|
"MSE/layer0": 621.7943653361006, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.76, |
|
"input_norm": 31.997678140004478, |
|
"input_norm/layer0": 31.997678140004478, |
|
"learning_rate": 0.00014473684210526317, |
|
"loss": 1.9221, |
|
"max_norm": 76.68899536132812, |
|
"max_norm/layer0": 76.68899536132812, |
|
"mean_norm": 65.61434745788574, |
|
"mean_norm/layer0": 65.61434745788574, |
|
"multicode_k": 1, |
|
"output_norm": 14.622403078079222, |
|
"output_norm/layer0": 14.622403078079222, |
|
"step": 7250 |
|
}, |
|
{ |
|
"MSE": 621.7445918782552, |
|
"MSE/layer0": 621.7445918782552, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.76, |
|
"input_norm": 31.997679424285884, |
|
"input_norm/layer0": 31.997679424285884, |
|
"learning_rate": 0.00014210526315789474, |
|
"loss": 1.9172, |
|
"max_norm": 76.79942321777344, |
|
"max_norm/layer0": 76.79942321777344, |
|
"mean_norm": 65.70241737365723, |
|
"mean_norm/layer0": 65.70241737365723, |
|
"multicode_k": 1, |
|
"output_norm": 14.632240413029983, |
|
"output_norm/layer0": 14.632240413029983, |
|
"step": 7300 |
|
}, |
|
{ |
|
"MSE": 621.0073055013017, |
|
"MSE/layer0": 621.0073055013017, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.77, |
|
"input_norm": 31.997667986551914, |
|
"input_norm/layer0": 31.997667986551914, |
|
"learning_rate": 0.0001394736842105263, |
|
"loss": 1.9187, |
|
"max_norm": 76.90473937988281, |
|
"max_norm/layer0": 76.90473937988281, |
|
"mean_norm": 65.78865623474121, |
|
"mean_norm/layer0": 65.78865623474121, |
|
"multicode_k": 1, |
|
"output_norm": 14.659644064903254, |
|
"output_norm/layer0": 14.659644064903254, |
|
"step": 7350 |
|
}, |
|
{ |
|
"MSE": 620.5166587320964, |
|
"MSE/layer0": 620.5166587320964, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.77, |
|
"input_norm": 31.99766827583312, |
|
"input_norm/layer0": 31.99766827583312, |
|
"learning_rate": 0.00013684210526315792, |
|
"loss": 1.9236, |
|
"max_norm": 77.00653839111328, |
|
"max_norm/layer0": 77.00653839111328, |
|
"mean_norm": 65.87344741821289, |
|
"mean_norm/layer0": 65.87344741821289, |
|
"multicode_k": 1, |
|
"output_norm": 14.683248674074807, |
|
"output_norm/layer0": 14.683248674074807, |
|
"step": 7400 |
|
}, |
|
{ |
|
"MSE": 620.4730934651691, |
|
"MSE/layer0": 620.4730934651691, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.78, |
|
"input_norm": 31.99766536712645, |
|
"input_norm/layer0": 31.99766536712645, |
|
"learning_rate": 0.00013421052631578948, |
|
"loss": 1.9181, |
|
"max_norm": 77.11151123046875, |
|
"max_norm/layer0": 77.11151123046875, |
|
"mean_norm": 65.95642852783203, |
|
"mean_norm/layer0": 65.95642852783203, |
|
"multicode_k": 1, |
|
"output_norm": 14.682427426973977, |
|
"output_norm/layer0": 14.682427426973977, |
|
"step": 7450 |
|
}, |
|
{ |
|
"MSE": 619.8806704711913, |
|
"MSE/layer0": 619.8806704711913, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.78, |
|
"input_norm": 31.997652931213374, |
|
"input_norm/layer0": 31.997652931213374, |
|
"learning_rate": 0.00013157894736842105, |
|
"loss": 1.9204, |
|
"max_norm": 77.21614837646484, |
|
"max_norm/layer0": 77.21614837646484, |
|
"mean_norm": 66.03750610351562, |
|
"mean_norm/layer0": 66.03750610351562, |
|
"multicode_k": 1, |
|
"output_norm": 14.709125100771587, |
|
"output_norm/layer0": 14.709125100771587, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_MSE/layer0": 619.4572802491444, |
|
"eval_accuracy": 0.538146743438657, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997657016941467, |
|
"eval_loss": 1.9224542379379272, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.72584700899003, |
|
"eval_runtime": 73.2809, |
|
"eval_samples_per_second": 63.086, |
|
"eval_steps_per_second": 7.887, |
|
"step": 7500 |
|
}, |
|
{ |
|
"MSE": 619.6498880004883, |
|
"MSE/layer0": 619.6498880004883, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.79, |
|
"input_norm": 31.997653865814208, |
|
"input_norm/layer0": 31.997653865814208, |
|
"learning_rate": 0.00012894736842105264, |
|
"loss": 1.9109, |
|
"max_norm": 77.3195571899414, |
|
"max_norm/layer0": 77.3195571899414, |
|
"mean_norm": 66.11709403991699, |
|
"mean_norm/layer0": 66.11709403991699, |
|
"multicode_k": 1, |
|
"output_norm": 14.724224853515622, |
|
"output_norm/layer0": 14.724224853515622, |
|
"step": 7550 |
|
}, |
|
{ |
|
"MSE": 619.544646809896, |
|
"MSE/layer0": 619.544646809896, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.79, |
|
"input_norm": 31.997655792236333, |
|
"input_norm/layer0": 31.997655792236333, |
|
"learning_rate": 0.0001263157894736842, |
|
"loss": 1.9247, |
|
"max_norm": 77.41654205322266, |
|
"max_norm/layer0": 77.41654205322266, |
|
"mean_norm": 66.19502639770508, |
|
"mean_norm/layer0": 66.19502639770508, |
|
"multicode_k": 1, |
|
"output_norm": 14.729852019945778, |
|
"output_norm/layer0": 14.729852019945778, |
|
"step": 7600 |
|
}, |
|
{ |
|
"MSE": 619.1442233276366, |
|
"MSE/layer0": 619.1442233276366, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.8, |
|
"input_norm": 31.99764471054077, |
|
"input_norm/layer0": 31.99764471054077, |
|
"learning_rate": 0.0001236842105263158, |
|
"loss": 1.9237, |
|
"max_norm": 77.5074234008789, |
|
"max_norm/layer0": 77.5074234008789, |
|
"mean_norm": 66.27114677429199, |
|
"mean_norm/layer0": 66.27114677429199, |
|
"multicode_k": 1, |
|
"output_norm": 14.745990212758379, |
|
"output_norm/layer0": 14.745990212758379, |
|
"step": 7650 |
|
}, |
|
{ |
|
"MSE": 618.6404962158206, |
|
"MSE/layer0": 618.6404962158206, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.8, |
|
"input_norm": 31.997635892232267, |
|
"input_norm/layer0": 31.997635892232267, |
|
"learning_rate": 0.00012105263157894738, |
|
"loss": 1.913, |
|
"max_norm": 77.602294921875, |
|
"max_norm/layer0": 77.602294921875, |
|
"mean_norm": 66.34577751159668, |
|
"mean_norm/layer0": 66.34577751159668, |
|
"multicode_k": 1, |
|
"output_norm": 14.766639779408772, |
|
"output_norm/layer0": 14.766639779408772, |
|
"step": 7700 |
|
}, |
|
{ |
|
"MSE": 618.2833578491213, |
|
"MSE/layer0": 618.2833578491213, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.81, |
|
"input_norm": 31.997631740570075, |
|
"input_norm/layer0": 31.997631740570075, |
|
"learning_rate": 0.00011842105263157894, |
|
"loss": 1.9214, |
|
"max_norm": 77.6917724609375, |
|
"max_norm/layer0": 77.6917724609375, |
|
"mean_norm": 66.41888046264648, |
|
"mean_norm/layer0": 66.41888046264648, |
|
"multicode_k": 1, |
|
"output_norm": 14.779039435386654, |
|
"output_norm/layer0": 14.779039435386654, |
|
"step": 7750 |
|
}, |
|
{ |
|
"MSE": 618.2477112833653, |
|
"MSE/layer0": 618.2477112833653, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.81, |
|
"input_norm": 31.997634382247924, |
|
"input_norm/layer0": 31.997634382247924, |
|
"learning_rate": 0.00011578947368421053, |
|
"loss": 1.9127, |
|
"max_norm": 77.77839660644531, |
|
"max_norm/layer0": 77.77839660644531, |
|
"mean_norm": 66.49017333984375, |
|
"mean_norm/layer0": 66.49017333984375, |
|
"multicode_k": 1, |
|
"output_norm": 14.782011265754704, |
|
"output_norm/layer0": 14.782011265754704, |
|
"step": 7800 |
|
}, |
|
{ |
|
"MSE": 617.7417582194005, |
|
"MSE/layer0": 617.7417582194005, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.82, |
|
"input_norm": 31.997628266016648, |
|
"input_norm/layer0": 31.997628266016648, |
|
"learning_rate": 0.00011315789473684211, |
|
"loss": 1.9084, |
|
"max_norm": 77.86212158203125, |
|
"max_norm/layer0": 77.86212158203125, |
|
"mean_norm": 66.55990791320801, |
|
"mean_norm/layer0": 66.55990791320801, |
|
"multicode_k": 1, |
|
"output_norm": 14.801776518821718, |
|
"output_norm/layer0": 14.801776518821718, |
|
"step": 7850 |
|
}, |
|
{ |
|
"MSE": 617.339886271159, |
|
"MSE/layer0": 617.339886271159, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.82, |
|
"input_norm": 31.99762384732564, |
|
"input_norm/layer0": 31.99762384732564, |
|
"learning_rate": 0.00011052631578947368, |
|
"loss": 1.9115, |
|
"max_norm": 77.94374084472656, |
|
"max_norm/layer0": 77.94374084472656, |
|
"mean_norm": 66.62779235839844, |
|
"mean_norm/layer0": 66.62779235839844, |
|
"multicode_k": 1, |
|
"output_norm": 14.823196705182394, |
|
"output_norm/layer0": 14.823196705182394, |
|
"step": 7900 |
|
}, |
|
{ |
|
"MSE": 617.3184334309897, |
|
"MSE/layer0": 617.3184334309897, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.83, |
|
"input_norm": 31.9976179567973, |
|
"input_norm/layer0": 31.9976179567973, |
|
"learning_rate": 0.00010789473684210527, |
|
"loss": 1.9136, |
|
"max_norm": 78.02580261230469, |
|
"max_norm/layer0": 78.02580261230469, |
|
"mean_norm": 66.69412803649902, |
|
"mean_norm/layer0": 66.69412803649902, |
|
"multicode_k": 1, |
|
"output_norm": 14.828250519434608, |
|
"output_norm/layer0": 14.828250519434608, |
|
"step": 7950 |
|
}, |
|
{ |
|
"MSE": 616.9322255452475, |
|
"MSE/layer0": 616.9322255452475, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.84, |
|
"input_norm": 31.997613105773937, |
|
"input_norm/layer0": 31.997613105773937, |
|
"learning_rate": 0.00010526315789473683, |
|
"loss": 1.907, |
|
"max_norm": 78.10686492919922, |
|
"max_norm/layer0": 78.10686492919922, |
|
"mean_norm": 66.7584114074707, |
|
"mean_norm/layer0": 66.7584114074707, |
|
"multicode_k": 1, |
|
"output_norm": 14.839720834096273, |
|
"output_norm/layer0": 14.839720834096273, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_MSE/layer0": 616.4379357749087, |
|
"eval_accuracy": 0.5393073732024142, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.99761064584294, |
|
"eval_loss": 1.9150168895721436, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.862492301828695, |
|
"eval_runtime": 73.6278, |
|
"eval_samples_per_second": 62.789, |
|
"eval_steps_per_second": 7.85, |
|
"step": 8000 |
|
}, |
|
{ |
|
"MSE": 616.6813212076825, |
|
"MSE/layer0": 616.6813212076825, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.84, |
|
"input_norm": 31.997603750228897, |
|
"input_norm/layer0": 31.997603750228897, |
|
"learning_rate": 0.00010263157894736843, |
|
"loss": 1.8975, |
|
"max_norm": 78.18397521972656, |
|
"max_norm/layer0": 78.18397521972656, |
|
"mean_norm": 66.82158279418945, |
|
"mean_norm/layer0": 66.82158279418945, |
|
"multicode_k": 1, |
|
"output_norm": 14.848202861150106, |
|
"output_norm/layer0": 14.848202861150106, |
|
"step": 8050 |
|
}, |
|
{ |
|
"MSE": 616.5551970418294, |
|
"MSE/layer0": 616.5551970418294, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.85, |
|
"input_norm": 31.99760689099629, |
|
"input_norm/layer0": 31.99760689099629, |
|
"learning_rate": 0.0001, |
|
"loss": 1.916, |
|
"max_norm": 78.26499938964844, |
|
"max_norm/layer0": 78.26499938964844, |
|
"mean_norm": 66.88335037231445, |
|
"mean_norm/layer0": 66.88335037231445, |
|
"multicode_k": 1, |
|
"output_norm": 14.8604402812322, |
|
"output_norm/layer0": 14.8604402812322, |
|
"step": 8100 |
|
}, |
|
{ |
|
"MSE": 616.288039347331, |
|
"MSE/layer0": 616.288039347331, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.85, |
|
"input_norm": 31.997600466410333, |
|
"input_norm/layer0": 31.997600466410333, |
|
"learning_rate": 9.736842105263158e-05, |
|
"loss": 1.902, |
|
"max_norm": 78.33844757080078, |
|
"max_norm/layer0": 78.33844757080078, |
|
"mean_norm": 66.94340133666992, |
|
"mean_norm/layer0": 66.94340133666992, |
|
"multicode_k": 1, |
|
"output_norm": 14.872187639872232, |
|
"output_norm/layer0": 14.872187639872232, |
|
"step": 8150 |
|
}, |
|
{ |
|
"MSE": 615.8982196044924, |
|
"MSE/layer0": 615.8982196044924, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.86, |
|
"input_norm": 31.997594718933108, |
|
"input_norm/layer0": 31.997594718933108, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 1.9142, |
|
"max_norm": 78.40998077392578, |
|
"max_norm/layer0": 78.40998077392578, |
|
"mean_norm": 67.00171661376953, |
|
"mean_norm/layer0": 67.00171661376953, |
|
"multicode_k": 1, |
|
"output_norm": 14.884622203509018, |
|
"output_norm/layer0": 14.884622203509018, |
|
"step": 8200 |
|
}, |
|
{ |
|
"MSE": 615.649053141276, |
|
"MSE/layer0": 615.649053141276, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.86, |
|
"input_norm": 31.997591203053794, |
|
"input_norm/layer0": 31.997591203053794, |
|
"learning_rate": 9.210526315789474e-05, |
|
"loss": 1.9103, |
|
"max_norm": 78.47700500488281, |
|
"max_norm/layer0": 78.47700500488281, |
|
"mean_norm": 67.05831527709961, |
|
"mean_norm/layer0": 67.05831527709961, |
|
"multicode_k": 1, |
|
"output_norm": 14.896942078272502, |
|
"output_norm/layer0": 14.896942078272502, |
|
"step": 8250 |
|
}, |
|
{ |
|
"MSE": 615.4050069173176, |
|
"MSE/layer0": 615.4050069173176, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.87, |
|
"input_norm": 31.99757507324218, |
|
"input_norm/layer0": 31.99757507324218, |
|
"learning_rate": 8.947368421052632e-05, |
|
"loss": 1.8999, |
|
"max_norm": 78.54086303710938, |
|
"max_norm/layer0": 78.54086303710938, |
|
"mean_norm": 67.11351013183594, |
|
"mean_norm/layer0": 67.11351013183594, |
|
"multicode_k": 1, |
|
"output_norm": 14.907591681480406, |
|
"output_norm/layer0": 14.907591681480406, |
|
"step": 8300 |
|
}, |
|
{ |
|
"MSE": 615.0221789550782, |
|
"MSE/layer0": 615.0221789550782, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.87, |
|
"input_norm": 31.997587076822917, |
|
"input_norm/layer0": 31.997587076822917, |
|
"learning_rate": 8.68421052631579e-05, |
|
"loss": 1.9122, |
|
"max_norm": 78.60425567626953, |
|
"max_norm/layer0": 78.60425567626953, |
|
"mean_norm": 67.1669692993164, |
|
"mean_norm/layer0": 67.1669692993164, |
|
"multicode_k": 1, |
|
"output_norm": 14.918850135803218, |
|
"output_norm/layer0": 14.918850135803218, |
|
"step": 8350 |
|
}, |
|
{ |
|
"MSE": 614.7660255940759, |
|
"MSE/layer0": 614.7660255940759, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.88, |
|
"input_norm": 31.99758012771608, |
|
"input_norm/layer0": 31.99758012771608, |
|
"learning_rate": 8.421052631578948e-05, |
|
"loss": 1.9074, |
|
"max_norm": 78.66250610351562, |
|
"max_norm/layer0": 78.66250610351562, |
|
"mean_norm": 67.21884536743164, |
|
"mean_norm/layer0": 67.21884536743164, |
|
"multicode_k": 1, |
|
"output_norm": 14.930259111722311, |
|
"output_norm/layer0": 14.930259111722311, |
|
"step": 8400 |
|
}, |
|
{ |
|
"MSE": 614.4904387410484, |
|
"MSE/layer0": 614.4904387410484, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.88, |
|
"input_norm": 31.99757884025574, |
|
"input_norm/layer0": 31.99757884025574, |
|
"learning_rate": 8.157894736842105e-05, |
|
"loss": 1.9151, |
|
"max_norm": 78.7247314453125, |
|
"max_norm/layer0": 78.7247314453125, |
|
"mean_norm": 67.26914596557617, |
|
"mean_norm/layer0": 67.26914596557617, |
|
"multicode_k": 1, |
|
"output_norm": 14.941800510088594, |
|
"output_norm/layer0": 14.941800510088594, |
|
"step": 8450 |
|
}, |
|
{ |
|
"MSE": 614.3984759521479, |
|
"MSE/layer0": 614.3984759521479, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.89, |
|
"input_norm": 31.997565978368122, |
|
"input_norm/layer0": 31.997565978368122, |
|
"learning_rate": 7.894736842105263e-05, |
|
"loss": 1.8931, |
|
"max_norm": 78.78428649902344, |
|
"max_norm/layer0": 78.78428649902344, |
|
"mean_norm": 67.31785583496094, |
|
"mean_norm/layer0": 67.31785583496094, |
|
"multicode_k": 1, |
|
"output_norm": 14.948297271728517, |
|
"output_norm/layer0": 14.948297271728517, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_MSE/layer0": 613.78736410403, |
|
"eval_accuracy": 0.5408171011151899, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997572115378908, |
|
"eval_loss": 1.9076036214828491, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 14.968526063531659, |
|
"eval_runtime": 73.9448, |
|
"eval_samples_per_second": 62.52, |
|
"eval_steps_per_second": 7.817, |
|
"step": 8500 |
|
}, |
|
{ |
|
"MSE": 614.2003710937502, |
|
"MSE/layer0": 614.2003710937502, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.89, |
|
"input_norm": 31.997571328481037, |
|
"input_norm/layer0": 31.997571328481037, |
|
"learning_rate": 7.631578947368421e-05, |
|
"loss": 1.9006, |
|
"max_norm": 78.83836364746094, |
|
"max_norm/layer0": 78.83836364746094, |
|
"mean_norm": 67.36493301391602, |
|
"mean_norm/layer0": 67.36493301391602, |
|
"multicode_k": 1, |
|
"output_norm": 14.955024781227117, |
|
"output_norm/layer0": 14.955024781227117, |
|
"step": 8550 |
|
}, |
|
{ |
|
"MSE": 613.8168900553383, |
|
"MSE/layer0": 613.8168900553383, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.9, |
|
"input_norm": 31.997557487487796, |
|
"input_norm/layer0": 31.997557487487796, |
|
"learning_rate": 7.368421052631579e-05, |
|
"loss": 1.9045, |
|
"max_norm": 78.8912582397461, |
|
"max_norm/layer0": 78.8912582397461, |
|
"mean_norm": 67.41046524047852, |
|
"mean_norm/layer0": 67.41046524047852, |
|
"multicode_k": 1, |
|
"output_norm": 14.968488362630207, |
|
"output_norm/layer0": 14.968488362630207, |
|
"step": 8600 |
|
}, |
|
{ |
|
"MSE": 613.6968625895179, |
|
"MSE/layer0": 613.6968625895179, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.9, |
|
"input_norm": 31.99754828453064, |
|
"input_norm/layer0": 31.99754828453064, |
|
"learning_rate": 7.105263157894737e-05, |
|
"loss": 1.9009, |
|
"max_norm": 78.942626953125, |
|
"max_norm/layer0": 78.942626953125, |
|
"mean_norm": 67.45438766479492, |
|
"mean_norm/layer0": 67.45438766479492, |
|
"multicode_k": 1, |
|
"output_norm": 14.979475774765014, |
|
"output_norm/layer0": 14.979475774765014, |
|
"step": 8650 |
|
}, |
|
{ |
|
"MSE": 613.3956824747725, |
|
"MSE/layer0": 613.3956824747725, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.91, |
|
"input_norm": 31.997546965281174, |
|
"input_norm/layer0": 31.997546965281174, |
|
"learning_rate": 6.842105263157896e-05, |
|
"loss": 1.9, |
|
"max_norm": 78.99479675292969, |
|
"max_norm/layer0": 78.99479675292969, |
|
"mean_norm": 67.49666595458984, |
|
"mean_norm/layer0": 67.49666595458984, |
|
"multicode_k": 1, |
|
"output_norm": 14.988234910964966, |
|
"output_norm/layer0": 14.988234910964966, |
|
"step": 8700 |
|
}, |
|
{ |
|
"MSE": 613.2128627522789, |
|
"MSE/layer0": 613.2128627522789, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.91, |
|
"input_norm": 31.997544927597048, |
|
"input_norm/layer0": 31.997544927597048, |
|
"learning_rate": 6.578947368421052e-05, |
|
"loss": 1.9059, |
|
"max_norm": 79.04541015625, |
|
"max_norm/layer0": 79.04541015625, |
|
"mean_norm": 67.53742218017578, |
|
"mean_norm/layer0": 67.53742218017578, |
|
"multicode_k": 1, |
|
"output_norm": 14.991036421457924, |
|
"output_norm/layer0": 14.991036421457924, |
|
"step": 8750 |
|
}, |
|
{ |
|
"MSE": 612.9370720418297, |
|
"MSE/layer0": 612.9370720418297, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.92, |
|
"input_norm": 31.99753908475239, |
|
"input_norm/layer0": 31.99753908475239, |
|
"learning_rate": 6.31578947368421e-05, |
|
"loss": 1.9023, |
|
"max_norm": 79.09040069580078, |
|
"max_norm/layer0": 79.09040069580078, |
|
"mean_norm": 67.57658767700195, |
|
"mean_norm/layer0": 67.57658767700195, |
|
"multicode_k": 1, |
|
"output_norm": 15.003661061922706, |
|
"output_norm/layer0": 15.003661061922706, |
|
"step": 8800 |
|
}, |
|
{ |
|
"MSE": 613.0978963216148, |
|
"MSE/layer0": 613.0978963216148, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.92, |
|
"input_norm": 31.997535756429023, |
|
"input_norm/layer0": 31.997535756429023, |
|
"learning_rate": 6.052631578947369e-05, |
|
"loss": 1.9004, |
|
"max_norm": 79.13478088378906, |
|
"max_norm/layer0": 79.13478088378906, |
|
"mean_norm": 67.61412811279297, |
|
"mean_norm/layer0": 67.61412811279297, |
|
"multicode_k": 1, |
|
"output_norm": 14.999437109629307, |
|
"output_norm/layer0": 14.999437109629307, |
|
"step": 8850 |
|
}, |
|
{ |
|
"MSE": 612.746408691406, |
|
"MSE/layer0": 612.746408691406, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.93, |
|
"input_norm": 31.997531512578334, |
|
"input_norm/layer0": 31.997531512578334, |
|
"learning_rate": 5.789473684210527e-05, |
|
"loss": 1.8947, |
|
"max_norm": 79.17863464355469, |
|
"max_norm/layer0": 79.17863464355469, |
|
"mean_norm": 67.65010452270508, |
|
"mean_norm/layer0": 67.65010452270508, |
|
"multicode_k": 1, |
|
"output_norm": 15.013854147593182, |
|
"output_norm/layer0": 15.013854147593182, |
|
"step": 8900 |
|
}, |
|
{ |
|
"MSE": 612.5075473022462, |
|
"MSE/layer0": 612.5075473022462, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.93, |
|
"input_norm": 31.997522573471066, |
|
"input_norm/layer0": 31.997522573471066, |
|
"learning_rate": 5.526315789473684e-05, |
|
"loss": 1.888, |
|
"max_norm": 79.2198257446289, |
|
"max_norm/layer0": 79.2198257446289, |
|
"mean_norm": 67.6845588684082, |
|
"mean_norm/layer0": 67.6845588684082, |
|
"multicode_k": 1, |
|
"output_norm": 15.024005990028382, |
|
"output_norm/layer0": 15.024005990028382, |
|
"step": 8950 |
|
}, |
|
{ |
|
"MSE": 612.4464337158204, |
|
"MSE/layer0": 612.4464337158204, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.94, |
|
"input_norm": 31.99751985549927, |
|
"input_norm/layer0": 31.99751985549927, |
|
"learning_rate": 5.263157894736842e-05, |
|
"loss": 1.9021, |
|
"max_norm": 79.25985717773438, |
|
"max_norm/layer0": 79.25985717773438, |
|
"mean_norm": 67.71733856201172, |
|
"mean_norm/layer0": 67.71733856201172, |
|
"multicode_k": 1, |
|
"output_norm": 15.025202210744226, |
|
"output_norm/layer0": 15.025202210744226, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_MSE/layer0": 612.012579843437, |
|
"eval_accuracy": 0.5416772654217966, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.99751990196794, |
|
"eval_loss": 1.90205979347229, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 15.037853428586699, |
|
"eval_runtime": 73.2981, |
|
"eval_samples_per_second": 63.071, |
|
"eval_steps_per_second": 7.886, |
|
"step": 9000 |
|
}, |
|
{ |
|
"MSE": 612.1616466267901, |
|
"MSE/layer0": 612.1616466267901, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.94, |
|
"input_norm": 31.997515144348153, |
|
"input_norm/layer0": 31.997515144348153, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8979, |
|
"max_norm": 79.2950668334961, |
|
"max_norm/layer0": 79.2950668334961, |
|
"mean_norm": 67.74863052368164, |
|
"mean_norm/layer0": 67.74863052368164, |
|
"multicode_k": 1, |
|
"output_norm": 15.036479252179465, |
|
"output_norm/layer0": 15.036479252179465, |
|
"step": 9050 |
|
}, |
|
{ |
|
"MSE": 611.8442991129552, |
|
"MSE/layer0": 611.8442991129552, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.95, |
|
"input_norm": 31.99751302719116, |
|
"input_norm/layer0": 31.99751302719116, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 1.8978, |
|
"max_norm": 79.32892608642578, |
|
"max_norm/layer0": 79.32892608642578, |
|
"mean_norm": 67.77827835083008, |
|
"mean_norm/layer0": 67.77827835083008, |
|
"multicode_k": 1, |
|
"output_norm": 15.046743833223978, |
|
"output_norm/layer0": 15.046743833223978, |
|
"step": 9100 |
|
}, |
|
{ |
|
"MSE": 611.9183032226562, |
|
"MSE/layer0": 611.9183032226562, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.96, |
|
"input_norm": 31.99751057942708, |
|
"input_norm/layer0": 31.99751057942708, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 1.8971, |
|
"max_norm": 79.36182403564453, |
|
"max_norm/layer0": 79.36182403564453, |
|
"mean_norm": 67.80632781982422, |
|
"mean_norm/layer0": 67.80632781982422, |
|
"multicode_k": 1, |
|
"output_norm": 15.041637244224557, |
|
"output_norm/layer0": 15.041637244224557, |
|
"step": 9150 |
|
}, |
|
{ |
|
"MSE": 611.5441438802083, |
|
"MSE/layer0": 611.5441438802083, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.96, |
|
"input_norm": 31.99750095685323, |
|
"input_norm/layer0": 31.99750095685323, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 1.8874, |
|
"max_norm": 79.39281463623047, |
|
"max_norm/layer0": 79.39281463623047, |
|
"mean_norm": 67.83284759521484, |
|
"mean_norm/layer0": 67.83284759521484, |
|
"multicode_k": 1, |
|
"output_norm": 15.055660729408274, |
|
"output_norm/layer0": 15.055660729408274, |
|
"step": 9200 |
|
}, |
|
{ |
|
"MSE": 611.5922235107425, |
|
"MSE/layer0": 611.5922235107425, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.97, |
|
"input_norm": 31.99750220934551, |
|
"input_norm/layer0": 31.99750220934551, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 1.8958, |
|
"max_norm": 79.42273712158203, |
|
"max_norm/layer0": 79.42273712158203, |
|
"mean_norm": 67.85774230957031, |
|
"mean_norm/layer0": 67.85774230957031, |
|
"multicode_k": 1, |
|
"output_norm": 15.055747102101643, |
|
"output_norm/layer0": 15.055747102101643, |
|
"step": 9250 |
|
}, |
|
{ |
|
"MSE": 611.6544079589839, |
|
"MSE/layer0": 611.6544079589839, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.97, |
|
"input_norm": 31.997499033610026, |
|
"input_norm/layer0": 31.997499033610026, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 1.8915, |
|
"max_norm": 79.44976806640625, |
|
"max_norm/layer0": 79.44976806640625, |
|
"mean_norm": 67.88099670410156, |
|
"mean_norm/layer0": 67.88099670410156, |
|
"multicode_k": 1, |
|
"output_norm": 15.057963668505355, |
|
"output_norm/layer0": 15.057963668505355, |
|
"step": 9300 |
|
}, |
|
{ |
|
"MSE": 611.321997172038, |
|
"MSE/layer0": 611.321997172038, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.98, |
|
"input_norm": 31.997498153050746, |
|
"input_norm/layer0": 31.997498153050746, |
|
"learning_rate": 3.421052631578948e-05, |
|
"loss": 1.8893, |
|
"max_norm": 79.47447967529297, |
|
"max_norm/layer0": 79.47447967529297, |
|
"mean_norm": 67.90266799926758, |
|
"mean_norm/layer0": 67.90266799926758, |
|
"multicode_k": 1, |
|
"output_norm": 15.067080327669775, |
|
"output_norm/layer0": 15.067080327669775, |
|
"step": 9350 |
|
}, |
|
{ |
|
"MSE": 611.4500786336266, |
|
"MSE/layer0": 611.4500786336266, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.98, |
|
"input_norm": 31.997495447794595, |
|
"input_norm/layer0": 31.997495447794595, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 1.894, |
|
"max_norm": 79.49812316894531, |
|
"max_norm/layer0": 79.49812316894531, |
|
"mean_norm": 67.92279815673828, |
|
"mean_norm/layer0": 67.92279815673828, |
|
"multicode_k": 1, |
|
"output_norm": 15.062444001833596, |
|
"output_norm/layer0": 15.062444001833596, |
|
"step": 9400 |
|
}, |
|
{ |
|
"MSE": 611.1107730102539, |
|
"MSE/layer0": 611.1107730102539, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.99, |
|
"input_norm": 31.997485151290896, |
|
"input_norm/layer0": 31.997485151290896, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 1.8849, |
|
"max_norm": 79.52143096923828, |
|
"max_norm/layer0": 79.52143096923828, |
|
"mean_norm": 67.94132995605469, |
|
"mean_norm/layer0": 67.94132995605469, |
|
"multicode_k": 1, |
|
"output_norm": 15.076274760564168, |
|
"output_norm/layer0": 15.076274760564168, |
|
"step": 9450 |
|
}, |
|
{ |
|
"MSE": 611.3009430948896, |
|
"MSE/layer0": 611.3009430948896, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.99, |
|
"input_norm": 31.99749323209126, |
|
"input_norm/layer0": 31.99749323209126, |
|
"learning_rate": 2.631578947368421e-05, |
|
"loss": 1.8967, |
|
"max_norm": 79.54227447509766, |
|
"max_norm/layer0": 79.54227447509766, |
|
"mean_norm": 67.958251953125, |
|
"mean_norm/layer0": 67.958251953125, |
|
"multicode_k": 1, |
|
"output_norm": 15.06888332684835, |
|
"output_norm/layer0": 15.06888332684835, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_MSE/layer0": 610.6120883183328, |
|
"eval_accuracy": 0.5425511737500183, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.99749166347134, |
|
"eval_loss": 1.8969556093215942, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 15.09320597480496, |
|
"eval_runtime": 73.3984, |
|
"eval_samples_per_second": 62.985, |
|
"eval_steps_per_second": 7.875, |
|
"step": 9500 |
|
}, |
|
{ |
|
"MSE": 610.9202908325196, |
|
"MSE/layer0": 610.9202908325196, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.0, |
|
"input_norm": 31.99748815218606, |
|
"input_norm/layer0": 31.99748815218606, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 1.8917, |
|
"max_norm": 79.56092834472656, |
|
"max_norm/layer0": 79.56092834472656, |
|
"mean_norm": 67.97361755371094, |
|
"mean_norm/layer0": 67.97361755371094, |
|
"multicode_k": 1, |
|
"output_norm": 15.084220841725665, |
|
"output_norm/layer0": 15.084220841725665, |
|
"step": 9550 |
|
}, |
|
{ |
|
"MSE": 610.9847631835939, |
|
"MSE/layer0": 610.9847631835939, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.0, |
|
"input_norm": 31.997486731211332, |
|
"input_norm/layer0": 31.997486731211332, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 1.8839, |
|
"max_norm": 79.57735443115234, |
|
"max_norm/layer0": 79.57735443115234, |
|
"mean_norm": 67.98743438720703, |
|
"mean_norm/layer0": 67.98743438720703, |
|
"multicode_k": 1, |
|
"output_norm": 15.082832886377968, |
|
"output_norm/layer0": 15.082832886377968, |
|
"step": 9600 |
|
}, |
|
{ |
|
"MSE": 611.2879392496747, |
|
"MSE/layer0": 611.2879392496747, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.01, |
|
"input_norm": 31.997482639948544, |
|
"input_norm/layer0": 31.997482639948544, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 1.8851, |
|
"max_norm": 79.59221649169922, |
|
"max_norm/layer0": 79.59221649169922, |
|
"mean_norm": 67.99962997436523, |
|
"mean_norm/layer0": 67.99962997436523, |
|
"multicode_k": 1, |
|
"output_norm": 15.075549699465444, |
|
"output_norm/layer0": 15.075549699465444, |
|
"step": 9650 |
|
}, |
|
{ |
|
"MSE": 611.3861442057291, |
|
"MSE/layer0": 611.3861442057291, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.01, |
|
"input_norm": 31.997482592264817, |
|
"input_norm/layer0": 31.997482592264817, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 1.8774, |
|
"max_norm": 79.60480499267578, |
|
"max_norm/layer0": 79.60480499267578, |
|
"mean_norm": 68.01019668579102, |
|
"mean_norm/layer0": 68.01019668579102, |
|
"multicode_k": 1, |
|
"output_norm": 15.07396024545034, |
|
"output_norm/layer0": 15.07396024545034, |
|
"step": 9700 |
|
}, |
|
{ |
|
"MSE": 611.4255168660482, |
|
"MSE/layer0": 611.4255168660482, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.02, |
|
"input_norm": 31.997479289372762, |
|
"input_norm/layer0": 31.997479289372762, |
|
"learning_rate": 1.3157894736842104e-05, |
|
"loss": 1.8768, |
|
"max_norm": 79.6154556274414, |
|
"max_norm/layer0": 79.6154556274414, |
|
"mean_norm": 68.01911926269531, |
|
"mean_norm/layer0": 68.01911926269531, |
|
"multicode_k": 1, |
|
"output_norm": 15.07339178085327, |
|
"output_norm/layer0": 15.07339178085327, |
|
"step": 9750 |
|
}, |
|
{ |
|
"MSE": 611.6131436157225, |
|
"MSE/layer0": 611.6131436157225, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.02, |
|
"input_norm": 31.99748600323995, |
|
"input_norm/layer0": 31.99748600323995, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 1.8905, |
|
"max_norm": 79.62410736083984, |
|
"max_norm/layer0": 79.62410736083984, |
|
"mean_norm": 68.02641677856445, |
|
"mean_norm/layer0": 68.02641677856445, |
|
"multicode_k": 1, |
|
"output_norm": 15.068124500910447, |
|
"output_norm/layer0": 15.068124500910447, |
|
"step": 9800 |
|
}, |
|
{ |
|
"MSE": 611.5507637532555, |
|
"MSE/layer0": 611.5507637532555, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.03, |
|
"input_norm": 31.997482582728068, |
|
"input_norm/layer0": 31.997482582728068, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 1.8798, |
|
"max_norm": 79.63082122802734, |
|
"max_norm/layer0": 79.63082122802734, |
|
"mean_norm": 68.03211212158203, |
|
"mean_norm/layer0": 68.03211212158203, |
|
"multicode_k": 1, |
|
"output_norm": 15.072520554860436, |
|
"output_norm/layer0": 15.072520554860436, |
|
"step": 9850 |
|
}, |
|
{ |
|
"MSE": 611.7908610026044, |
|
"MSE/layer0": 611.7908610026044, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.03, |
|
"input_norm": 31.99747860272725, |
|
"input_norm/layer0": 31.99747860272725, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 1.8807, |
|
"max_norm": 79.63563537597656, |
|
"max_norm/layer0": 79.63563537597656, |
|
"mean_norm": 68.03619003295898, |
|
"mean_norm/layer0": 68.03619003295898, |
|
"multicode_k": 1, |
|
"output_norm": 15.06489105542501, |
|
"output_norm/layer0": 15.06489105542501, |
|
"step": 9900 |
|
}, |
|
{ |
|
"MSE": 611.5220219930011, |
|
"MSE/layer0": 611.5220219930011, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.04, |
|
"input_norm": 31.997478488286326, |
|
"input_norm/layer0": 31.997478488286326, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 1.8795, |
|
"max_norm": 79.63849639892578, |
|
"max_norm/layer0": 79.63849639892578, |
|
"mean_norm": 68.03863906860352, |
|
"mean_norm/layer0": 68.03863906860352, |
|
"multicode_k": 1, |
|
"output_norm": 15.07397619565328, |
|
"output_norm/layer0": 15.07397619565328, |
|
"step": 9950 |
|
}, |
|
{ |
|
"MSE": 611.5742947387696, |
|
"MSE/layer0": 611.5742947387696, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 1.04, |
|
"input_norm": 31.997486855189003, |
|
"input_norm/layer0": 31.997486855189003, |
|
"learning_rate": 0.0, |
|
"loss": 1.8942, |
|
"max_norm": 79.63946533203125, |
|
"max_norm/layer0": 79.63946533203125, |
|
"mean_norm": 68.03947448730469, |
|
"mean_norm/layer0": 68.03947448730469, |
|
"multicode_k": 1, |
|
"output_norm": 15.069696005185442, |
|
"output_norm/layer0": 15.069696005185442, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_MSE/layer0": 611.1571513346564, |
|
"eval_accuracy": 0.5429091526514649, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997479090978388, |
|
"eval_loss": 1.89570152759552, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 15.087154228553715, |
|
"eval_runtime": 73.2125, |
|
"eval_samples_per_second": 63.145, |
|
"eval_steps_per_second": 7.895, |
|
"step": 10000 |
|
}, |
|
{ |
|
"MSE": 0.0, |
|
"MSE/layer0": 0.0, |
|
"dead_code_fraction": 1.0, |
|
"dead_code_fraction/layer0": 1.0, |
|
"epoch": 1.04, |
|
"input_norm": 0.0, |
|
"input_norm/layer0": 0.0, |
|
"max_norm": 79.63946533203125, |
|
"max_norm/layer0": 79.63946533203125, |
|
"mean_norm": 68.03947448730469, |
|
"mean_norm/layer0": 68.03947448730469, |
|
"multicode_k": 1, |
|
"output_norm": 0.0, |
|
"output_norm/layer0": 0.0, |
|
"step": 10000, |
|
"total_flos": 3.715683581952e+16, |
|
"train_loss": 2.0762174885749816, |
|
"train_runtime": 12054.7701, |
|
"train_samples_per_second": 39.818, |
|
"train_steps_per_second": 0.83 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 10000, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 3.715683581952e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|