{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.427528675703858, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.0, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 1.0000000000000001e-07, "loss": 3.5777, "max_norm": 11.326861381530762, "max_norm/layer0": 11.326861381530762, "mean_norm": 7.967035204172134, "mean_norm/layer0": 7.967035204172134, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 1 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.1, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0001, "loss": 2.2319, "max_norm": 11.350375175476074, "max_norm/layer0": 11.350375175476074, "mean_norm": 8.056939780712128, "mean_norm/layer0": 8.056939780712128, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 1000 }, { "epoch": 0.1, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5317416829745597, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.9134443998336792, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.034, "eval_samples_per_second": 96.716, "eval_steps_per_second": 1.934, "step": 1000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.21, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002, "loss": 1.8521, "max_norm": 11.451685905456543, "max_norm/layer0": 11.451685905456543, "mean_norm": 8.345297634601593, "mean_norm/layer0": 8.345297634601593, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 2000 }, { "epoch": 0.21, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5495499021526419, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.798967719078064, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0243, "eval_samples_per_second": 97.629, "eval_steps_per_second": 1.953, "step": 2000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.31, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003, "loss": 1.7879, "max_norm": 12.298419952392578, "max_norm/layer0": 12.298419952392578, "mean_norm": 8.877436935901642, "mean_norm/layer0": 8.877436935901642, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 3000 }, { "epoch": 0.31, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5556751467710371, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7738969326019287, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0304, "eval_samples_per_second": 97.052, "eval_steps_per_second": 1.941, "step": 3000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.42, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004, "loss": 1.7728, "max_norm": 13.492940902709961, "max_norm/layer0": 13.492940902709961, "mean_norm": 9.665014863014221, "mean_norm/layer0": 9.665014863014221, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 4000 }, { "epoch": 0.42, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5564187866927593, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7666442394256592, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0276, "eval_samples_per_second": 97.317, "eval_steps_per_second": 1.946, "step": 4000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.52, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0005, "loss": 1.7686, "max_norm": 15.771540641784668, "max_norm/layer0": 15.771540641784668, "mean_norm": 10.713956594467163, "mean_norm/layer0": 10.713956594467163, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 5000 }, { "epoch": 0.52, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5594520547945205, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7608648538589478, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0276, "eval_samples_per_second": 97.311, "eval_steps_per_second": 1.946, "step": 5000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.63, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004947368421052632, "loss": 1.7635, "max_norm": 18.436635971069336, "max_norm/layer0": 18.436635971069336, "mean_norm": 11.895205318927765, "mean_norm/layer0": 11.895205318927765, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 6000 }, { "epoch": 0.63, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5597651663405088, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7554824352264404, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0292, "eval_samples_per_second": 97.159, "eval_steps_per_second": 1.943, "step": 6000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.73, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004894736842105264, "loss": 1.7523, "max_norm": 21.048765182495117, "max_norm/layer0": 21.048765182495117, "mean_norm": 13.066829144954681, "mean_norm/layer0": 13.066829144954681, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 7000 }, { "epoch": 0.73, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5631506849315069, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.738256812095642, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0284, "eval_samples_per_second": 97.237, "eval_steps_per_second": 1.945, "step": 7000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.83, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004842105263157895, "loss": 1.7471, "max_norm": 23.563966751098633, "max_norm/layer0": 23.563966751098633, "mean_norm": 14.23173063993454, "mean_norm/layer0": 14.23173063993454, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 8000 }, { "epoch": 0.83, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5643248532289629, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7368353605270386, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0291, "eval_samples_per_second": 97.172, "eval_steps_per_second": 1.943, "step": 8000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 0.94, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00047894736842105264, "loss": 1.7404, "max_norm": 26.197235107421875, "max_norm/layer0": 26.197235107421875, "mean_norm": 15.391284584999084, "mean_norm/layer0": 15.391284584999084, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 9000 }, { "epoch": 0.94, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5658904109589041, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7276737689971924, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.029, "eval_samples_per_second": 97.186, "eval_steps_per_second": 1.944, "step": 9000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.04, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00047368421052631577, "loss": 1.728, "max_norm": 28.83698272705078, "max_norm/layer0": 28.83698272705078, "mean_norm": 16.548602163791656, "mean_norm/layer0": 16.548602163791656, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 10000 }, { "epoch": 1.04, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5647358121330724, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7289787530899048, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0306, "eval_samples_per_second": 97.031, "eval_steps_per_second": 1.941, "step": 10000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.15, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00046842105263157895, "loss": 1.7195, "max_norm": 31.372026443481445, "max_norm/layer0": 31.372026443481445, "mean_norm": 17.70964866876602, "mean_norm/layer0": 17.70964866876602, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 11000 }, { "epoch": 1.15, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5667318982387476, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7244290113449097, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0293, "eval_samples_per_second": 97.153, "eval_steps_per_second": 1.943, "step": 11000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.25, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00046315789473684214, "loss": 1.7198, "max_norm": 33.9889030456543, "max_norm/layer0": 33.9889030456543, "mean_norm": 18.86298167705536, "mean_norm/layer0": 18.86298167705536, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 12000 }, { "epoch": 1.25, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5671037181996086, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7230280637741089, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0314, "eval_samples_per_second": 96.953, "eval_steps_per_second": 1.939, "step": 12000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.36, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00045789473684210527, "loss": 1.7171, "max_norm": 36.61670684814453, "max_norm/layer0": 36.61670684814453, "mean_norm": 20.012963116168976, "mean_norm/layer0": 20.012963116168976, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 13000 }, { "epoch": 1.36, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5689432485322896, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7177398204803467, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0289, "eval_samples_per_second": 97.193, "eval_steps_per_second": 1.944, "step": 13000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.46, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00045263157894736845, "loss": 1.7185, "max_norm": 39.28107452392578, "max_norm/layer0": 39.28107452392578, "mean_norm": 21.156790494918823, "mean_norm/layer0": 21.156790494918823, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 14000 }, { "epoch": 1.46, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5688258317025441, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.715006709098816, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0296, "eval_samples_per_second": 97.126, "eval_steps_per_second": 1.943, "step": 14000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.56, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004473684210526316, "loss": 1.7149, "max_norm": 41.809288024902344, "max_norm/layer0": 41.809288024902344, "mean_norm": 22.29547154903412, "mean_norm/layer0": 22.29547154903412, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 15000 }, { "epoch": 1.56, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5694520547945205, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7125060558319092, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0282, "eval_samples_per_second": 97.259, "eval_steps_per_second": 1.945, "step": 15000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.67, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004421052631578947, "loss": 1.7105, "max_norm": 44.21694564819336, "max_norm/layer0": 44.21694564819336, "mean_norm": 23.42733907699585, "mean_norm/layer0": 23.42733907699585, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 16000 }, { "epoch": 1.67, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5695303326810176, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.709671139717102, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0286, "eval_samples_per_second": 97.217, "eval_steps_per_second": 1.944, "step": 16000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.77, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00043684210526315795, "loss": 1.7107, "max_norm": 46.647300720214844, "max_norm/layer0": 46.647300720214844, "mean_norm": 24.55408787727356, "mean_norm/layer0": 24.55408787727356, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 17000 }, { "epoch": 1.77, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5689236790606653, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7072749137878418, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0377, "eval_samples_per_second": 96.366, "eval_steps_per_second": 1.927, "step": 17000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.88, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004315789473684211, "loss": 1.7113, "max_norm": 49.0349235534668, "max_norm/layer0": 49.0349235534668, "mean_norm": 25.673280954360962, "mean_norm/layer0": 25.673280954360962, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 18000 }, { "epoch": 1.88, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5711937377690802, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7024654150009155, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.031, "eval_samples_per_second": 96.991, "eval_steps_per_second": 1.94, "step": 18000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 1.98, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004263157894736842, "loss": 1.7078, "max_norm": 51.330352783203125, "max_norm/layer0": 51.330352783203125, "mean_norm": 26.78341281414032, "mean_norm/layer0": 26.78341281414032, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 19000 }, { "epoch": 1.98, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5701956947162427, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7047913074493408, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0324, "eval_samples_per_second": 96.861, "eval_steps_per_second": 1.937, "step": 19000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.09, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00042105263157894734, "loss": 1.693, "max_norm": 53.77473449707031, "max_norm/layer0": 53.77473449707031, "mean_norm": 27.891030192375183, "mean_norm/layer0": 27.891030192375183, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 20000 }, { "epoch": 2.09, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5695694716242662, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7044708728790283, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0296, "eval_samples_per_second": 97.121, "eval_steps_per_second": 1.942, "step": 20000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.19, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004157894736842106, "loss": 1.6935, "max_norm": 56.13137435913086, "max_norm/layer0": 56.13137435913086, "mean_norm": 28.993399620056152, "mean_norm/layer0": 28.993399620056152, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 21000 }, { "epoch": 2.19, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5694716242661448, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.706821322441101, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.03, "eval_samples_per_second": 97.083, "eval_steps_per_second": 1.942, "step": 21000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.29, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004105263157894737, "loss": 1.6962, "max_norm": 58.38813018798828, "max_norm/layer0": 58.38813018798828, "mean_norm": 30.087660908699036, "mean_norm/layer0": 30.087660908699036, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 22000 }, { "epoch": 2.29, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5687475538160469, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7046499252319336, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0302, "eval_samples_per_second": 97.072, "eval_steps_per_second": 1.941, "step": 22000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.4, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00040526315789473684, "loss": 1.6954, "max_norm": 60.607887268066406, "max_norm/layer0": 60.607887268066406, "mean_norm": 31.172435641288757, "mean_norm/layer0": 31.172435641288757, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 23000 }, { "epoch": 2.4, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5706457925636008, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.7018758058547974, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0307, "eval_samples_per_second": 97.019, "eval_steps_per_second": 1.94, "step": 23000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.5, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0004, "loss": 1.6933, "max_norm": 62.92869186401367, "max_norm/layer0": 62.92869186401367, "mean_norm": 32.24555063247681, "mean_norm/layer0": 32.24555063247681, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 24000 }, { "epoch": 2.5, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5725440313111546, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.700171947479248, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0287, "eval_samples_per_second": 97.209, "eval_steps_per_second": 1.944, "step": 24000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.61, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00039473684210526315, "loss": 1.6942, "max_norm": 65.25504302978516, "max_norm/layer0": 65.25504302978516, "mean_norm": 33.31111395359039, "mean_norm/layer0": 33.31111395359039, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 25000 }, { "epoch": 2.61, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5717221135029354, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6983325481414795, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0305, "eval_samples_per_second": 97.039, "eval_steps_per_second": 1.941, "step": 25000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.71, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00038947368421052633, "loss": 1.6935, "max_norm": 67.45101928710938, "max_norm/layer0": 67.45101928710938, "mean_norm": 34.36543405056, "mean_norm/layer0": 34.36543405056, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 26000 }, { "epoch": 2.71, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.573013698630137, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6938215494155884, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.031, "eval_samples_per_second": 96.993, "eval_steps_per_second": 1.94, "step": 26000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.82, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00038421052631578946, "loss": 1.6928, "max_norm": 69.7523193359375, "max_norm/layer0": 69.7523193359375, "mean_norm": 35.40895915031433, "mean_norm/layer0": 35.40895915031433, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 27000 }, { "epoch": 2.82, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5718786692759296, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6977686882019043, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0294, "eval_samples_per_second": 97.146, "eval_steps_per_second": 1.943, "step": 27000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.92, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00037894736842105265, "loss": 1.6927, "max_norm": 71.84566497802734, "max_norm/layer0": 71.84566497802734, "mean_norm": 36.44334518909454, "mean_norm/layer0": 36.44334518909454, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 28000 }, { "epoch": 2.92, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.571545988258317, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6934936046600342, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0309, "eval_samples_per_second": 97.006, "eval_steps_per_second": 1.94, "step": 28000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.02, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003736842105263158, "loss": 1.6855, "max_norm": 73.88700866699219, "max_norm/layer0": 73.88700866699219, "mean_norm": 37.46485388278961, "mean_norm/layer0": 37.46485388278961, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 29000 }, { "epoch": 3.02, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5726027397260274, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.69780695438385, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0312, "eval_samples_per_second": 96.977, "eval_steps_per_second": 1.94, "step": 29000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.13, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00036842105263157896, "loss": 1.6773, "max_norm": 76.05863189697266, "max_norm/layer0": 76.05863189697266, "mean_norm": 38.48086929321289, "mean_norm/layer0": 38.48086929321289, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 30000 }, { "epoch": 3.13, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5731898238747554, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6950737237930298, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0321, "eval_samples_per_second": 96.894, "eval_steps_per_second": 1.938, "step": 30000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.23, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00036315789473684214, "loss": 1.6788, "max_norm": 78.17182922363281, "max_norm/layer0": 78.17182922363281, "mean_norm": 39.482818245887756, "mean_norm/layer0": 39.482818245887756, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 31000 }, { "epoch": 3.23, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5728375733855186, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6925665140151978, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0261, "eval_samples_per_second": 97.46, "eval_steps_per_second": 1.949, "step": 31000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.34, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003578947368421053, "loss": 1.6813, "max_norm": 80.1740493774414, "max_norm/layer0": 80.1740493774414, "mean_norm": 40.47369468212128, "mean_norm/layer0": 40.47369468212128, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 32000 }, { "epoch": 3.34, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.572641878669276, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6919567584991455, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0325, "eval_samples_per_second": 96.851, "eval_steps_per_second": 1.937, "step": 32000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.44, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003526315789473684, "loss": 1.6782, "max_norm": 82.25338745117188, "max_norm/layer0": 82.25338745117188, "mean_norm": 41.45106363296509, "mean_norm/layer0": 41.45106363296509, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 33000 }, { "epoch": 3.44, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5732681017612524, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6926295757293701, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0268, "eval_samples_per_second": 97.393, "eval_steps_per_second": 1.948, "step": 33000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.55, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003473684210526316, "loss": 1.6801, "max_norm": 84.2289047241211, "max_norm/layer0": 84.2289047241211, "mean_norm": 42.41705143451691, "mean_norm/layer0": 42.41705143451691, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 34000 }, { "epoch": 3.55, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5718590998043053, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6893627643585205, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0265, "eval_samples_per_second": 97.419, "eval_steps_per_second": 1.948, "step": 34000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.65, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00034210526315789477, "loss": 1.6796, "max_norm": 86.21290588378906, "max_norm/layer0": 86.21290588378906, "mean_norm": 43.37204849720001, "mean_norm/layer0": 43.37204849720001, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 35000 }, { "epoch": 3.65, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5727788649706458, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.688981294631958, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0277, "eval_samples_per_second": 97.302, "eval_steps_per_second": 1.946, "step": 35000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.75, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003368421052631579, "loss": 1.6768, "max_norm": 88.06742095947266, "max_norm/layer0": 88.06742095947266, "mean_norm": 44.314213514328, "mean_norm/layer0": 44.314213514328, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 36000 }, { "epoch": 3.75, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5721722113502935, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6881800889968872, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0296, "eval_samples_per_second": 97.122, "eval_steps_per_second": 1.942, "step": 36000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.86, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00033157894736842103, "loss": 1.6802, "max_norm": 89.83356475830078, "max_norm/layer0": 89.83356475830078, "mean_norm": 45.24382555484772, "mean_norm/layer0": 45.24382555484772, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 37000 }, { "epoch": 3.86, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.573228962818004, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6871685981750488, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0305, "eval_samples_per_second": 97.04, "eval_steps_per_second": 1.941, "step": 37000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 3.96, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003263157894736842, "loss": 1.6809, "max_norm": 91.73099517822266, "max_norm/layer0": 91.73099517822266, "mean_norm": 46.16047787666321, "mean_norm/layer0": 46.16047787666321, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 38000 }, { "epoch": 3.96, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5750097847358121, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.685491681098938, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0263, "eval_samples_per_second": 97.433, "eval_steps_per_second": 1.949, "step": 38000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.07, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003210526315789474, "loss": 1.6701, "max_norm": 93.56365966796875, "max_norm/layer0": 93.56365966796875, "mean_norm": 47.06712102890015, "mean_norm/layer0": 47.06712102890015, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 39000 }, { "epoch": 4.07, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5742074363992172, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6885604858398438, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0277, "eval_samples_per_second": 97.308, "eval_steps_per_second": 1.946, "step": 39000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.17, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00031578947368421053, "loss": 1.6646, "max_norm": 95.40019226074219, "max_norm/layer0": 95.40019226074219, "mean_norm": 47.9597909450531, "mean_norm/layer0": 47.9597909450531, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 40000 }, { "epoch": 4.17, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5734442270058708, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6889522075653076, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0311, "eval_samples_per_second": 96.985, "eval_steps_per_second": 1.94, "step": 40000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.28, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003105263157894737, "loss": 1.669, "max_norm": 97.21604919433594, "max_norm/layer0": 97.21604919433594, "mean_norm": 48.839876651763916, "mean_norm/layer0": 48.839876651763916, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 41000 }, { "epoch": 4.28, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5746771037181996, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6858941316604614, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0321, "eval_samples_per_second": 96.889, "eval_steps_per_second": 1.938, "step": 41000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.38, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00030526315789473684, "loss": 1.6713, "max_norm": 99.00255584716797, "max_norm/layer0": 99.00255584716797, "mean_norm": 49.7055082321167, "mean_norm/layer0": 49.7055082321167, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 42000 }, { "epoch": 4.38, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5739726027397261, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.686662197113037, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0316, "eval_samples_per_second": 96.933, "eval_steps_per_second": 1.939, "step": 42000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.48, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0003, "loss": 1.6693, "max_norm": 100.78987884521484, "max_norm/layer0": 100.78987884521484, "mean_norm": 50.558117628097534, "mean_norm/layer0": 50.558117628097534, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 43000 }, { "epoch": 4.48, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5749510763209393, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.682096004486084, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0324, "eval_samples_per_second": 96.862, "eval_steps_per_second": 1.937, "step": 43000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.59, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00029473684210526316, "loss": 1.6693, "max_norm": 102.48609161376953, "max_norm/layer0": 102.48609161376953, "mean_norm": 51.397204637527466, "mean_norm/layer0": 51.397204637527466, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 44000 }, { "epoch": 4.59, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5747162426614482, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6822019815444946, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0343, "eval_samples_per_second": 96.681, "eval_steps_per_second": 1.934, "step": 44000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.69, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00028947368421052634, "loss": 1.6692, "max_norm": 104.09925842285156, "max_norm/layer0": 104.09925842285156, "mean_norm": 52.224265336990356, "mean_norm/layer0": 52.224265336990356, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 45000 }, { "epoch": 4.69, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5745009784735812, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.680064082145691, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0301, "eval_samples_per_second": 97.078, "eval_steps_per_second": 1.942, "step": 45000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.8, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00028421052631578947, "loss": 1.6703, "max_norm": 105.74360656738281, "max_norm/layer0": 105.74360656738281, "mean_norm": 53.035457372665405, "mean_norm/layer0": 53.035457372665405, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 46000 }, { "epoch": 4.8, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5761448140900196, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6834497451782227, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.028, "eval_samples_per_second": 97.278, "eval_steps_per_second": 1.946, "step": 46000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 4.9, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002789473684210526, "loss": 1.6677, "max_norm": 107.3390884399414, "max_norm/layer0": 107.3390884399414, "mean_norm": 53.83394503593445, "mean_norm/layer0": 53.83394503593445, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 47000 }, { "epoch": 4.9, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5756360078277887, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.681907296180725, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0321, "eval_samples_per_second": 96.892, "eval_steps_per_second": 1.938, "step": 47000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.01, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00027368421052631584, "loss": 1.6682, "max_norm": 108.92868041992188, "max_norm/layer0": 108.92868041992188, "mean_norm": 54.61986470222473, "mean_norm/layer0": 54.61986470222473, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 48000 }, { "epoch": 5.01, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5752250489236791, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6778249740600586, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0308, "eval_samples_per_second": 97.012, "eval_steps_per_second": 1.94, "step": 48000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.11, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00026842105263157897, "loss": 1.6547, "max_norm": 110.44733428955078, "max_norm/layer0": 110.44733428955078, "mean_norm": 55.39363622665405, "mean_norm/layer0": 55.39363622665405, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 49000 }, { "epoch": 5.11, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.575146771037182, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6824584007263184, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0301, "eval_samples_per_second": 97.074, "eval_steps_per_second": 1.941, "step": 49000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.21, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002631578947368421, "loss": 1.6566, "max_norm": 112.06961059570312, "max_norm/layer0": 112.06961059570312, "mean_norm": 56.14954137802124, "mean_norm/layer0": 56.14954137802124, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 50000 }, { "epoch": 5.21, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5757729941291585, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6824774742126465, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0319, "eval_samples_per_second": 96.905, "eval_steps_per_second": 1.938, "step": 50000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.32, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002578947368421053, "loss": 1.6605, "max_norm": 113.63825988769531, "max_norm/layer0": 113.63825988769531, "mean_norm": 56.89331555366516, "mean_norm/layer0": 56.89331555366516, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 51000 }, { "epoch": 5.32, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5745988258317025, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6813552379608154, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0311, "eval_samples_per_second": 96.986, "eval_steps_per_second": 1.94, "step": 51000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.42, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002526315789473684, "loss": 1.6603, "max_norm": 115.20358276367188, "max_norm/layer0": 115.20358276367188, "mean_norm": 57.622037410736084, "mean_norm/layer0": 57.622037410736084, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 52000 }, { "epoch": 5.42, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5754598825831703, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6768248081207275, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0402, "eval_samples_per_second": 96.135, "eval_steps_per_second": 1.923, "step": 52000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.53, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002473684210526316, "loss": 1.6595, "max_norm": 116.71245574951172, "max_norm/layer0": 116.71245574951172, "mean_norm": 58.335200548172, "mean_norm/layer0": 58.335200548172, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 53000 }, { "epoch": 5.53, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5753424657534246, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6757440567016602, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0326, "eval_samples_per_second": 96.84, "eval_steps_per_second": 1.937, "step": 53000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.63, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00024210526315789475, "loss": 1.6603, "max_norm": 118.23548889160156, "max_norm/layer0": 118.23548889160156, "mean_norm": 59.03403639793396, "mean_norm/layer0": 59.03403639793396, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 54000 }, { "epoch": 5.63, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5738160469667319, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6769322156906128, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0316, "eval_samples_per_second": 96.941, "eval_steps_per_second": 1.939, "step": 54000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.74, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00023684210526315788, "loss": 1.662, "max_norm": 119.70069122314453, "max_norm/layer0": 119.70069122314453, "mean_norm": 59.720083475112915, "mean_norm/layer0": 59.720083475112915, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 55000 }, { "epoch": 5.74, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5758708414872798, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6757923364639282, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0457, "eval_samples_per_second": 95.626, "eval_steps_per_second": 1.913, "step": 55000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.84, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00023157894736842107, "loss": 1.6602, "max_norm": 121.170654296875, "max_norm/layer0": 121.170654296875, "mean_norm": 60.39123606681824, "mean_norm/layer0": 60.39123606681824, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 56000 }, { "epoch": 5.84, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.57573385518591, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6770671606063843, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0283, "eval_samples_per_second": 97.247, "eval_steps_per_second": 1.945, "step": 56000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 5.94, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00022631578947368422, "loss": 1.6624, "max_norm": 122.60064697265625, "max_norm/layer0": 122.60064697265625, "mean_norm": 61.04953479766846, "mean_norm/layer0": 61.04953479766846, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 57000 }, { "epoch": 5.94, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5770254403131115, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6749203205108643, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0377, "eval_samples_per_second": 96.365, "eval_steps_per_second": 1.927, "step": 57000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.05, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00022105263157894735, "loss": 1.6527, "max_norm": 123.97573852539062, "max_norm/layer0": 123.97573852539062, "mean_norm": 61.69300150871277, "mean_norm/layer0": 61.69300150871277, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 58000 }, { "epoch": 6.05, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5757925636007828, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6790989637374878, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0297, "eval_samples_per_second": 97.113, "eval_steps_per_second": 1.942, "step": 58000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.15, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00021578947368421054, "loss": 1.6474, "max_norm": 125.31166076660156, "max_norm/layer0": 125.31166076660156, "mean_norm": 62.322699308395386, "mean_norm/layer0": 62.322699308395386, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 59000 }, { "epoch": 6.15, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5772602739726027, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.676284909248352, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0311, "eval_samples_per_second": 96.983, "eval_steps_per_second": 1.94, "step": 59000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.26, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00021052631578947367, "loss": 1.6494, "max_norm": 126.64249420166016, "max_norm/layer0": 126.64249420166016, "mean_norm": 62.93570160865784, "mean_norm/layer0": 62.93570160865784, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 60000 }, { "epoch": 6.26, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5760861056751467, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6764713525772095, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0321, "eval_samples_per_second": 96.893, "eval_steps_per_second": 1.938, "step": 60000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.36, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00020526315789473685, "loss": 1.6539, "max_norm": 127.96533966064453, "max_norm/layer0": 127.96533966064453, "mean_norm": 63.53509712219238, "mean_norm/layer0": 63.53509712219238, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 61000 }, { "epoch": 6.36, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5763600782778865, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6741266250610352, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0379, "eval_samples_per_second": 96.35, "eval_steps_per_second": 1.927, "step": 61000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.47, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0002, "loss": 1.6539, "max_norm": 129.2185516357422, "max_norm/layer0": 129.2185516357422, "mean_norm": 64.11901497840881, "mean_norm/layer0": 64.11901497840881, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 62000 }, { "epoch": 6.47, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5767710371819961, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6752326488494873, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0315, "eval_samples_per_second": 96.949, "eval_steps_per_second": 1.939, "step": 62000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.57, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00019473684210526317, "loss": 1.6529, "max_norm": 130.4375, "max_norm/layer0": 130.4375, "mean_norm": 64.6885313987732, "mean_norm/layer0": 64.6885313987732, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 63000 }, { "epoch": 6.57, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5774755381604697, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6737432479858398, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0332, "eval_samples_per_second": 96.787, "eval_steps_per_second": 1.936, "step": 63000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.67, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00018947368421052632, "loss": 1.6533, "max_norm": 131.647705078125, "max_norm/layer0": 131.647705078125, "mean_norm": 65.24243497848511, "mean_norm/layer0": 65.24243497848511, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 64000 }, { "epoch": 6.67, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5758317025440313, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6725146770477295, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0301, "eval_samples_per_second": 97.079, "eval_steps_per_second": 1.942, "step": 64000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.78, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00018421052631578948, "loss": 1.653, "max_norm": 132.81036376953125, "max_norm/layer0": 132.81036376953125, "mean_norm": 65.78296113014221, "mean_norm/layer0": 65.78296113014221, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 65000 }, { "epoch": 6.78, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5773581213307241, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6722198724746704, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0294, "eval_samples_per_second": 97.147, "eval_steps_per_second": 1.943, "step": 65000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.88, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00017894736842105264, "loss": 1.6522, "max_norm": 133.95559692382812, "max_norm/layer0": 133.95559692382812, "mean_norm": 66.3084762096405, "mean_norm/layer0": 66.3084762096405, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 66000 }, { "epoch": 6.88, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5761643835616438, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6726341247558594, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0342, "eval_samples_per_second": 96.689, "eval_steps_per_second": 1.934, "step": 66000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 6.99, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0001736842105263158, "loss": 1.6528, "max_norm": 135.03582763671875, "max_norm/layer0": 135.03582763671875, "mean_norm": 66.81927680969238, "mean_norm/layer0": 66.81927680969238, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 67000 }, { "epoch": 6.99, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5767710371819961, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.672642707824707, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0319, "eval_samples_per_second": 96.909, "eval_steps_per_second": 1.938, "step": 67000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.09, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00016842105263157895, "loss": 1.6439, "max_norm": 136.08602905273438, "max_norm/layer0": 136.08602905273438, "mean_norm": 67.31627178192139, "mean_norm/layer0": 67.31627178192139, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 68000 }, { "epoch": 7.09, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5770841487279843, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6727759838104248, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0316, "eval_samples_per_second": 96.933, "eval_steps_per_second": 1.939, "step": 68000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.19, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0001631578947368421, "loss": 1.6403, "max_norm": 137.1239013671875, "max_norm/layer0": 137.1239013671875, "mean_norm": 67.79701733589172, "mean_norm/layer0": 67.79701733589172, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 69000 }, { "epoch": 7.19, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5757534246575342, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.670316219329834, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0329, "eval_samples_per_second": 96.812, "eval_steps_per_second": 1.936, "step": 69000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.3, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00015789473684210527, "loss": 1.6447, "max_norm": 138.13624572753906, "max_norm/layer0": 138.13624572753906, "mean_norm": 68.2625687122345, "mean_norm/layer0": 68.2625687122345, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 70000 }, { "epoch": 7.3, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.57720156555773, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6696677207946777, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0301, "eval_samples_per_second": 97.082, "eval_steps_per_second": 1.942, "step": 70000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.4, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00015263157894736842, "loss": 1.6458, "max_norm": 139.11770629882812, "max_norm/layer0": 139.11770629882812, "mean_norm": 68.71309423446655, "mean_norm/layer0": 68.71309423446655, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 71000 }, { "epoch": 7.4, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5776908023483366, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6694140434265137, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0301, "eval_samples_per_second": 97.075, "eval_steps_per_second": 1.941, "step": 71000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.51, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00014736842105263158, "loss": 1.6447, "max_norm": 140.086669921875, "max_norm/layer0": 140.086669921875, "mean_norm": 69.14835000038147, "mean_norm/layer0": 69.14835000038147, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 72000 }, { "epoch": 7.51, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5771037181996086, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6716102361679077, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0344, "eval_samples_per_second": 96.67, "eval_steps_per_second": 1.933, "step": 72000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.61, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00014210526315789474, "loss": 1.6449, "max_norm": 141.01820373535156, "max_norm/layer0": 141.01820373535156, "mean_norm": 69.5690529346466, "mean_norm/layer0": 69.5690529346466, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 73000 }, { "epoch": 7.61, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.577945205479452, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6680197715759277, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.013, "eval_samples_per_second": 98.719, "eval_steps_per_second": 1.974, "step": 73000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.72, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00013684210526315792, "loss": 1.6458, "max_norm": 141.88795471191406, "max_norm/layer0": 141.88795471191406, "mean_norm": 69.97455215454102, "mean_norm/layer0": 69.97455215454102, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 74000 }, { "epoch": 7.72, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.577866927592955, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6682908535003662, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0282, "eval_samples_per_second": 97.262, "eval_steps_per_second": 1.945, "step": 74000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.82, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00013157894736842105, "loss": 1.6447, "max_norm": 142.72747802734375, "max_norm/layer0": 142.72747802734375, "mean_norm": 70.36574029922485, "mean_norm/layer0": 70.36574029922485, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 75000 }, { "epoch": 7.82, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5778277886497064, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6680580377578735, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0275, "eval_samples_per_second": 97.327, "eval_steps_per_second": 1.947, "step": 75000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 7.92, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0001263157894736842, "loss": 1.6451, "max_norm": 143.5332489013672, "max_norm/layer0": 143.5332489013672, "mean_norm": 70.7415566444397, "mean_norm/layer0": 70.7415566444397, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 76000 }, { "epoch": 7.92, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5780821917808219, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6676955223083496, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.038, "eval_samples_per_second": 96.341, "eval_steps_per_second": 1.927, "step": 76000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.03, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00012105263157894738, "loss": 1.6418, "max_norm": 144.31161499023438, "max_norm/layer0": 144.31161499023438, "mean_norm": 71.10220861434937, "mean_norm/layer0": 71.10220861434937, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 77000 }, { "epoch": 8.03, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5789041095890411, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.666453242301941, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.032, "eval_samples_per_second": 96.901, "eval_steps_per_second": 1.938, "step": 77000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.13, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00011578947368421053, "loss": 1.6361, "max_norm": 145.06106567382812, "max_norm/layer0": 145.06106567382812, "mean_norm": 71.44788241386414, "mean_norm/layer0": 71.44788241386414, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 78000 }, { "epoch": 8.13, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.577866927592955, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6684386730194092, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0237, "eval_samples_per_second": 97.688, "eval_steps_per_second": 1.954, "step": 78000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.24, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00011052631578947368, "loss": 1.636, "max_norm": 145.7824249267578, "max_norm/layer0": 145.7824249267578, "mean_norm": 71.77816247940063, "mean_norm/layer0": 71.77816247940063, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 79000 }, { "epoch": 8.24, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5785909980430528, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.668695092201233, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0276, "eval_samples_per_second": 97.318, "eval_steps_per_second": 1.946, "step": 79000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.34, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.00010526315789473683, "loss": 1.6357, "max_norm": 146.4754180908203, "max_norm/layer0": 146.4754180908203, "mean_norm": 72.09290337562561, "mean_norm/layer0": 72.09290337562561, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 80000 }, { "epoch": 8.34, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.579041095890411, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6670129299163818, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0297, "eval_samples_per_second": 97.117, "eval_steps_per_second": 1.942, "step": 80000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.45, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0001, "loss": 1.6379, "max_norm": 147.1273651123047, "max_norm/layer0": 147.1273651123047, "mean_norm": 72.39218544960022, "mean_norm/layer0": 72.39218544960022, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 81000 }, { "epoch": 8.45, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5787866927592955, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6658258438110352, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0297, "eval_samples_per_second": 97.119, "eval_steps_per_second": 1.942, "step": 81000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.55, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 9.473684210526316e-05, "loss": 1.6405, "max_norm": 147.75466918945312, "max_norm/layer0": 147.75466918945312, "mean_norm": 72.67654967308044, "mean_norm/layer0": 72.67654967308044, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 82000 }, { "epoch": 8.55, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5788454011741683, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6660892963409424, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0266, "eval_samples_per_second": 97.408, "eval_steps_per_second": 1.948, "step": 82000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.65, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 8.947368421052632e-05, "loss": 1.6378, "max_norm": 148.3541259765625, "max_norm/layer0": 148.3541259765625, "mean_norm": 72.94574618339539, "mean_norm/layer0": 72.94574618339539, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 83000 }, { "epoch": 8.65, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5789236790606653, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6649667024612427, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0319, "eval_samples_per_second": 96.911, "eval_steps_per_second": 1.938, "step": 83000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.76, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 8.421052631578948e-05, "loss": 1.6386, "max_norm": 148.9251251220703, "max_norm/layer0": 148.9251251220703, "mean_norm": 73.1996808052063, "mean_norm/layer0": 73.1996808052063, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 84000 }, { "epoch": 8.76, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5784344422700587, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.664962887763977, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0344, "eval_samples_per_second": 96.67, "eval_steps_per_second": 1.933, "step": 84000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.86, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 7.894736842105263e-05, "loss": 1.638, "max_norm": 149.44464111328125, "max_norm/layer0": 149.44464111328125, "mean_norm": 73.43817734718323, "mean_norm/layer0": 73.43817734718323, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 85000 }, { "epoch": 8.86, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5784931506849315, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6643970012664795, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0336, "eval_samples_per_second": 96.753, "eval_steps_per_second": 1.935, "step": 85000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 8.97, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 7.368421052631579e-05, "loss": 1.6374, "max_norm": 149.93634033203125, "max_norm/layer0": 149.93634033203125, "mean_norm": 73.66135931015015, "mean_norm/layer0": 73.66135931015015, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 86000 }, { "epoch": 8.97, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5777103718199609, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6635217666625977, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0303, "eval_samples_per_second": 97.056, "eval_steps_per_second": 1.941, "step": 86000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.07, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 6.842105263157896e-05, "loss": 1.6298, "max_norm": 150.40525817871094, "max_norm/layer0": 150.40525817871094, "mean_norm": 73.86943292617798, "mean_norm/layer0": 73.86943292617798, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 87000 }, { "epoch": 9.07, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5785127201565557, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6646850109100342, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0309, "eval_samples_per_second": 97.0, "eval_steps_per_second": 1.94, "step": 87000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.18, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 6.31578947368421e-05, "loss": 1.6302, "max_norm": 150.8313751220703, "max_norm/layer0": 150.8313751220703, "mean_norm": 74.06253480911255, "mean_norm/layer0": 74.06253480911255, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 88000 }, { "epoch": 9.18, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5787475538160469, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.664866328239441, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0347, "eval_samples_per_second": 96.643, "eval_steps_per_second": 1.933, "step": 88000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.28, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 5.789473684210527e-05, "loss": 1.6315, "max_norm": 151.22195434570312, "max_norm/layer0": 151.22195434570312, "mean_norm": 74.23983907699585, "mean_norm/layer0": 74.23983907699585, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 89000 }, { "epoch": 9.28, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5782191780821918, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6650762557983398, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0285, "eval_samples_per_second": 97.227, "eval_steps_per_second": 1.945, "step": 89000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.38, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 5.263157894736842e-05, "loss": 1.631, "max_norm": 151.57760620117188, "max_norm/layer0": 151.57760620117188, "mean_norm": 74.40166926383972, "mean_norm/layer0": 74.40166926383972, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 90000 }, { "epoch": 9.38, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.578825831702544, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6636165380477905, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0322, "eval_samples_per_second": 96.882, "eval_steps_per_second": 1.938, "step": 90000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.49, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 4.736842105263158e-05, "loss": 1.6316, "max_norm": 151.9007110595703, "max_norm/layer0": 151.9007110595703, "mean_norm": 74.54819416999817, "mean_norm/layer0": 74.54819416999817, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 91000 }, { "epoch": 9.49, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5781604696673189, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6626789569854736, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0209, "eval_samples_per_second": 97.952, "eval_steps_per_second": 1.959, "step": 91000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.59, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 4.210526315789474e-05, "loss": 1.6286, "max_norm": 152.19346618652344, "max_norm/layer0": 152.19346618652344, "mean_norm": 74.67919540405273, "mean_norm/layer0": 74.67919540405273, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 92000 }, { "epoch": 9.59, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5783365949119373, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.664610743522644, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0316, "eval_samples_per_second": 96.938, "eval_steps_per_second": 1.939, "step": 92000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.7, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 3.6842105263157895e-05, "loss": 1.6304, "max_norm": 152.45083618164062, "max_norm/layer0": 152.45083618164062, "mean_norm": 74.79478573799133, "mean_norm/layer0": 74.79478573799133, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 93000 }, { "epoch": 9.7, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5800587084148728, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6632179021835327, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0258, "eval_samples_per_second": 97.488, "eval_steps_per_second": 1.95, "step": 93000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.8, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 3.157894736842105e-05, "loss": 1.6298, "max_norm": 152.67724609375, "max_norm/layer0": 152.67724609375, "mean_norm": 74.8951530456543, "mean_norm/layer0": 74.8951530456543, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 94000 }, { "epoch": 9.8, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5799804305283758, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6622798442840576, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0295, "eval_samples_per_second": 97.137, "eval_steps_per_second": 1.943, "step": 94000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 9.91, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 2.631578947368421e-05, "loss": 1.6309, "max_norm": 152.86326599121094, "max_norm/layer0": 152.86326599121094, "mean_norm": 74.9801697731018, "mean_norm/layer0": 74.9801697731018, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 95000 }, { "epoch": 9.91, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5800391389432485, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6619502305984497, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0188, "eval_samples_per_second": 98.158, "eval_steps_per_second": 1.963, "step": 95000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 10.01, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 2.105263157894737e-05, "loss": 1.6302, "max_norm": 153.0155029296875, "max_norm/layer0": 153.0155029296875, "mean_norm": 75.04964685440063, "mean_norm/layer0": 75.04964685440063, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 96000 }, { "epoch": 10.01, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5801369863013699, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6601940393447876, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0341, "eval_samples_per_second": 96.703, "eval_steps_per_second": 1.934, "step": 96000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 10.11, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 1.5789473684210526e-05, "loss": 1.6242, "max_norm": 153.1354217529297, "max_norm/layer0": 153.1354217529297, "mean_norm": 75.10380005836487, "mean_norm/layer0": 75.10380005836487, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 97000 }, { "epoch": 10.11, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5785518590998043, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6609833240509033, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0284, "eval_samples_per_second": 97.237, "eval_steps_per_second": 1.945, "step": 97000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 10.22, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 1.0526315789473684e-05, "loss": 1.6258, "max_norm": 153.22213745117188, "max_norm/layer0": 153.22213745117188, "mean_norm": 75.14245867729187, "mean_norm/layer0": 75.14245867729187, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 98000 }, { "epoch": 10.22, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5794716242661448, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.660490870475769, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0267, "eval_samples_per_second": 97.402, "eval_steps_per_second": 1.948, "step": 98000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 10.32, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 5.263157894736842e-06, "loss": 1.6234, "max_norm": 153.27365112304688, "max_norm/layer0": 153.27365112304688, "mean_norm": 75.16563892364502, "mean_norm/layer0": 75.16563892364502, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 99000 }, { "epoch": 10.32, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5790802348336594, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6604704856872559, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0384, "eval_samples_per_second": 96.298, "eval_steps_per_second": 1.926, "step": 99000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 10.43, "input_norm": 0.0, "input_norm/layer0": 0.0, "learning_rate": 0.0, "loss": 1.6245, "max_norm": 153.29054260253906, "max_norm/layer0": 153.29054260253906, "mean_norm": 75.17323780059814, "mean_norm/layer0": 75.17323780059814, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 100000 }, { "epoch": 10.43, "eval_MSE/layer0": 0.0, "eval_accuracy": 0.5791389432485323, "eval_dead_code_fraction/layer0": 1.0, "eval_input_norm/layer0": 0.0, "eval_loss": 1.6604058742523193, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_runtime": 1.0339, "eval_samples_per_second": 96.717, "eval_steps_per_second": 1.934, "step": 100000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 10.43, "input_norm": 0.0, "input_norm/layer0": 0.0, "max_norm": 153.29054260253906, "max_norm/layer0": 153.29054260253906, "mean_norm": 75.17323780059814, "mean_norm/layer0": 75.17323780059814, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 100000, "total_flos": 3.714781621832909e+17, "train_loss": 1.6774777018260956, "train_runtime": 132212.7109, "train_samples_per_second": 72.61, "train_steps_per_second": 0.756 } ], "max_steps": 100000, "num_train_epochs": 11, "total_flos": 3.714781621832909e+17, "trial_name": null, "trial_params": null }