{ "best_metric": 0.6364516129032258, "best_model_checkpoint": "output_toy/checkpoint-15500", "epoch": 0.775, "global_step": 15500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.0, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 3.5788, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 13.754842758178711, "max_norm/layer2": 14.107428550720215, "max_norm/layer3": 14.075852394104004, "mean_norm": 8.452343925833702, "mean_norm/layer0": 8.452010810375214, "mean_norm/layer1": 8.451448321342468, "mean_norm/layer2": 8.451122224330902, "mean_norm/layer3": 8.454794347286224, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 1 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.03, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 2.2465, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 13.313672065734863, "max_norm/layer2": 13.402200698852539, "max_norm/layer3": 13.972790718078613, "mean_norm": 8.24767641723156, "mean_norm/layer0": 8.446629762649536, "mean_norm/layer1": 8.26882529258728, "mean_norm/layer2": 8.018843710422516, "mean_norm/layer3": 8.256406903266907, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 500 }, { "epoch": 0.03, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.3564852016178642, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.31, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.8386362791061401, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.7699, "eval_samples_per_second": 803.731, "eval_steps_per_second": 1.57, "eval_transition_accuracy": 0.3554838709677419, "step": 500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.05, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.5981, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 13.26521110534668, "max_norm/layer2": 13.196029663085938, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.097165614366531, "mean_norm/layer0": 8.447738409042358, "mean_norm/layer1": 8.100942492485046, "mean_norm/layer2": 7.6819451451301575, "mean_norm/layer3": 8.158036410808563, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 1000 }, { "epoch": 0.05, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4204176054226132, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.58, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.4651859998703003, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.3952, "eval_samples_per_second": 811.185, "eval_steps_per_second": 1.584, "eval_transition_accuracy": 0.5014516129032258, "step": 1000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.07, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.3928, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 13.196317672729492, "max_norm/layer2": 12.821307182312012, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.05762867629528, "mean_norm/layer0": 8.449561476707458, "mean_norm/layer1": 8.045644223690033, "mean_norm/layer2": 7.585634410381317, "mean_norm/layer3": 8.149674594402313, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 1500 }, { "epoch": 0.07, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4378030131182333, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.79, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.354053258895874, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.3967, "eval_samples_per_second": 811.156, "eval_steps_per_second": 1.584, "eval_transition_accuracy": 0.555, "step": 1500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.1, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.3405, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 12.928963661193848, "max_norm/layer2": 12.629460334777832, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.056178480386734, "mean_norm/layer0": 8.451180398464203, "mean_norm/layer1": 8.046804785728455, "mean_norm/layer2": 7.5705525279045105, "mean_norm/layer3": 8.156176209449768, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 2000 }, { "epoch": 0.1, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4426980807086614, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.82, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.3263764381408691, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4538, "eval_samples_per_second": 810.011, "eval_steps_per_second": 1.582, "eval_transition_accuracy": 0.5756451612903226, "step": 2000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.12, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.3189, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 12.928963661193848, "max_norm/layer2": 12.610198974609375, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.065410792827606, "mean_norm/layer0": 8.452252388000488, "mean_norm/layer1": 8.05913120508194, "mean_norm/layer2": 7.583977818489075, "mean_norm/layer3": 8.166281759738922, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 2500 }, { "epoch": 0.12, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4445690245140256, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.86, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.3187371492385864, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.0204, "eval_samples_per_second": 818.782, "eval_steps_per_second": 1.599, "eval_transition_accuracy": 0.5575806451612904, "step": 2500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.15, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.308, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 12.928963661193848, "max_norm/layer2": 12.610198974609375, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.076771080493927, "mean_norm/layer0": 8.453002035617828, "mean_norm/layer1": 8.070474624633789, "mean_norm/layer2": 7.606890320777893, "mean_norm/layer3": 8.176717340946198, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 3000 }, { "epoch": 0.15, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.44684558778297245, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.82, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.3064292669296265, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4873, "eval_samples_per_second": 809.341, "eval_steps_per_second": 1.581, "eval_transition_accuracy": 0.557258064516129, "step": 3000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.17, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.3009, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 12.928963661193848, "max_norm/layer2": 12.480420112609863, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.087428167462349, "mean_norm/layer0": 8.45363199710846, "mean_norm/layer1": 8.081151723861694, "mean_norm/layer2": 7.628681242465973, "mean_norm/layer3": 8.186247706413269, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 3500 }, { "epoch": 0.17, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.44931342658095474, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.87, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2963054180145264, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.5536, "eval_samples_per_second": 808.018, "eval_steps_per_second": 1.578, "eval_transition_accuracy": 0.5762903225806452, "step": 3500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.2, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2965, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 12.945528030395508, "max_norm/layer2": 13.130059242248535, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.097855687141418, "mean_norm/layer0": 8.454240918159485, "mean_norm/layer1": 8.09092777967453, "mean_norm/layer2": 7.649804890155792, "mean_norm/layer3": 8.196449160575867, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 4000 }, { "epoch": 0.2, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4493698961152805, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2922283411026, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.3081, "eval_samples_per_second": 812.939, "eval_steps_per_second": 1.588, "eval_transition_accuracy": 0.567741935483871, "step": 4000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.23, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2919, "max_norm": 14.111713409423828, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 13.611448287963867, "max_norm/layer2": 13.752634048461914, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.10820010304451, "mean_norm/layer0": 8.454896569252014, "mean_norm/layer1": 8.099043369293213, "mean_norm/layer2": 7.671496093273163, "mean_norm/layer3": 8.20736438035965, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 4500 }, { "epoch": 0.23, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.449889175535187, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.91, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2880299091339111, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.5303, "eval_samples_per_second": 808.482, "eval_steps_per_second": 1.579, "eval_transition_accuracy": 0.5820967741935484, "step": 4500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.25, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2889, "max_norm": 14.629088401794434, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 14.212156295776367, "max_norm/layer2": 14.629088401794434, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.117734983563423, "mean_norm/layer0": 8.455368101596832, "mean_norm/layer1": 8.104798018932343, "mean_norm/layer2": 7.693721830844879, "mean_norm/layer3": 8.217051982879639, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 5000 }, { "epoch": 0.25, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45008597786970966, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2855565547943115, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.1444, "eval_samples_per_second": 816.252, "eval_steps_per_second": 1.594, "eval_transition_accuracy": 0.56, "step": 5000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.28, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2855, "max_norm": 15.594667434692383, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 14.633105278015137, "max_norm/layer2": 15.594667434692383, "max_norm/layer3": 13.539477348327637, "mean_norm": 8.12691231071949, "mean_norm/layer0": 8.45590054988861, "mean_norm/layer1": 8.110510230064392, "mean_norm/layer2": 7.71501624584198, "mean_norm/layer3": 8.226222217082977, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 5500 }, { "epoch": 0.28, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4503207469549705, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2815940380096436, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4723, "eval_samples_per_second": 809.64, "eval_steps_per_second": 1.581, "eval_transition_accuracy": 0.6016129032258064, "step": 5500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.3, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2828, "max_norm": 16.523221969604492, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 15.140296936035156, "max_norm/layer2": 16.523221969604492, "max_norm/layer3": 15.213438987731934, "mean_norm": 8.136623591184616, "mean_norm/layer0": 8.456406712532043, "mean_norm/layer1": 8.11625623703003, "mean_norm/layer2": 7.738001704216003, "mean_norm/layer3": 8.235829710960388, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 6000 }, { "epoch": 0.3, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45019699457123524, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.87, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2844185829162598, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4554, "eval_samples_per_second": 809.979, "eval_steps_per_second": 1.582, "eval_transition_accuracy": 0.5733870967741935, "step": 6000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.33, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2805, "max_norm": 17.3682861328125, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 15.587692260742188, "max_norm/layer2": 17.3682861328125, "max_norm/layer3": 16.75887107849121, "mean_norm": 8.145677655935287, "mean_norm/layer0": 8.456890940666199, "mean_norm/layer1": 8.120486974716187, "mean_norm/layer2": 7.760386824607849, "mean_norm/layer3": 8.244945883750916, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 6500 }, { "epoch": 0.33, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45155730960875984, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.95, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.277733325958252, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4778, "eval_samples_per_second": 809.529, "eval_steps_per_second": 1.581, "eval_transition_accuracy": 0.6083870967741936, "step": 6500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.35, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2793, "max_norm": 18.367448806762695, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 16.05299949645996, "max_norm/layer2": 18.142847061157227, "max_norm/layer3": 18.367448806762695, "mean_norm": 8.15514886379242, "mean_norm/layer0": 8.45737361907959, "mean_norm/layer1": 8.127346932888031, "mean_norm/layer2": 7.781527459621429, "mean_norm/layer3": 8.254347443580627, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 7000 }, { "epoch": 0.35, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45114255890132876, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.93, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2795634269714355, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.6598, "eval_samples_per_second": 805.906, "eval_steps_per_second": 1.574, "eval_transition_accuracy": 0.5680645161290323, "step": 7000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.38, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2785, "max_norm": 20.019760131835938, "max_norm/layer0": 14.111713409423828, "max_norm/layer1": 16.42165756225586, "max_norm/layer2": 19.017709732055664, "max_norm/layer3": 20.019760131835938, "mean_norm": 8.164311796426773, "mean_norm/layer0": 8.457763373851776, "mean_norm/layer1": 8.13219028711319, "mean_norm/layer2": 7.803518235683441, "mean_norm/layer3": 8.263775289058685, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 7500 }, { "epoch": 0.38, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45188651497908466, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.95, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2748253345489502, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4299, "eval_samples_per_second": 810.489, "eval_steps_per_second": 1.583, "eval_transition_accuracy": 0.5919354838709677, "step": 7500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.4, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2764, "max_norm": 21.821395874023438, "max_norm/layer0": 14.158592224121094, "max_norm/layer1": 16.7973690032959, "max_norm/layer2": 19.965808868408203, "max_norm/layer3": 21.821395874023438, "mean_norm": 8.173083677887917, "mean_norm/layer0": 8.458155512809753, "mean_norm/layer1": 8.137401163578033, "mean_norm/layer2": 7.8241875767707825, "mean_norm/layer3": 8.272590458393097, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 8000 }, { "epoch": 0.4, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.451844222902313, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.276716709136963, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.3344, "eval_samples_per_second": 812.407, "eval_steps_per_second": 1.587, "eval_transition_accuracy": 0.5759677419354838, "step": 8000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.42, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2763, "max_norm": 23.506425857543945, "max_norm/layer0": 14.451132774353027, "max_norm/layer1": 17.121503829956055, "max_norm/layer2": 20.906761169433594, "max_norm/layer3": 23.506425857543945, "mean_norm": 8.181086376309395, "mean_norm/layer0": 8.458472549915314, "mean_norm/layer1": 8.140588343143463, "mean_norm/layer2": 7.84406965970993, "mean_norm/layer3": 8.281214952468872, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 8500 }, { "epoch": 0.42, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45069200410617616, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.94, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2800538539886475, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.8942, "eval_samples_per_second": 801.287, "eval_steps_per_second": 1.565, "eval_transition_accuracy": 0.582741935483871, "step": 8500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.45, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2755, "max_norm": 25.07321548461914, "max_norm/layer0": 14.874269485473633, "max_norm/layer1": 17.39398956298828, "max_norm/layer2": 21.67055892944336, "max_norm/layer3": 25.07321548461914, "mean_norm": 8.188907638192177, "mean_norm/layer0": 8.458899140357971, "mean_norm/layer1": 8.143711388111115, "mean_norm/layer2": 7.863752484321594, "mean_norm/layer3": 8.289267539978027, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 9000 }, { "epoch": 0.45, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4516255536417323, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2754778861999512, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.3576, "eval_samples_per_second": 811.942, "eval_steps_per_second": 1.586, "eval_transition_accuracy": 0.5764516129032258, "step": 9000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.47, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2746, "max_norm": 26.98938751220703, "max_norm/layer0": 15.152251243591309, "max_norm/layer1": 17.66726303100586, "max_norm/layer2": 22.432802200317383, "max_norm/layer3": 26.98938751220703, "mean_norm": 8.197539746761322, "mean_norm/layer0": 8.4593066573143, "mean_norm/layer1": 8.148526132106781, "mean_norm/layer2": 7.884801626205444, "mean_norm/layer3": 8.297524571418762, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 9500 }, { "epoch": 0.47, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45229261503444884, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2736179828643799, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.4635, "eval_samples_per_second": 809.817, "eval_steps_per_second": 1.582, "eval_transition_accuracy": 0.5864516129032258, "step": 9500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.5, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2734, "max_norm": 28.549026489257812, "max_norm/layer0": 15.429606437683105, "max_norm/layer1": 17.9191837310791, "max_norm/layer2": 23.421247482299805, "max_norm/layer3": 28.549026489257812, "mean_norm": 8.206039026379585, "mean_norm/layer0": 8.459603905677795, "mean_norm/layer1": 8.153472065925598, "mean_norm/layer2": 7.905435502529144, "mean_norm/layer3": 8.305644631385803, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 10000 }, { "epoch": 0.5, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4518793061023622, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.91, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2739558219909668, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.3957, "eval_samples_per_second": 811.175, "eval_steps_per_second": 1.584, "eval_transition_accuracy": 0.5779032258064516, "step": 10000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.53, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2732, "max_norm": 30.3062801361084, "max_norm/layer0": 15.69118595123291, "max_norm/layer1": 18.259578704833984, "max_norm/layer2": 24.377281188964844, "max_norm/layer3": 30.3062801361084, "mean_norm": 8.214424923062325, "mean_norm/layer0": 8.459942817687988, "mean_norm/layer1": 8.156991243362427, "mean_norm/layer2": 7.927173614501953, "mean_norm/layer3": 8.31359201669693, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 10500 }, { "epoch": 0.53, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45159791961429624, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.89, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2743829488754272, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.5803, "eval_samples_per_second": 807.485, "eval_steps_per_second": 1.577, "eval_transition_accuracy": 0.5879032258064516, "step": 10500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.55, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2723, "max_norm": 31.752639770507812, "max_norm/layer0": 16.03121566772461, "max_norm/layer1": 18.51296043395996, "max_norm/layer2": 25.478057861328125, "max_norm/layer3": 31.752639770507812, "mean_norm": 8.222758993506432, "mean_norm/layer0": 8.460269570350647, "mean_norm/layer1": 8.1603884100914, "mean_norm/layer2": 7.949049711227417, "mean_norm/layer3": 8.321328282356262, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 11000 }, { "epoch": 0.55, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45252594234436516, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.89, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.269043207168579, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.1138, "eval_samples_per_second": 816.875, "eval_steps_per_second": 1.595, "eval_transition_accuracy": 0.5811290322580646, "step": 11000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.57, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2712, "max_norm": 33.44036102294922, "max_norm/layer0": 16.178028106689453, "max_norm/layer1": 18.79450798034668, "max_norm/layer2": 26.372129440307617, "max_norm/layer3": 33.44036102294922, "mean_norm": 8.230609133839607, "mean_norm/layer0": 8.460545778274536, "mean_norm/layer1": 8.163512825965881, "mean_norm/layer2": 7.969985008239746, "mean_norm/layer3": 8.328392922878265, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 11500 }, { "epoch": 0.57, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45260740265132876, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.93, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2705051898956299, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.2988, "eval_samples_per_second": 813.126, "eval_steps_per_second": 1.588, "eval_transition_accuracy": 0.5779032258064516, "step": 11500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.6, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2716, "max_norm": 35.007076263427734, "max_norm/layer0": 16.497554779052734, "max_norm/layer1": 18.986597061157227, "max_norm/layer2": 27.318687438964844, "max_norm/layer3": 35.007076263427734, "mean_norm": 8.238363325595856, "mean_norm/layer0": 8.460874915122986, "mean_norm/layer1": 8.166603803634644, "mean_norm/layer2": 7.990897297859192, "mean_norm/layer3": 8.335077285766602, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 12000 }, { "epoch": 0.6, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4526636318897638, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.89, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2700704336166382, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.2954, "eval_samples_per_second": 813.195, "eval_steps_per_second": 1.588, "eval_transition_accuracy": 0.5759677419354838, "step": 12000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.62, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2708, "max_norm": 36.530174255371094, "max_norm/layer0": 16.774707794189453, "max_norm/layer1": 19.286115646362305, "max_norm/layer2": 28.3411808013916, "max_norm/layer3": 36.530174255371094, "mean_norm": 8.246006086468697, "mean_norm/layer0": 8.461170196533203, "mean_norm/layer1": 8.169004082679749, "mean_norm/layer2": 8.012158274650574, "mean_norm/layer3": 8.341691792011261, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 12500 }, { "epoch": 0.62, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45218808632197344, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.95, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2715603113174438, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.6419, "eval_samples_per_second": 806.261, "eval_steps_per_second": 1.575, "eval_transition_accuracy": 0.5485483870967742, "step": 12500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.65, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2705, "max_norm": 38.14279556274414, "max_norm/layer0": 16.91411590576172, "max_norm/layer1": 19.68568229675293, "max_norm/layer2": 29.167861938476562, "max_norm/layer3": 38.14279556274414, "mean_norm": 8.253673061728477, "mean_norm/layer0": 8.461421430110931, "mean_norm/layer1": 8.171639680862427, "mean_norm/layer2": 8.033495247364044, "mean_norm/layer3": 8.348135888576508, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 13000 }, { "epoch": 0.65, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4528599536325049, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.93, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2675950527191162, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.2422, "eval_samples_per_second": 814.27, "eval_steps_per_second": 1.59, "eval_transition_accuracy": 0.5733870967741935, "step": 13000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.68, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2696, "max_norm": 39.6273307800293, "max_norm/layer0": 17.14823341369629, "max_norm/layer1": 19.711994171142578, "max_norm/layer2": 30.05324363708496, "max_norm/layer3": 39.6273307800293, "mean_norm": 8.261498123407364, "mean_norm/layer0": 8.46165120601654, "mean_norm/layer1": 8.17492812871933, "mean_norm/layer2": 8.055188477039337, "mean_norm/layer3": 8.354224681854248, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 13500 }, { "epoch": 0.68, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4518833911325049, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.91, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2716896533966064, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.327, "eval_samples_per_second": 812.557, "eval_steps_per_second": 1.587, "eval_transition_accuracy": 0.5993548387096774, "step": 13500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.7, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2687, "max_norm": 41.1092643737793, "max_norm/layer0": 17.324541091918945, "max_norm/layer1": 19.974578857421875, "max_norm/layer2": 30.87788200378418, "max_norm/layer3": 41.1092643737793, "mean_norm": 8.26871033012867, "mean_norm/layer0": 8.46183955669403, "mean_norm/layer1": 8.177218735218048, "mean_norm/layer2": 8.076003432273865, "mean_norm/layer3": 8.359779596328735, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 14000 }, { "epoch": 0.7, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45239882581815943, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.9, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2687005996704102, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.5627, "eval_samples_per_second": 807.837, "eval_steps_per_second": 1.578, "eval_transition_accuracy": 0.5756451612903226, "step": 14000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.72, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2685, "max_norm": 42.766014099121094, "max_norm/layer0": 17.42141342163086, "max_norm/layer1": 20.071725845336914, "max_norm/layer2": 31.913164138793945, "max_norm/layer3": 42.766014099121094, "mean_norm": 8.276251748204231, "mean_norm/layer0": 8.46206510066986, "mean_norm/layer1": 8.179498374462128, "mean_norm/layer2": 8.097876787185669, "mean_norm/layer3": 8.365566730499268, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 14500 }, { "epoch": 0.72, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45213714359313484, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.89, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2709327936172485, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.2398, "eval_samples_per_second": 814.319, "eval_steps_per_second": 1.59, "eval_transition_accuracy": 0.612741935483871, "step": 14500 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.75, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2685, "max_norm": 44.61077117919922, "max_norm/layer0": 17.665727615356445, "max_norm/layer1": 20.15743637084961, "max_norm/layer2": 32.966209411621094, "max_norm/layer3": 44.61077117919922, "mean_norm": 8.283790707588196, "mean_norm/layer0": 8.462257981300354, "mean_norm/layer1": 8.181303024291992, "mean_norm/layer2": 8.120182931423187, "mean_norm/layer3": 8.37141889333725, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 15000 }, { "epoch": 0.75, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.4519187146284449, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.91, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2706036567687988, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 40.0952, "eval_samples_per_second": 817.256, "eval_steps_per_second": 1.596, "eval_transition_accuracy": 0.587258064516129, "step": 15000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "MSE/layer1": 0.0, "MSE/layer2": 0.0, "MSE/layer3": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "dead_code_fraction/layer1": 1.0, "dead_code_fraction/layer2": 1.0, "dead_code_fraction/layer3": 1.0, "epoch": 0.78, "input_norm": 0.0, "input_norm/layer0": 0.0, "input_norm/layer1": 0.0, "input_norm/layer2": 0.0, "input_norm/layer3": 0.0, "learning_rate": 0.001, "loss": 1.2675, "max_norm": 46.33829879760742, "max_norm/layer0": 17.856664657592773, "max_norm/layer1": 20.084186553955078, "max_norm/layer2": 33.940242767333984, "max_norm/layer3": 46.33829879760742, "mean_norm": 8.291451185941696, "mean_norm/layer0": 8.462452054023743, "mean_norm/layer1": 8.18280303478241, "mean_norm/layer2": 8.143204748630524, "mean_norm/layer3": 8.377344906330109, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "output_norm/layer1": 0.0, "output_norm/layer2": 0.0, "output_norm/layer3": 0.0, "step": 15500 }, { "epoch": 0.78, "eval_MSE/layer0": 0.0, "eval_MSE/layer1": 0.0, "eval_MSE/layer2": 0.0, "eval_MSE/layer3": 0.0, "eval_accuracy": 0.45268790177472934, "eval_dead_code_fraction/layer0": 1.0, "eval_dead_code_fraction/layer1": 1.0, "eval_dead_code_fraction/layer2": 1.0, "eval_dead_code_fraction/layer3": 1.0, "eval_first_transition_accuracy": 0.96, "eval_input_norm/layer0": 0.0, "eval_input_norm/layer1": 0.0, "eval_input_norm/layer2": 0.0, "eval_input_norm/layer3": 0.0, "eval_loss": 1.2690919637680054, "eval_multicode_k": 1, "eval_output_norm/layer0": 0.0, "eval_output_norm/layer1": 0.0, "eval_output_norm/layer2": 0.0, "eval_output_norm/layer3": 0.0, "eval_runtime": 39.8501, "eval_samples_per_second": 822.281, "eval_steps_per_second": 1.606, "eval_transition_accuracy": 0.6364516129032258, "step": 15500 } ], "max_steps": 20000, "num_train_epochs": 9223372036854775807, "total_flos": 9712749772800000.0, "trial_name": null, "trial_params": null }