|
{ |
|
"MSE": 0.0, |
|
"MSE/layer0": 0.0, |
|
"dead_code_fraction": 1.0, |
|
"dead_code_fraction/layer0": 1.0, |
|
"epoch": 1.04, |
|
"eval_MSE/layer0": 611.1571513346564, |
|
"eval_accuracy": 0.5429091526514649, |
|
"eval_dead_code_fraction/layer0": 0.0, |
|
"eval_input_norm/layer0": 31.997479090978388, |
|
"eval_loss": 1.89570152759552, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 15.087154228553715, |
|
"eval_runtime": 73.291, |
|
"eval_samples": 4623, |
|
"eval_samples_per_second": 63.077, |
|
"eval_steps_per_second": 7.886, |
|
"input_norm": 0.0, |
|
"input_norm/layer0": 0.0, |
|
"loss": 2.0762174885749816, |
|
"max_norm": 79.63946533203125, |
|
"max_norm/layer0": 79.63946533203125, |
|
"mean_norm": 68.03947448730469, |
|
"mean_norm/layer0": 68.03947448730469, |
|
"multicode_k": 1, |
|
"output_norm": 0.0, |
|
"output_norm/layer0": 0.0, |
|
"perplexity": 6.657216988297924, |
|
"runtime": 12054.7701, |
|
"samples_per_second": 39.818, |
|
"steps_per_second": 0.83, |
|
"train_samples": 459760 |
|
} |