{ "optimizer_state_offchip": true, "replicated_tensor_sharding": true, "enable_half_partials": true, "gradient_accumulation_steps": 32, "executable_cache_dir": "./exe_cache", "layers_per_ipu": [1, 2, 3, 3, 3, 0, 6, 6], "matmul_proportion": [0.6, 0.6, 0.2, 0.2, 0.2, 0.6, 0.2, 0.2], "serialized_projection_splits_per_ipu": [0, 0, 0, 0, 0, 4, 0, 0] }