{
    "optimizer_state_offchip": true,
    "replicated_tensor_sharding": true,
    "enable_half_partials": true,                                                                                                
  
    "gradient_accumulation_steps": 32,
    "executable_cache_dir": "./exe_cache",                                                                                       

    "layers_per_ipu": [1, 2, 3, 3, 3, 0, 6, 6],
    "matmul_proportion": [0.6, 0.6, 0.2, 0.2, 0.2, 0.6, 0.2, 0.2],
    "serialized_projection_splits_per_ipu": [0, 0, 0, 0, 0, 4, 0, 0]
}