d_model: 384 dim_feedforward: 1024 learning_rate: 0.001 n_heads: 4 n_layers: 4 num_speakers: 3 num_steps: 384 weight_decay: 0.001