bins: 128 dropout: 0.1 filter_size: 256 hidden_dim: 512 kernel_size: 7 max_length: 512 n_heads: 2 n_layers: 16