# Note that some of the fields in this template haven't been filled in yet. # Please resolve any `null` fields before launching! precision: amp_bf16 max_seq_len: 32768 # Tokenizer for dataset creation tokenizer_name: bert-base-uncased # Base model config model: name: bert pretrained_model_name: ${tokenizer_name} tokenizer_name: ${tokenizer_name} model_config: num_attention_heads: 12 num_hidden_layers: 12 attention_probs_dropout_prob: 0.0 max_position_embeddings: 32768 monarch_mixer_sequence_mixing: True long_conv_l_max: 32768 long_conv_kernel_learning_rate: 1e-3 hyena_lr_pos_emb: 1e-5 hyena_w: 10 hyena_wd: 0.1 hyena_emb_dim: 5 hyena_filter_order: 128 hyena_training_additions: False bidirectional: true residual_long_conv: true use_glu_mlp: True use_monarch_mlp: True monarch_mlp_nblocks: 4 use_positional_encodings: True