File size: 922 Bytes

4f7610c

# Note that some of the fields in this template haven't been filled in yet.
# Please resolve any `null` fields before launching!

precision: amp_bf16 
max_seq_len: 8192

# Tokenizer for dataset creation
tokenizer_name: bert-base-uncased

# Base model config
model:
  name: bert
  pretrained_model_name: ${tokenizer_name}
  tokenizer_name: ${tokenizer_name}
  model_config:
    num_attention_heads: 12 
    num_hidden_layers: 12 
    attention_probs_dropout_prob: 0.0 
    max_position_embeddings: 8192

    monarch_mixer_sequence_mixing: True
    long_conv_l_max: 8192
    long_conv_kernel_learning_rate: 1e-3
    hyena_lr_pos_emb: 1e-5
    hyena_w: 10
    hyena_wd: 0.1
    hyena_emb_dim: 5
    hyena_filter_order: 128
    hyena_training_additions: False

    bidirectional: true
    residual_long_conv: true

    use_glu_mlp: True
    use_monarch_mlp: True
    monarch_mlp_nblocks: 4
    use_positional_encodings: True