togethercomputer
/

m2-bert-80M-32k

Model card Files Files and versions Community

m2-bert-80M-32k / config.yaml

Dan Fu

32K partial checkpoint

afc7050 7 months ago

raw history blame contribute delete

No virus

925 Bytes

	# Note that some of the fields in this template haven't been filled in yet.
	# Please resolve any `null` fields before launching!

	precision: amp_bf16
	max_seq_len: 32768

	# Tokenizer for dataset creation
	tokenizer_name: bert-base-uncased

	# Base model config
	model:
	name: bert
	pretrained_model_name: ${tokenizer_name}
	tokenizer_name: ${tokenizer_name}
	model_config:
	num_attention_heads: 12
	num_hidden_layers: 12
	attention_probs_dropout_prob: 0.0
	max_position_embeddings: 32768

	monarch_mixer_sequence_mixing: True
	long_conv_l_max: 32768
	long_conv_kernel_learning_rate: 1e-3
	hyena_lr_pos_emb: 1e-5
	hyena_w: 10
	hyena_wd: 0.1
	hyena_emb_dim: 5
	hyena_filter_order: 128
	hyena_training_additions: False

	bidirectional: true
	residual_long_conv: true

	use_glu_mlp: True
	use_monarch_mlp: True
	monarch_mlp_nblocks: 4
	use_positional_encodings: True