MERaLiON-SpeechEncoder-v1 / configuration_bestrq_conformer.py
huzy0's picture
Upload config
f903aee verified
from transformers import PretrainedConfig
from typing import List
class MeralionBestRqConformerEncoderConfig(PretrainedConfig):
model_type = "meralion_bestrq"
def __init__(
self,
input_dim: int = 80,
input_channels: int = 1,
num_attention_heads: int = 8,
hidden_size: int = 1024, #embed_dim
ffn_dim: int = 4096,
num_hidden_layers: int = 24,
conv_depthwise_kernel_size: int = 5,
feat_proj_dropout: float = 0., #for input_projection
activation_dropout: float = 0.,
hidden_dropout: float = 0.,
max_source_positions: int = 3000,
no_scale_embedding: bool = False,
hidden_act: str = "swish",
conformer_conv_dropout: float = 0.,
position_embeddings_type: str = "relative",
attention_dropout: float = 0.,
rotary_embedding_base: int = 10000,
layerdrop = 0.,
final_dropout = 0., #ctc
vocab_size = None, #ctc
ctc_loss_reduction = 'sum', #ctc
ctc_zero_infinity = False, #ctc
**kwargs,
):
self.input_dim = input_dim
self.input_channels = input_channels
self.num_attention_heads = num_attention_heads
self.hidden_size = hidden_size
self.ffn_dim = ffn_dim
self.num_hidden_layers = num_hidden_layers
self.conv_depthwise_kernel_size = conv_depthwise_kernel_size
self.feat_proj_dropout = feat_proj_dropout
self.activation_dropout = activation_dropout
self.hidden_dropout = hidden_dropout
self.max_source_positions = max_source_positions
self.no_scale_embedding = no_scale_embedding
self.hidden_act = hidden_act
self.conformer_conv_dropout = conformer_conv_dropout
self.position_embeddings_type = position_embeddings_type
self.attention_dropout = attention_dropout
self.rotary_embedding_base = rotary_embedding_base
self.layerdrop = layerdrop
self.final_dropout = final_dropout
self.vocab_size = vocab_size
self.ctc_loss_reduction = ctc_loss_reduction
self.ctc_zero_infinity = ctc_zero_infinity
super().__init__(**kwargs)