|
from transformers import PretrainedConfig
|
|
from typing import List
|
|
|
|
|
|
|
|
class MeralionBestRqConformerEncoderConfig(PretrainedConfig):
|
|
model_type = "meralion_bestrq"
|
|
|
|
def __init__(
|
|
self,
|
|
input_dim: int = 80,
|
|
input_channels: int = 1,
|
|
num_attention_heads: int = 8,
|
|
hidden_size: int = 1024,
|
|
ffn_dim: int = 4096,
|
|
num_hidden_layers: int = 24,
|
|
conv_depthwise_kernel_size: int = 5,
|
|
feat_proj_dropout: float = 0.,
|
|
activation_dropout: float = 0.,
|
|
hidden_dropout: float = 0.,
|
|
max_source_positions: int = 3000,
|
|
no_scale_embedding: bool = False,
|
|
hidden_act: str = "swish",
|
|
conformer_conv_dropout: float = 0.,
|
|
position_embeddings_type: str = "relative",
|
|
attention_dropout: float = 0.,
|
|
rotary_embedding_base: int = 10000,
|
|
layerdrop = 0.,
|
|
final_dropout = 0.,
|
|
vocab_size = None,
|
|
ctc_loss_reduction = 'sum',
|
|
ctc_zero_infinity = False,
|
|
**kwargs,
|
|
):
|
|
|
|
self.input_dim = input_dim
|
|
self.input_channels = input_channels
|
|
self.num_attention_heads = num_attention_heads
|
|
self.hidden_size = hidden_size
|
|
self.ffn_dim = ffn_dim
|
|
self.num_hidden_layers = num_hidden_layers
|
|
self.conv_depthwise_kernel_size = conv_depthwise_kernel_size
|
|
self.feat_proj_dropout = feat_proj_dropout
|
|
self.activation_dropout = activation_dropout
|
|
self.hidden_dropout = hidden_dropout
|
|
self.max_source_positions = max_source_positions
|
|
self.no_scale_embedding = no_scale_embedding
|
|
self.hidden_act = hidden_act
|
|
self.conformer_conv_dropout = conformer_conv_dropout
|
|
self.position_embeddings_type = position_embeddings_type
|
|
self.attention_dropout = attention_dropout
|
|
self.rotary_embedding_base = rotary_embedding_base
|
|
self.layerdrop = layerdrop
|
|
self.final_dropout = final_dropout
|
|
self.vocab_size = vocab_size
|
|
self.ctc_loss_reduction = ctc_loss_reduction
|
|
self.ctc_zero_infinity = ctc_zero_infinity
|
|
super().__init__(**kwargs) |