from transformers.configuration_utils import PretrainedConfig class XvectorConfig(PretrainedConfig): model_type = 'xvector' def __init__( self, n_mels=40, sample_rate=16000, win_length=25, hop_length=10, mean_norm=True, std_norm=False, norm_type='sentence', tdnn_blocks=5, tdnn_channels=[512, 512, 512, 512, 1500], tdnn_kernel_sizes=[5, 3, 3, 1, 1], tdnn_dilations=[1, 2, 3, 1, 1], hidden_size=512, num_classes=1251, loss_fn='aam', auto_map={ "AutoConfig": "configuration_xvector.XvectorConfig", "AutoModel": "modeling_xvector.XvectorModel", "AutoModelForAudioClassification": "modeling_xvector.XvectorModelForSequenceClassification" }, initializer_range=0.02, **kwargs ): # Compute features self.n_mels = n_mels self.sample_rate = sample_rate self.win_length = win_length self.hop_length = hop_length # Mean variance norm self.mean_norm = mean_norm self.std_norm = std_norm self.norm_type = norm_type # Embedding model self.tdnn_blocks = tdnn_blocks self.tdnn_channels = tdnn_channels self.tdnn_kernel_sizes = tdnn_kernel_sizes self.tdnn_dilations = tdnn_dilations self.hidden_size = hidden_size # Classifier self.num_classes = num_classes self.loss_fn = loss_fn # Others self.auto_map = auto_map self.initializer_range = initializer_range super().__init__(**kwargs)