from transformers import PretrainedConfig class BilmaConfig(PretrainedConfig): model_type = "bilma" def __init__( self, weights="MX", include_top = True, add_head = None, pooling = None, num_attention_heads: int = 4, num_hidden_layers: int = 2, seq_max_length: int = 280, hidden_size: int = 512, vocab_size: int = 29025, hidden_dropout_prob: float = 0.1, **kwargs, ): countries = ["MX"] poolings = ["mean", "cls", "max"] if weights not in countries: raise ValueError(f"`weights` must be one of {countries}, got {weights}.") if add_head is not None and include_top == True: raise ValueError(f"To add a head, 'include_top' must be False") if pooling is not None and include_top == True: raise ValueError(f"To specify a pooling, 'include_top' must be False") if pooling is not None and pooling not in poolings: raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.") if weights is not None: self.weights = weights self.include_top = include_top self.add_head = add_head self.pooling = pooling self.num_attention_heads = 4 self.num_hidden_layers = 2 self.seq_max_length = 280 self.hidden_size = 512 self.vocab_size = 29025 self.hidden_dropout_prob = 0.1 super().__init__(**kwargs) return self.weights = weights self.include_top = include_top self.add_head = add_head self.pooling = pooling self.num_attention_heads = num_attention_heads self.num_hidden_layers = num_hidden_layers self.seq_max_length = seq_max_length self.hidden_size = hidden_size self.vocab_size = vocab_size self.hidden_dropout_prob = hidden_dropout_prob super().__init__(**kwargs)