from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class RITAConfig(PretrainedConfig): model_type = "rita" def __init__( self, vocab_size=26, d_model=768, num_layers=12, max_seq_len=1024, num_heads=12, dropout=0., ff_ratio=4, eos_token_id=2, initializer_range=0.02, **kwargs, ): super().__init__(eos_token_id=eos_token_id, **kwargs) self.vocab_size = vocab_size self.d_model = d_model self.num_heads = num_heads self.d_feedforward = d_model*ff_ratio self.num_layers = num_layers self.max_seq_len=max_seq_len self.dropout = dropout self.eos_token_id=eos_token_id self.initializer_range=0.02