from transformers.models.gpt2.configuration_gpt2 import GPT2Config class MidmBitextConfig(GPT2Config): model_type = "midm-bitext-S" def __init__( self, use_absolute_position_embedding: bool = True, use_rotary_position_embedding: bool = False, rotary_percentage: float = 1.0, normalization_type: str = 'layernorm', scale_qk_by_inverse_layer_idx: bool = False, *args, **kwargs ): super().__init__(*args, **kwargs) self.use_absolute_position_embedding = use_absolute_position_embedding self.use_rotary_position_embedding = use_rotary_position_embedding self.rotary_percentage = rotary_percentage self.normalization_type = normalization_type self.scale_qk_by_inverse_layer_idx = scale_qk_by_inverse_layer_idx