from transformers.models.clip.configuration_clip import CLIPVisionConfig class AestheticsPredictorConfig(CLIPVisionConfig): model_type = "aesthetics_predictor" def __init__( self, hidden_size: int = 768, intermediate_size: int = 3072, projection_dim: int = 512, num_hidden_layers: int = 12, num_attention_heads: int = 12, num_channels: int = 3, image_size: int = 224, patch_size: int = 32, hidden_act: str = "quick_gelu", layer_norm_eps: float = 0.00001, attention_dropout: float = 0, initializer_range: float = 0.02, initializer_factor: float = 1, **kwargs, ): super().__init__( hidden_size, intermediate_size, projection_dim, num_hidden_layers, num_attention_heads, num_channels, image_size, patch_size, hidden_act, layer_norm_eps, attention_dropout, initializer_range, initializer_factor, **kwargs, )