shunk031
/

aesthetics-predictor-v2-ava-logos-l14-reluMSE

Feature Extraction

aesthetics_predictor

Model card Files Files and versions Community

shunk031 commited on Jun 14

Commit

e336b9d

•

1 Parent(s): 0b03efe

Upload AestheticsPredictorV2ReLU

Files changed (2) hide show

config.json +2 -2
modeling_v2.py +12 -2

config.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "_name_or_path": "openai/clip-vit-large-patch14",
   "architectures": [
     "AestheticsPredictorV2ReLU"
   ],
   "attention_dropout": 0.0,
   "auto_map": {
     "AutoModel": "modeling_v2.AestheticsPredictorV2ReLU"
   },
   "dropout": 0.0,
@@ -15,7 +15,7 @@
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
-  "model_type": "clip_vision_model",
   "num_attention_heads": 16,
   "num_channels": 3,
   "num_hidden_layers": 24,

 {
   "architectures": [
     "AestheticsPredictorV2ReLU"
   ],
   "attention_dropout": 0.0,
   "auto_map": {
+    "AutoConfig": "configuration_predictor.AestheticsPredictorConfig",
     "AutoModel": "modeling_v2.AestheticsPredictorV2ReLU"
   },
   "dropout": 0.0,
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
+  "model_type": "aesthetics_predictor",
   "num_attention_heads": 16,
   "num_channels": 3,
   "num_hidden_layers": 24,

modeling_v2.py CHANGED Viewed

@@ -97,8 +97,13 @@ class AestheticsPredictorV2ReLU(AestheticsPredictorV2Linear):
 def convert_v2_linear_from_openai_clip(
     predictor_head_name: str,
     openai_model_name: str = "openai/clip-vit-large-patch14",
 ) -> AestheticsPredictorV2Linear:
-    model = AestheticsPredictorV2Linear.from_pretrained(openai_model_name)
     state_dict = torch.hub.load_state_dict_from_url(
         URLS_LINEAR[predictor_head_name], map_location="cpu"
@@ -119,8 +124,13 @@ def convert_v2_linear_from_openai_clip(
 def convert_v2_relu_from_openai_clip(
     predictor_head_name: str,
     openai_model_name: str = "openai/clip-vit-large-patch14",
 ) -> AestheticsPredictorV2ReLU:
-    model = AestheticsPredictorV2ReLU.from_pretrained(openai_model_name)
     state_dict = torch.hub.load_state_dict_from_url(
         URLS_RELU[predictor_head_name], map_location="cpu"

 def convert_v2_linear_from_openai_clip(
     predictor_head_name: str,
     openai_model_name: str = "openai/clip-vit-large-patch14",
+    config: Optional[AestheticsPredictorConfig] = None,
 ) -> AestheticsPredictorV2Linear:
+    config = config or AestheticsPredictorConfig.from_pretrained(openai_model_name)
+    model = AestheticsPredictorV2Linear(config)
+    clip_model = CLIPVisionModelWithProjection.from_pretrained(openai_model_name)
+    model.load_state_dict(clip_model.state_dict(), strict=False)
     state_dict = torch.hub.load_state_dict_from_url(
         URLS_LINEAR[predictor_head_name], map_location="cpu"
 def convert_v2_relu_from_openai_clip(
     predictor_head_name: str,
     openai_model_name: str = "openai/clip-vit-large-patch14",
+    config: Optional[AestheticsPredictorConfig] = None,
 ) -> AestheticsPredictorV2ReLU:
+    config = config or AestheticsPredictorConfig.from_pretrained(openai_model_name)
+    model = AestheticsPredictorV2ReLU(config)
+    clip_model = CLIPVisionModelWithProjection.from_pretrained(openai_model_name)
+    model.load_state_dict(clip_model.state_dict(), strict=False)
     state_dict = torch.hub.load_state_dict_from_url(
         URLS_RELU[predictor_head_name], map_location="cpu"