Spaces:

kenjiqq
/

aesthetics-scorer

Runtime error

+{
+    "input_size": 1024,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

aesthetics_scorer_artifacts_openclip_vit_l_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a9481fdbce5ff02b252bcb25109b9f3b29841289fadf7e79e884d59f9357d5
+size 16801743

aesthetics_scorer_rating_openclip_vit_l_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1024,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

aesthetics_scorer_rating_openclip_vit_l_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb7fe561369ab6c7dad34b9316a56d2c6070582f0323656148e1107a242cd666
+size 16801623

app.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import gradio as gr
+import torch
+from model import preprocess, load_model
+from transformers import CLIPModel, CLIPProcessor
+MODEL = "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+model = CLIPModel.from_pretrained(MODEL)
+vision_model = model.vision_model
+vision_model.to(DEVICE)
+del model
+clip_processor = CLIPProcessor.from_pretrained(MODEL)
+rating_model = load_model("aesthetics_scorer_rating_openclip_vit_l_14.pth").to(DEVICE)
+artifacts_model = load_model("aesthetics_scorer_artifacts_openclip_vit_l_14.pth").to(DEVICE)
+def predict(img):
+    inputs = clip_processor(images=img, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        vision_output = vision_model(**inputs)
+    pooled_output = vision_output.pooler_output
+    embedding = preprocess(pooled_output)
+    with torch.no_grad():
+        rating = rating_model(embedding)
+        artifact = artifacts_model(embedding)
+    return rating.detach().cpu().item(), artifact.detach().cpu().item()
+gr.Interface(
+    title="Aesthetics Scorer",
+    fn=predict,
+    inputs=gr.Image(type="pil"),
+    outputs=[gr.Number(label="Rating ~1-10 (high is good)"), gr.Number(label="Artifacts ~1-5 (low is good)")]
+).launch()

model.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+import torch.nn as nn
+import json
+import os
+class AestheticScorer(nn.Module):
+    def __init__(self, input_size=0, use_activation=False, dropout=0.2, config=None, hidden_dim=1024, reduce_dims=False, output_activation=None):
+        super().__init__()
+        self.config = {
+            "input_size": input_size,
+            "use_activation": use_activation,
+            "dropout": dropout,
+            "hidden_dim": hidden_dim,
+            "reduce_dims": reduce_dims,
+            "output_activation": output_activation
+        }
+        if config != None:
+            self.config.update(config)
+        layers = [
+            nn.Linear(self.config["input_size"], self.config["hidden_dim"]),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Dropout(self.config["dropout"]),
+            nn.Linear(self.config["hidden_dim"], round(self.config["hidden_dim"] / (2 if reduce_dims else 1))),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Dropout(self.config["dropout"]),
+            nn.Linear(round(self.config["hidden_dim"] / (2 if reduce_dims else 1)), round(self.config["hidden_dim"] / (4 if reduce_dims else 1))),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Dropout(self.config["dropout"]),
+            nn.Linear(round(self.config["hidden_dim"] / (4 if reduce_dims else 1)), round(self.config["hidden_dim"] / (8 if reduce_dims else 1))),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Linear(round(self.config["hidden_dim"] / (8 if reduce_dims else 1)), 1),
+        ]
+        if self.config["output_activation"] == "sigmoid":
+            layers.append(
+                nn.Sigmoid()
+            )
+        layers = [ x for x in layers if x is not None]
+        self.layers = nn.Sequential(
+           *layers
+        )
+    def forward(self, x):
+        if self.config["output_activation"] == "sigmoid":
+            upper, lower = 10, 1
+            scale = upper - lower
+            return (self.layers(x) * scale) + lower
+        else:
+            return self.layers(x)
+    def save(self, save_name):
+        split_name = os.path.splitext(save_name)
+        with open(f"{split_name[0]}.config", "w") as outfile:
+            outfile.write(json.dumps(self.config, indent=4))
+        for i in range(6): # saving sometiles fails, so retry 5 times, might be windows issue
+            try:
+                torch.save(self.state_dict(), save_name)
+                break
+            except RuntimeError as e:
+                # check if error contains string "File"
+                if "cannot be opened" in str(e) and i < 5:
+                    print("Model save failed, retrying...")
+                else:
+                    raise e
+def preprocess(embeddings):
+    return embeddings / embeddings.norm(p=2, dim=-1, keepdim=True)
+def load_model(weight_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
+    split_path = os.path.splitext(weight_path)
+    with open(f"{split_path[0]}.config", "r") as config_file:
+        config = json.load(config_file)
+    model = AestheticScorer(config=config)
+    model.load_state_dict(torch.load(weight_path, map_location=device))
+    model.eval()
+    return model

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+gradio
+transformers