Spaces:
Runtime error
Runtime error
add demo
Browse files- .gitignore +2 -0
- README.md +2 -2
- aesthetics_scorer_artifacts_openclip_vit_l_14.config +8 -0
- aesthetics_scorer_artifacts_openclip_vit_l_14.pth +3 -0
- aesthetics_scorer_rating_openclip_vit_l_14.config +8 -0
- aesthetics_scorer_rating_openclip_vit_l_14.pth +3 -0
- app.py +34 -0
- model.py +82 -0
- requirements.txt +3 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
__pycache__/
|
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
title: Aesthetics Scorer
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.24.1
|
|
|
1 |
---
|
2 |
title: Aesthetics Scorer
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.24.1
|
aesthetics_scorer_artifacts_openclip_vit_l_14.config
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"input_size": 1024,
|
3 |
+
"use_activation": false,
|
4 |
+
"dropout": 0.0,
|
5 |
+
"hidden_dim": 1024,
|
6 |
+
"reduce_dims": false,
|
7 |
+
"output_activation": null
|
8 |
+
}
|
aesthetics_scorer_artifacts_openclip_vit_l_14.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4a9481fdbce5ff02b252bcb25109b9f3b29841289fadf7e79e884d59f9357d5
|
3 |
+
size 16801743
|
aesthetics_scorer_rating_openclip_vit_l_14.config
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"input_size": 1024,
|
3 |
+
"use_activation": false,
|
4 |
+
"dropout": 0.0,
|
5 |
+
"hidden_dim": 1024,
|
6 |
+
"reduce_dims": false,
|
7 |
+
"output_activation": null
|
8 |
+
}
|
aesthetics_scorer_rating_openclip_vit_l_14.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb7fe561369ab6c7dad34b9316a56d2c6070582f0323656148e1107a242cd666
|
3 |
+
size 16801623
|
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from model import preprocess, load_model
|
4 |
+
from transformers import CLIPModel, CLIPProcessor
|
5 |
+
|
6 |
+
MODEL = "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
|
7 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
8 |
+
|
9 |
+
model = CLIPModel.from_pretrained(MODEL)
|
10 |
+
vision_model = model.vision_model
|
11 |
+
vision_model.to(DEVICE)
|
12 |
+
del model
|
13 |
+
clip_processor = CLIPProcessor.from_pretrained(MODEL)
|
14 |
+
|
15 |
+
rating_model = load_model("aesthetics_scorer_rating_openclip_vit_l_14.pth").to(DEVICE)
|
16 |
+
artifacts_model = load_model("aesthetics_scorer_artifacts_openclip_vit_l_14.pth").to(DEVICE)
|
17 |
+
|
18 |
+
def predict(img):
|
19 |
+
inputs = clip_processor(images=img, return_tensors="pt").to(DEVICE)
|
20 |
+
with torch.no_grad():
|
21 |
+
vision_output = vision_model(**inputs)
|
22 |
+
pooled_output = vision_output.pooler_output
|
23 |
+
embedding = preprocess(pooled_output)
|
24 |
+
with torch.no_grad():
|
25 |
+
rating = rating_model(embedding)
|
26 |
+
artifact = artifacts_model(embedding)
|
27 |
+
return rating.detach().cpu().item(), artifact.detach().cpu().item()
|
28 |
+
|
29 |
+
gr.Interface(
|
30 |
+
title="Aesthetics Scorer",
|
31 |
+
fn=predict,
|
32 |
+
inputs=gr.Image(type="pil"),
|
33 |
+
outputs=[gr.Number(label="Rating ~1-10 (high is good)"), gr.Number(label="Artifacts ~1-5 (low is good)")]
|
34 |
+
).launch()
|
model.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
|
6 |
+
class AestheticScorer(nn.Module):
|
7 |
+
def __init__(self, input_size=0, use_activation=False, dropout=0.2, config=None, hidden_dim=1024, reduce_dims=False, output_activation=None):
|
8 |
+
super().__init__()
|
9 |
+
self.config = {
|
10 |
+
"input_size": input_size,
|
11 |
+
"use_activation": use_activation,
|
12 |
+
"dropout": dropout,
|
13 |
+
"hidden_dim": hidden_dim,
|
14 |
+
"reduce_dims": reduce_dims,
|
15 |
+
"output_activation": output_activation
|
16 |
+
}
|
17 |
+
if config != None:
|
18 |
+
self.config.update(config)
|
19 |
+
|
20 |
+
layers = [
|
21 |
+
nn.Linear(self.config["input_size"], self.config["hidden_dim"]),
|
22 |
+
nn.ReLU() if self.config["use_activation"] else None,
|
23 |
+
nn.Dropout(self.config["dropout"]),
|
24 |
+
|
25 |
+
nn.Linear(self.config["hidden_dim"], round(self.config["hidden_dim"] / (2 if reduce_dims else 1))),
|
26 |
+
nn.ReLU() if self.config["use_activation"] else None,
|
27 |
+
nn.Dropout(self.config["dropout"]),
|
28 |
+
|
29 |
+
nn.Linear(round(self.config["hidden_dim"] / (2 if reduce_dims else 1)), round(self.config["hidden_dim"] / (4 if reduce_dims else 1))),
|
30 |
+
nn.ReLU() if self.config["use_activation"] else None,
|
31 |
+
nn.Dropout(self.config["dropout"]),
|
32 |
+
|
33 |
+
nn.Linear(round(self.config["hidden_dim"] / (4 if reduce_dims else 1)), round(self.config["hidden_dim"] / (8 if reduce_dims else 1))),
|
34 |
+
nn.ReLU() if self.config["use_activation"] else None,
|
35 |
+
nn.Linear(round(self.config["hidden_dim"] / (8 if reduce_dims else 1)), 1),
|
36 |
+
]
|
37 |
+
if self.config["output_activation"] == "sigmoid":
|
38 |
+
layers.append(
|
39 |
+
nn.Sigmoid()
|
40 |
+
)
|
41 |
+
layers = [ x for x in layers if x is not None]
|
42 |
+
self.layers = nn.Sequential(
|
43 |
+
*layers
|
44 |
+
)
|
45 |
+
|
46 |
+
def forward(self, x):
|
47 |
+
if self.config["output_activation"] == "sigmoid":
|
48 |
+
upper, lower = 10, 1
|
49 |
+
scale = upper - lower
|
50 |
+
return (self.layers(x) * scale) + lower
|
51 |
+
else:
|
52 |
+
return self.layers(x)
|
53 |
+
|
54 |
+
def save(self, save_name):
|
55 |
+
split_name = os.path.splitext(save_name)
|
56 |
+
with open(f"{split_name[0]}.config", "w") as outfile:
|
57 |
+
outfile.write(json.dumps(self.config, indent=4))
|
58 |
+
|
59 |
+
for i in range(6): # saving sometiles fails, so retry 5 times, might be windows issue
|
60 |
+
try:
|
61 |
+
torch.save(self.state_dict(), save_name)
|
62 |
+
break
|
63 |
+
except RuntimeError as e:
|
64 |
+
# check if error contains string "File"
|
65 |
+
if "cannot be opened" in str(e) and i < 5:
|
66 |
+
print("Model save failed, retrying...")
|
67 |
+
else:
|
68 |
+
raise e
|
69 |
+
|
70 |
+
|
71 |
+
def preprocess(embeddings):
|
72 |
+
return embeddings / embeddings.norm(p=2, dim=-1, keepdim=True)
|
73 |
+
|
74 |
+
|
75 |
+
def load_model(weight_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
|
76 |
+
split_path = os.path.splitext(weight_path)
|
77 |
+
with open(f"{split_path[0]}.config", "r") as config_file:
|
78 |
+
config = json.load(config_file)
|
79 |
+
model = AestheticScorer(config=config)
|
80 |
+
model.load_state_dict(torch.load(weight_path, map_location=device))
|
81 |
+
model.eval()
|
82 |
+
return model
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
gradio
|
3 |
+
transformers
|