kenjiqq commited on
Commit
b732a2c
β€’
1 Parent(s): 80db878
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv/
2
+ __pycache__/
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  title: Aesthetics Scorer
3
- emoji: 🐒
4
- colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.24.1
 
1
  ---
2
  title: Aesthetics Scorer
3
+ emoji: πŸ“Š
4
+ colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.24.1
aesthetics_scorer_artifacts_openclip_vit_l_14.config ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "input_size": 1024,
3
+ "use_activation": false,
4
+ "dropout": 0.0,
5
+ "hidden_dim": 1024,
6
+ "reduce_dims": false,
7
+ "output_activation": null
8
+ }
aesthetics_scorer_artifacts_openclip_vit_l_14.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a9481fdbce5ff02b252bcb25109b9f3b29841289fadf7e79e884d59f9357d5
3
+ size 16801743
aesthetics_scorer_rating_openclip_vit_l_14.config ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "input_size": 1024,
3
+ "use_activation": false,
4
+ "dropout": 0.0,
5
+ "hidden_dim": 1024,
6
+ "reduce_dims": false,
7
+ "output_activation": null
8
+ }
aesthetics_scorer_rating_openclip_vit_l_14.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7fe561369ab6c7dad34b9316a56d2c6070582f0323656148e1107a242cd666
3
+ size 16801623
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from model import preprocess, load_model
4
+ from transformers import CLIPModel, CLIPProcessor
5
+
6
+ MODEL = "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
7
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
8
+
9
+ model = CLIPModel.from_pretrained(MODEL)
10
+ vision_model = model.vision_model
11
+ vision_model.to(DEVICE)
12
+ del model
13
+ clip_processor = CLIPProcessor.from_pretrained(MODEL)
14
+
15
+ rating_model = load_model("aesthetics_scorer_rating_openclip_vit_l_14.pth").to(DEVICE)
16
+ artifacts_model = load_model("aesthetics_scorer_artifacts_openclip_vit_l_14.pth").to(DEVICE)
17
+
18
+ def predict(img):
19
+ inputs = clip_processor(images=img, return_tensors="pt").to(DEVICE)
20
+ with torch.no_grad():
21
+ vision_output = vision_model(**inputs)
22
+ pooled_output = vision_output.pooler_output
23
+ embedding = preprocess(pooled_output)
24
+ with torch.no_grad():
25
+ rating = rating_model(embedding)
26
+ artifact = artifacts_model(embedding)
27
+ return rating.detach().cpu().item(), artifact.detach().cpu().item()
28
+
29
+ gr.Interface(
30
+ title="Aesthetics Scorer",
31
+ fn=predict,
32
+ inputs=gr.Image(type="pil"),
33
+ outputs=[gr.Number(label="Rating ~1-10 (high is good)"), gr.Number(label="Artifacts ~1-5 (low is good)")]
34
+ ).launch()
model.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import json
4
+ import os
5
+
6
+ class AestheticScorer(nn.Module):
7
+ def __init__(self, input_size=0, use_activation=False, dropout=0.2, config=None, hidden_dim=1024, reduce_dims=False, output_activation=None):
8
+ super().__init__()
9
+ self.config = {
10
+ "input_size": input_size,
11
+ "use_activation": use_activation,
12
+ "dropout": dropout,
13
+ "hidden_dim": hidden_dim,
14
+ "reduce_dims": reduce_dims,
15
+ "output_activation": output_activation
16
+ }
17
+ if config != None:
18
+ self.config.update(config)
19
+
20
+ layers = [
21
+ nn.Linear(self.config["input_size"], self.config["hidden_dim"]),
22
+ nn.ReLU() if self.config["use_activation"] else None,
23
+ nn.Dropout(self.config["dropout"]),
24
+
25
+ nn.Linear(self.config["hidden_dim"], round(self.config["hidden_dim"] / (2 if reduce_dims else 1))),
26
+ nn.ReLU() if self.config["use_activation"] else None,
27
+ nn.Dropout(self.config["dropout"]),
28
+
29
+ nn.Linear(round(self.config["hidden_dim"] / (2 if reduce_dims else 1)), round(self.config["hidden_dim"] / (4 if reduce_dims else 1))),
30
+ nn.ReLU() if self.config["use_activation"] else None,
31
+ nn.Dropout(self.config["dropout"]),
32
+
33
+ nn.Linear(round(self.config["hidden_dim"] / (4 if reduce_dims else 1)), round(self.config["hidden_dim"] / (8 if reduce_dims else 1))),
34
+ nn.ReLU() if self.config["use_activation"] else None,
35
+ nn.Linear(round(self.config["hidden_dim"] / (8 if reduce_dims else 1)), 1),
36
+ ]
37
+ if self.config["output_activation"] == "sigmoid":
38
+ layers.append(
39
+ nn.Sigmoid()
40
+ )
41
+ layers = [ x for x in layers if x is not None]
42
+ self.layers = nn.Sequential(
43
+ *layers
44
+ )
45
+
46
+ def forward(self, x):
47
+ if self.config["output_activation"] == "sigmoid":
48
+ upper, lower = 10, 1
49
+ scale = upper - lower
50
+ return (self.layers(x) * scale) + lower
51
+ else:
52
+ return self.layers(x)
53
+
54
+ def save(self, save_name):
55
+ split_name = os.path.splitext(save_name)
56
+ with open(f"{split_name[0]}.config", "w") as outfile:
57
+ outfile.write(json.dumps(self.config, indent=4))
58
+
59
+ for i in range(6): # saving sometiles fails, so retry 5 times, might be windows issue
60
+ try:
61
+ torch.save(self.state_dict(), save_name)
62
+ break
63
+ except RuntimeError as e:
64
+ # check if error contains string "File"
65
+ if "cannot be opened" in str(e) and i < 5:
66
+ print("Model save failed, retrying...")
67
+ else:
68
+ raise e
69
+
70
+
71
+ def preprocess(embeddings):
72
+ return embeddings / embeddings.norm(p=2, dim=-1, keepdim=True)
73
+
74
+
75
+ def load_model(weight_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
76
+ split_path = os.path.splitext(weight_path)
77
+ with open(f"{split_path[0]}.config", "r") as config_file:
78
+ config = json.load(config_file)
79
+ model = AestheticScorer(config=config)
80
+ model.load_state_dict(torch.load(weight_path, map_location=device))
81
+ model.eval()
82
+ return model
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ gradio
3
+ transformers