City commited on
Commit
218e10f
1 Parent(s): a5ed63a

Initial version

Browse files
README.md CHANGED
@@ -1,11 +1,12 @@
1
  ---
2
  title: CityAesthetics Demo
3
- emoji: 🐨
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 4.3.0
8
  app_file: app.py
 
9
  pinned: false
10
  license: apache-2.0
11
  ---
 
1
  ---
2
  title: CityAesthetics Demo
3
+ emoji: 🏙️
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 3.23.0
8
  app_file: app.py
9
+ models: [city96/CityAesthetics]
10
  pinned: false
11
  license: apache-2.0
12
  ---
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
5
+ from huggingface_hub import hf_hub_download
6
+ from safetensors.torch import load_file
7
+
8
+ from model import AestheticPredictorModel
9
+
10
+ HFREPO = "City96/CityAesthetics"
11
+ MODELS = [
12
+ "CityAesthetics-Anime-v1.8",
13
+ ]
14
+
15
+ class CityAestheticsPipeline:
16
+ """
17
+ Demo pipeline for [image=>score] prediction
18
+ Accepts a list of model paths on initialization.
19
+ Resulting object can be called directly with a PIL image as the input.
20
+ Returns a dict with the model name as key and the score [0.0;1.0] as a value.
21
+ """
22
+ def __init__(self, model_paths):
23
+ self.models = {}
24
+ for path in model_paths:
25
+ name = os.path.splitext(os.path.basename(path))[0]
26
+ self.models[name] = self.load_model(path)
27
+
28
+ clip_ver = "openai/clip-vit-large-patch14"
29
+ self.proc = CLIPImageProcessor.from_pretrained(clip_ver)
30
+ self.clip = CLIPVisionModelWithProjection.from_pretrained(clip_ver)
31
+ print("CityAesthetics: Pipeline init ok") # debug
32
+
33
+ def load_model(self, path):
34
+ sd = load_file(path)
35
+ assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver
36
+ model = AestheticPredictorModel()
37
+ model.load_state_dict(sd)
38
+ model.eval()
39
+ return model
40
+
41
+ def __call__(self, raw):
42
+ img = self.proc(images=raw, return_tensors="pt")
43
+ with torch.no_grad():
44
+ emb = self.clip(pixel_values=img["pixel_values"])
45
+ emb = emb["image_embeds"].detach().cpu()
46
+ out = {}
47
+ for name, model in self.models.items():
48
+ pred = model(emb)
49
+ out[name] = float(pred.squeeze(0))
50
+ return out
51
+
52
+ def get_model_path(name):
53
+ fname = f"{name}.safetensors"
54
+
55
+ # local path: [models/AesPred-Anime-v1.8.safetensors]
56
+ path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models")
57
+ if os.path.isfile(os.path.join(path, fname)):
58
+ print("CityAesthetics: Using local model")
59
+ return os.path.join(path, fname)
60
+
61
+ # huggingface hub fallback
62
+ print("CityAesthetics: Using HF Hub model")
63
+ return str(hf_hub_download(
64
+ token = os.environ.get("HFS_TOKEN") or True,
65
+ repo_id = HFREPO,
66
+ filename = fname,
67
+ # subfolder = fname.split('-')[1],
68
+ ))
69
+
70
+ article = """\
71
+ # About
72
+
73
+ This is the live demo for the CityAesthetics class of predictors.
74
+
75
+ For more information, you can check out the [Huggingface Hub](https://huggingface.co/city96/CityAesthetics) or [GitHub page](https://github.com/city96/CityAesthetics).
76
+
77
+ ## CityAesthetics-Anime
78
+
79
+ This flavor is optimized for scoring anime images with at least one subject present.
80
+
81
+ ### Intentional biases:
82
+
83
+ - Completely negative towards real life photos (ideal score of 0%)
84
+ - Strongly Negative towards text (subtitles, memes, etc) and manga panels
85
+ - Fairly negative towards 3D and to some extent 2.5D images
86
+ - Negative towards western cartoons and stylized images (chibi, parody)
87
+
88
+ ### Expected output scores:
89
+
90
+ - Non-anime images should always score below 20%
91
+ - Sketches/rough lineart/oekaki get around 20-40%
92
+ - Flat shading/TV anime gets around 40-50%
93
+ - Above 50% is mostly scored based on my personal style preferences
94
+
95
+ ### Issues:
96
+
97
+ - Tends to filter male characters.
98
+ - Requires at least 1 subject, won't work for scenery/landscapes.
99
+ - Noticeable positive bias towards anime characters with animal ears.
100
+ - Hit-or-miss with AI generated images due to style/quality not being correlated.
101
+ """
102
+
103
+ pipeline = CityAestheticsPipeline([get_model_path(x) for x in MODELS])
104
+ gr.Interface(
105
+ fn = pipeline,
106
+ title = "CityAesthetics demo",
107
+ article = article,
108
+ inputs = gr.Image(label="Input image", type="pil"),
109
+ outputs = gr.Label(label="Model prediction", show_label=False),
110
+ examples = "./examples",
111
+ allow_flagging = "never",
112
+ analytics_enabled = False,
113
+ ).launch()
examples/eru1.webp ADDED
examples/eru2.webp ADDED
examples/pass1.webp ADDED
examples/pass2.webp ADDED
model.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class ResBlock(nn.Module):
5
+ """Block with residuals"""
6
+ def __init__(self, ch):
7
+ super().__init__()
8
+ self.join = nn.ReLU()
9
+ self.long = nn.Sequential(
10
+ nn.Linear(ch, ch),
11
+ nn.LeakyReLU(0.1),
12
+ nn.Linear(ch, ch),
13
+ nn.LeakyReLU(0.1),
14
+ nn.Linear(ch, ch),
15
+ )
16
+ def forward(self, x):
17
+ return self.join(self.long(x) + x)
18
+
19
+ class AestheticPredictorModel(nn.Module):
20
+ """
21
+ Main predictor class. Original:
22
+ https://github.com/city96/CityAesthetics/blob/main/model.py
23
+ """
24
+ def __init__(self, features=768, hidden=1024):
25
+ super().__init__()
26
+ self.features = features
27
+ self.hidden = hidden
28
+ self.up = nn.Sequential(
29
+ nn.Linear(self.features, self.hidden),
30
+ ResBlock(ch=self.hidden),
31
+ )
32
+ self.down = nn.Sequential(
33
+ nn.Linear(self.hidden, 128),
34
+ nn.Linear(128, 64),
35
+ nn.Dropout(0.1),
36
+ nn.LeakyReLU(),
37
+ nn.Linear(64, 32),
38
+ nn.Linear(32, 1),
39
+ nn.Tanh(),
40
+ )
41
+ def forward(self, x):
42
+ y = self.up(x)
43
+ z = self.down(y)
44
+ return (z+1.0)/2.0
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch=2.1.0
2
+ safetensors=0.4.0
3
+ transformers=4.35.0