Spaces:

Mavthunder
/

one-click-aesthetic

Running

App Files Files Community

Mavthunder commited on Aug 21

Commit

de7814e

verified ·

1 Parent(s): cf39539

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -76

app.py CHANGED Viewed

@@ -1,91 +1,101 @@
 import gradio as gr
-from PIL import Image, ImageEnhance
-import numpy as np
 import torch
-from transformers import AutoProcessor, AutoModel, pipeline, ViTFeatureExtractor, ViTForImageClassification, CLIPProcessor
-import cv2
-device = "cuda" if torch.cuda.is_available() else "cpu"
-#  Aesthetic Scorer: rsinema/aesthetic-scorer (public)
-ae_processor = CLIPProcessor.from_pretrained("rsinema/aesthetic-scorer")
-ae_model = AutoModel.from_pretrained("rsinema/aesthetic-scorer").to(device)
-ae_model.eval()
-def aesthetic_score(img_pil):
-    inputs = ae_processor(images=img_pil, return_tensors="pt")["pixel_values"].to(device)
     with torch.no_grad():
-        scores = ae_model(inputs)
-    # scores returns 7 dims; first is overall aesthetic
-    return float(scores[0][0].item())
-#  Enhancement using public Zero-DCE model
-zero_dce_pipe = pipeline(
-    "image-enhancement",
-    model="nateraw/zero-dce",
-    device=0 if torch.cuda.is_available() else -1
-)
-def enhance_image(img_pil):
-    enhanced = zero_dce_pipe(img_pil)
-    return enhanced[0]
-#  Image Classifier (ViT)
-cls_ext = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
-cls_model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device)
-cls_model.eval()
-def classify_image(img_pil):
-    inputs = cls_ext(images=img_pil, return_tensors="pt").to(device)
     with torch.no_grad():
-        logits = cls_model(**inputs).logits
-    label = cls_model.config.id2label[logits.argmax(-1).item()].lower()
-    return label
-#  Category-specific vibes
-CATEGORY_VIBES = {
-    "person": [...],      # same presets as before
-    "food": [...],
-    "landscape": [...],
-    "default": [...],
-}
-def apply_adjustments(img, exposure, contrast, saturation, warmth, clarity):
-    img = img.convert("RGB")
-    if exposure: img = ImageEnhance.Brightness(img).enhance(2**exposure)
-    if contrast: img = ImageEnhance.Contrast(img).enhance(1 + contrast)
-    if saturation: img = ImageEnhance.Color(img).enhance(1 + saturation)
-    if clarity:
-        arr = np.array(img).astype(np.float32)
-        arr = np.clip(arr * (1 + clarity), 0, 255).astype(np.uint8)
-        img = Image.fromarray(arr)
-    if warmth:
-        r, g, b = img.split()
-        r = r.point(lambda i: min(255, i*(1+warmth)))
-        b = b.point(lambda i: min(255, i*(1-warmth)))
-        img = Image.merge("RGB",(r,g,b))
-    return img
-def process(image):
-    enhanced = enhance_image(image)
-    label = classify_image(enhanced)
-    vibes = CATEGORY_VIBES.get(label, CATEGORY_VIBES["default"])
-    best, best_score, best_name = None, -float("inf"), None
-    for vibe in vibes:
-        out = apply_adjustments(enhanced, **vibe)
-        score = aesthetic_score(out)
-        if score > best_score:
-            best, best_score, best_name = out, score, vibe["name"]
-    return best, f"Classified as {label} → Chosen style: {best_name} (score {best_score:.2f})"
-demo = gr.Interface(
-    fn=process,
-    inputs=gr.Image(type="pil"),
-    outputs=[gr.Image(type="pil"), gr.Text()],
-    title="Content-Aware Aesthetic AI (Public)",
-    description="Enhance → classify → apply category vibes → score with public aesthetic model"
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+import torch.nn as nn
+from transformers import CLIPProcessor, CLIPModel
+from PIL import Image
+import numpy as np
+# -----------------------------
+# 1. Zero-DCE model (light enhancement)
+# -----------------------------
+class ZeroDCE(nn.Module):
+    def __init__(self):
+        super(ZeroDCE, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
+        self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
+        self.conv3 = nn.Conv2d(32, 32, 3, padding=1)
+        self.conv4 = nn.Conv2d(32, 24, 3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x1 = self.relu(self.conv1(x))
+        x2 = self.relu(self.conv2(x1))
+        x3 = self.relu(self.conv3(x2))
+        x_r = torch.tanh(self.conv4(x3))
+        return x_r
+def enhance_image(img, model):
+    img_tensor = torch.from_numpy(np.array(img)).float() / 255.0
+    img_tensor = img_tensor.permute(2, 0, 1).unsqueeze(0).to(device)
     with torch.no_grad():
+        enhanced = model(img_tensor)
+    enhanced = enhanced.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    enhanced = np.clip(enhanced * 255, 0, 255).astype(np.uint8)
+    return Image.fromarray(enhanced)
+# -----------------------------
+# 2. Aesthetic Scoring Model
+# -----------------------------
+class AestheticPredictor(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
+        self.mlp = nn.Sequential(
+            nn.Linear(self.clip.config.projection_dim, 512),
+            nn.ReLU(),
+            nn.Linear(512, 1)
+        )
+    def forward(self, pixel_values, input_ids, attention_mask):
+        outputs = self.clip(pixel_values=pixel_values, input_ids=input_ids, attention_mask=attention_mask)
+        pooled_output = outputs.pooler_output
+        return self.mlp(pooled_output)
+def score_image(image, processor, model):
+    inputs = processor(text=["aesthetic photo"], images=image, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
+        score = model(**inputs)
+    return score.item()
+# -----------------------------
+# 3. Pipeline function
+# -----------------------------
+def process_image(input_img):
+    # Step 1: enhance
+    enhanced_img = enhance_image(input_img, zero_dce)
+    # Step 2: aesthetic scoring
+    original_score = score_image(input_img, processor, ae_model)
+    enhanced_score = score_image(enhanced_img, processor, ae_model)
+    # Step 3: choose best
+    if enhanced_score > original_score:
+        return enhanced_img, f"Enhanced chosen (score {enhanced_score:.2f} vs {original_score:.2f})"
+    else:
+        return input_img, f"Original kept (score {original_score:.2f} vs {enhanced_score:.2f})"
+# -----------------------------
+# 4. Setup
+# -----------------------------
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+zero_dce = ZeroDCE().to(device)
+ae_model = AestheticPredictor().to(device)
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
+# -----------------------------
+# 5. Gradio UI
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## 📸 AI Photo Enhancer with Aesthetic Scoring")
+    with gr.Row():
+        inp = gr.Image(type="pil", label="Upload your photo")
+        out = gr.Image(type="pil", label="Best looking result")
+    info = gr.Label(label="Result Info")
+    btn = gr.Button("Enhance ✨")
+    btn.click(process_image, inputs=inp, outputs=[out, info])
+demo.launch()