|
|
""" |
|
|
π¨ AI Image Editor Pro - Streamlit Version |
|
|
============================================= |
|
|
A private, self-hosted AI image editing tool using open-source models. |
|
|
Runs on Hugging Face Spaces with Streamlit SDK. |
|
|
Now with advanced Gemini-style instruction understanding! |
|
|
""" |
|
|
|
|
|
import os |
|
|
import gc |
|
|
import re |
|
|
import torch |
|
|
import numpy as np |
|
|
import streamlit as st |
|
|
from PIL import Image |
|
|
from typing import Tuple, Optional, Dict, List |
|
|
from io import BytesIO |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="π¨ AI Image Editor Pro", |
|
|
page_icon="π¨", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32 |
|
|
INPAINT_MODEL = "runwayml/stable-diffusion-inpainting" |
|
|
CLIPSEG_MODEL = "CIDAS/clipseg-rd64-refined" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GeminiStyleParser: |
|
|
""" |
|
|
Advanced natural language parser that understands complex editing instructions |
|
|
like Google Gemini. Handles various phrasings, synonyms, and compound commands. |
|
|
""" |
|
|
|
|
|
|
|
|
REMOVE_KEYWORDS = [ |
|
|
"remove", "delete", "erase", "get rid of", "take out", "eliminate", |
|
|
"clear", "wipe", "clean up", "take away", "disappear", "vanish", |
|
|
"make disappear", "get away", "rid of", "cut out", "crop out", |
|
|
"hide", "discard", "throw away", "dispose", "extract", "pull out", |
|
|
"subtract", "minus", "without", "lose", "drop", "ditch", "nix", |
|
|
"scratch", "strike", "zap", "nuke", "kill", "destroy", "obliterate" |
|
|
] |
|
|
|
|
|
REPLACE_KEYWORDS = [ |
|
|
"replace", "swap", "switch", "substitute", "exchange", "trade", |
|
|
"put", "place", "add", "insert", "set", "change to", "turn into", |
|
|
"transform to", "convert to", "make it", "make this", "transform into", |
|
|
"morph into", "become", "evolve into", "shift to" |
|
|
] |
|
|
|
|
|
CHANGE_KEYWORDS = [ |
|
|
"change", "modify", "alter", "adjust", "edit", "transform", |
|
|
"convert", "turn", "make", "update", "recolor", "repaint", |
|
|
"tint", "color", "paint", "dye", "shade", "hue", "tone", |
|
|
"brighten", "darken", "lighten", "saturate", "desaturate" |
|
|
] |
|
|
|
|
|
ADD_KEYWORDS = [ |
|
|
"add", "insert", "put", "place", "include", "attach", |
|
|
"append", "introduce", "bring", "create", "generate", |
|
|
"draw", "paint", "render", "give", "apply", "overlay" |
|
|
] |
|
|
|
|
|
ENHANCE_KEYWORDS = [ |
|
|
"enhance", "improve", "beautify", "upgrade", "refine", |
|
|
"polish", "perfect", "optimize", "boost", "amplify", |
|
|
"sharpen", "clarify", "fix", "repair", "restore" |
|
|
] |
|
|
|
|
|
|
|
|
PREPOSITIONS = [ |
|
|
"with", "to", "into", "as", "by", "for", "from", |
|
|
"using", "via", "through", "in place of", "instead of" |
|
|
] |
|
|
|
|
|
|
|
|
COLORS = { |
|
|
"red": "vibrant red colored", |
|
|
"blue": "deep blue colored", |
|
|
"green": "lush green colored", |
|
|
"yellow": "bright yellow colored", |
|
|
"orange": "warm orange colored", |
|
|
"purple": "rich purple colored", |
|
|
"pink": "soft pink colored", |
|
|
"black": "pure black colored", |
|
|
"white": "clean white colored", |
|
|
"gold": "shimmering golden colored", |
|
|
"silver": "metallic silver colored", |
|
|
"brown": "natural brown colored", |
|
|
"gray": "neutral gray colored", |
|
|
"grey": "neutral grey colored", |
|
|
"cyan": "cyan turquoise colored", |
|
|
"magenta": "vivid magenta colored", |
|
|
"teal": "elegant teal colored", |
|
|
"navy": "deep navy blue colored", |
|
|
"maroon": "rich maroon colored", |
|
|
"olive": "earthy olive colored", |
|
|
"coral": "beautiful coral colored", |
|
|
"beige": "soft beige colored", |
|
|
"tan": "warm tan colored", |
|
|
"cream": "creamy off-white colored", |
|
|
"mint": "fresh mint green colored", |
|
|
"lavender": "delicate lavender colored", |
|
|
"rose": "romantic rose colored", |
|
|
"burgundy": "deep burgundy colored", |
|
|
"bronze": "warm bronze colored" |
|
|
} |
|
|
|
|
|
|
|
|
OBJECT_SYNONYMS = { |
|
|
"person": ["person", "human", "man", "woman", "people", "guy", "girl", "boy", "lady", "gentleman", "individual", "figure", "someone", "somebody", "pedestrian"], |
|
|
"sky": ["sky", "clouds", "heaven", "atmosphere", "air above", "skyline"], |
|
|
"car": ["car", "vehicle", "automobile", "auto", "ride", "wheels", "sedan", "suv", "truck", "van"], |
|
|
"background": ["background", "backdrop", "behind", "scenery", "setting", "surroundings", "environment"], |
|
|
"text": ["text", "words", "letters", "writing", "inscription", "watermark", "logo", "signature", "label", "caption"], |
|
|
"grass": ["grass", "lawn", "turf", "field", "meadow", "greenery"], |
|
|
"tree": ["tree", "plant", "vegetation", "foliage", "bush", "shrub"], |
|
|
"water": ["water", "ocean", "sea", "lake", "river", "pond", "pool", "stream"], |
|
|
"building": ["building", "house", "structure", "architecture", "construction", "edifice"], |
|
|
"animal": ["animal", "pet", "creature", "dog", "cat", "bird"], |
|
|
"face": ["face", "facial", "head", "portrait", "visage"], |
|
|
"hair": ["hair", "hairstyle", "locks", "mane", "tresses"], |
|
|
"clothes": ["clothes", "clothing", "outfit", "dress", "shirt", "pants", "garment", "attire", "wear"], |
|
|
"wall": ["wall", "walls", "surface", "partition"], |
|
|
"floor": ["floor", "ground", "flooring", "surface below"], |
|
|
"window": ["window", "glass", "pane", "windowpane"], |
|
|
"door": ["door", "doorway", "entrance", "entry", "gate"] |
|
|
} |
|
|
|
|
|
|
|
|
STYLE_TRANSFORMS = { |
|
|
"sunset": "beautiful golden sunset sky with orange and pink clouds, dramatic lighting", |
|
|
"sunrise": "stunning sunrise with warm golden light, peaceful morning atmosphere", |
|
|
"night": "dark nighttime scene with stars, moonlit atmosphere", |
|
|
"day": "bright daylight, clear blue sky, natural sunlight", |
|
|
"winter": "snowy winter scene, frost covered, cold atmosphere", |
|
|
"summer": "bright summer day, warm sunny atmosphere", |
|
|
"autumn": "fall colors, orange and brown leaves, autumn atmosphere", |
|
|
"spring": "fresh spring scene, blooming flowers, new growth", |
|
|
"rain": "rainy weather, wet surfaces, overcast sky", |
|
|
"snow": "heavy snowfall, white snow covered, winter wonderland", |
|
|
"foggy": "misty foggy atmosphere, soft diffused light", |
|
|
"stormy": "dramatic stormy sky, dark clouds, lightning", |
|
|
"vintage": "vintage retro aesthetic, warm sepia tones, nostalgic feel", |
|
|
"cyberpunk": "neon cyberpunk aesthetic, futuristic, glowing lights", |
|
|
"fantasy": "magical fantasy scene, ethereal atmosphere, dreamlike", |
|
|
"realistic": "photorealistic, natural, lifelike quality", |
|
|
"cartoon": "cartoon animated style, colorful, illustrated", |
|
|
"anime": "anime style, japanese animation aesthetic", |
|
|
"watercolor": "watercolor painting style, soft brushstrokes", |
|
|
"oil painting": "oil painting style, rich textures, artistic", |
|
|
"sketch": "pencil sketch style, hand-drawn look", |
|
|
"cinematic": "cinematic movie quality, dramatic lighting, film-like", |
|
|
"hdr": "high dynamic range, vivid colors, enhanced contrast", |
|
|
"dreamy": "soft dreamy atmosphere, ethereal glow, romantic", |
|
|
"dramatic": "dramatic lighting, high contrast, intense mood", |
|
|
"peaceful": "calm peaceful atmosphere, serene, tranquil", |
|
|
"scary": "dark scary atmosphere, horror aesthetic, ominous", |
|
|
"happy": "bright cheerful atmosphere, joyful, vibrant colors", |
|
|
"sad": "melancholic atmosphere, muted colors, somber mood" |
|
|
} |
|
|
|
|
|
def __init__(self): |
|
|
self.last_confidence = 0.0 |
|
|
self.interpretation = "" |
|
|
|
|
|
def normalize_text(self, text: str) -> str: |
|
|
"""Normalize input text for better parsing.""" |
|
|
text = text.lower().strip() |
|
|
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
|
|
|
text = re.sub(r'[.,!?;:]+$', '', text) |
|
|
|
|
|
text = text.replace("don't", "do not") |
|
|
text = text.replace("can't", "cannot") |
|
|
text = text.replace("won't", "will not") |
|
|
text = text.replace("i'd", "i would") |
|
|
text = text.replace("i'm", "i am") |
|
|
text = text.replace("it's", "it is") |
|
|
return text |
|
|
|
|
|
def extract_target_object(self, text: str) -> str: |
|
|
"""Extract the target object from the instruction.""" |
|
|
|
|
|
filler_words = ["the", "a", "an", "this", "that", "those", "these", "my", "your", "please", "kindly", "can you", "could you", "would you", "i want to", "i'd like to", "i would like to"] |
|
|
result = text |
|
|
for filler in filler_words: |
|
|
result = re.sub(r'\b' + filler + r'\b', '', result, flags=re.IGNORECASE) |
|
|
return result.strip() |
|
|
|
|
|
def find_best_synonym(self, target: str) -> str: |
|
|
"""Find the best matching object for CLIPSeg detection.""" |
|
|
target_lower = target.lower() |
|
|
|
|
|
|
|
|
for main_object, synonyms in self.OBJECT_SYNONYMS.items(): |
|
|
for synonym in synonyms: |
|
|
if synonym in target_lower or target_lower in synonym: |
|
|
return main_object |
|
|
|
|
|
return target |
|
|
|
|
|
def enhance_prompt(self, prompt: str) -> str: |
|
|
"""Enhance the replacement prompt for better results.""" |
|
|
prompt_lower = prompt.lower() |
|
|
|
|
|
|
|
|
for style_key, style_value in self.STYLE_TRANSFORMS.items(): |
|
|
if style_key in prompt_lower: |
|
|
return f"{style_value}, high quality, detailed, professional" |
|
|
|
|
|
|
|
|
for color_key, color_value in self.COLORS.items(): |
|
|
if color_key in prompt_lower: |
|
|
prompt = prompt.replace(color_key, color_value) |
|
|
|
|
|
|
|
|
quality_terms = ["high quality", "detailed", "professional", "beautiful", "stunning"] |
|
|
has_quality = any(term in prompt_lower for term in quality_terms) |
|
|
|
|
|
if not has_quality: |
|
|
prompt = f"{prompt}, high quality, detailed, professional photography" |
|
|
|
|
|
return prompt |
|
|
|
|
|
def detect_action_type(self, text: str) -> str: |
|
|
"""Detect the type of editing action requested.""" |
|
|
text_lower = text.lower() |
|
|
|
|
|
for keyword in self.REMOVE_KEYWORDS: |
|
|
if keyword in text_lower: |
|
|
return "remove" |
|
|
|
|
|
for keyword in self.ADD_KEYWORDS: |
|
|
if keyword in text_lower: |
|
|
return "add" |
|
|
|
|
|
for keyword in self.REPLACE_KEYWORDS: |
|
|
if keyword in text_lower: |
|
|
return "replace" |
|
|
|
|
|
for keyword in self.CHANGE_KEYWORDS: |
|
|
if keyword in text_lower: |
|
|
return "change" |
|
|
|
|
|
for keyword in self.ENHANCE_KEYWORDS: |
|
|
if keyword in text_lower: |
|
|
return "enhance" |
|
|
|
|
|
return "general" |
|
|
|
|
|
def parse(self, instruction: str) -> Tuple[str, str, float]: |
|
|
""" |
|
|
Parse the instruction and return (target, replacement_prompt, confidence). |
|
|
This is the main parsing method that handles all types of instructions. |
|
|
""" |
|
|
original = instruction |
|
|
normalized = self.normalize_text(instruction) |
|
|
action_type = self.detect_action_type(normalized) |
|
|
|
|
|
target = "" |
|
|
replacement = "" |
|
|
confidence = 0.5 |
|
|
|
|
|
|
|
|
if action_type == "remove": |
|
|
for keyword in self.REMOVE_KEYWORDS: |
|
|
if keyword in normalized: |
|
|
target = normalized.split(keyword, 1)[-1].strip() |
|
|
break |
|
|
|
|
|
target = self.extract_target_object(target) |
|
|
target = self.find_best_synonym(target) |
|
|
replacement = "clean empty background, seamless natural texture, nothing there, blank space" |
|
|
confidence = 0.85 |
|
|
self.interpretation = f"ποΈ Remove: Detecting and removing '{target}'" |
|
|
|
|
|
|
|
|
elif action_type == "add": |
|
|
for keyword in self.ADD_KEYWORDS: |
|
|
if keyword in normalized: |
|
|
parts = normalized.split(keyword, 1) |
|
|
if len(parts) > 1: |
|
|
target = "main subject area" |
|
|
replacement = parts[1].strip() |
|
|
break |
|
|
|
|
|
replacement = self.extract_target_object(replacement) |
|
|
replacement = self.enhance_prompt(replacement) |
|
|
confidence = 0.75 |
|
|
self.interpretation = f"β Add: Adding '{replacement}' to the image" |
|
|
|
|
|
|
|
|
elif action_type == "replace": |
|
|
|
|
|
preposition_found = False |
|
|
for prep in self.PREPOSITIONS: |
|
|
if f" {prep} " in normalized: |
|
|
parts = normalized.split(f" {prep} ", 1) |
|
|
|
|
|
|
|
|
first_part = parts[0] |
|
|
for keyword in self.REPLACE_KEYWORDS + self.CHANGE_KEYWORDS: |
|
|
first_part = first_part.replace(keyword, "") |
|
|
target = self.extract_target_object(first_part) |
|
|
target = self.find_best_synonym(target) |
|
|
|
|
|
|
|
|
replacement = self.extract_target_object(parts[1]) |
|
|
replacement = self.enhance_prompt(replacement) |
|
|
|
|
|
preposition_found = True |
|
|
confidence = 0.9 |
|
|
break |
|
|
|
|
|
if not preposition_found: |
|
|
|
|
|
for keyword in self.REPLACE_KEYWORDS: |
|
|
if keyword in normalized: |
|
|
target = normalized.split(keyword, 1)[-1].strip() |
|
|
target = self.extract_target_object(target) |
|
|
target = self.find_best_synonym(target) |
|
|
replacement = "something different, new object, alternative" |
|
|
confidence = 0.6 |
|
|
break |
|
|
|
|
|
self.interpretation = f"π Replace: Replacing '{target}' with '{replacement[:50]}...'" |
|
|
|
|
|
|
|
|
elif action_type == "change": |
|
|
|
|
|
preposition_found = False |
|
|
for prep in ["to", "into", "as"]: |
|
|
if f" {prep} " in normalized: |
|
|
parts = normalized.split(f" {prep} ", 1) |
|
|
|
|
|
|
|
|
first_part = parts[0] |
|
|
for keyword in self.CHANGE_KEYWORDS: |
|
|
first_part = first_part.replace(keyword, "") |
|
|
target = self.extract_target_object(first_part) |
|
|
target = self.find_best_synonym(target) |
|
|
|
|
|
|
|
|
new_state = self.extract_target_object(parts[1]) |
|
|
|
|
|
|
|
|
replacement = f"{target} that is {new_state}, {self.enhance_prompt(new_state)}" |
|
|
|
|
|
preposition_found = True |
|
|
confidence = 0.85 |
|
|
break |
|
|
|
|
|
if not preposition_found: |
|
|
|
|
|
for color in self.COLORS.keys(): |
|
|
if color in normalized: |
|
|
target = "main subject" |
|
|
replacement = f"{self.COLORS[color]}, high quality, detailed" |
|
|
confidence = 0.8 |
|
|
preposition_found = True |
|
|
break |
|
|
|
|
|
if not preposition_found: |
|
|
target = "main subject" |
|
|
replacement = self.enhance_prompt(normalized) |
|
|
confidence = 0.6 |
|
|
|
|
|
self.interpretation = f"βοΈ Change: Modifying '{target}' β '{replacement[:50]}...'" |
|
|
|
|
|
|
|
|
elif action_type == "enhance": |
|
|
target = "main subject" |
|
|
replacement = "enhanced improved professional high quality detailed stunning beautiful" |
|
|
confidence = 0.7 |
|
|
self.interpretation = f"β¨ Enhance: Improving overall image quality" |
|
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
|
words = normalized.split() |
|
|
if len(words) <= 3: |
|
|
target = self.find_best_synonym(normalized) |
|
|
replacement = "clean empty background, seamless natural texture" |
|
|
confidence = 0.5 |
|
|
self.interpretation = f"π€ Guessing: You might want to remove '{target}'?" |
|
|
else: |
|
|
|
|
|
target = "main subject area" |
|
|
replacement = self.enhance_prompt(normalized) |
|
|
confidence = 0.5 |
|
|
self.interpretation = f"π¨ Creative: Applying '{replacement[:50]}...'" |
|
|
|
|
|
|
|
|
target = target.strip() if target else "main subject" |
|
|
replacement = replacement.strip() if replacement else "improved version" |
|
|
|
|
|
|
|
|
self.last_confidence = confidence |
|
|
|
|
|
return target, replacement, confidence |
|
|
|
|
|
|
|
|
|
|
|
gemini_parser = GeminiStyleParser() |
|
|
|
|
|
|
|
|
def parse_instruction(instruction: str) -> Tuple[str, str]: |
|
|
""" |
|
|
Enhanced parsing function that uses the GeminiStyleParser. |
|
|
Maintains backward compatibility with existing code. |
|
|
""" |
|
|
target, replacement, _ = gemini_parser.parse(instruction) |
|
|
return target, replacement |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_inpaint_pipeline(): |
|
|
"""Load and cache the inpainting pipeline.""" |
|
|
from diffusers import StableDiffusionInpaintPipeline |
|
|
|
|
|
pipe = StableDiffusionInpaintPipeline.from_pretrained( |
|
|
INPAINT_MODEL, |
|
|
torch_dtype=DTYPE, |
|
|
safety_checker=None, |
|
|
requires_safety_checker=False |
|
|
) |
|
|
|
|
|
pipe = pipe.to(DEVICE) |
|
|
|
|
|
if DEVICE == "cuda": |
|
|
pipe.enable_attention_slicing() |
|
|
try: |
|
|
pipe.enable_xformers_memory_efficient_attention() |
|
|
except Exception: |
|
|
pass |
|
|
else: |
|
|
pipe.enable_attention_slicing(1) |
|
|
|
|
|
return pipe |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_clipseg(): |
|
|
"""Load and cache CLIPSeg for automatic mask generation.""" |
|
|
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation |
|
|
|
|
|
processor = CLIPSegProcessor.from_pretrained(CLIPSEG_MODEL) |
|
|
model = CLIPSegForImageSegmentation.from_pretrained(CLIPSEG_MODEL) |
|
|
model = model.to(DEVICE) |
|
|
model.eval() |
|
|
|
|
|
return processor, model |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_mask_clipseg( |
|
|
image: Image.Image, |
|
|
target_text: str, |
|
|
threshold: float = 0.3, |
|
|
expand_pixels: int = 10 |
|
|
) -> Optional[Image.Image]: |
|
|
"""Generate a segmentation mask using CLIPSeg with enhanced detection.""" |
|
|
try: |
|
|
processor, model = load_clipseg() |
|
|
|
|
|
|
|
|
target_variations = [ |
|
|
target_text, |
|
|
f"a {target_text}", |
|
|
f"the {target_text}", |
|
|
f"{target_text} in photo", |
|
|
f"photo of {target_text}" |
|
|
] |
|
|
|
|
|
best_mask = None |
|
|
best_score = 0 |
|
|
|
|
|
for variation in target_variations: |
|
|
inputs = processor( |
|
|
text=[variation], |
|
|
images=[image], |
|
|
padding=True, |
|
|
return_tensors="pt" |
|
|
) |
|
|
inputs = {k: v.to(DEVICE) for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
preds = outputs.logits |
|
|
|
|
|
pred = torch.sigmoid(preds[0]).cpu().numpy() |
|
|
score = pred.max() |
|
|
|
|
|
if score > best_score: |
|
|
best_score = score |
|
|
best_mask = pred |
|
|
|
|
|
if best_mask is None: |
|
|
return None |
|
|
|
|
|
|
|
|
pred_pil = Image.fromarray((best_mask * 255).astype(np.uint8)) |
|
|
pred_resized = pred_pil.resize(image.size, Image.BILINEAR) |
|
|
pred_array = np.array(pred_resized) |
|
|
|
|
|
|
|
|
mask = (pred_array > (threshold * 255)).astype(np.uint8) * 255 |
|
|
|
|
|
|
|
|
if expand_pixels > 0: |
|
|
from PIL import ImageFilter |
|
|
mask_image = Image.fromarray(mask, mode="L") |
|
|
mask_image = mask_image.filter( |
|
|
ImageFilter.MaxFilter(size=expand_pixels * 2 + 1) |
|
|
) |
|
|
mask_image = mask_image.filter( |
|
|
ImageFilter.GaussianBlur(radius=3) |
|
|
) |
|
|
return mask_image |
|
|
|
|
|
return Image.fromarray(mask, mode="L") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Mask generation error: {str(e)}") |
|
|
return None |
|
|
|
|
|
|
|
|
def process_manual_mask(mask_image: Image.Image, target_size: Tuple[int, int]) -> Image.Image: |
|
|
"""Process a manually uploaded mask.""" |
|
|
mask = mask_image.convert("L") |
|
|
mask = mask.resize(target_size, Image.LANCZOS) |
|
|
mask_array = np.array(mask) |
|
|
mask_array = ((mask_array > 127) * 255).astype(np.uint8) |
|
|
return Image.fromarray(mask_array, mode="L") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inpaint_image( |
|
|
image: Image.Image, |
|
|
mask: Image.Image, |
|
|
prompt: str, |
|
|
negative_prompt: str = "blurry, bad quality, distorted, ugly, deformed, low resolution, pixelated, jpeg artifacts, watermark, text, logo", |
|
|
num_inference_steps: int = 30, |
|
|
guidance_scale: float = 7.5 |
|
|
) -> Optional[Image.Image]: |
|
|
"""Inpaint the masked region of an image with enhanced prompts.""" |
|
|
try: |
|
|
pipe = load_inpaint_pipeline() |
|
|
|
|
|
|
|
|
original_size = image.size |
|
|
target_size = (512, 512) |
|
|
|
|
|
image_resized = image.resize(target_size, Image.LANCZOS) |
|
|
mask_resized = mask.resize(target_size, Image.NEAREST) |
|
|
|
|
|
if image_resized.mode != "RGB": |
|
|
image_resized = image_resized.convert("RGB") |
|
|
|
|
|
|
|
|
if DEVICE == "cpu": |
|
|
num_inference_steps = min(num_inference_steps, 20) |
|
|
|
|
|
|
|
|
enhanced_prompt = f"{prompt}, masterpiece, best quality, highly detailed, sharp focus, professional" |
|
|
|
|
|
with torch.inference_mode(): |
|
|
result = pipe( |
|
|
prompt=enhanced_prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
image=image_resized, |
|
|
mask_image=mask_resized, |
|
|
num_inference_steps=num_inference_steps, |
|
|
guidance_scale=guidance_scale |
|
|
).images[0] |
|
|
|
|
|
result = result.resize(original_size, Image.LANCZOS) |
|
|
|
|
|
if DEVICE == "cpu": |
|
|
gc.collect() |
|
|
|
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Inpainting error: {str(e)}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inject_custom_css(): |
|
|
"""Inject custom CSS for a more professional look.""" |
|
|
st.markdown(""" |
|
|
<style> |
|
|
/* Dark theme with gradients */ |
|
|
.stApp { |
|
|
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); |
|
|
} |
|
|
|
|
|
/* Styled headers */ |
|
|
h1 { |
|
|
background: linear-gradient(90deg, #e94560, #0f3460); |
|
|
-webkit-background-clip: text; |
|
|
-webkit-text-fill-color: transparent; |
|
|
font-size: 2.5rem !important; |
|
|
} |
|
|
|
|
|
/* Card-like containers */ |
|
|
.stButton > button { |
|
|
background: linear-gradient(90deg, #e94560, #533483); |
|
|
border: none; |
|
|
border-radius: 10px; |
|
|
font-weight: bold; |
|
|
transition: all 0.3s ease; |
|
|
} |
|
|
|
|
|
.stButton > button:hover { |
|
|
transform: translateY(-2px); |
|
|
box-shadow: 0 5px 20px rgba(233, 69, 96, 0.4); |
|
|
} |
|
|
|
|
|
/* Styled file uploader */ |
|
|
.stFileUploader { |
|
|
border: 2px dashed #e94560; |
|
|
border-radius: 15px; |
|
|
padding: 20px; |
|
|
} |
|
|
|
|
|
/* Confidence indicator */ |
|
|
.confidence-high { |
|
|
color: #4ade80; |
|
|
font-weight: bold; |
|
|
} |
|
|
|
|
|
.confidence-medium { |
|
|
color: #fbbf24; |
|
|
font-weight: bold; |
|
|
} |
|
|
|
|
|
.confidence-low { |
|
|
color: #f87171; |
|
|
font-weight: bold; |
|
|
} |
|
|
|
|
|
/* Interpretation box */ |
|
|
.interpretation-box { |
|
|
background: rgba(233, 69, 96, 0.1); |
|
|
border-left: 4px solid #e94560; |
|
|
padding: 10px 15px; |
|
|
border-radius: 0 10px 10px 0; |
|
|
margin: 10px 0; |
|
|
} |
|
|
|
|
|
/* Pro badge */ |
|
|
.pro-badge { |
|
|
background: linear-gradient(90deg, #e94560, #533483); |
|
|
padding: 2px 10px; |
|
|
border-radius: 20px; |
|
|
font-size: 0.8rem; |
|
|
font-weight: bold; |
|
|
color: white; |
|
|
} |
|
|
|
|
|
/* Smooth transitions */ |
|
|
* { |
|
|
transition: background-color 0.3s ease, color 0.3s ease; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
inject_custom_css() |
|
|
|
|
|
st.markdown(""" |
|
|
<div style="display: flex; align-items: center; gap: 10px;"> |
|
|
<h1>π¨ AI Image Editor</h1> |
|
|
<span class="pro-badge">PRO</span> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("**Gemini-style image editing with advanced prompt understanding - 100% Private!**") |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.header("βοΈ Settings") |
|
|
|
|
|
auto_mask = st.checkbox( |
|
|
"π Auto-detect region", |
|
|
value=True, |
|
|
help="Automatically find the object to edit using AI" |
|
|
) |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("ποΈ Advanced Options") |
|
|
|
|
|
mask_threshold = st.slider( |
|
|
"Detection Sensitivity", |
|
|
min_value=0.1, |
|
|
max_value=0.9, |
|
|
value=0.25, |
|
|
step=0.05, |
|
|
help="Lower = larger detection area" |
|
|
) |
|
|
|
|
|
mask_expansion = st.slider( |
|
|
"Mask Expansion (px)", |
|
|
min_value=0, |
|
|
max_value=50, |
|
|
value=15, |
|
|
step=2, |
|
|
help="Expand the detected area for better blending" |
|
|
) |
|
|
|
|
|
num_steps = st.slider( |
|
|
"Quality Steps", |
|
|
min_value=10, |
|
|
max_value=50, |
|
|
value=20 if DEVICE == "cpu" else 35, |
|
|
step=5, |
|
|
help="More = better quality but slower" |
|
|
) |
|
|
|
|
|
guidance_scale = st.slider( |
|
|
"Prompt Strength", |
|
|
min_value=1.0, |
|
|
max_value=15.0, |
|
|
value=8.5, |
|
|
step=0.5, |
|
|
help="Higher = more closely follows your instructions" |
|
|
) |
|
|
|
|
|
st.markdown("---") |
|
|
device_emoji = "π" if DEVICE == "cuda" else "π»" |
|
|
st.info(f"{device_emoji} Device: **{DEVICE.upper()}**") |
|
|
|
|
|
if DEVICE == "cpu": |
|
|
st.warning("β οΈ Running on CPU. Edits may take 1-3 minutes.") |
|
|
else: |
|
|
st.success("β
GPU detected! Fast processing enabled.") |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("π· Upload Image") |
|
|
uploaded_file = st.file_uploader( |
|
|
"Choose an image", |
|
|
type=["png", "jpg", "jpeg", "webp", "bmp"], |
|
|
label_visibility="collapsed" |
|
|
) |
|
|
|
|
|
image = None |
|
|
if uploaded_file is not None: |
|
|
image = Image.open(uploaded_file).convert("RGB") |
|
|
st.image(image, caption="Original Image", use_container_width=True) |
|
|
|
|
|
st.subheader("βοΈ What would you like to change?") |
|
|
instruction = st.text_area( |
|
|
"Describe your edit naturally", |
|
|
placeholder="Examples:\nβ’ 'Remove the person in the background'\nβ’ 'Replace the sky with a sunset'\nβ’ 'Make the car red'\nβ’ 'Add a rainbow'\nβ’ 'Turn the grass into snow'\nβ’ 'Delete the watermark'", |
|
|
label_visibility="collapsed", |
|
|
height=120 |
|
|
) |
|
|
|
|
|
|
|
|
if instruction: |
|
|
target_preview, replacement_preview, confidence = gemini_parser.parse(instruction) |
|
|
|
|
|
confidence_class = "high" if confidence >= 0.8 else "medium" if confidence >= 0.6 else "low" |
|
|
confidence_pct = int(confidence * 100) |
|
|
|
|
|
st.markdown(f""" |
|
|
<div class="interpretation-box"> |
|
|
<strong>π§ Understanding:</strong> {gemini_parser.interpretation}<br> |
|
|
<span class="confidence-{confidence_class}">Confidence: {confidence_pct}%</span> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
mask_file = None |
|
|
if not auto_mask: |
|
|
st.subheader("π Manual Mask") |
|
|
mask_file = st.file_uploader( |
|
|
"Upload a black & white mask (white = area to edit)", |
|
|
type=["png", "jpg", "jpeg"], |
|
|
key="mask" |
|
|
) |
|
|
|
|
|
edit_clicked = st.button( |
|
|
"π¨ Apply Edit", |
|
|
type="primary", |
|
|
use_container_width=True, |
|
|
disabled=(uploaded_file is None or not instruction) |
|
|
) |
|
|
|
|
|
with col2: |
|
|
st.subheader("β¨ Result") |
|
|
result_placeholder = st.empty() |
|
|
mask_placeholder = st.empty() |
|
|
status_placeholder = st.empty() |
|
|
download_placeholder = st.empty() |
|
|
|
|
|
if edit_clicked and image is not None and instruction: |
|
|
try: |
|
|
target, replacement_prompt, confidence = gemini_parser.parse(instruction) |
|
|
|
|
|
status_placeholder.info(f"π― **Target:** `{target}`\n\nβ¨ **Generating:** `{replacement_prompt[:100]}...`") |
|
|
|
|
|
|
|
|
if mask_file is not None: |
|
|
mask_img = Image.open(mask_file) |
|
|
final_mask = process_manual_mask(mask_img, image.size) |
|
|
status_placeholder.info("π Using manual mask...") |
|
|
elif auto_mask: |
|
|
with st.spinner(f"π AI detecting '{target}'..."): |
|
|
final_mask = generate_mask_clipseg( |
|
|
image=image, |
|
|
target_text=target, |
|
|
threshold=mask_threshold, |
|
|
expand_pixels=mask_expansion |
|
|
) |
|
|
if final_mask is None: |
|
|
st.error("Failed to generate mask") |
|
|
st.stop() |
|
|
else: |
|
|
st.error("Please upload a mask or enable auto-detection!") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
mask_array = np.array(final_mask) |
|
|
if mask_array.max() < 128: |
|
|
st.warning(f"β οΈ Could not confidently detect '{target}'. Trying with broader detection...") |
|
|
|
|
|
final_mask = generate_mask_clipseg( |
|
|
image=image, |
|
|
target_text=target, |
|
|
threshold=mask_threshold * 0.5, |
|
|
expand_pixels=mask_expansion * 2 |
|
|
) |
|
|
if final_mask is None or np.array(final_mask).max() < 128: |
|
|
st.error(f"β Still could not detect '{target}'. Try different wording or upload a mask.") |
|
|
st.stop() |
|
|
|
|
|
mask_placeholder.image(final_mask, caption="π Detected Area", use_container_width=True) |
|
|
|
|
|
|
|
|
with st.spinner("π¨ AI is editing your image... This may take a moment."): |
|
|
result = inpaint_image( |
|
|
image=image, |
|
|
mask=final_mask, |
|
|
prompt=replacement_prompt, |
|
|
num_inference_steps=num_steps, |
|
|
guidance_scale=guidance_scale |
|
|
) |
|
|
|
|
|
if result is not None: |
|
|
result_placeholder.image(result, caption="β
Edited Image", use_container_width=True) |
|
|
status_placeholder.success("β
Edit complete!") |
|
|
|
|
|
buf = BytesIO() |
|
|
result.save(buf, format="PNG") |
|
|
download_placeholder.download_button( |
|
|
label="π₯ Download Result", |
|
|
data=buf.getvalue(), |
|
|
file_name="edited_image.png", |
|
|
mime="image/png", |
|
|
use_container_width=True |
|
|
) |
|
|
else: |
|
|
st.error("Inpainting failed") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"β Error: {str(e)}") |
|
|
|
|
|
elif uploaded_file is None: |
|
|
result_placeholder.info("π Upload an image to get started") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("π‘ Pro Tips & Examples") |
|
|
|
|
|
c1, c2, c3, c4 = st.columns(4) |
|
|
|
|
|
with c1: |
|
|
st.markdown(""" |
|
|
**ποΈ Remove Objects:** |
|
|
- `remove the person` |
|
|
- `delete the watermark` |
|
|
- `erase the car` |
|
|
- `get rid of the background` |
|
|
- `take out the text` |
|
|
""") |
|
|
|
|
|
with c2: |
|
|
st.markdown(""" |
|
|
**π Replace Objects:** |
|
|
- `replace sky with sunset` |
|
|
- `swap the car with a bike` |
|
|
- `change background to beach` |
|
|
- `turn grass into snow` |
|
|
""") |
|
|
|
|
|
with c3: |
|
|
st.markdown(""" |
|
|
**π¨ Change Colors:** |
|
|
- `make the car red` |
|
|
- `change dress to blue` |
|
|
- `turn hair blonde` |
|
|
- `paint walls white` |
|
|
""") |
|
|
|
|
|
with c4: |
|
|
st.markdown(""" |
|
|
**β¨ Transform Styles:** |
|
|
- `make it sunset lighting` |
|
|
- `turn into winter scene` |
|
|
- `add cyberpunk aesthetic` |
|
|
- `make it cinematic` |
|
|
""") |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown( |
|
|
"""<center>π <b>Privacy First</b> - All processing happens locally. No data sent to external APIs.<br> |
|
|
<span style="color: #888;">Powered by Stable Diffusion + CLIPSeg | Created with β€οΈ</span></center>""", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|