Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| Streamlit app: Prompt Generator from Image (NSFW-ready, self-hosted on Hugging Face Spaces) | |
| - Backends: Gemini API (optional) + Local open-source (Qwen2-VL 2B/7B) | |
| - Detail modes: soft / artistic / raw | |
| - JSONL export with policy fields (adult-only, consent) | |
| - Simple keyword tag extractor (can be swapped for WD14/DeepDanbooru later) | |
| NOTE: To use the local backend you must select a Qwen2-VL model that fits your Space hardware. | |
| Suggested default for T4/low VRAM: "Qwen/Qwen2-VL-2B-Instruct" (loads with 4-bit if bitsandbytes available). | |
| Requirements (put these lines into requirements.txt): | |
| ----- requirements.txt ----- | |
| streamlit==1.37.1 | |
| Pillow | |
| transformers>=4.43.0 | |
| accelerate>=0.33.0 | |
| sentencepiece | |
| safetensors | |
| huggingface_hub | |
| bitsandbytes; platform_system != 'Darwin' | |
| google-generativeai==0.7.2 # only if you keep Gemini option | |
| --------------------------- | |
| """ | |
| import os | |
| import io | |
| import json | |
| from datetime import datetime | |
| import streamlit as st | |
| from PIL import Image | |
| # ===== Gemini (optional) ===== | |
| USE_GEMINI = True | |
| try: | |
| import google.generativeai as genai # type: ignore | |
| except Exception: | |
| USE_GEMINI = False | |
| def get_gemini_api_key() -> str: | |
| # Return Gemini API key from SECRET_KEY or GOOGLE_API_KEY (if present) | |
| return os.getenv('SECRET_KEY') or os.getenv('GOOGLE_API_KEY') or '' | |
| # ===== Transformers (open-source backend) ===== | |
| import torch | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| # ---------------- UI CONFIG ---------------- | |
| st.set_page_config(page_title="🖼️ Prompt Generator from Image (NSFW-ready)", layout="wide") | |
| st.title("🖼️ Prompt Generator from Image") | |
| st.markdown( | |
| "> Please try my other tool at : https://imgkey.lovable.app" | |
| ) | |
| with st.sidebar: | |
| st.header("⚙️ Settings") | |
| # Gemini availability message | |
| gem_key = get_gemini_api_key() if USE_GEMINI else '' | |
| gem_ready = bool(gem_key) | |
| backend_opts = ["Local Qwen2-VL (Open-Source)"] | |
| if USE_GEMINI and gem_ready: | |
| backend_opts.append("Gemini API") | |
| elif USE_GEMINI and not gem_ready: | |
| backend_opts.append("Gemini API (key missing)") | |
| else: | |
| backend_opts.append("Gemini API (unavailable)") | |
| backend = st.selectbox("Backend", backend_opts, index=0) | |
| mode = st.selectbox("Detail level", ["soft", "artistic", "raw"], index=2) | |
| model_id = st.text_input( | |
| "HF Model (local backend)", | |
| value="Qwen/Qwen2-VL-2B-Instruct", | |
| help="Pick a Qwen2-VL Instruct model that fits your GPU (e.g., 2B/7B).", | |
| ) | |
| max_tokens = st.slider("Max new tokens", 64, 512, 220, 8) | |
| temperature = st.slider("Temperature", 0.0, 1.2, 0.6, 0.05) | |
| # Gemini status badge | |
| if USE_GEMINI: | |
| if gem_ready: | |
| st.success("Gemini key detected (SECRET_KEY / GOOGLE_API_KEY)") | |
| else: | |
| st.warning("Gemini key not found. Add SECRET_KEY or GOOGLE_API_KEY in Space Secrets.") | |
| st.divider() | |
| st.subheader("🔐 Policy") | |
| st.caption("This app only describes consenting adults. It refuses illegal/underage/forced content.") | |
| # ---------------- Helpers ---------------- | |
| def load_qwen(model_id: str): | |
| """Load Qwen2-VL model+processor with best effort for the available hardware.""" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| load_kwargs = {"torch_dtype": torch.float16 if device == "cuda" else torch.float32, | |
| "low_cpu_mem_usage": True, | |
| "trust_remote_code": True} | |
| try: | |
| if device == "cuda": | |
| load_kwargs.update({"device_map": "auto", "load_in_4bit": True}) | |
| except Exception: | |
| pass | |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
| model = AutoModelForVision2Seq.from_pretrained(model_id, **load_kwargs) | |
| if device == "cpu": | |
| model.to(device) | |
| model.eval() | |
| return processor, model | |
| def _system_prompt(): | |
| return ( | |
| "You are an expert adult-image describer. Describe only consenting adults. " | |
| "Never mention or imply minors, coercion, violence, incest, or illegal content." | |
| ) | |
| def _style_prompt(mode: str): | |
| if mode == "soft": | |
| return ( | |
| "Write an elegant, suggestive description focusing on mood, wardrobe, and lighting. " | |
| "Avoid explicit anatomy terms." | |
| ) | |
| if mode == "artistic": | |
| return ( | |
| "Describe composition, pose, camera angle, lens, lighting, colors, materials, and setting." | |
| ) | |
| # raw | |
| return ( | |
| "Use precise, clinical adult anatomy/act terms when visible. Mention positions, garments, accessories, fluids, and interactions explicitly." | |
| ) | |
| def caption_with_qwen(image: Image.Image, mode: str, model_id: str, max_new_tokens=220, temperature=0.6): | |
| processor, model = load_qwen(model_id) | |
| messages = [ | |
| {"role": "system", "content": _system_prompt()}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image}, | |
| { | |
| "type": "text", | |
| "text": ( | |
| f"Describe the image in the '{mode}' style. " | |
| + _style_prompt(mode) | |
| + " Return a single coherent paragraph." | |
| ), | |
| }, | |
| ], | |
| }, | |
| ] | |
| # Build chat template -> tokens | |
| text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = processor(text=[text], images=[image], return_tensors="pt") | |
| device = next(model.parameters()).device | |
| inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| out = model.generate( | |
| **inputs, | |
| max_new_tokens=int(max_new_tokens), | |
| do_sample=True, | |
| temperature=float(temperature), | |
| top_p=0.9, | |
| repetition_penalty=1.05, | |
| ) | |
| decoded = processor.batch_decode(out, skip_special_tokens=True)[0] | |
| # Try to cut assistant prefix if present | |
| if "assistant" in decoded: | |
| decoded = decoded.split("assistant")[-1].strip().lstrip("\n: ") | |
| return decoded.strip() | |
| # Minimal keyword tagger from caption text | |
| KEYWORDS = { | |
| "acts": [ | |
| "oral", "blowjob", "doggy", "missionary", "cowgirl", "anal", "handjob", | |
| "cumshot", "facial", "masturbation", "kissing", "threesome", "spanking", | |
| ], | |
| "nudity": ["topless", "nude", "full", "see-through", "lingerie", "panties", "stockings"], | |
| "style": ["photo", "studio", "candid", "mirror", "selfie", "soft lighting", "hard lighting", "bokeh"], | |
| } | |
| def extract_tags(text: str): | |
| tags = set() | |
| low = text.lower() | |
| for group in KEYWORDS.values(): | |
| for w in group: | |
| if w in low: | |
| tags.add(w) | |
| return sorted(tags) | |
| # Gemini helper | |
| def get_gemini_response(image: Image.Image): | |
| if not USE_GEMINI: | |
| raise RuntimeError("Gemini library not available in this Space.") | |
| secret_key = get_gemini_api_key() | |
| if not secret_key: | |
| raise RuntimeError("Gemini API key not found. Set SECRET_KEY or GOOGLE_API_KEY in Space secrets.") | |
| genai.configure(api_key=secret_key) | |
| model = genai.GenerativeModel("gemini-2.0-flash") | |
| sys = ( | |
| "You are a prompt generator for text-to-image models. " | |
| "When content is adult, describe it clinically without euphemism." | |
| ) | |
| resp = model.generate_content([sys, image]) | |
| return getattr(resp, "text", "") | |
| # ---------------- LAYOUT ---------------- | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### 📤 Upload Your Image") | |
| uploaded_file = st.file_uploader( | |
| "Drag and drop or click to upload an image...", | |
| type=["jpg", "jpeg", "png", "webp"], | |
| label_visibility="collapsed", | |
| ) | |
| image = None | |
| if uploaded_file is not None: | |
| try: | |
| image = Image.open(uploaded_file).convert("RGB") | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| except Exception as e: | |
| st.error(f"Failed to open image: {e}") | |
| with col2: | |
| st.markdown("### 🎯 Generated Prompt") | |
| if image is None: | |
| st.info("Please upload an image to generate a prompt.") | |
| else: | |
| if st.button("✨ Generate Prompt", use_container_width=True): | |
| with st.spinner("Generating prompt..."): | |
| try: | |
| if backend.startswith("Local Qwen2-VL"): | |
| prompt = caption_with_qwen( | |
| image, | |
| mode=mode, | |
| model_id=model_id, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| ) | |
| else: | |
| prompt = get_gemini_response(image) | |
| if not prompt: | |
| st.warning("No text generated.") | |
| else: | |
| st.code(prompt, language="markdown") | |
| # Build JSON record | |
| record = { | |
| "timestamp": datetime.utcnow().isoformat() + "Z", | |
| "image": uploaded_file.name, | |
| "mode": mode if backend.startswith("Local") else "gemini_default", | |
| "prompt": prompt, | |
| "tags": extract_tags(prompt), | |
| "policy": {"age": "adult_only", "consent": True}, | |
| "backend": "qwen2-vl" if backend.startswith("Local") else "gemini", | |
| "model": model_id if backend.startswith("Local") else "gemini-2.0-flash", | |
| } | |
| st.json(record) | |
| # Append to JSONL | |
| out_path = "captions.jsonl" | |
| with open(out_path, "a", encoding="utf-8") as f: | |
| f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| st.success(f"Appended to {out_path}") | |
| except torch.cuda.OutOfMemoryError: | |
| st.error("CUDA OOM. Try a smaller model (e.g., Qwen2-VL-2B) or reduce max tokens.") | |
| except Exception as e: | |
| st.error(f"Generation failed: {e}") | |
| # Footer | |
| st.markdown("---") | |
| st.caption( | |
| "This Space is intended for lawful, adult-only NSFW dataset preparation. You are responsible for compliance with local laws and platform policies." | |
| ) | |