Spaces:

Athspi
/

Gttg

Sleeping

File size: 6,393 Bytes

605bf7b
b0a339e
8e6ca2b
a36d15c
 
8e6ca2b
 
 
e5c238d
 
b0a339e
9479bea
8e6ca2b
 
b0a339e
a36d15c
 
 
 
 
b0a339e
5a42ff8
b0a339e
 
5a42ff8
 
b0a339e
5a42ff8
 
 
 
 
 
 
 
 
b0a339e
4c9f740
 
8489828
7b62da2
 
 
5a42ff8
4c9f740
7b62da2
4c9f740
7b62da2
 
ca3c534
61acb23
ca3c534
61acb23
ca3c534
61acb23
0327eea
 
 
 
ca3c534
0327eea
ca3c534
0327eea
 
 
ca3c534
0327eea
ca3c534
0327eea
ca3c534
0327eea
7b62da2
 
 
 
4c9f740
7b62da2
 
 
 
 
 
 
 
 
 
4c9f740
7b62da2
b0a339e
5a42ff8
4c9f740
b0a339e
 
 
5a42ff8
b0a339e
4c9f740
68780eb
4c9f740
b0a339e
5a42ff8
 
 
 
 
4c9f740
 
 
68780eb
4c9f740
5a42ff8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c9f740
5a42ff8
8e6ca2b
a36d15c
68780eb
a36d15c
 
 
 
 
5a42ff8
 
7b62da2
b0a339e
5a42ff8
 
b0a339e
4c9f740
b0a339e
4c9f740
 
 
7b62da2
 
4c9f740
7b62da2
4c9f740
 
5a42ff8
4c9f740
5a42ff8
b0a339e
4c9f740
b0a339e
 
5a42ff8
b0a339e
5a42ff8
a36d15c
5a42ff8
8e6ca2b
 
2269b2d

import os
import base64
import mimetypes
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from google import genai
from google.genai import types

app = Flask(__name__)

# Initialize Gemini client
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
client = genai.Client(api_key=GEMINI_API_KEY)

# Configure upload folders
UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = os.path.join('static')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULT_FOLDER, exist_ok=True)

def upload_image(image_data_url):
    """Helper function to upload image to Gemini"""
    try:
        header, encoded = image_data_url.split(',', 1)
    except ValueError:
        raise ValueError("Invalid image data")
    
    binary_data = base64.b64decode(encoded)
    ext = ".png" if "png" in header.lower() else ".jpg"
    temp_filename = secure_filename("temp_image" + ext)
    temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
    
    with open(temp_filepath, "wb") as f:
        f.write(binary_data)
    
    return client.files.upload(file=temp_filepath)

def is_prohibited_request(uploaded_file, object_type):
    """Check if request is to remove person/animal using gemini-2.0-flash-lite"""
    model = "gemini-2.0-pro-exp-02-05"
    parts = [
        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
        types.Part.from_text(text=f"Remove {object_type}")
    ]
    
    contents = [types.Content(role="user", parts=parts)]
    
    generate_content_config = types.GenerateContentConfig(
        system_instruction=[
            types.Part.from_text(text="""Reject Requests That Require AI to Generate a Human, Animal, or Their Features (Respond with "yes")

If the user asks to remove or modify human or animal features (e.g., "Remove eyes," "Remove mouth," "Remove face," "Remove hair from person," "Remove a tattoo on skin," "Remove a logo printed on a person's body") → AI should reject the request and respond with "yes."

If removing something would require AI to generate missing human or animal body parts → AI should reject the request and respond with "yes."



2. Reject Requests That Require AI to Recreate Text on a Person's Body (Respond with "No")

If the user asks to remove a watermark, logo, or text that is on a person’s body → AI should reject the request and respond with "yes."

However, if the watermark or text is on an object or background (not a person/animal) → **AI can approve and respond with "no."



3. Approve Object and Background Removal (Respond with "no")

If the user asks to remove non-living objects (e.g., "Remove sunglasses on table," "Remove phone in hand," "Remove background") → **AI should approve and respond with "no."

If the user asks to remove a person or animal without generating a replacement (e.g., "Remove person," "Remove animal") → **AI should approve and respond with "no." answer by yes or no
  """)
        ],
        temperature=0.0,
        max_output_tokens=1,
    )
    
    try:
        response = client.models.generate_content(
            model=model,
            contents=contents,
            config=generate_content_config
        )
        if response.candidates and response.candidates[0].content.parts:
            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
        return False
    except Exception as e:
        print(f"Prohibition check error: {str(e)}")
        return False

def generate_gemini_output(object_type, uploaded_file):
    """Generate image using gemini-2.0-flash-exp-image-generation"""
    model = "gemini-2.0-flash-exp-image-generation"
    parts = [
        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
        types.Part.from_text(text=f"Remove {object_type} from the image")
    ]
    
    contents = [types.Content(role="user", parts=parts)]
    
    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_modalities=["image", "text"],
        safety_settings=[
            types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
        ],
    )
    
    result_image = None
    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        if chunk.candidates and chunk.candidates[0].content.parts:
            part = chunk.candidates[0].content.parts[0]
            if part.inline_data:
                file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
                output_filename = secure_filename("generated_output" + file_extension)
                result_image_path = os.path.join(RESULT_FOLDER, output_filename)
                with open(result_image_path, "wb") as f:
                    f.write(part.inline_data.data)
                result_image = result_image_path
    
    return result_image

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/process", methods=["POST"])
def process():
    try:
        data = request.get_json(force=True)
        image_data = data.get("image")
        object_type = data.get("objectType", "").strip()
        
        if not image_data or not object_type:
            return jsonify({"success": False, "message": "Missing required data"}), 400
        
        # Upload image once
        uploaded_file = upload_image(image_data)
        
        # Check for prohibited requests
        if is_prohibited_request(uploaded_file, object_type):
            return jsonify({
                "success": False,
                "message": "Sorry, I can't assist with removing people or animals."
            }), 400
        
        # Generate output if allowed
        result_image = generate_gemini_output(object_type, uploaded_file)
        
        if not result_image:
            return jsonify({"success": False, "message": "Failed to generate image"}), 500
        
        return jsonify({
            "success": True,
            "resultPath": f"/static/{os.path.basename(result_image)}"
        })
        
    except Exception as e:
        return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)