import os import base64 import mimetypes from flask import Flask, render_template, request, jsonify from werkzeug.utils import secure_filename from google import genai from google.genai import types app = Flask(__name__) # Initialize Gemini client GEMINI_API_KEY = os.environ["GEMINI_API_KEY"] client = genai.Client(api_key=GEMINI_API_KEY) # Configure upload folders UPLOAD_FOLDER = 'uploads' RESULT_FOLDER = os.path.join('static') os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(RESULT_FOLDER, exist_ok=True) def upload_image(image_data_url): """Helper function to upload image to Gemini""" try: header, encoded = image_data_url.split(',', 1) except ValueError: raise ValueError("Invalid image data") binary_data = base64.b64decode(encoded) ext = ".png" if "png" in header.lower() else ".jpg" temp_filename = secure_filename("temp_image" + ext) temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename) with open(temp_filepath, "wb") as f: f.write(binary_data) return client.files.upload(file=temp_filepath) def is_prohibited_request(uploaded_file, object_type): """Check if request is to remove person/animal using gemini-2.0-flash-lite""" model = "gemini-2.0-pro-exp-02-05" parts = [ types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type), types.Part.from_text(text=f"Remove {object_type}") ] contents = [types.Content(role="user", parts=parts)] generate_content_config = types.GenerateContentConfig( system_instruction=[ types.Part.from_text(text="""Reject Requests That Require AI to Generate a Human, Animal, or Their Features (Respond with "yes") If the user asks to remove or modify human or animal features (e.g., "Remove eyes," "Remove mouth," "Remove face," "Remove hair from person," "Remove a tattoo on skin," "Remove a logo printed on a person's body") → AI should reject the request and respond with "yes." If removing something would require AI to generate missing human or animal body parts → AI should reject the request and respond with "yes." 2. Reject Requests That Require AI to Recreate Text on a Person's Body (Respond with "No") If the user asks to remove a watermark, logo, or text that is on a person’s body → AI should reject the request and respond with "yes." However, if the watermark or text is on an object or background (not a person/animal) → **AI can approve and respond with "no." 3. Approve Object and Background Removal (Respond with "no") If the user asks to remove non-living objects (e.g., "Remove sunglasses on table," "Remove phone in hand," "Remove background") → **AI should approve and respond with "no." If the user asks to remove a person or animal without generating a replacement (e.g., "Remove person," "Remove animal") → **AI should approve and respond with "no." answer by yes or no """) ], temperature=0.0, max_output_tokens=1, ) try: response = client.models.generate_content( model=model, contents=contents, config=generate_content_config ) if response.candidates and response.candidates[0].content.parts: return response.candidates[0].content.parts[0].text.strip().lower() == "yes" return False except Exception as e: print(f"Prohibition check error: {str(e)}") return False def generate_gemini_output(object_type, uploaded_file): """Generate image using gemini-2.0-flash-exp-image-generation""" model = "gemini-2.0-flash-exp-image-generation" parts = [ types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type), types.Part.from_text(text=f"Remove {object_type} from the image") ] contents = [types.Content(role="user", parts=parts)] generate_content_config = types.GenerateContentConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, response_modalities=["image", "text"], safety_settings=[ types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"), ], ) result_image = None for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): if chunk.candidates and chunk.candidates[0].content.parts: part = chunk.candidates[0].content.parts[0] if part.inline_data: file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" output_filename = secure_filename("generated_output" + file_extension) result_image_path = os.path.join(RESULT_FOLDER, output_filename) with open(result_image_path, "wb") as f: f.write(part.inline_data.data) result_image = result_image_path return result_image @app.route("/") def index(): return render_template("index.html") @app.route("/process", methods=["POST"]) def process(): try: data = request.get_json(force=True) image_data = data.get("image") object_type = data.get("objectType", "").strip() if not image_data or not object_type: return jsonify({"success": False, "message": "Missing required data"}), 400 # Upload image once uploaded_file = upload_image(image_data) # Check for prohibited requests if is_prohibited_request(uploaded_file, object_type): return jsonify({ "success": False, "message": "Sorry, I can't assist with removing people or animals." }), 400 # Generate output if allowed result_image = generate_gemini_output(object_type, uploaded_file) if not result_image: return jsonify({"success": False, "message": "Failed to generate image"}), 500 return jsonify({ "success": True, "resultPath": f"/static/{os.path.basename(result_image)}" }) except Exception as e: return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)