Gttg / app.py
Athspi's picture
Update app.py
ca3c534 verified
import os
import base64
import mimetypes
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from google import genai
from google.genai import types
app = Flask(__name__)
# Initialize Gemini client
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
client = genai.Client(api_key=GEMINI_API_KEY)
# Configure upload folders
UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = os.path.join('static')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULT_FOLDER, exist_ok=True)
def upload_image(image_data_url):
"""Helper function to upload image to Gemini"""
try:
header, encoded = image_data_url.split(',', 1)
except ValueError:
raise ValueError("Invalid image data")
binary_data = base64.b64decode(encoded)
ext = ".png" if "png" in header.lower() else ".jpg"
temp_filename = secure_filename("temp_image" + ext)
temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
with open(temp_filepath, "wb") as f:
f.write(binary_data)
return client.files.upload(file=temp_filepath)
def is_prohibited_request(uploaded_file, object_type):
"""Check if request is to remove person/animal using gemini-2.0-flash-lite"""
model = "gemini-2.0-pro-exp-02-05"
parts = [
types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
types.Part.from_text(text=f"Remove {object_type}")
]
contents = [types.Content(role="user", parts=parts)]
generate_content_config = types.GenerateContentConfig(
system_instruction=[
types.Part.from_text(text="""Reject Requests That Require AI to Generate a Human, Animal, or Their Features (Respond with "yes")
If the user asks to remove or modify human or animal features (e.g., "Remove eyes," "Remove mouth," "Remove face," "Remove hair from person," "Remove a tattoo on skin," "Remove a logo printed on a person's body") → AI should reject the request and respond with "yes."
If removing something would require AI to generate missing human or animal body parts → AI should reject the request and respond with "yes."
2. Reject Requests That Require AI to Recreate Text on a Person's Body (Respond with "No")
If the user asks to remove a watermark, logo, or text that is on a person’s body → AI should reject the request and respond with "yes."
However, if the watermark or text is on an object or background (not a person/animal) → **AI can approve and respond with "no."
3. Approve Object and Background Removal (Respond with "no")
If the user asks to remove non-living objects (e.g., "Remove sunglasses on table," "Remove phone in hand," "Remove background") → **AI should approve and respond with "no."
If the user asks to remove a person or animal without generating a replacement (e.g., "Remove person," "Remove animal") → **AI should approve and respond with "no." answer by yes or no
""")
],
temperature=0.0,
max_output_tokens=1,
)
try:
response = client.models.generate_content(
model=model,
contents=contents,
config=generate_content_config
)
if response.candidates and response.candidates[0].content.parts:
return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
return False
except Exception as e:
print(f"Prohibition check error: {str(e)}")
return False
def generate_gemini_output(object_type, uploaded_file):
"""Generate image using gemini-2.0-flash-exp-image-generation"""
model = "gemini-2.0-flash-exp-image-generation"
parts = [
types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
types.Part.from_text(text=f"Remove {object_type} from the image")
]
contents = [types.Content(role="user", parts=parts)]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_modalities=["image", "text"],
safety_settings=[
types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
],
)
result_image = None
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if chunk.candidates and chunk.candidates[0].content.parts:
part = chunk.candidates[0].content.parts[0]
if part.inline_data:
file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
output_filename = secure_filename("generated_output" + file_extension)
result_image_path = os.path.join(RESULT_FOLDER, output_filename)
with open(result_image_path, "wb") as f:
f.write(part.inline_data.data)
result_image = result_image_path
return result_image
@app.route("/")
def index():
return render_template("index.html")
@app.route("/process", methods=["POST"])
def process():
try:
data = request.get_json(force=True)
image_data = data.get("image")
object_type = data.get("objectType", "").strip()
if not image_data or not object_type:
return jsonify({"success": False, "message": "Missing required data"}), 400
# Upload image once
uploaded_file = upload_image(image_data)
# Check for prohibited requests
if is_prohibited_request(uploaded_file, object_type):
return jsonify({
"success": False,
"message": "Sorry, I can't assist with removing people or animals."
}), 400
# Generate output if allowed
result_image = generate_gemini_output(object_type, uploaded_file)
if not result_image:
return jsonify({"success": False, "message": "Failed to generate image"}), 500
return jsonify({
"success": True,
"resultPath": f"/static/{os.path.basename(result_image)}"
})
except Exception as e:
return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)