File size: 6,393 Bytes
605bf7b
b0a339e
8e6ca2b
a36d15c
 
8e6ca2b
 
 
e5c238d
 
b0a339e
9479bea
8e6ca2b
 
b0a339e
a36d15c
 
 
 
 
b0a339e
5a42ff8
b0a339e
 
5a42ff8
 
b0a339e
5a42ff8
 
 
 
 
 
 
 
 
b0a339e
4c9f740
 
8489828
7b62da2
 
 
5a42ff8
4c9f740
7b62da2
4c9f740
7b62da2
 
ca3c534
61acb23
ca3c534
61acb23
ca3c534
61acb23
0327eea
 
 
 
ca3c534
0327eea
ca3c534
0327eea
 
 
ca3c534
0327eea
ca3c534
0327eea
ca3c534
0327eea
7b62da2
 
 
 
4c9f740
7b62da2
 
 
 
 
 
 
 
 
 
4c9f740
7b62da2
b0a339e
5a42ff8
4c9f740
b0a339e
 
 
5a42ff8
b0a339e
4c9f740
68780eb
4c9f740
b0a339e
5a42ff8
 
 
 
 
4c9f740
 
 
68780eb
4c9f740
5a42ff8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c9f740
5a42ff8
8e6ca2b
a36d15c
68780eb
a36d15c
 
 
 
 
5a42ff8
 
7b62da2
b0a339e
5a42ff8
 
b0a339e
4c9f740
b0a339e
4c9f740
 
 
7b62da2
 
4c9f740
7b62da2
4c9f740
 
5a42ff8
4c9f740
5a42ff8
b0a339e
4c9f740
b0a339e
 
5a42ff8
b0a339e
5a42ff8
a36d15c
5a42ff8
8e6ca2b
 
2269b2d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import base64
import mimetypes
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from google import genai
from google.genai import types

app = Flask(__name__)

# Initialize Gemini client
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
client = genai.Client(api_key=GEMINI_API_KEY)

# Configure upload folders
UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = os.path.join('static')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULT_FOLDER, exist_ok=True)

def upload_image(image_data_url):
    """Helper function to upload image to Gemini"""
    try:
        header, encoded = image_data_url.split(',', 1)
    except ValueError:
        raise ValueError("Invalid image data")
    
    binary_data = base64.b64decode(encoded)
    ext = ".png" if "png" in header.lower() else ".jpg"
    temp_filename = secure_filename("temp_image" + ext)
    temp_filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
    
    with open(temp_filepath, "wb") as f:
        f.write(binary_data)
    
    return client.files.upload(file=temp_filepath)

def is_prohibited_request(uploaded_file, object_type):
    """Check if request is to remove person/animal using gemini-2.0-flash-lite"""
    model = "gemini-2.0-pro-exp-02-05"
    parts = [
        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
        types.Part.from_text(text=f"Remove {object_type}")
    ]
    
    contents = [types.Content(role="user", parts=parts)]
    
    generate_content_config = types.GenerateContentConfig(
        system_instruction=[
            types.Part.from_text(text="""Reject Requests That Require AI to Generate a Human, Animal, or Their Features (Respond with "yes")

If the user asks to remove or modify human or animal features (e.g., "Remove eyes," "Remove mouth," "Remove face," "Remove hair from person," "Remove a tattoo on skin," "Remove a logo printed on a person's body") → AI should reject the request and respond with "yes."

If removing something would require AI to generate missing human or animal body parts → AI should reject the request and respond with "yes."



2. Reject Requests That Require AI to Recreate Text on a Person's Body (Respond with "No")

If the user asks to remove a watermark, logo, or text that is on a person’s body → AI should reject the request and respond with "yes."

However, if the watermark or text is on an object or background (not a person/animal) → **AI can approve and respond with "no."



3. Approve Object and Background Removal (Respond with "no")

If the user asks to remove non-living objects (e.g., "Remove sunglasses on table," "Remove phone in hand," "Remove background") → **AI should approve and respond with "no."

If the user asks to remove a person or animal without generating a replacement (e.g., "Remove person," "Remove animal") → **AI should approve and respond with "no." answer by yes or no
  """)
        ],
        temperature=0.0,
        max_output_tokens=1,
    )
    
    try:
        response = client.models.generate_content(
            model=model,
            contents=contents,
            config=generate_content_config
        )
        if response.candidates and response.candidates[0].content.parts:
            return response.candidates[0].content.parts[0].text.strip().lower() == "yes"
        return False
    except Exception as e:
        print(f"Prohibition check error: {str(e)}")
        return False

def generate_gemini_output(object_type, uploaded_file):
    """Generate image using gemini-2.0-flash-exp-image-generation"""
    model = "gemini-2.0-flash-exp-image-generation"
    parts = [
        types.Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
        types.Part.from_text(text=f"Remove {object_type} from the image")
    ]
    
    contents = [types.Content(role="user", parts=parts)]
    
    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_modalities=["image", "text"],
        safety_settings=[
            types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF"),
        ],
    )
    
    result_image = None
    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        if chunk.candidates and chunk.candidates[0].content.parts:
            part = chunk.candidates[0].content.parts[0]
            if part.inline_data:
                file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png"
                output_filename = secure_filename("generated_output" + file_extension)
                result_image_path = os.path.join(RESULT_FOLDER, output_filename)
                with open(result_image_path, "wb") as f:
                    f.write(part.inline_data.data)
                result_image = result_image_path
    
    return result_image

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/process", methods=["POST"])
def process():
    try:
        data = request.get_json(force=True)
        image_data = data.get("image")
        object_type = data.get("objectType", "").strip()
        
        if not image_data or not object_type:
            return jsonify({"success": False, "message": "Missing required data"}), 400
        
        # Upload image once
        uploaded_file = upload_image(image_data)
        
        # Check for prohibited requests
        if is_prohibited_request(uploaded_file, object_type):
            return jsonify({
                "success": False,
                "message": "Sorry, I can't assist with removing people or animals."
            }), 400
        
        # Generate output if allowed
        result_image = generate_gemini_output(object_type, uploaded_file)
        
        if not result_image:
            return jsonify({"success": False, "message": "Failed to generate image"}), 500
        
        return jsonify({
            "success": True,
            "resultPath": f"/static/{os.path.basename(result_image)}"
        })
        
    except Exception as e:
        return jsonify({"success": False, "message": f"Error: {str(e)}"}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)