shohrukhdadakhon commited on
Commit
4ad42b5
Β·
1 Parent(s): f8dc15a
Files changed (3) hide show
  1. .gitignore +14 -0
  2. app.py +520 -0
  3. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore Python cache
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+
6
+ # Ignore environment variable file
7
+ .env
8
+
9
+ # Ignore local virtual environments
10
+ venv/
11
+ env/
12
+
13
+ # Ignore Hugging Face Space build artifacts
14
+ gradio_cached_examples/
app.py ADDED
@@ -0,0 +1,520 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_client import Client, handle_file
3
+ from PIL import Image
4
+ import io, base64, requests, os
5
+ from dotenv import load_dotenv
6
+ from google import genai
7
+ from google.genai import types
8
+ import time
9
+ import mimetypes
10
+ import tempfile
11
+ from io import BytesIO
12
+
13
+ load_dotenv()
14
+ MODAL_KEY = os.getenv("MODAL_LABS_KEY")
15
+ MODAL_ENDPOINT = os.getenv("MODAL_LABS_ENDPOINT")
16
+ GOOGLE_API_KEY = os.getenv("GEMINI_API")
17
+ CLARITY_API = "jbilcke-hf/clarity-upscaler"
18
+
19
+ client = genai.Client(api_key=GOOGLE_API_KEY)
20
+
21
+ # ── Function 1: Remove Background ─────────────────────────────── BIREFNET
22
+ def remove_background_image(path: str, output_path: str = None) -> Image.Image:
23
+ """Edit a local image file using the 'Remove Background' method. Optionally save result.
24
+
25
+ Args:
26
+ path (str): Absolute or relative path to a PNG/JPEG on disk.
27
+ output_path (str, optional): Where to save the edited image, e.g. Downloads/bg_removed.png
28
+
29
+ Returns:
30
+ image: The edited image (PIL.Image) with background removed.
31
+ """
32
+ if not path or not os.path.exists(path):
33
+ raise gr.Error("Valid input image path is required.")
34
+
35
+ with Image.open(path).convert("RGB") as img:
36
+ buf = io.BytesIO()
37
+ img.save(buf, format="PNG")
38
+ img_b64 = base64.b64encode(buf.getvalue()).decode()
39
+
40
+ resp = requests.post(
41
+ MODAL_ENDPOINT,
42
+ json={"input_base64": img_b64, "model_type": "bg_removal"},
43
+ headers={"x-api-key": MODAL_KEY},
44
+ timeout=60
45
+ )
46
+ if resp.status_code != 200:
47
+ raise RuntimeError(f"Modal error: {resp.text}")
48
+
49
+ result_img = Image.open(io.BytesIO(base64.b64decode(resp.json()["output_base64"])))
50
+ if output_path:
51
+ result_img.save(output_path)
52
+ return result_img
53
+
54
+
55
+ # ── Function 2: Clarity Upscaler ────────────────────────────────
56
+ def upscale_image(
57
+ path: str,
58
+ output_path: str = None,
59
+ scale: float = 2,
60
+ dynamic: float = 6,
61
+ creativity: float = 0.35,
62
+ resemblance: float = 0.6,
63
+ tiling_width: str = "112",
64
+ tiling_height: str = "144",
65
+ model: str = "juggernaut_reborn.safetensors [338b85bc4f]",
66
+ scheduler: str = "DPM++ 3M SDE Karras",
67
+ steps: int = 18,
68
+ seed: int = 1337,
69
+ downscale: bool = False,
70
+ downscale_resolution: int = 768
71
+ ) -> Image.Image:
72
+ """Edit a local image using the 'Clarity Upscaler' method. Optionally save result. Useful for stylized upscaling with fractal detail control.
73
+
74
+ Args:
75
+ path (str): Absolute or relative path to a PNG/JPEG on disk.
76
+ output_path (str, optional): Path to save the edited image, e.g. Downloads/clarity_upscaled.png.
77
+ scale (float, optional): Upscale factor (default: 2).
78
+ dynamic (float, optional): Controls responsiveness of upscale. Range: 1–50 (default: 6).
79
+ creativity (float, optional): Controls creative generation. Range: 0.3–0.9 (default: 0.35).
80
+ resemblance (float, optional): How much result resembles original image. Range: 0.3–1.6 (default: 0.6).
81
+ tiling_width (str, optional): Tiling width for fractal detail (lower = more fractality). Options: 16–256 (default: "112").
82
+ tiling_height (str, optional): Tiling height for fractal detail (lower = more fractality). Options: 16–256 (default: "144").
83
+ model (str, optional): Base SD model. Options: juggernaut, epicrealism, flat2DAnimerge (default: juggernaut).
84
+ scheduler (str, optional): Sampling algorithm used. Options include DPM++, Euler, LMS, etc. (default: DPM++ 3M SDE Karras).
85
+ steps (int, optional): Number of inference steps. Range: 1–100 (default: 18).
86
+ seed (int, optional): Random seed. Default: 1337.
87
+ downscale (bool, optional): Whether to apply post-upscale downscaling. Default: False.
88
+ downscale_resolution (int, optional): Resolution to downscale to (if downscale=True). Default: 768.
89
+
90
+ Returns:
91
+ image: The edited image (PIL.Image) upscaled via AI model.
92
+ """
93
+ if not path or not os.path.exists(path):
94
+ raise gr.Error("Valid input image path is required.")
95
+
96
+ client = Client(CLARITY_API)
97
+ result_path = client.predict(
98
+ handle_file(path),
99
+ "", "", # prompt / neg prompt
100
+ scale,
101
+ dynamic,
102
+ creativity,
103
+ resemblance,
104
+ tiling_width,
105
+ tiling_height,
106
+ model,
107
+ scheduler,
108
+ steps,
109
+ seed,
110
+ downscale,
111
+ downscale_resolution,
112
+ "", "", # lora / custom model
113
+ api_name="/predict"
114
+ )
115
+
116
+ result_img = Image.open(result_path)
117
+ if output_path:
118
+ result_img.save(output_path)
119
+ return result_img
120
+
121
+
122
+
123
+ # ── Function 3: Tile ControlNet Upscaler (Preferred) ────────────
124
+ def upscale_image_preferred(
125
+ path: str,
126
+ output_path: str = None,
127
+ resolution: int = 512,
128
+ steps: int = 18,
129
+ strength: float = 0.4,
130
+ hdr: float = 0.1,
131
+ guidance: float = 3
132
+ ) -> Image.Image:
133
+ """Edit a local image file using the 'Tile Upscaler' method. This is the preferred upscale method. Optionally save the result.
134
+
135
+ Args:
136
+ path (str): Absolute or relative path to a PNG/JPEG on disk.
137
+ output_path (str, optional): Where to save the edited image, e.g., Downloads/upscaled_tile.png.
138
+ resolution (int, optional): Tile conditioning resolution before inference. Valid range: 256–2048. Default is 512.
139
+ This affects detail level. Output image is roughly 2x this resolution.
140
+ e.g. if 1024 is set, output is ~2048x2048.
141
+ Claude should decide based on image quality β€” for low-res input, try 1024.
142
+ steps (int, optional): Number of inference steps. Range: 1–50. Default is 18.
143
+ strength (float, optional): Strength of transformation (0–1). Default is 0.4.
144
+ hdr (float, optional): Intensity of HDR effect (0–1). Default is 0.1.
145
+ guidance (float, optional): Guidance scale (CFG). Range: 0–20. Default is 3.
146
+
147
+ Returns:
148
+ image: The upscaled image (PIL.Image) generated using ControlNet + RealESRGAN.
149
+ """
150
+ if not path or not os.path.exists(path):
151
+ raise gr.Error("Valid input image path is required.")
152
+
153
+ with Image.open(path).convert("RGB") as img:
154
+ buf = io.BytesIO()
155
+ img.save(buf, format="PNG")
156
+ img_b64 = base64.b64encode(buf.getvalue()).decode()
157
+
158
+ resp = requests.post(
159
+ MODAL_ENDPOINT,
160
+ json={
161
+ "input_base64": img_b64,
162
+ "model_type": "tile_upscale",
163
+ "resolution": resolution,
164
+ "steps": steps,
165
+ "strength": strength,
166
+ "hdr": hdr,
167
+ "guidance": guidance
168
+ },
169
+ headers={"x-api-key": MODAL_KEY},
170
+ timeout=300
171
+ )
172
+ if resp.status_code != 200:
173
+ raise RuntimeError(f"Modal error: {resp.text}")
174
+
175
+ result_img = Image.open(io.BytesIO(base64.b64decode(resp.json()["output_base64"])))
176
+ if output_path:
177
+ result_img.save(output_path)
178
+ return result_img
179
+
180
+
181
+ def generate_video_from_image(
182
+ path: str,
183
+ prompt: str = "",
184
+ aspect_ratio: str = "16:9",
185
+ duration: int = 8,
186
+ output_path: str = None
187
+ ) -> str:
188
+ """
189
+ Generate a video from an image and a prompt using the Google Veo-2.0 model.
190
+
191
+ Args:
192
+ path (str): Path to input image on disk (JPG/PNG). This image will be used both as visual input for the video generation and as context for generating a descriptive prompt using the Veo prompt guide.
193
+ prompt (str): Prompt text to guide the generation. If generated dynamically, it should include subject, style, action, camera motion, composition, and ambiance where possible.
194
+ aspect_ratio (str): Desired aspect ratio, e.g., "16:9" or "9:16".
195
+ duration (int): Duration of the generated video in seconds. Valid range: 5–8.
196
+ output_path (str): Optional path to save the generated MP4 file locally.
197
+
198
+ Returns:
199
+ str: Path to the generated video file (temporary file used for Gradio display).
200
+ """
201
+
202
+ if not path or not os.path.exists(path):
203
+ raise gr.Error("Input image path is invalid or missing.")
204
+
205
+ with open(path, "rb") as f:
206
+ image_bytes = f.read()
207
+
208
+ # 2. Determine the MIME type from the file path
209
+ mime_type = mimetypes.guess_type(path)[0]
210
+ if not mime_type or not mime_type.startswith('image/'):
211
+ # Fallback for robustness, e.g., if mimetypes fails
212
+ if path.lower().endswith('.png'):
213
+ mime_type = 'image/png'
214
+ elif path.lower().endswith(('.jpg', '.jpeg')):
215
+ mime_type = 'image/jpeg'
216
+ else:
217
+ raise gr.Error(f"Could not determine image type for {path}. Please use JPG or PNG.")
218
+
219
+ # 3. Create the Image object with BOTH correct keywords
220
+ image_type = types.Image(image_bytes=image_bytes, mime_type=mime_type)
221
+
222
+ # --- End of corrected block ---
223
+
224
+ operation = client.models.generate_videos(
225
+ model="veo-2.0-generate-001",
226
+ prompt=prompt,
227
+ image=image_type,
228
+ config=types.GenerateVideosConfig(
229
+ person_generation="allow_adult",
230
+ aspect_ratio=aspect_ratio,
231
+ number_of_videos=1,
232
+ duration_seconds=duration,
233
+ )
234
+ )
235
+
236
+ print("Video generation started. Waiting for completion...")
237
+ while not operation.done:
238
+ time.sleep(20)
239
+ operation = client.operations.get(operation)
240
+ print("...")
241
+
242
+ # --- START OF CRUCIAL FIX ---
243
+ #
244
+ # !! CHECK IF THE OPERATION FAILED !!
245
+ # The 'response' attribute will be None if there was an error.
246
+ #
247
+ if not operation.response:
248
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
249
+ print("!! Video Generation FAILED. !!")
250
+ print("!! The operation finished but had no result.!!")
251
+ print("!! Printing the full operation object below. !!")
252
+ print("!! Look for an 'error' field for the reason. !!")
253
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
254
+ print(operation) # THIS IS THE MOST IMPORTANT LINE FOR DEBUGGING
255
+ raise gr.Error("Video generation failed. Check the server console for the detailed error from the API.")
256
+
257
+ # --- END OF CRUCIAL FIX ---
258
+
259
+ # If we get here, it means operation.response is valid.
260
+ print("Operation successful. Downloading video...")
261
+ video_data = operation.response.generated_videos[0].video
262
+ video_bytes = client.files.download(file=video_data)
263
+
264
+ if output_path:
265
+ with open(output_path, "wb") as out_file:
266
+ out_file.write(video_bytes)
267
+ print(f"Video saved to {output_path}")
268
+
269
+ # βœ… Always save to a Gradio-accessible temp file for UI display
270
+ temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
271
+ temp_file.write(video_bytes)
272
+ temp_file.close()
273
+
274
+ return temp_file.name
275
+
276
+
277
+ def edit_image_with_gemini(
278
+ path: str,
279
+ prompt: str,
280
+ output_path: str = None
281
+ ) -> str:
282
+ """
283
+ Edits an image using Gemini 2.0 Flash Preview Image Generation by applying a prompt to a reference image.
284
+
285
+ This is typically used to generate a background scene behind a subject (e.g., a person or object with background removed),
286
+ in preparation for video generation. The prompt should clearly describe the desired environment or context **without altering
287
+ the subject itself**. For example: "Place this car in the desert of Mars, but do not change the car."
288
+
289
+ Args:
290
+ path (str): Path to the reference image (JPG/PNG), typically a background-removed subject.
291
+ prompt (str): Instruction describing the desired background or scene to add. Must explicitly state that the subject should remain unchanged.
292
+ output_path (str): Optional path to save the resulting image file.
293
+
294
+ Returns:
295
+ str: Path to the generated image (temporary file used for Gradio display or further processing).
296
+ """
297
+
298
+ if not path or not os.path.exists(path):
299
+ raise gr.Error("Input image path is invalid or missing.")
300
+
301
+ original_image = Image.open(path)
302
+
303
+ response = client.models.generate_content(
304
+ model="gemini-2.0-flash-preview-image-generation",
305
+ contents=[prompt, original_image],
306
+ config=types.GenerateContentConfig(
307
+ response_modalities=["TEXT", "IMAGE"]
308
+ )
309
+ )
310
+
311
+ # Parse response
312
+ image_data = None
313
+ for part in response.candidates[0].content.parts:
314
+ if part.inline_data is not None:
315
+ image_data = Image.open(BytesIO(part.inline_data.data))
316
+ break
317
+
318
+ if image_data is None:
319
+ raise gr.Error("No image was returned by Gemini.")
320
+
321
+ # Save to optional output path
322
+ if output_path:
323
+ image_data.save(output_path)
324
+ print(f"Image saved to {output_path}")
325
+
326
+ # Save to temp path for Gradio UI
327
+ temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
328
+ image_data.save(temp_file.name)
329
+ return temp_file.name
330
+
331
+
332
+ # ── UI: Background Removal ──────────────────────────────────────
333
+ remove_bg_ui = gr.Interface(
334
+ fn=remove_background_image,
335
+ inputs=[
336
+ gr.Textbox(label="Input Image Path", placeholder=r"C:\path\to\input.png"),
337
+ gr.Textbox(label="Optional Output Save Path", placeholder=r"C:\Users\shokh\Downloads\bg_removed.png"),
338
+ ],
339
+ outputs=gr.Image(type="pil", label="Result"),
340
+ title="Remove Background",
341
+ )
342
+
343
+ # ── UI: Clarity Upscaler ────────────────────────────────────────
344
+ upscale_ui = gr.Interface(
345
+ fn=upscale_image,
346
+ inputs=[
347
+ gr.Textbox(label="Input Image Path", placeholder=r"C:\path\to\input.png"),
348
+ gr.Textbox(label="Optional Output Save Path", placeholder=r"C:\Users\shokh\Downloads\clarity_upscaled.png"),
349
+ gr.Slider(1, 4, step=0.1, value=2, label="Scale Factor"),
350
+ gr.Slider(1, 50, step=1, value=6, label="Dynamic"),
351
+ gr.Slider(0.3, 0.9, step=0.01, value=0.35, label="Creativity"),
352
+ gr.Slider(0.3, 1.6, step=0.01, value=0.6, label="Resemblance"),
353
+ gr.Dropdown(choices=[str(i) for i in range(16, 257, 16)], value="112", label="Tiling Width"),
354
+ gr.Dropdown(choices=[str(i) for i in range(16, 257, 16)], value="144", label="Tiling Height"),
355
+ gr.Dropdown(
356
+ choices=[
357
+ "juggernaut_reborn.safetensors [338b85bc4f]",
358
+ "epicrealism_naturalSinRC1VAE.safetensors [84d76a0328]",
359
+ "flat2DAnimerge_v45Sharp.safetensors"
360
+ ],
361
+ value="juggernaut_reborn.safetensors [338b85bc4f]",
362
+ label="Model"
363
+ ),
364
+ gr.Dropdown(
365
+ choices=[
366
+ "DPM++ 3M SDE Karras", "DPM++ 2M Karras", "Euler a", "Euler", "LMS", "Heun",
367
+ "DPM++ SDE", "DPM++ 2S a Karras", "DPM2", "UniPC", "DDIM", "PLMS"
368
+ ],
369
+ value="DPM++ 3M SDE Karras",
370
+ label="Scheduler"
371
+ ),
372
+ gr.Slider(1, 100, step=1, value=18, label="Inference Steps"),
373
+ gr.Number(value=1337, label="Seed"),
374
+ gr.Checkbox(label="Apply Downscaling", value=False),
375
+ gr.Number(value=768, label="Downscaling Resolution (if enabled)")
376
+ ],
377
+ outputs=gr.Image(type="pil", label="Result"),
378
+ title="Clarity Upscaler"
379
+ )
380
+
381
+
382
+ # ── UI: Tile Upscaler (Preferred) ───────────────────────────────
383
+ tile_upscale_ui = gr.Interface(
384
+ fn=upscale_image_preferred,
385
+ inputs=[
386
+ gr.Textbox(label="Input Image Path", placeholder=r"C:\path\to\input.png"),
387
+ gr.Textbox(label="Optional Output Save Path", placeholder=r"C:\Users\shokh\Downloads\tile_upscaled.png"),
388
+ gr.Slider(256, 2048, step=64, value=512, label="Resolution"),
389
+ gr.Slider(1, 50, step=1, value=18, label="Inference Steps"),
390
+ gr.Slider(0, 1, step=0.01, value=0.4, label="Strength (0-1)"),
391
+ gr.Slider(0, 1, step=0.01, value=0.1, label="HDR Effect (0-1)"),
392
+ gr.Slider(0, 20, step=0.1, value=3, label="Guidance Scale (0-20)")
393
+ ],
394
+ outputs=gr.Image(type="pil", label="Result"),
395
+ title="Tile Upscaler (Preferred)"
396
+ )
397
+
398
+ generate_video_ui = gr.Interface(
399
+ fn=generate_video_from_image,
400
+ inputs=[
401
+ gr.Textbox(label="Image Path", placeholder="C:\\Users\\shokh\\Desktop\\img.png"),
402
+ gr.Textbox(label="Prompt", placeholder="A scenic view of mountains at sunset"),
403
+ gr.Dropdown(choices=["16:9", "9:16"], value="16:9", label="Aspect Ratio"),
404
+ gr.Slider(minimum=5, maximum=8, step=1, value=8, label="Duration (seconds)"),
405
+ gr.Textbox(label="Optional Output Save Path", placeholder="C:\\Users\\shokh\\Downloads\\video.mp4"),
406
+ ],
407
+ outputs=gr.Video(label="Generated Video"),
408
+ title="Image to Video",
409
+ )
410
+
411
+ generate_image_ui = gr.Interface(
412
+ fn=edit_image_with_gemini,
413
+ inputs=[
414
+ gr.Textbox(label="Image Path", placeholder="C:\\Users\\shokh\\Desktop\\no_bg_img.png"),
415
+ gr.Textbox(label="Prompt", placeholder="Place me in a futuristic cityscape at sunset"),
416
+ gr.Textbox(label="Optional Output Save Path", placeholder="C:\\Users\\shokh\\Downloads\\edited.png"),
417
+ ],
418
+ outputs=gr.Image(label="Edited Image"),
419
+ title="Edit Image with Gemini"
420
+ )
421
+
422
+
423
+ # Final UI with new tab added
424
+ demo = gr.TabbedInterface(
425
+ interface_list=[
426
+ remove_bg_ui,
427
+ tile_upscale_ui,
428
+ upscale_ui,
429
+ generate_video_ui,
430
+ generate_image_ui # <- Add here
431
+ ],
432
+ tab_names=[
433
+ "Remove Background",
434
+ "Upscale (Tile - Preferred)",
435
+ "Upscale (Clarity)",
436
+ "Image-to-Video",
437
+ "Edit Image with Gemini" # <- And name the tab
438
+ ]
439
+ )
440
+
441
+ explanation_md = gr.Markdown(
442
+ """
443
+ # 🧠 How This AI Image & Video Editing MCP Server Works
444
+
445
+ This toolchain provides AI-powered image and video editing capabilities using multiple models connected via the [Claude MCP (Model Context Protocol)](https://modelcontextprotocol.io/) system. You can control and automate these tools from Claude Desktop.
446
+
447
+ ---
448
+
449
+ ### πŸ”§ Tools Available
450
+
451
+ #### 1. **Remove Background**
452
+ - **Model**: BiRefNet v2 (hosted on Modal Labs)
453
+ - **Input**: Image with background
454
+ - **Output**: Transparent PNG
455
+
456
+ #### 2. **Upscale**
457
+ - **Tile Upscaler**: Highly accurate enhancer using tiled upscaling (hosted on Modal Labs)
458
+ - **Clarity Upscaler**: General quality enhancer (calls external Gradio Space API)
459
+
460
+ #### 3. **Image-to-Video**
461
+ - **Model**: Google Veo 2
462
+ - **Input**: Image + Prompt
463
+ - **Output**: Cinematic video clip (5–8 sec)
464
+ - ⚠️ **Note**: Image must be visually coherent; typically used after background editing
465
+
466
+ #### 4. **Edit Image with Gemini**
467
+ - **Model**: Gemini 2.0 Flash Preview Image Generation
468
+ - **Purpose**: Add backgrounds/scenes to background-removed subjects
469
+ - βœ… **Important**: Prompt must specify to **not alter the subject**, only modify the environment.
470
+
471
+ ---
472
+
473
+ ### πŸ§‘β€πŸ’» How to Use With Claude Desktop (MCP)
474
+
475
+ To use this space as an MCP server:
476
+
477
+ 1. **Download [Claude Desktop](https://claude.ai)**
478
+ 2. In Claude's MCP config, add this server and filesystem:
479
+
480
+ ```json
481
+ {
482
+ "mcpServers": {
483
+ "gradio": {
484
+ "command": "npx",
485
+ "args": [
486
+ "mcp-remote",
487
+ "http://127.0.0.1:7860/gradio_api/mcp/sse"
488
+ ]
489
+ },
490
+ "filesystem": {
491
+ "command": "npx",
492
+ "args": [
493
+ "-y",
494
+ "@modelcontextprotocol/server-filesystem",
495
+ "C:\\Users\\YOUR_USERNAME\\Desktop\\claude-accessible-folder"
496
+ ]
497
+ }
498
+ }
499
+ }
500
+ ```
501
+
502
+ > πŸ—‚οΈ Replace `YOUR_USERNAME` with your actual Windows username. Make sure the folder `claude-accessible-folder` exists on your Desktop. Claude will use it to share image/video files with the tools.
503
+
504
+ ---
505
+
506
+ ### πŸ“Ί Demo Video
507
+
508
+ πŸ‘‰ [Watch how it works (Loom)](https://www.loom.com/share/90b7c72f4eda47e1a94ba6859b14d13e?sid=f268bb09-6a8d-4c83-8435-cf8f85085a93)
509
+
510
+ ---
511
+
512
+ ### 🧡 Built by: [@shdkhasan](https://x.com/shdkhasan)
513
+ """
514
+ )
515
+
516
+ with gr.Blocks() as full_ui:
517
+ demo.render()
518
+ explanation_md.render()
519
+
520
+ full_ui.launch(mcp_server=True, show_error=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ gradio_client
3
+ Pillow
4
+ requests
5
+ python-dotenv
6
+ google-generativeai