Muhammad Waqas commited on
Commit
80ca3a0
·
1 Parent(s): d6f45c3

Added OmniGen Image to Image

Browse files
app.py CHANGED
@@ -6,7 +6,6 @@ import random
6
  import urllib.request
7
  import urllib.parse
8
  import websocket
9
- import requests
10
  import uuid
11
  from dotenv import load_dotenv
12
  from flask import Flask, request, jsonify, render_template, send_file, send_from_directory
@@ -15,7 +14,6 @@ from werkzeug.utils import secure_filename
15
  import urllib.parse
16
  import urllib.request
17
  import time
18
- from werkzeug.serving import WSGIRequestHandler
19
 
20
  # Load environment variables from the .env file
21
  load_dotenv()
@@ -23,9 +21,6 @@ load_dotenv()
23
  # Initialize Flask app
24
  app = Flask(__name__)
25
 
26
- # Increase timeout limit
27
- # WSGIRequestHandler.timeout = 3600
28
-
29
  ALLOWED_EXTENSIONS = {'jpg', 'jpeg', 'png', 'webp'} # Define supported image types
30
 
31
  # Set server and websocket addresses from environment variables
@@ -176,6 +171,115 @@ def generate_image():
176
 
177
  return jsonify({'images': output_images_base64})
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  # Get image route
180
  @app.route('/get_image/<filename>', methods=['GET'])
181
  def get_image_file(filename):
@@ -256,9 +360,9 @@ def get_video_data(filename, subfolder, token):
256
  print(f"URL Error: {e.reason}")
257
  raise
258
 
259
- ##################################################
260
- # Generate image to video using CogVideoX-5B-12V #
261
- ##################################################
262
 
263
  # Route: Image to Video
264
  @app.route('/v1/image_to_video', methods=['POST'])
@@ -362,6 +466,70 @@ def v1_image_to_video():
362
  # os.remove(image_path)
363
  # print(f"Deleted temporary image: {image_path}", flush=True)
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  # Get video_tasks route
366
  @app.route('/v1/video_tasks/<prompt_id>', methods=['GET'])
367
  def video_tasks(prompt_id):
@@ -393,12 +561,14 @@ def video_tasks(prompt_id):
393
  'status': 'pending',
394
  'prompts_in_queue': queue_remaining
395
  }), 202
 
396
  video_data = None
397
 
398
  # Extract video or GIF details
399
  for node_id, node_output in history.get('outputs', {}).items():
400
  if 'gifs' in node_output:
401
  video = node_output['gifs'][0] # Take the first available GIF/video
 
402
  try:
403
  video_data = get_video_data(video['filename'], video['subfolder'], token)
404
  break # Stop after fetching the first valid video
 
6
  import urllib.request
7
  import urllib.parse
8
  import websocket
 
9
  import uuid
10
  from dotenv import load_dotenv
11
  from flask import Flask, request, jsonify, render_template, send_file, send_from_directory
 
14
  import urllib.parse
15
  import urllib.request
16
  import time
 
17
 
18
  # Load environment variables from the .env file
19
  load_dotenv()
 
21
  # Initialize Flask app
22
  app = Flask(__name__)
23
 
 
 
 
24
  ALLOWED_EXTENSIONS = {'jpg', 'jpeg', 'png', 'webp'} # Define supported image types
25
 
26
  # Set server and websocket addresses from environment variables
 
171
 
172
  return jsonify({'images': output_images_base64})
173
 
174
+
175
+ ###################################################
176
+ # Edit image with text prompt using OmniGen Model #
177
+ ###################################################
178
+
179
+ # Route: OmniGen image to image
180
+ @app.route('/omnigen/image_to_image', methods=['POST'])
181
+ def omnigen_image_to_image():
182
+ data = request.json
183
+
184
+ # Extract and validate token
185
+ token = request.headers.get('Authorization')
186
+ if not token or not token.startswith("Bearer "):
187
+ return jsonify({'error': 'Valid Bearer token required'}), 400
188
+ token = token.split(" ")[1]
189
+
190
+ # Validate text prompt
191
+ text_prompt = data.get('text_prompt')
192
+ if not text_prompt or not text_prompt.strip():
193
+ return jsonify({'error': 'Text prompt is required'}), 400
194
+
195
+ steps = data.get('steps')
196
+ if not steps:
197
+ steps = 50
198
+
199
+ # Handle uploaded image or base64 image
200
+ image_file = request.files.get('image')
201
+ base64_image = data.get('base64_image')
202
+
203
+ image_path = None # Initialize image path
204
+
205
+ try:
206
+ if image_file:
207
+ # Check if the file has an allowed extension
208
+ if not allowed_file(image_file.filename):
209
+ return jsonify({'error': 'Unsupported image format'}), 400
210
+
211
+ # Secure the filename
212
+ filename = secure_filename(image_file.filename)
213
+
214
+ # Generate a unique path for the image
215
+ unique_filename = f"{uuid.uuid4()}_{filename}"
216
+ image_path = os.path.join('static', unique_filename)
217
+
218
+ # Ensure the 'static' directory exists
219
+ os.makedirs('static', exist_ok=True)
220
+
221
+ # Save the image to the static directory
222
+ image_file.save(image_path)
223
+
224
+ # Construct the public URL to access the image
225
+ image_url = f"https://gosign-de-comfyui-api.hf.space/{image_path}"
226
+
227
+ elif base64_image:
228
+ # Save base64 image
229
+ try:
230
+ image_path, image_url = save_base64_image(base64_image)
231
+ # image_url = "https://drive.google.com/uc?id=1JEHEy0zCVWOob4421hLQIPMbO_ebeCPS&export=download"
232
+ except Exception as e:
233
+ raise ValueError(f'Invalid base64 image data: {str(e)}')
234
+
235
+ else:
236
+ return jsonify({'error': 'Image is required (file or base64)'}), 400
237
+
238
+ # Load workflow configuration
239
+ current_dir = os.path.dirname(os.path.abspath(__file__))
240
+ workflow_path = os.path.join(current_dir, 'workflows/omnigen_image_to_image_workflow_api.json')
241
+ with open(workflow_path, 'r', encoding='utf-8') as f:
242
+ workflow = json.load(f)
243
+
244
+ # Modify workflow with inputs
245
+ workflow["6"]["inputs"]["prompt"] = "in image_1 " + text_prompt
246
+ workflow["6"]["inputs"]["num_inference_steps"] = steps
247
+ workflow["12"]["inputs"]["url"] = image_url
248
+
249
+ # WebSocket connection to queue the prompt
250
+ ws = websocket.WebSocket()
251
+ ws.connect(f"{ws_address}?clientId={client_id}&token={token}",
252
+ header={"Authorization": f"Bearer {token}"})
253
+
254
+ # Queue the prompt
255
+ prompt_id = queue_prompt(workflow, token)
256
+
257
+ images = get_images(ws, workflow, token)
258
+ ws.close()
259
+
260
+ output_images_base64 = []
261
+
262
+ for node_id in images:
263
+ for image_data in images[node_id]:
264
+ image = Image.open(io.BytesIO(image_data))
265
+ buffered = io.BytesIO()
266
+ image.save(buffered, format="PNG")
267
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
268
+ output_images_base64.append(img_str)
269
+
270
+ return jsonify({'images': output_images_base64, 'message': 'Image generated successfully'}), 200
271
+
272
+ except Exception as e:
273
+ return jsonify({'message': 'Unable to connect to the server. Make sure the server is running', 'error': str(e)}), 500
274
+
275
+ finally:
276
+ pass
277
+ # Always delete the image if it was saved
278
+ if image_path and os.path.exists(image_path):
279
+ os.remove(image_path)
280
+ print(f"Deleted temporary image: {image_path}", flush=True)
281
+
282
+
283
  # Get image route
284
  @app.route('/get_image/<filename>', methods=['GET'])
285
  def get_image_file(filename):
 
360
  print(f"URL Error: {e.reason}")
361
  raise
362
 
363
+ ########################################################
364
+ # Generate image to video using CogVideoX-5B-12V Model #
365
+ ########################################################
366
 
367
  # Route: Image to Video
368
  @app.route('/v1/image_to_video', methods=['POST'])
 
466
  # os.remove(image_path)
467
  # print(f"Deleted temporary image: {image_path}", flush=True)
468
 
469
+
470
+ ###################################################
471
+ # Generate text to video using CogVideoX-5B Model #
472
+ ###################################################
473
+
474
+ # Route: Text to Video
475
+ @app.route('/v1/text_to_video', methods=['POST'])
476
+ def v1_text_to_video():
477
+ data = request.json
478
+
479
+ # Extract and validate token
480
+ token = request.headers.get('Authorization')
481
+ if not token or not token.startswith("Bearer "):
482
+ return jsonify({'error': 'Valid Bearer token required'}), 400
483
+ token = token.split(" ")[1]
484
+
485
+ # Validate text prompt
486
+ text_prompt = data.get('text_prompt')
487
+ frame_rate = data.get('frame_rate')
488
+ steps = data.get('steps')
489
+ if not text_prompt or not text_prompt.strip():
490
+ return jsonify({'error': 'Text prompt is required'}), 400
491
+
492
+ # Check if frame_rate is missing or invalid
493
+ if not frame_rate: # If frame_rate is None, empty, or 0
494
+ frame_rate = 24 # Default to 24 fps
495
+ else:
496
+ try:
497
+ frame_rate = int(frame_rate)
498
+ if frame_rate not in [8, 12, 24]: # Ensure it's one of the allowed values
499
+ return jsonify({'error': 'Frame rate must be a valid number (8, 12, or 24).'}), 400
500
+ except ValueError:
501
+ return jsonify({'error': 'Frame rate must be a valid number (8, 12, or 24).'}), 400
502
+
503
+ if not steps:
504
+ steps = 50
505
+
506
+ try:
507
+ # Load workflow configuration
508
+ current_dir = os.path.dirname(os.path.abspath(__file__))
509
+ workflow_path = os.path.join(current_dir, 'workflows/cogvideox_text_to_video_workflow_api.json')
510
+ with open(workflow_path, 'r', encoding='utf-8') as f:
511
+ workflow = json.load(f)
512
+
513
+ # Modify workflow with inputs
514
+ workflow["30"]["inputs"]["prompt"] = text_prompt
515
+ workflow["31"]["inputs"]["prompt"] = "Low quality, watermark, strange motion, blur"
516
+ workflow["33"]["inputs"]["frame_rate"] = frame_rate
517
+ workflow["34"]["inputs"]["steps"] = steps
518
+
519
+ # WebSocket connection to queue the prompt
520
+ ws = websocket.WebSocket()
521
+ ws.connect(f"{ws_address}?clientId={client_id}&token={token}",
522
+ header={"Authorization": f"Bearer {token}"})
523
+
524
+ # Queue the prompt
525
+ prompt_id = queue_prompt(workflow, token)
526
+
527
+ return jsonify({'prompt_id': prompt_id, 'message': 'Prompt queued successfully', 'get_video_url': f'https://gosign-de-comfyui-api.hf.space/v1/video_tasks/{prompt_id}'}), 202
528
+
529
+ except Exception as e:
530
+ return jsonify({'message': 'Unbale to connect to the server. Make sure the server is running', 'error': str(e)}), 500
531
+
532
+
533
  # Get video_tasks route
534
  @app.route('/v1/video_tasks/<prompt_id>', methods=['GET'])
535
  def video_tasks(prompt_id):
 
561
  'status': 'pending',
562
  'prompts_in_queue': queue_remaining
563
  }), 202
564
+
565
  video_data = None
566
 
567
  # Extract video or GIF details
568
  for node_id, node_output in history.get('outputs', {}).items():
569
  if 'gifs' in node_output:
570
  video = node_output['gifs'][0] # Take the first available GIF/video
571
+
572
  try:
573
  video_data = get_video_data(video['filename'], video['subfolder'], token)
574
  break # Stop after fetching the first valid video
workflows/cogvideox_text_to_video_workflow_api.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "1": {
3
+ "inputs": {
4
+ "model": "THUDM/CogVideoX-5b",
5
+ "precision": "bf16",
6
+ "fp8_transformer": "disabled",
7
+ "compile": "disabled",
8
+ "enable_sequential_cpu_offload": false
9
+ },
10
+ "class_type": "DownloadAndLoadCogVideoModel",
11
+ "_meta": {
12
+ "title": "(Down)load CogVideo Model"
13
+ }
14
+ },
15
+ "11": {
16
+ "inputs": {
17
+ "enable_vae_tiling": false,
18
+ "tile_sample_min_height": 240,
19
+ "tile_sample_min_width": 360,
20
+ "tile_overlap_factor_height": 0.2,
21
+ "tile_overlap_factor_width": 0.2,
22
+ "auto_tile_size": true,
23
+ "pipeline": [
24
+ "34",
25
+ 0
26
+ ],
27
+ "samples": [
28
+ "34",
29
+ 1
30
+ ]
31
+ },
32
+ "class_type": "CogVideoDecode",
33
+ "_meta": {
34
+ "title": "CogVideo Decode"
35
+ }
36
+ },
37
+ "20": {
38
+ "inputs": {
39
+ "clip_name": "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
40
+ "type": "sd3"
41
+ },
42
+ "class_type": "CLIPLoader",
43
+ "_meta": {
44
+ "title": "Load CLIP"
45
+ }
46
+ },
47
+ "30": {
48
+ "inputs": {
49
+ "prompt": "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer.\n\n",
50
+ "strength": 1,
51
+ "force_offload": true,
52
+ "clip": [
53
+ "20",
54
+ 0
55
+ ]
56
+ },
57
+ "class_type": "CogVideoTextEncode",
58
+ "_meta": {
59
+ "title": "CogVideo TextEncode"
60
+ }
61
+ },
62
+ "31": {
63
+ "inputs": {
64
+ "prompt": "",
65
+ "strength": 1,
66
+ "force_offload": true,
67
+ "clip": [
68
+ "20",
69
+ 0
70
+ ]
71
+ },
72
+ "class_type": "CogVideoTextEncode",
73
+ "_meta": {
74
+ "title": "CogVideo TextEncode"
75
+ }
76
+ },
77
+ "33": {
78
+ "inputs": {
79
+ "frame_rate": 24,
80
+ "loop_count": 0,
81
+ "filename_prefix": "CogVideoX5B",
82
+ "format": "video/h264-mp4",
83
+ "pix_fmt": "yuv420p",
84
+ "crf": 19,
85
+ "save_metadata": true,
86
+ "pingpong": false,
87
+ "save_output": true,
88
+ "images": [
89
+ "11",
90
+ 0
91
+ ]
92
+ },
93
+ "class_type": "VHS_VideoCombine",
94
+ "_meta": {
95
+ "title": "Video Combine 🎥🅥🅗🅢"
96
+ }
97
+ },
98
+ "34": {
99
+ "inputs": {
100
+ "height": 480,
101
+ "width": 720,
102
+ "num_frames": 49,
103
+ "steps": 50,
104
+ "cfg": 6,
105
+ "seed": 806286757407563,
106
+ "scheduler": "DPM++",
107
+ "denoise_strength": 1,
108
+ "pipeline": [
109
+ "1",
110
+ 0
111
+ ],
112
+ "positive": [
113
+ "30",
114
+ 0
115
+ ],
116
+ "negative": [
117
+ "31",
118
+ 0
119
+ ]
120
+ },
121
+ "class_type": "CogVideoSampler",
122
+ "_meta": {
123
+ "title": "CogVideo Sampler"
124
+ }
125
+ }
126
+ }
workflows/omnigen_image_to_image_workflow_api.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "6": {
3
+ "inputs": {
4
+ "preset_prompt": "None",
5
+ "prompt": "in image_1 the boy is standing on a road",
6
+ "model_precision": "Auto",
7
+ "memory_management": "Balanced",
8
+ "guidance_scale": 3.5,
9
+ "img_guidance_scale": 1.8,
10
+ "num_inference_steps": 50,
11
+ "separate_cfg_infer": true,
12
+ "use_input_image_size_as_output": false,
13
+ "width": 768,
14
+ "height": 768,
15
+ "seed": 582248950683741,
16
+ "max_input_image_size": 1024,
17
+ "image_1": [
18
+ "12",
19
+ 0
20
+ ]
21
+ },
22
+ "class_type": "ailab_OmniGen",
23
+ "_meta": {
24
+ "title": "OmniGen 🖼️"
25
+ }
26
+ },
27
+ "7": {
28
+ "inputs": {
29
+ "filename_prefix": "ComfyUI",
30
+ "images": [
31
+ "6",
32
+ 0
33
+ ]
34
+ },
35
+ "class_type": "SaveImage",
36
+ "_meta": {
37
+ "title": "Save Image"
38
+ }
39
+ },
40
+ "12": {
41
+ "inputs": {
42
+ "url": "",
43
+ "cache": true
44
+ },
45
+ "class_type": "LoadImageByUrl //Browser",
46
+ "_meta": {
47
+ "title": "Load Image By URL"
48
+ }
49
+ }
50
+ }