Spaces:

gosign-de
/

comfyui-api

Sleeping

App Files Files Community

Muhammad Waqas commited on Nov 18, 2024

Commit

80ca3a0

1 Parent(s): d6f45c3

Added OmniGen Image to Image

Browse files

Files changed (3) hide show

app.py +178 -8
workflows/cogvideox_text_to_video_workflow_api.json +126 -0
workflows/omnigen_image_to_image_workflow_api.json +50 -0

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ import random
 import urllib.request
 import urllib.parse
 import websocket
-import requests
 import uuid
 from dotenv import load_dotenv
 from flask import Flask, request, jsonify, render_template, send_file, send_from_directory
@@ -15,7 +14,6 @@ from werkzeug.utils import secure_filename
 import urllib.parse
 import urllib.request
 import time
-from werkzeug.serving import WSGIRequestHandler
 # Load environment variables from the .env file
 load_dotenv()
@@ -23,9 +21,6 @@ load_dotenv()
 # Initialize Flask app
 app = Flask(__name__)
-# Increase timeout limit
-# WSGIRequestHandler.timeout = 3600
 ALLOWED_EXTENSIONS = {'jpg', 'jpeg', 'png', 'webp'}  # Define supported image types
 # Set server and websocket addresses from environment variables
@@ -176,6 +171,115 @@ def generate_image():
     return jsonify({'images': output_images_base64})
 # Get image route
 @app.route('/get_image/<filename>', methods=['GET'])
 def get_image_file(filename):
@@ -256,9 +360,9 @@ def get_video_data(filename, subfolder, token):
         print(f"URL Error: {e.reason}")
         raise
-                ##################################################
-                # Generate image to video using CogVideoX-5B-12V #
-                ##################################################
 # Route: Image to Video
 @app.route('/v1/image_to_video', methods=['POST'])
@@ -362,6 +466,70 @@ def v1_image_to_video():
         #     os.remove(image_path)
         #     print(f"Deleted temporary image: {image_path}", flush=True)
 # Get video_tasks route
 @app.route('/v1/video_tasks/<prompt_id>', methods=['GET'])
 def video_tasks(prompt_id):
@@ -393,12 +561,14 @@ def video_tasks(prompt_id):
                 'status': 'pending',
                 'prompts_in_queue': queue_remaining
             }), 202
         video_data = None
         # Extract video or GIF details
         for node_id, node_output in history.get('outputs', {}).items():
             if 'gifs' in node_output:
                 video = node_output['gifs'][0]  # Take the first available GIF/video
                 try:
                     video_data = get_video_data(video['filename'], video['subfolder'], token)
                     break  # Stop after fetching the first valid video

 import urllib.request
 import urllib.parse
 import websocket
 import uuid
 from dotenv import load_dotenv
 from flask import Flask, request, jsonify, render_template, send_file, send_from_directory
 import urllib.parse
 import urllib.request
 import time
 # Load environment variables from the .env file
 load_dotenv()
 # Initialize Flask app
 app = Flask(__name__)
 ALLOWED_EXTENSIONS = {'jpg', 'jpeg', 'png', 'webp'}  # Define supported image types
 # Set server and websocket addresses from environment variables
     return jsonify({'images': output_images_base64})
+                ###################################################
+                # Edit image with text prompt using OmniGen Model #
+                ###################################################
+# Route: OmniGen image to image
+@app.route('/omnigen/image_to_image', methods=['POST'])
+def omnigen_image_to_image():
+    data = request.json
+    # Extract and validate token
+    token = request.headers.get('Authorization')
+    if not token or not token.startswith("Bearer "):
+        return jsonify({'error': 'Valid Bearer token required'}), 400
+    token = token.split(" ")[1]
+    # Validate text prompt
+    text_prompt = data.get('text_prompt')
+    if not text_prompt or not text_prompt.strip():
+        return jsonify({'error': 'Text prompt is required'}), 400
+    steps = data.get('steps')
+    if not steps:
+        steps = 50
+    # Handle uploaded image or base64 image
+    image_file = request.files.get('image')
+    base64_image = data.get('base64_image')
+    image_path = None  # Initialize image path
+    try:
+        if image_file:
+            # Check if the file has an allowed extension
+            if not allowed_file(image_file.filename):
+                return jsonify({'error': 'Unsupported image format'}), 400
+            # Secure the filename
+            filename = secure_filename(image_file.filename)
+            # Generate a unique path for the image
+            unique_filename = f"{uuid.uuid4()}_{filename}"
+            image_path = os.path.join('static', unique_filename)
+            # Ensure the 'static' directory exists
+            os.makedirs('static', exist_ok=True)
+            # Save the image to the static directory
+            image_file.save(image_path)
+            # Construct the public URL to access the image
+            image_url = f"https://gosign-de-comfyui-api.hf.space/{image_path}"
+        elif base64_image:
+            # Save base64 image
+            try:
+                image_path, image_url = save_base64_image(base64_image)
+                # image_url = "https://drive.google.com/uc?id=1JEHEy0zCVWOob4421hLQIPMbO_ebeCPS&export=download"
+            except Exception as e:
+                raise ValueError(f'Invalid base64 image data: {str(e)}')
+        else:
+            return jsonify({'error': 'Image is required (file or base64)'}), 400
+        # Load workflow configuration
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        workflow_path = os.path.join(current_dir, 'workflows/omnigen_image_to_image_workflow_api.json')
+        with open(workflow_path, 'r', encoding='utf-8') as f:
+            workflow = json.load(f)
+        # Modify workflow with inputs
+        workflow["6"]["inputs"]["prompt"] = "in image_1 " + text_prompt
+        workflow["6"]["inputs"]["num_inference_steps"] = steps
+        workflow["12"]["inputs"]["url"] = image_url
+        # WebSocket connection to queue the prompt
+        ws = websocket.WebSocket()
+        ws.connect(f"{ws_address}?clientId={client_id}&token={token}",
+                   header={"Authorization": f"Bearer {token}"})
+        # Queue the prompt
+        prompt_id = queue_prompt(workflow, token)
+        images = get_images(ws, workflow, token)
+        ws.close()
+        output_images_base64 = []
+        for node_id in images:
+            for image_data in images[node_id]:
+                image = Image.open(io.BytesIO(image_data))
+                buffered = io.BytesIO()
+                image.save(buffered, format="PNG")
+                img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+                output_images_base64.append(img_str)
+        return jsonify({'images': output_images_base64, 'message': 'Image generated successfully'}), 200
+    except Exception as e:
+        return jsonify({'message': 'Unable to connect to the server. Make sure the server is running', 'error': str(e)}), 500
+    finally:
+        pass
+        # Always delete the image if it was saved
+        if image_path and os.path.exists(image_path):
+            os.remove(image_path)
+            print(f"Deleted temporary image: {image_path}", flush=True)
 # Get image route
 @app.route('/get_image/<filename>', methods=['GET'])
 def get_image_file(filename):
         print(f"URL Error: {e.reason}")
         raise
+                ########################################################
+                # Generate image to video using CogVideoX-5B-12V Model #
+                ########################################################
 # Route: Image to Video
 @app.route('/v1/image_to_video', methods=['POST'])
         #     os.remove(image_path)
         #     print(f"Deleted temporary image: {image_path}", flush=True)
+                ###################################################
+                # Generate text to video using CogVideoX-5B Model #
+                ###################################################
+# Route: Text to Video
+@app.route('/v1/text_to_video', methods=['POST'])
+def v1_text_to_video():
+    data = request.json
+    # Extract and validate token
+    token = request.headers.get('Authorization')
+    if not token or not token.startswith("Bearer "):
+        return jsonify({'error': 'Valid Bearer token required'}), 400
+    token = token.split(" ")[1]
+    # Validate text prompt
+    text_prompt = data.get('text_prompt')
+    frame_rate = data.get('frame_rate')
+    steps = data.get('steps')
+    if not text_prompt or not text_prompt.strip():
+        return jsonify({'error': 'Text prompt is required'}), 400
+    # Check if frame_rate is missing or invalid
+    if not frame_rate:  # If frame_rate is None, empty, or 0
+        frame_rate = 24  # Default to 24 fps
+    else:
+        try:
+            frame_rate = int(frame_rate)
+            if frame_rate not in [8, 12, 24]:  # Ensure it's one of the allowed values
+                return jsonify({'error': 'Frame rate must be a valid number (8, 12, or 24).'}), 400
+        except ValueError:
+            return jsonify({'error': 'Frame rate must be a valid number (8, 12, or 24).'}), 400
+    if not steps:
+        steps = 50
+    try:
+        # Load workflow configuration
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        workflow_path = os.path.join(current_dir, 'workflows/cogvideox_text_to_video_workflow_api.json')
+        with open(workflow_path, 'r', encoding='utf-8') as f:
+            workflow = json.load(f)
+        # Modify workflow with inputs
+        workflow["30"]["inputs"]["prompt"] = text_prompt
+        workflow["31"]["inputs"]["prompt"] = "Low quality, watermark, strange motion, blur"
+        workflow["33"]["inputs"]["frame_rate"] = frame_rate
+        workflow["34"]["inputs"]["steps"] = steps
+        # WebSocket connection to queue the prompt
+        ws = websocket.WebSocket()
+        ws.connect(f"{ws_address}?clientId={client_id}&token={token}",
+                   header={"Authorization": f"Bearer {token}"})
+        # Queue the prompt
+        prompt_id = queue_prompt(workflow, token)
+        return jsonify({'prompt_id': prompt_id, 'message': 'Prompt queued successfully', 'get_video_url': f'https://gosign-de-comfyui-api.hf.space/v1/video_tasks/{prompt_id}'}), 202
+    except Exception as e:
+        return jsonify({'message': 'Unbale to connect to the server. Make sure the server is running', 'error': str(e)}), 500
 # Get video_tasks route
 @app.route('/v1/video_tasks/<prompt_id>', methods=['GET'])
 def video_tasks(prompt_id):
                 'status': 'pending',
                 'prompts_in_queue': queue_remaining
             }), 202
         video_data = None
         # Extract video or GIF details
         for node_id, node_output in history.get('outputs', {}).items():
             if 'gifs' in node_output:
                 video = node_output['gifs'][0]  # Take the first available GIF/video
                 try:
                     video_data = get_video_data(video['filename'], video['subfolder'], token)
                     break  # Stop after fetching the first valid video

workflows/cogvideox_text_to_video_workflow_api.json ADDED Viewed

	@@ -0,0 +1,126 @@

+{
+  "1": {
+    "inputs": {
+      "model": "THUDM/CogVideoX-5b",
+      "precision": "bf16",
+      "fp8_transformer": "disabled",
+      "compile": "disabled",
+      "enable_sequential_cpu_offload": false
+    },
+    "class_type": "DownloadAndLoadCogVideoModel",
+    "_meta": {
+      "title": "(Down)load CogVideo Model"
+    }
+  },
+  "11": {
+    "inputs": {
+      "enable_vae_tiling": false,
+      "tile_sample_min_height": 240,
+      "tile_sample_min_width": 360,
+      "tile_overlap_factor_height": 0.2,
+      "tile_overlap_factor_width": 0.2,
+      "auto_tile_size": true,
+      "pipeline": [
+        "34",
+        0
+      ],
+      "samples": [
+        "34",
+        1
+      ]
+    },
+    "class_type": "CogVideoDecode",
+    "_meta": {
+      "title": "CogVideo Decode"
+    }
+  },
+  "20": {
+    "inputs": {
+      "clip_name": "t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+      "type": "sd3"
+    },
+    "class_type": "CLIPLoader",
+    "_meta": {
+      "title": "Load CLIP"
+    }
+  },
+  "30": {
+    "inputs": {
+      "prompt": "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer.\n\n",
+      "strength": 1,
+      "force_offload": true,
+      "clip": [
+        "20",
+        0
+      ]
+    },
+    "class_type": "CogVideoTextEncode",
+    "_meta": {
+      "title": "CogVideo TextEncode"
+    }
+  },
+  "31": {
+    "inputs": {
+      "prompt": "",
+      "strength": 1,
+      "force_offload": true,
+      "clip": [
+        "20",
+        0
+      ]
+    },
+    "class_type": "CogVideoTextEncode",
+    "_meta": {
+      "title": "CogVideo TextEncode"
+    }
+  },
+  "33": {
+    "inputs": {
+      "frame_rate": 24,
+      "loop_count": 0,
+      "filename_prefix": "CogVideoX5B",
+      "format": "video/h264-mp4",
+      "pix_fmt": "yuv420p",
+      "crf": 19,
+      "save_metadata": true,
+      "pingpong": false,
+      "save_output": true,
+      "images": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "VHS_VideoCombine",
+    "_meta": {
+      "title": "Video Combine 🎥🅥🅗🅢"
+    }
+  },
+  "34": {
+    "inputs": {
+      "height": 480,
+      "width": 720,
+      "num_frames": 49,
+      "steps": 50,
+      "cfg": 6,
+      "seed": 806286757407563,
+      "scheduler": "DPM++",
+      "denoise_strength": 1,
+      "pipeline": [
+        "1",
+        0
+      ],
+      "positive": [
+        "30",
+        0
+      ],
+      "negative": [
+        "31",
+        0
+      ]
+    },
+    "class_type": "CogVideoSampler",
+    "_meta": {
+      "title": "CogVideo Sampler"
+    }
+  }
+}

workflows/omnigen_image_to_image_workflow_api.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "6": {
+    "inputs": {
+      "preset_prompt": "None",
+      "prompt": "in image_1 the boy is standing on a road",
+      "model_precision": "Auto",
+      "memory_management": "Balanced",
+      "guidance_scale": 3.5,
+      "img_guidance_scale": 1.8,
+      "num_inference_steps": 50,
+      "separate_cfg_infer": true,
+      "use_input_image_size_as_output": false,
+      "width": 768,
+      "height": 768,
+      "seed": 582248950683741,
+      "max_input_image_size": 1024,
+      "image_1": [
+        "12",
+        0
+      ]
+    },
+    "class_type": "ailab_OmniGen",
+    "_meta": {
+      "title": "OmniGen 🖼️"
+    }
+  },
+  "7": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "6",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "12": {
+    "inputs": {
+      "url": "",
+      "cache": true
+    },
+    "class_type": "LoadImageByUrl //Browser",
+    "_meta": {
+      "title": "Load Image By URL"
+    }
+  }
+}